1 /*
2 * linux/fs/hfs/trans.c
3 *
4 * Copyright (C) 1995-1997 Paul H. Hargrove
5 * This file may be distributed under the terms of the GNU General Public License.
6 *
7 * This file contains routines for converting between the Macintosh
8 * character set and various other encodings. This includes dealing
9 * with ':' vs. '/' as the path-element separator.
10 *
11 * Latin-1 translation based on code contributed by Holger Schemel
12 * (aeglos@valinor.owl.de).
13 *
14 * The '8-bit', '7-bit ASCII' and '7-bit alphanumeric' encodings are
15 * implementations of the three encodings recommended by Apple in the
16 * document "AppleSingle/AppleDouble Formats: Developer's Note
17 * (9/94)". This document is available from Apple's Technical
18 * Information Library from the World Wide Web server
19 * www.info.apple.com.
20 *
21 * The 'CAP' encoding is an implementation of the naming scheme used
22 * by the Columbia AppleTalk Package, available for anonymous FTP from
23 * ????.
24 *
25 * "XXX" in a comment is a note to myself to consider changing something.
26 *
27 * In function preconditions the term "valid" applied to a pointer to
28 * a structure means that the pointer is non-NULL and the structure it
29 * points to has all fields initialized to consistent values.
30 */
31
32 #include "hfs.h"
33 #include <linux/hfs_fs_sb.h>
34 #include <linux/hfs_fs_i.h>
35 #include <linux/hfs_fs.h>
36 #include <linux/compiler.h>
37
38 /*================ File-local variables ================*/
39
40 /* int->ASCII map for a single hex digit */
41 static char hex[16] = {'0','1','2','3','4','5','6','7',
42 '8','9','a','b','c','d','e','f'};
43 /*
44 * Latin-1 to Mac character set map
45 *
46 * For the sake of consistency this map is generated from the Mac to
47 * Latin-1 map the first time it is needed. This means there is just
48 * one map to maintain.
49 */
50 static unsigned char latin2mac_map[128]; /* initially all zero */
51
52 /*
53 * Mac to Latin-1 map for the upper 128 characters (both have ASCII in
54 * the lower 128 positions)
55 */
56 static unsigned char mac2latin_map[128] = {
57 0xC4, 0xC5, 0xC7, 0xC9, 0xD1, 0xD6, 0xDC, 0xE1,
58 0xE0, 0xE2, 0xE4, 0xE3, 0xE5, 0xE7, 0xE9, 0xE8,
59 0xEA, 0xEB, 0xED, 0xEC, 0xEE, 0xEF, 0xF1, 0xF3,
60 0xF2, 0xF4, 0xF6, 0xF5, 0xFA, 0xF9, 0xFB, 0xFC,
61 0x00, 0xB0, 0xA2, 0xA3, 0xA7, 0xB7, 0xB6, 0xDF,
62 0xAE, 0xA9, 0x00, 0xB4, 0xA8, 0x00, 0xC6, 0xD8,
63 0x00, 0xB1, 0x00, 0x00, 0xA5, 0xB5, 0xF0, 0x00,
64 0x00, 0x00, 0x00, 0xAA, 0xBA, 0x00, 0xE6, 0xF8,
65 0xBF, 0xA1, 0xAC, 0x00, 0x00, 0x00, 0x00, 0xAB,
66 0xBB, 0x00, 0xA0, 0xC0, 0xC3, 0xD5, 0x00, 0x00,
67 0xAD, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x00,
68 0xFF, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00,
69 0x00, 0x00, 0xB8, 0x00, 0x00, 0xC2, 0xCA, 0xC1,
70 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xD3, 0xD4,
71 0x00, 0xD2, 0xDA, 0xDB, 0xD9, 0x00, 0x00, 0x00,
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
73 };
74
75 /*================ File-local functions ================*/
76
77 /*
78 * dehex()
79 *
80 * Given a hexadecimal digit in ASCII, return the integer representation.
81 */
dehex(char c)82 static inline __attribute_const__ unsigned char dehex(char c) {
83 if ((c>='0')&&(c<='9')) {
84 return c-'0';
85 }
86 if ((c>='a')&&(c<='f')) {
87 return c-'a'+10;
88 }
89 if ((c>='A')&&(c<='F')) {
90 return c-'A'+10;
91 }
92 return 0xff;
93 }
94
95 /*================ Global functions ================*/
96
97 /*
98 * hfs_mac2nat()
99 *
100 * Given a 'Pascal String' (a string preceded by a length byte) in
101 * the Macintosh character set produce the corresponding filename using
102 * the Netatalk name-mangling scheme, returning the length of the
103 * mangled filename. Note that the output string is not NULL terminated.
104 *
105 * The name-mangling works as follows:
106 * Characters 32-126 (' '-'~') except '/' and any initial '.' are passed
107 * unchanged from input to output. The remaining characters are replaced
108 * by three characters: ':xx' where xx is the hexadecimal representation
109 * of the character, using lowercase 'a' through 'f'.
110 */
hfs_mac2nat(char * out,const struct hfs_name * in)111 int hfs_mac2nat(char *out, const struct hfs_name *in) {
112 unsigned char c;
113 const unsigned char *p = in->Name;
114 int len = in->Len;
115 int count = 0;
116
117 /* Special case for .AppleDesktop which in the
118 distant future may be a pseudodirectory. */
119 if (strncmp(".AppleDesktop", p, len) == 0) {
120 strncpy(out, p, 13);
121 return 13;
122 }
123
124 while (len--) {
125 c = *p++;
126 if ((c<32) || (c=='/') || (c>126) || (!count && (c=='.'))) {
127 *out++ = ':';
128 *out++ = hex[(c>>4) & 0xf];
129 *out++ = hex[c & 0xf];
130 count += 3;
131 } else {
132 *out++ = c;
133 count++;
134 }
135 }
136 return count;
137 }
138
139 /*
140 * hfs_mac2cap()
141 *
142 * Given a 'Pascal String' (a string preceded by a length byte) in
143 * the Macintosh character set produce the corresponding filename using
144 * the CAP name-mangling scheme, returning the length of the mangled
145 * filename. Note that the output string is not NULL terminated.
146 *
147 * The name-mangling works as follows:
148 * Characters 32-126 (' '-'~') except '/' are passed unchanged from
149 * input to output. The remaining characters are replaced by three
150 * characters: ':xx' where xx is the hexadecimal representation of the
151 * character, using lowercase 'a' through 'f'.
152 */
hfs_mac2cap(char * out,const struct hfs_name * in)153 int hfs_mac2cap(char *out, const struct hfs_name *in) {
154 unsigned char c;
155 const unsigned char *p = in->Name;
156 int len = in->Len;
157 int count = 0;
158
159 while (len--) {
160 c = *p++;
161 if ((c<32) || (c=='/') || (c>126)) {
162 *out++ = ':';
163 *out++ = hex[(c>>4) & 0xf];
164 *out++ = hex[c & 0xf];
165 count += 3;
166 } else {
167 *out++ = c;
168 count++;
169 }
170 }
171 return count;
172 }
173
174 /*
175 * hfs_mac2eight()
176 *
177 * Given a 'Pascal String' (a string preceded by a length byte) in
178 * the Macintosh character set produce the corresponding filename using
179 * the '8-bit' name-mangling scheme, returning the length of the
180 * mangled filename. Note that the output string is not NULL
181 * terminated.
182 *
183 * This is one of the three recommended naming conventions described
184 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
185 * Note (9/94)"
186 *
187 * The name-mangling works as follows:
188 * Characters 0, '%' and '/' are replaced by three characters: '%xx'
189 * where xx is the hexadecimal representation of the character, using
190 * lowercase 'a' through 'f'. All other characters are passed
191 * unchanged from input to output. Note that this format is mainly
192 * implemented for completeness and is rather hard to read.
193 */
hfs_mac2eight(char * out,const struct hfs_name * in)194 int hfs_mac2eight(char *out, const struct hfs_name *in) {
195 unsigned char c;
196 const unsigned char *p = in->Name;
197 int len = in->Len;
198 int count = 0;
199
200 while (len--) {
201 c = *p++;
202 if (!c || (c=='/') || (c=='%')) {
203 *out++ = '%';
204 *out++ = hex[(c>>4) & 0xf];
205 *out++ = hex[c & 0xf];
206 count += 3;
207 } else {
208 *out++ = c;
209 count++;
210 }
211 }
212 return count;
213 }
214
215 /*
216 * hfs_mac2seven()
217 *
218 * Given a 'Pascal String' (a string preceded by a length byte) in
219 * the Macintosh character set produce the corresponding filename using
220 * the '7-bit ASCII' name-mangling scheme, returning the length of the
221 * mangled filename. Note that the output string is not NULL
222 * terminated.
223 *
224 * This is one of the three recommended naming conventions described
225 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
226 * Note (9/94)"
227 *
228 * The name-mangling works as follows:
229 * Characters 0, '%', '/' and 128-255 are replaced by three
230 * characters: '%xx' where xx is the hexadecimal representation of the
231 * character, using lowercase 'a' through 'f'. All other characters
232 * are passed unchanged from input to output. Note that control
233 * characters (including newline) and space are unchanged make reading
234 * these filenames difficult.
235 */
hfs_mac2seven(char * out,const struct hfs_name * in)236 int hfs_mac2seven(char *out, const struct hfs_name *in) {
237 unsigned char c;
238 const unsigned char *p = in->Name;
239 int len = in->Len;
240 int count = 0;
241
242 while (len--) {
243 c = *p++;
244 if (!c || (c=='/') || (c=='%') || (c&0x80)) {
245 *out++ = '%';
246 *out++ = hex[(c>>4) & 0xf];
247 *out++ = hex[c & 0xf];
248 count += 3;
249 } else {
250 *out++ = c;
251 count++;
252 }
253 }
254 return count;
255 }
256
257 /*
258 * hfs_mac2alpha()
259 *
260 * Given a 'Pascal String' (a string preceded by a length byte) in
261 * the Macintosh character set produce the corresponding filename using
262 * the '7-bit alphanumeric' name-mangling scheme, returning the length
263 * of the mangled filename. Note that the output string is not NULL
264 * terminated.
265 *
266 * This is one of the three recommended naming conventions described
267 * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
268 * Note (9/94)"
269 *
270 * The name-mangling works as follows:
271 * The characters 'a'-'z', 'A'-'Z', '0'-'9', '_' and the last '.' in
272 * the filename are passed unchanged from input to output. All
273 * remaining characters (including any '.'s other than the last) are
274 * replaced by three characters: '%xx' where xx is the hexadecimal
275 * representation of the character, using lowercase 'a' through 'f'.
276 */
hfs_mac2alpha(char * out,const struct hfs_name * in)277 int hfs_mac2alpha(char *out, const struct hfs_name *in) {
278 unsigned char c;
279 const unsigned char *p = in->Name;
280 int len = in->Len;
281 int count = 0;
282 const unsigned char *lp; /* last period */
283
284 /* strrchr() would be good here, but 'in' is not null-terminated */
285 for (lp=p+len-1; (lp>=p)&&(*lp!='.'); --lp) {}
286 ++lp;
287
288 while (len--) {
289 c = *p++;
290 if ((p==lp) || ((c>='0')&&(c<='9')) || ((c>='A')&&(c<='Z')) ||
291 ((c>='a')&&(c<='z')) || (c=='_')) {
292 *out++ = c;
293 count++;
294 } else {
295 *out++ = '%';
296 *out++ = hex[(c>>4) & 0xf];
297 *out++ = hex[c & 0xf];
298 count += 3;
299 }
300 }
301 return count;
302 }
303
304 /*
305 * hfs_mac2triv()
306 *
307 * Given a 'Pascal String' (a string preceded by a length byte) in
308 * the Macintosh character set produce the corresponding filename using
309 * the 'trivial' name-mangling scheme, returning the length of the
310 * mangled filename. Note that the output string is not NULL
311 * terminated.
312 *
313 * The name-mangling works as follows:
314 * The character '/', which is illegal in Linux filenames is replaced
315 * by ':' which never appears in HFS filenames. All other characters
316 * are passed unchanged from input to output.
317 */
hfs_mac2triv(char * out,const struct hfs_name * in)318 int hfs_mac2triv(char *out, const struct hfs_name *in) {
319 unsigned char c;
320 const unsigned char *p = in->Name;
321 int len = in->Len;
322 int count = 0;
323
324 while (len--) {
325 c = *p++;
326 if (c=='/') {
327 *out++ = ':';
328 } else {
329 *out++ = c;
330 }
331 count++;
332 }
333 return count;
334 }
335
336 /*
337 * hfs_mac2latin()
338 *
339 * Given a 'Pascal String' (a string preceded by a length byte) in
340 * the Macintosh character set produce the corresponding filename using
341 * the 'Latin-1' name-mangling scheme, returning the length of the
342 * mangled filename. Note that the output string is not NULL
343 * terminated.
344 *
345 * The Macintosh character set and Latin-1 are both extensions of the
346 * ASCII character set. Some, but certainly not all, of the characters
347 * in the Macintosh character set are also in Latin-1 but not with the
348 * same encoding. This name-mangling scheme replaces the characters in
349 * the Macintosh character set that have Latin-1 equivalents by those
350 * equivalents; the characters 32-126, excluding '/' and '%', are
351 * passed unchanged from input to output. The remaining characters
352 * are replaced by three characters: '%xx' where xx is the hexadecimal
353 * representation of the character, using lowercase 'a' through 'f'.
354 *
355 * The array mac2latin_map[] indicates the correspondence between the
356 * two character sets. The byte in element x-128 gives the Latin-1
357 * encoding of the character with encoding x in the Macintosh
358 * character set. A value of zero indicates Latin-1 has no
359 * corresponding character.
360 */
hfs_mac2latin(char * out,const struct hfs_name * in)361 int hfs_mac2latin(char *out, const struct hfs_name *in) {
362 unsigned char c;
363 const unsigned char *p = in->Name;
364 int len = in->Len;
365 int count = 0;
366
367 while (len--) {
368 c = *p++;
369
370 if ((c & 0x80) && mac2latin_map[c & 0x7f]) {
371 *out++ = mac2latin_map[c & 0x7f];
372 count++;
373 } else if ((c>=32) && (c<=126) && (c!='/') && (c!='%')) {
374 *out++ = c;
375 count++;
376 } else {
377 *out++ = '%';
378 *out++ = hex[(c>>4) & 0xf];
379 *out++ = hex[c & 0xf];
380 count += 3;
381 }
382 }
383 return count;
384 }
385
386 /*
387 * hfs_colon2mac()
388 *
389 * Given an ASCII string (not null-terminated) and its length,
390 * generate the corresponding filename in the Macintosh character set
391 * using the 'CAP' name-mangling scheme, returning the length of the
392 * mangled filename. Note that the output string is not NULL
393 * terminated.
394 *
395 * This routine is a inverse to hfs_mac2cap() and hfs_mac2nat().
396 * A ':' not followed by a 2-digit hexadecimal number (or followed
397 * by the codes for NULL or ':') is replaced by a '|'.
398 */
hfs_colon2mac(struct hfs_name * out,const char * in,int len)399 void hfs_colon2mac(struct hfs_name *out, const char *in, int len) {
400 int hi, lo;
401 unsigned char code, c, *count;
402 unsigned char *p = out->Name;
403
404 out->Len = 0;
405 count = &out->Len;
406 while (len-- && (*count < HFS_NAMELEN)) {
407 c = *in++;
408 (*count)++;
409 if (c!=':') {
410 *p++ = c;
411 } else if ((len<2) ||
412 ((hi=dehex(in[0])) & 0xf0) ||
413 ((lo=dehex(in[1])) & 0xf0) ||
414 !(code = (hi << 4) | lo) ||
415 (code == ':')) {
416 *p++ = '|';
417 } else {
418 *p++ = code;
419 len -= 2;
420 in += 2;
421 }
422 }
423 }
424
425 /*
426 * hfs_prcnt2mac()
427 *
428 * Given an ASCII string (not null-terminated) and its length,
429 * generate the corresponding filename in the Macintosh character set
430 * using Apple's three recommended name-mangling schemes, returning
431 * the length of the mangled filename. Note that the output string is
432 * not NULL terminated.
433 *
434 * This routine is a inverse to hfs_mac2alpha(), hfs_mac2seven() and
435 * hfs_mac2eight().
436 * A '%' not followed by a 2-digit hexadecimal number (or followed
437 * by the code for NULL or ':') is unchanged.
438 * A ':' is replaced by a '|'.
439 */
hfs_prcnt2mac(struct hfs_name * out,const char * in,int len)440 void hfs_prcnt2mac(struct hfs_name *out, const char *in, int len) {
441 int hi, lo;
442 unsigned char code, c, *count;
443 unsigned char *p = out->Name;
444
445 out->Len = 0;
446 count = &out->Len;
447 while (len-- && (*count < HFS_NAMELEN)) {
448 c = *in++;
449 (*count)++;
450 if (c==':') {
451 *p++ = '|';
452 } else if (c!='%') {
453 *p++ = c;
454 } else if ((len<2) ||
455 ((hi=dehex(in[0])) & 0xf0) ||
456 ((lo=dehex(in[1])) & 0xf0) ||
457 !(code = (hi << 4) | lo) ||
458 (code == ':')) {
459 *p++ = '%';
460 } else {
461 *p++ = code;
462 len -= 2;
463 in += 2;
464 }
465 }
466 }
467
468 /*
469 * hfs_triv2mac()
470 *
471 * Given an ASCII string (not null-terminated) and its length,
472 * generate the corresponding filename in the Macintosh character set
473 * using the 'trivial' name-mangling scheme, returning the length of
474 * the mangled filename. Note that the output string is not NULL
475 * terminated.
476 *
477 * This routine is a inverse to hfs_mac2triv().
478 * A ':' is replaced by a '/'.
479 */
hfs_triv2mac(struct hfs_name * out,const char * in,int len)480 void hfs_triv2mac(struct hfs_name *out, const char *in, int len) {
481 unsigned char c, *count;
482 unsigned char *p = out->Name;
483
484 out->Len = 0;
485 count = &out->Len;
486 while (len-- && (*count < HFS_NAMELEN)) {
487 c = *in++;
488 (*count)++;
489 if (c==':') {
490 *p++ = '/';
491 } else {
492 *p++ = c;
493 }
494 }
495 }
496
497 /*
498 * hfs_latin2mac()
499 *
500 * Given an Latin-1 string (not null-terminated) and its length,
501 * generate the corresponding filename in the Macintosh character set
502 * using the 'Latin-1' name-mangling scheme, returning the length of
503 * the mangled filename. Note that the output string is not NULL
504 * terminated.
505 *
506 * This routine is a inverse to hfs_latin2cap().
507 * A '%' not followed by a 2-digit hexadecimal number (or followed
508 * by the code for NULL or ':') is unchanged.
509 * A ':' is replaced by a '|'.
510 *
511 * Note that the character map is built the first time it is needed.
512 */
hfs_latin2mac(struct hfs_name * out,const char * in,int len)513 void hfs_latin2mac(struct hfs_name *out, const char *in, int len)
514 {
515 int hi, lo;
516 unsigned char code, c, *count;
517 unsigned char *p = out->Name;
518 static int map_initialized;
519
520 if (!map_initialized) {
521 int i;
522
523 /* build the inverse mapping at run time */
524 for (i = 0; i < 128; i++) {
525 if ((c = mac2latin_map[i])) {
526 latin2mac_map[(int)c - 128] = i + 128;
527 }
528 }
529 map_initialized = 1;
530 }
531
532 out->Len = 0;
533 count = &out->Len;
534 while (len-- && (*count < HFS_NAMELEN)) {
535 c = *in++;
536 (*count)++;
537
538 if (c==':') {
539 *p++ = '|';
540 } else if (c!='%') {
541 if (c<128 || !(*p = latin2mac_map[c-128])) {
542 *p = c;
543 }
544 p++;
545 } else if ((len<2) ||
546 ((hi=dehex(in[0])) & 0xf0) ||
547 ((lo=dehex(in[1])) & 0xf0) ||
548 !(code = (hi << 4) | lo) ||
549 (code == ':')) {
550 *p++ = '%';
551 } else {
552 *p++ = code;
553 len -= 2;
554 in += 2;
555 }
556 }
557 }
558