1 /*
2  * linux/fs/hfs/trans.c
3  *
4  * Copyright (C) 1995-1997  Paul H. Hargrove
5  * This file may be distributed under the terms of the GNU General Public License.
6  *
7  * This file contains routines for converting between the Macintosh
8  * character set and various other encodings.  This includes dealing
9  * with ':' vs. '/' as the path-element separator.
10  *
11  * Latin-1 translation based on code contributed by Holger Schemel
12  * (aeglos@valinor.owl.de).
13  *
14  * The '8-bit', '7-bit ASCII' and '7-bit alphanumeric' encodings are
15  * implementations of the three encodings recommended by Apple in the
16  * document "AppleSingle/AppleDouble Formats: Developer's Note
17  * (9/94)".  This document is available from Apple's Technical
18  * Information Library from the World Wide Web server
19  * www.info.apple.com.
20  *
21  * The 'CAP' encoding is an implementation of the naming scheme used
22  * by the Columbia AppleTalk Package, available for anonymous FTP from
23  * ????.
24  *
25  * "XXX" in a comment is a note to myself to consider changing something.
26  *
27  * In function preconditions the term "valid" applied to a pointer to
28  * a structure means that the pointer is non-NULL and the structure it
29  * points to has all fields initialized to consistent values.
30  */
31 
32 #include "hfs.h"
33 #include <linux/hfs_fs_sb.h>
34 #include <linux/hfs_fs_i.h>
35 #include <linux/hfs_fs.h>
36 #include <linux/compiler.h>
37 
38 /*================ File-local variables ================*/
39 
40 /* int->ASCII map for a single hex digit */
41 static char hex[16] = {'0','1','2','3','4','5','6','7',
42 		       '8','9','a','b','c','d','e','f'};
43 /*
44  * Latin-1 to Mac character set map
45  *
46  * For the sake of consistency this map is generated from the Mac to
47  * Latin-1 map the first time it is needed.  This means there is just
48  * one map to maintain.
49  */
50 static unsigned char latin2mac_map[128]; /* initially all zero */
51 
52 /*
53  * Mac to Latin-1 map for the upper 128 characters (both have ASCII in
54  * the lower 128 positions)
55  */
56 static unsigned char mac2latin_map[128] = {
57 	0xC4, 0xC5, 0xC7, 0xC9, 0xD1, 0xD6, 0xDC, 0xE1,
58 	0xE0, 0xE2, 0xE4, 0xE3, 0xE5, 0xE7, 0xE9, 0xE8,
59 	0xEA, 0xEB, 0xED, 0xEC, 0xEE, 0xEF, 0xF1, 0xF3,
60 	0xF2, 0xF4, 0xF6, 0xF5, 0xFA, 0xF9, 0xFB, 0xFC,
61 	0x00, 0xB0, 0xA2, 0xA3, 0xA7, 0xB7, 0xB6, 0xDF,
62 	0xAE, 0xA9, 0x00, 0xB4, 0xA8, 0x00, 0xC6, 0xD8,
63 	0x00, 0xB1, 0x00, 0x00, 0xA5, 0xB5, 0xF0, 0x00,
64 	0x00, 0x00, 0x00, 0xAA, 0xBA, 0x00, 0xE6, 0xF8,
65 	0xBF, 0xA1, 0xAC, 0x00, 0x00, 0x00, 0x00, 0xAB,
66 	0xBB, 0x00, 0xA0, 0xC0, 0xC3, 0xD5, 0x00, 0x00,
67 	0xAD, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x00,
68 	0xFF, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00,
69 	0x00, 0x00, 0xB8, 0x00, 0x00, 0xC2, 0xCA, 0xC1,
70 	0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xD3, 0xD4,
71 	0x00, 0xD2, 0xDA, 0xDB, 0xD9, 0x00, 0x00, 0x00,
72 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
73 };
74 
75 /*================ File-local functions ================*/
76 
77 /*
78  * dehex()
79  *
80  * Given a hexadecimal digit in ASCII, return the integer representation.
81  */
dehex(char c)82 static inline __attribute_const__ unsigned char dehex(char c) {
83 	if ((c>='0')&&(c<='9')) {
84 		return c-'0';
85 	}
86 	if ((c>='a')&&(c<='f')) {
87 		return c-'a'+10;
88 	}
89 	if ((c>='A')&&(c<='F')) {
90 		return c-'A'+10;
91 	}
92 	return 0xff;
93 }
94 
95 /*================ Global functions ================*/
96 
97 /*
98  * hfs_mac2nat()
99  *
100  * Given a 'Pascal String' (a string preceded by a length byte) in
101  * the Macintosh character set produce the corresponding filename using
102  * the Netatalk name-mangling scheme, returning the length of the
103  * mangled filename.  Note that the output string is not NULL terminated.
104  *
105  * The name-mangling works as follows:
106  * Characters 32-126 (' '-'~') except '/' and any initial '.' are passed
107  * unchanged from input to output.  The remaining characters are replaced
108  * by three characters: ':xx' where xx is the hexadecimal representation
109  * of the character, using lowercase 'a' through 'f'.
110  */
hfs_mac2nat(char * out,const struct hfs_name * in)111 int hfs_mac2nat(char *out, const struct hfs_name *in) {
112 	unsigned char c;
113 	const unsigned char *p = in->Name;
114 	int len = in->Len;
115 	int count = 0;
116 
117 	/* Special case for .AppleDesktop which in the
118 	   distant future may be a pseudodirectory. */
119 	if (strncmp(".AppleDesktop", p, len) == 0) {
120 		strncpy(out, p, 13);
121 		return 13;
122 	}
123 
124 	while (len--) {
125 		c = *p++;
126 		if ((c<32) || (c=='/') || (c>126) || (!count && (c=='.'))) {
127 			*out++ = ':';
128 			*out++ = hex[(c>>4) & 0xf];
129 			*out++ = hex[c & 0xf];
130 			count += 3;
131 		} else {
132 			*out++ = c;
133 			count++;
134 		}
135 	}
136 	return count;
137 }
138 
139 /*
140  * hfs_mac2cap()
141  *
142  * Given a 'Pascal String' (a string preceded by a length byte) in
143  * the Macintosh character set produce the corresponding filename using
144  * the CAP name-mangling scheme, returning the length of the mangled
145  * filename.  Note that the output string is not NULL terminated.
146  *
147  * The name-mangling works as follows:
148  * Characters 32-126 (' '-'~') except '/' are passed unchanged from
149  * input to output.  The remaining characters are replaced by three
150  * characters: ':xx' where xx is the hexadecimal representation of the
151  * character, using lowercase 'a' through 'f'.
152  */
hfs_mac2cap(char * out,const struct hfs_name * in)153 int hfs_mac2cap(char *out, const struct hfs_name *in) {
154 	unsigned char c;
155 	const unsigned char *p = in->Name;
156 	int len = in->Len;
157 	int count = 0;
158 
159 	while (len--) {
160 		c = *p++;
161 		if ((c<32) || (c=='/') || (c>126)) {
162 			*out++ = ':';
163 			*out++ = hex[(c>>4) & 0xf];
164 			*out++ = hex[c & 0xf];
165 			count += 3;
166 		} else {
167 			*out++ = c;
168 			count++;
169 		}
170 	}
171 	return count;
172 }
173 
174 /*
175  * hfs_mac2eight()
176  *
177  * Given a 'Pascal String' (a string preceded by a length byte) in
178  * the Macintosh character set produce the corresponding filename using
179  * the '8-bit' name-mangling scheme, returning the length of the
180  * mangled filename.  Note that the output string is not NULL
181  * terminated.
182  *
183  * This is one of the three recommended naming conventions described
184  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
185  * Note (9/94)"
186  *
187  * The name-mangling works as follows:
188  * Characters 0, '%' and '/' are replaced by three characters: '%xx'
189  * where xx is the hexadecimal representation of the character, using
190  * lowercase 'a' through 'f'.  All other characters are passed
191  * unchanged from input to output.  Note that this format is mainly
192  * implemented for completeness and is rather hard to read.
193  */
hfs_mac2eight(char * out,const struct hfs_name * in)194 int hfs_mac2eight(char *out, const struct hfs_name *in) {
195 	unsigned char c;
196 	const unsigned char *p = in->Name;
197 	int len = in->Len;
198 	int count = 0;
199 
200 	while (len--) {
201 		c = *p++;
202 		if (!c || (c=='/') || (c=='%')) {
203 			*out++ = '%';
204 			*out++ = hex[(c>>4) & 0xf];
205 			*out++ = hex[c & 0xf];
206 			count += 3;
207 		} else {
208 			*out++ = c;
209 			count++;
210 		}
211 	}
212 	return count;
213 }
214 
215 /*
216  * hfs_mac2seven()
217  *
218  * Given a 'Pascal String' (a string preceded by a length byte) in
219  * the Macintosh character set produce the corresponding filename using
220  * the '7-bit ASCII' name-mangling scheme, returning the length of the
221  * mangled filename.  Note that the output string is not NULL
222  * terminated.
223  *
224  * This is one of the three recommended naming conventions described
225  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
226  * Note (9/94)"
227  *
228  * The name-mangling works as follows:
229  * Characters 0, '%', '/' and 128-255 are replaced by three
230  * characters: '%xx' where xx is the hexadecimal representation of the
231  * character, using lowercase 'a' through 'f'.	All other characters
232  * are passed unchanged from input to output.  Note that control
233  * characters (including newline) and space are unchanged make reading
234  * these filenames difficult.
235  */
hfs_mac2seven(char * out,const struct hfs_name * in)236 int hfs_mac2seven(char *out, const struct hfs_name *in) {
237 	unsigned char c;
238 	const unsigned char *p = in->Name;
239 	int len = in->Len;
240 	int count = 0;
241 
242 	while (len--) {
243 		c = *p++;
244 		if (!c || (c=='/') || (c=='%') || (c&0x80)) {
245 			*out++ = '%';
246 			*out++ = hex[(c>>4) & 0xf];
247 			*out++ = hex[c & 0xf];
248 			count += 3;
249 		} else {
250 			*out++ = c;
251 			count++;
252 		}
253 	}
254 	return count;
255 }
256 
257 /*
258  * hfs_mac2alpha()
259  *
260  * Given a 'Pascal String' (a string preceded by a length byte) in
261  * the Macintosh character set produce the corresponding filename using
262  * the '7-bit alphanumeric' name-mangling scheme, returning the length
263  * of the mangled filename.  Note that the output string is not NULL
264  * terminated.
265  *
266  * This is one of the three recommended naming conventions described
267  * in Apple's document "AppleSingle/AppleDouble Formats: Developer's
268  * Note (9/94)"
269  *
270  * The name-mangling works as follows:
271  * The characters 'a'-'z', 'A'-'Z', '0'-'9', '_' and the last '.' in
272  * the filename are passed unchanged from input to output.  All
273  * remaining characters (including any '.'s other than the last) are
274  * replaced by three characters: '%xx' where xx is the hexadecimal
275  * representation of the character, using lowercase 'a' through 'f'.
276  */
hfs_mac2alpha(char * out,const struct hfs_name * in)277 int hfs_mac2alpha(char *out, const struct hfs_name *in) {
278 	unsigned char c;
279 	const unsigned char *p = in->Name;
280 	int len = in->Len;
281 	int count = 0;
282 	const unsigned char *lp;	/* last period */
283 
284 	/* strrchr() would be good here, but 'in' is not null-terminated */
285 	for (lp=p+len-1; (lp>=p)&&(*lp!='.'); --lp) {}
286 	++lp;
287 
288 	while (len--) {
289 		c = *p++;
290 		if ((p==lp) || ((c>='0')&&(c<='9')) || ((c>='A')&&(c<='Z')) ||
291 				((c>='a')&&(c<='z')) || (c=='_')) {
292 			*out++ = c;
293 			count++;
294 		} else {
295 			*out++ = '%';
296 			*out++ = hex[(c>>4) & 0xf];
297 			*out++ = hex[c & 0xf];
298 			count += 3;
299 		}
300 	}
301 	return count;
302 }
303 
304 /*
305  * hfs_mac2triv()
306  *
307  * Given a 'Pascal String' (a string preceded by a length byte) in
308  * the Macintosh character set produce the corresponding filename using
309  * the 'trivial' name-mangling scheme, returning the length of the
310  * mangled filename.  Note that the output string is not NULL
311  * terminated.
312  *
313  * The name-mangling works as follows:
314  * The character '/', which is illegal in Linux filenames is replaced
315  * by ':' which never appears in HFS filenames.	 All other characters
316  * are passed unchanged from input to output.
317  */
hfs_mac2triv(char * out,const struct hfs_name * in)318 int hfs_mac2triv(char *out, const struct hfs_name *in) {
319 	unsigned char c;
320 	const unsigned char *p = in->Name;
321 	int len = in->Len;
322 	int count = 0;
323 
324 	while (len--) {
325 		c = *p++;
326 		if (c=='/') {
327 			*out++ = ':';
328 		} else {
329 			*out++ = c;
330 		}
331 		count++;
332 	}
333 	return count;
334 }
335 
336 /*
337  * hfs_mac2latin()
338  *
339  * Given a 'Pascal String' (a string preceded by a length byte) in
340  * the Macintosh character set produce the corresponding filename using
341  * the 'Latin-1' name-mangling scheme, returning the length of the
342  * mangled filename.  Note that the output string is not NULL
343  * terminated.
344  *
345  * The Macintosh character set and Latin-1 are both extensions of the
346  * ASCII character set.	 Some, but certainly not all, of the characters
347  * in the Macintosh character set are also in Latin-1 but not with the
348  * same encoding.  This name-mangling scheme replaces the characters in
349  * the Macintosh character set that have Latin-1 equivalents by those
350  * equivalents; the characters 32-126, excluding '/' and '%', are
351  * passed unchanged from input to output.  The remaining characters
352  * are replaced by three characters: '%xx' where xx is the hexadecimal
353  * representation of the character, using lowercase 'a' through 'f'.
354  *
355  * The array mac2latin_map[] indicates the correspondence between the
356  * two character sets.	The byte in element x-128 gives the Latin-1
357  * encoding of the character with encoding x in the Macintosh
358  * character set.  A value of zero indicates Latin-1 has no
359  * corresponding character.
360  */
hfs_mac2latin(char * out,const struct hfs_name * in)361 int hfs_mac2latin(char *out, const struct hfs_name *in) {
362 	unsigned char c;
363 	const unsigned char *p = in->Name;
364 	int len = in->Len;
365 	int count = 0;
366 
367 	while (len--) {
368 		c = *p++;
369 
370 		if ((c & 0x80) && mac2latin_map[c & 0x7f]) {
371 			*out++ = mac2latin_map[c & 0x7f];
372 			count++;
373 		} else if ((c>=32) && (c<=126) && (c!='/') && (c!='%')) {
374 			*out++ =  c;
375 			count++;
376 		} else {
377 			*out++ = '%';
378 			*out++ = hex[(c>>4) & 0xf];
379 			*out++ = hex[c & 0xf];
380 			count += 3;
381 		}
382 	}
383 	return count;
384 }
385 
386 /*
387  * hfs_colon2mac()
388  *
389  * Given an ASCII string (not null-terminated) and its length,
390  * generate the corresponding filename in the Macintosh character set
391  * using the 'CAP' name-mangling scheme, returning the length of the
392  * mangled filename.  Note that the output string is not NULL
393  * terminated.
394  *
395  * This routine is a inverse to hfs_mac2cap() and hfs_mac2nat().
396  * A ':' not followed by a 2-digit hexadecimal number (or followed
397  * by the codes for NULL or ':') is replaced by a '|'.
398  */
hfs_colon2mac(struct hfs_name * out,const char * in,int len)399 void hfs_colon2mac(struct hfs_name *out, const char *in, int len) {
400 	int hi, lo;
401 	unsigned char code, c, *count;
402 	unsigned char *p = out->Name;
403 
404 	out->Len = 0;
405 	count = &out->Len;
406 	while (len-- && (*count < HFS_NAMELEN)) {
407 		c = *in++;
408 		(*count)++;
409 		if (c!=':') {
410 			*p++ = c;
411 		} else if ((len<2) ||
412 			   ((hi=dehex(in[0])) & 0xf0) ||
413 			   ((lo=dehex(in[1])) & 0xf0) ||
414 			   !(code = (hi << 4) | lo) ||
415 			   (code == ':')) {
416 			*p++ = '|';
417 		} else {
418 			*p++ = code;
419 			len -= 2;
420 			in += 2;
421 		}
422 	}
423 }
424 
425 /*
426  * hfs_prcnt2mac()
427  *
428  * Given an ASCII string (not null-terminated) and its length,
429  * generate the corresponding filename in the Macintosh character set
430  * using Apple's three recommended name-mangling schemes, returning
431  * the length of the mangled filename.	Note that the output string is
432  * not NULL terminated.
433  *
434  * This routine is a inverse to hfs_mac2alpha(), hfs_mac2seven() and
435  * hfs_mac2eight().
436  * A '%' not followed by a 2-digit hexadecimal number (or followed
437  * by the code for NULL or ':') is unchanged.
438  * A ':' is replaced by a '|'.
439  */
hfs_prcnt2mac(struct hfs_name * out,const char * in,int len)440 void hfs_prcnt2mac(struct hfs_name *out, const char *in, int len) {
441 	int hi, lo;
442 	unsigned char code, c, *count;
443 	unsigned char *p = out->Name;
444 
445 	out->Len = 0;
446 	count = &out->Len;
447 	while (len-- && (*count < HFS_NAMELEN)) {
448 		c = *in++;
449 		(*count)++;
450 		if (c==':') {
451 			*p++ = '|';
452 		} else if (c!='%') {
453 			*p++ = c;
454 		} else if ((len<2) ||
455 			   ((hi=dehex(in[0])) & 0xf0) ||
456 			   ((lo=dehex(in[1])) & 0xf0) ||
457 			   !(code = (hi << 4) | lo) ||
458 			   (code == ':')) {
459 			*p++ = '%';
460 		} else {
461 			*p++ = code;
462 			len -= 2;
463 			in += 2;
464 		}
465 	}
466 }
467 
468 /*
469  * hfs_triv2mac()
470  *
471  * Given an ASCII string (not null-terminated) and its length,
472  * generate the corresponding filename in the Macintosh character set
473  * using the 'trivial' name-mangling scheme, returning the length of
474  * the mangled filename.  Note that the output string is not NULL
475  * terminated.
476  *
477  * This routine is a inverse to hfs_mac2triv().
478  * A ':' is replaced by a '/'.
479  */
hfs_triv2mac(struct hfs_name * out,const char * in,int len)480 void hfs_triv2mac(struct hfs_name *out, const char *in, int len) {
481 	unsigned char c, *count;
482 	unsigned char *p = out->Name;
483 
484 	out->Len = 0;
485 	count = &out->Len;
486 	while (len-- && (*count < HFS_NAMELEN)) {
487 		c = *in++;
488 		(*count)++;
489 		if (c==':') {
490 			*p++ = '/';
491 		} else {
492 			*p++ = c;
493 		}
494 	}
495 }
496 
497 /*
498  * hfs_latin2mac()
499  *
500  * Given an Latin-1 string (not null-terminated) and its length,
501  * generate the corresponding filename in the Macintosh character set
502  * using the 'Latin-1' name-mangling scheme, returning the length of
503  * the mangled filename.  Note that the output string is not NULL
504  * terminated.
505  *
506  * This routine is a inverse to hfs_latin2cap().
507  * A '%' not followed by a 2-digit hexadecimal number (or followed
508  * by the code for NULL or ':') is unchanged.
509  * A ':' is replaced by a '|'.
510  *
511  * Note that the character map is built the first time it is needed.
512  */
hfs_latin2mac(struct hfs_name * out,const char * in,int len)513 void hfs_latin2mac(struct hfs_name *out, const char *in, int len)
514 {
515 	int hi, lo;
516 	unsigned char code, c, *count;
517 	unsigned char *p = out->Name;
518 	static int map_initialized;
519 
520 	if (!map_initialized) {
521 		int i;
522 
523 		/* build the inverse mapping at run time */
524 		for (i = 0; i < 128; i++) {
525 			if ((c = mac2latin_map[i])) {
526 				latin2mac_map[(int)c - 128] = i + 128;
527 			}
528 		}
529 		map_initialized = 1;
530 	}
531 
532 	out->Len = 0;
533 	count = &out->Len;
534 	while (len-- && (*count < HFS_NAMELEN)) {
535 		c = *in++;
536 		(*count)++;
537 
538 		if (c==':') {
539 			*p++ = '|';
540 		} else if (c!='%') {
541 			if (c<128 || !(*p = latin2mac_map[c-128])) {
542 				*p = c;
543 			}
544 			p++;
545 		} else if ((len<2) ||
546 			   ((hi=dehex(in[0])) & 0xf0) ||
547 			   ((lo=dehex(in[1])) & 0xf0) ||
548 			   !(code = (hi << 4) | lo) ||
549 			   (code == ':')) {
550 			*p++ = '%';
551 		} else {
552 			*p++ = code;
553 			len -= 2;
554 			in += 2;
555 		}
556 	}
557 }
558