1 /*
2 * linux/fs/hfsplus/unicode.c
3 *
4 * Copyright (C) 2001
5 * Brad Boyer (flar@allandria.com)
6 * (C) 2003 Ardis Technologies <roman@ardistech.com>
7 *
8 * Handler routines for unicode strings
9 */
10
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)18 static inline u16 case_fold(u16 c)
19 {
20 u16 tmp;
21
22 tmp = hfsplus_case_fold_table[c >> 8];
23 if (tmp)
24 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25 else
26 tmp = c;
27 return tmp;
28 }
29
30 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 const struct hfsplus_unistr *s2)
33 {
34 u16 len1, len2, c1, c2;
35 const hfsplus_unichr *p1, *p2;
36
37 len1 = be16_to_cpu(s1->length);
38 len2 = be16_to_cpu(s2->length);
39 p1 = s1->unicode;
40 p2 = s2->unicode;
41
42 while (1) {
43 c1 = c2 = 0;
44
45 while (len1 && !c1) {
46 c1 = case_fold(be16_to_cpu(*p1));
47 p1++;
48 len1--;
49 }
50 while (len2 && !c2) {
51 c2 = case_fold(be16_to_cpu(*p2));
52 p2++;
53 len2--;
54 }
55
56 if (c1 != c2)
57 return (c1 < c2) ? -1 : 1;
58 if (!c1 && !c2)
59 return 0;
60 }
61 }
62
63 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 const struct hfsplus_unistr *s2)
66 {
67 u16 len1, len2, c1, c2;
68 const hfsplus_unichr *p1, *p2;
69 int len;
70
71 len1 = be16_to_cpu(s1->length);
72 len2 = be16_to_cpu(s2->length);
73 p1 = s1->unicode;
74 p2 = s2->unicode;
75
76 for (len = min(len1, len2); len > 0; len--) {
77 c1 = be16_to_cpu(*p1);
78 c2 = be16_to_cpu(*p2);
79 if (c1 != c2)
80 return c1 < c2 ? -1 : 1;
81 p1++;
82 p2++;
83 }
84
85 return len1 < len2 ? -1 :
86 len1 > len2 ? 1 : 0;
87 }
88
89
90 #define Hangul_SBase 0xac00
91 #define Hangul_LBase 0x1100
92 #define Hangul_VBase 0x1161
93 #define Hangul_TBase 0x11a7
94 #define Hangul_SCount 11172
95 #define Hangul_LCount 19
96 #define Hangul_VCount 21
97 #define Hangul_TCount 28
98 #define Hangul_NCount (Hangul_VCount * Hangul_TCount)
99
100
hfsplus_compose_lookup(u16 * p,u16 cc)101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103 int i, s, e;
104
105 s = 1;
106 e = p[1];
107 if (!e || cc < p[s * 2] || cc > p[e * 2])
108 return NULL;
109 do {
110 i = (s + e) / 2;
111 if (cc > p[i * 2])
112 s = i + 1;
113 else if (cc < p[i * 2])
114 e = i - 1;
115 else
116 return hfsplus_compose_table + p[i * 2 + 1];
117 } while (s <= e);
118 return NULL;
119 }
120
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)121 int hfsplus_uni2asc(struct super_block *sb,
122 const struct hfsplus_unistr *ustr,
123 char *astr, int *len_p)
124 {
125 const hfsplus_unichr *ip;
126 struct nls_table *nls = HFSPLUS_SB(sb)->nls;
127 u8 *op;
128 u16 cc, c0, c1;
129 u16 *ce1, *ce2;
130 int i, len, ustrlen, res, compose;
131
132 op = astr;
133 ip = ustr->unicode;
134 ustrlen = be16_to_cpu(ustr->length);
135 len = *len_p;
136 ce1 = NULL;
137 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
138
139 while (ustrlen > 0) {
140 c0 = be16_to_cpu(*ip++);
141 ustrlen--;
142 /* search for single decomposed char */
143 if (likely(compose))
144 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145 if (ce1 && (cc = ce1[0])) {
146 /* start of a possibly decomposed Hangul char */
147 if (cc != 0xffff)
148 goto done;
149 if (!ustrlen)
150 goto same;
151 c1 = be16_to_cpu(*ip) - Hangul_VBase;
152 if (c1 < Hangul_VCount) {
153 /* compose the Hangul char */
154 cc = (c0 - Hangul_LBase) * Hangul_VCount;
155 cc = (cc + c1) * Hangul_TCount;
156 cc += Hangul_SBase;
157 ip++;
158 ustrlen--;
159 if (!ustrlen)
160 goto done;
161 c1 = be16_to_cpu(*ip) - Hangul_TBase;
162 if (c1 > 0 && c1 < Hangul_TCount) {
163 cc += c1;
164 ip++;
165 ustrlen--;
166 }
167 goto done;
168 }
169 }
170 while (1) {
171 /* main loop for common case of not composed chars */
172 if (!ustrlen)
173 goto same;
174 c1 = be16_to_cpu(*ip);
175 if (likely(compose))
176 ce1 = hfsplus_compose_lookup(
177 hfsplus_compose_table, c1);
178 if (ce1)
179 break;
180 switch (c0) {
181 case 0:
182 c0 = 0x2400;
183 break;
184 case '/':
185 c0 = ':';
186 break;
187 }
188 res = nls->uni2char(c0, op, len);
189 if (res < 0) {
190 if (res == -ENAMETOOLONG)
191 goto out;
192 *op = '?';
193 res = 1;
194 }
195 op += res;
196 len -= res;
197 c0 = c1;
198 ip++;
199 ustrlen--;
200 }
201 ce2 = hfsplus_compose_lookup(ce1, c0);
202 if (ce2) {
203 i = 1;
204 while (i < ustrlen) {
205 ce1 = hfsplus_compose_lookup(ce2,
206 be16_to_cpu(ip[i]));
207 if (!ce1)
208 break;
209 i++;
210 ce2 = ce1;
211 }
212 if ((cc = ce2[0])) {
213 ip += i;
214 ustrlen -= i;
215 goto done;
216 }
217 }
218 same:
219 switch (c0) {
220 case 0:
221 cc = 0x2400;
222 break;
223 case '/':
224 cc = ':';
225 break;
226 default:
227 cc = c0;
228 }
229 done:
230 res = nls->uni2char(cc, op, len);
231 if (res < 0) {
232 if (res == -ENAMETOOLONG)
233 goto out;
234 *op = '?';
235 res = 1;
236 }
237 op += res;
238 len -= res;
239 }
240 res = 0;
241 out:
242 *len_p = (char *)op - astr;
243 return res;
244 }
245
246 /*
247 * Convert one or more ASCII characters into a single unicode character.
248 * Returns the number of ASCII characters corresponding to the unicode char.
249 */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc)250 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
251 wchar_t *uc)
252 {
253 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
254 if (size <= 0) {
255 *uc = '?';
256 size = 1;
257 }
258 switch (*uc) {
259 case 0x2400:
260 *uc = 0;
261 break;
262 case ':':
263 *uc = '/';
264 break;
265 }
266 return size;
267 }
268
269 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size)270 static inline u16 *decompose_unichar(wchar_t uc, int *size)
271 {
272 int off;
273
274 off = hfsplus_decompose_table[(uc >> 12) & 0xf];
275 if (off == 0 || off == 0xffff)
276 return NULL;
277
278 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
279 if (!off)
280 return NULL;
281
282 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
283 if (!off)
284 return NULL;
285
286 off = hfsplus_decompose_table[off + (uc & 0xf)];
287 *size = off & 3;
288 if (*size == 0)
289 return NULL;
290 return hfsplus_decompose_table + (off / 4);
291 }
292
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,const char * astr,int len)293 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
294 const char *astr, int len)
295 {
296 int size, dsize, decompose;
297 u16 *dstr, outlen = 0;
298 wchar_t c;
299
300 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
301 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
302 size = asc2unichar(sb, astr, len, &c);
303
304 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
305 if (outlen + dsize > HFSPLUS_MAX_STRLEN)
306 break;
307 do {
308 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
309 } while (--dsize > 0);
310 } else
311 ustr->unicode[outlen++] = cpu_to_be16(c);
312
313 astr += size;
314 len -= size;
315 }
316 ustr->length = cpu_to_be16(outlen);
317 if (len > 0)
318 return -ENAMETOOLONG;
319 return 0;
320 }
321
322 /*
323 * Hash a string to an integer as appropriate for the HFS+ filesystem.
324 * Composed unicode characters are decomposed and case-folding is performed
325 * if the appropriate bits are (un)set on the superblock.
326 */
hfsplus_hash_dentry(const struct dentry * dentry,const struct inode * inode,struct qstr * str)327 int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
328 struct qstr *str)
329 {
330 struct super_block *sb = dentry->d_sb;
331 const char *astr;
332 const u16 *dstr;
333 int casefold, decompose, size, len;
334 unsigned long hash;
335 wchar_t c;
336 u16 c2;
337
338 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
339 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
340 hash = init_name_hash();
341 astr = str->name;
342 len = str->len;
343 while (len > 0) {
344 int uninitialized_var(dsize);
345 size = asc2unichar(sb, astr, len, &c);
346 astr += size;
347 len -= size;
348
349 if (decompose && (dstr = decompose_unichar(c, &dsize))) {
350 do {
351 c2 = *dstr++;
352 if (!casefold || (c2 = case_fold(c2)))
353 hash = partial_name_hash(c2, hash);
354 } while (--dsize > 0);
355 } else {
356 c2 = c;
357 if (!casefold || (c2 = case_fold(c2)))
358 hash = partial_name_hash(c2, hash);
359 }
360 }
361 str->hash = end_name_hash(hash);
362
363 return 0;
364 }
365
366 /*
367 * Compare strings with HFS+ filename ordering.
368 * Composed unicode characters are decomposed and case-folding is performed
369 * if the appropriate bits are (un)set on the superblock.
370 */
hfsplus_compare_dentry(const struct dentry * parent,const struct inode * pinode,const struct dentry * dentry,const struct inode * inode,unsigned int len,const char * str,const struct qstr * name)371 int hfsplus_compare_dentry(const struct dentry *parent,
372 const struct inode *pinode,
373 const struct dentry *dentry, const struct inode *inode,
374 unsigned int len, const char *str, const struct qstr *name)
375 {
376 struct super_block *sb = parent->d_sb;
377 int casefold, decompose, size;
378 int dsize1, dsize2, len1, len2;
379 const u16 *dstr1, *dstr2;
380 const char *astr1, *astr2;
381 u16 c1, c2;
382 wchar_t c;
383
384 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
385 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
386 astr1 = str;
387 len1 = len;
388 astr2 = name->name;
389 len2 = name->len;
390 dsize1 = dsize2 = 0;
391 dstr1 = dstr2 = NULL;
392
393 while (len1 > 0 && len2 > 0) {
394 if (!dsize1) {
395 size = asc2unichar(sb, astr1, len1, &c);
396 astr1 += size;
397 len1 -= size;
398
399 if (decompose)
400 dstr1 = decompose_unichar(c, &dsize1);
401 if (!decompose || !dstr1) {
402 c1 = c;
403 dstr1 = &c1;
404 dsize1 = 1;
405 }
406 }
407
408 if (!dsize2) {
409 size = asc2unichar(sb, astr2, len2, &c);
410 astr2 += size;
411 len2 -= size;
412
413 if (decompose)
414 dstr2 = decompose_unichar(c, &dsize2);
415 if (!decompose || !dstr2) {
416 c2 = c;
417 dstr2 = &c2;
418 dsize2 = 1;
419 }
420 }
421
422 c1 = *dstr1;
423 c2 = *dstr2;
424 if (casefold) {
425 if (!(c1 = case_fold(c1))) {
426 dstr1++;
427 dsize1--;
428 continue;
429 }
430 if (!(c2 = case_fold(c2))) {
431 dstr2++;
432 dsize2--;
433 continue;
434 }
435 }
436 if (c1 < c2)
437 return -1;
438 else if (c1 > c2)
439 return 1;
440
441 dstr1++;
442 dsize1--;
443 dstr2++;
444 dsize2--;
445 }
446
447 if (len1 < len2)
448 return -1;
449 if (len1 > len2)
450 return 1;
451 return 0;
452 }
453