]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * unicode.c | |
3 | * | |
4 | * PURPOSE | |
5 | * Routines for converting between UTF-8 and OSTA Compressed Unicode. | |
6 | * Also handles filename mangling | |
7 | * | |
8 | * DESCRIPTION | |
9 | * OSTA Compressed Unicode is explained in the OSTA UDF specification. | |
10 | * http://www.osta.org/ | |
11 | * UTF-8 is explained in the IETF RFC XXXX. | |
12 | * ftp://ftp.internic.net/rfc/rfcxxxx.txt | |
13 | * | |
14 | * CONTACTS | |
15 | * E-mail regarding any portion of the Linux UDF file system should be | |
16 | * directed to the development team's mailing list (run by majordomo): | |
17 | * linux_udf@hpesjro.fc.hp.com | |
18 | * | |
19 | * COPYRIGHT | |
20 | * This file is distributed under the terms of the GNU General Public | |
21 | * License (GPL). Copies of the GPL can be obtained from: | |
22 | * ftp://prep.ai.mit.edu/pub/gnu/GPL | |
23 | * Each contributing author retains all rights to their own work. | |
24 | */ | |
25 | ||
26 | #include "udfdecl.h" | |
27 | ||
28 | #include <linux/kernel.h> | |
29 | #include <linux/string.h> /* for memset */ | |
30 | #include <linux/nls.h> | |
31 | #include <linux/udf_fs.h> | |
32 | ||
33 | #include "udf_sb.h" | |
34 | ||
35 | static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int); | |
36 | ||
37 | static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) | |
38 | { | |
39 | if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN-2) ) | |
40 | return 0; | |
41 | memset(dest, 0, sizeof(struct ustr)); | |
42 | memcpy(dest->u_name, src, strlen); | |
43 | dest->u_cmpID = 0x08; | |
44 | dest->u_len = strlen; | |
45 | return strlen; | |
46 | } | |
47 | ||
48 | /* | |
49 | * udf_build_ustr | |
50 | */ | |
51 | int udf_build_ustr(struct ustr *dest, dstring *ptr, int size) | |
52 | { | |
53 | int usesize; | |
54 | ||
55 | if ( (!dest) || (!ptr) || (!size) ) | |
56 | return -1; | |
57 | ||
58 | memset(dest, 0, sizeof(struct ustr)); | |
59 | usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size; | |
60 | dest->u_cmpID=ptr[0]; | |
61 | dest->u_len=ptr[size-1]; | |
62 | memcpy(dest->u_name, ptr+1, usesize-1); | |
63 | return 0; | |
64 | } | |
65 | ||
66 | /* | |
67 | * udf_build_ustr_exact | |
68 | */ | |
69 | static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) | |
70 | { | |
71 | if ( (!dest) || (!ptr) || (!exactsize) ) | |
72 | return -1; | |
73 | ||
74 | memset(dest, 0, sizeof(struct ustr)); | |
75 | dest->u_cmpID=ptr[0]; | |
76 | dest->u_len=exactsize-1; | |
77 | memcpy(dest->u_name, ptr+1, exactsize-1); | |
78 | return 0; | |
79 | } | |
80 | ||
81 | /* | |
82 | * udf_ocu_to_utf8 | |
83 | * | |
84 | * PURPOSE | |
85 | * Convert OSTA Compressed Unicode to the UTF-8 equivalent. | |
86 | * | |
87 | * DESCRIPTION | |
88 | * This routine is only called by udf_filldir(). | |
89 | * | |
90 | * PRE-CONDITIONS | |
91 | * utf Pointer to UTF-8 output buffer. | |
92 | * ocu Pointer to OSTA Compressed Unicode input buffer | |
93 | * of size UDF_NAME_LEN bytes. | |
94 | * both of type "struct ustr *" | |
95 | * | |
96 | * POST-CONDITIONS | |
97 | * <return> Zero on success. | |
98 | * | |
99 | * HISTORY | |
100 | * November 12, 1997 - Andrew E. Mileski | |
101 | * Written, tested, and released. | |
102 | */ | |
103 | int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) | |
104 | { | |
105 | uint8_t *ocu; | |
106 | uint32_t c; | |
107 | uint8_t cmp_id, ocu_len; | |
108 | int i; | |
109 | ||
110 | ocu = ocu_i->u_name; | |
111 | ||
112 | ocu_len = ocu_i->u_len; | |
113 | cmp_id = ocu_i->u_cmpID; | |
114 | utf_o->u_len = 0; | |
115 | ||
116 | if (ocu_len == 0) | |
117 | { | |
118 | memset(utf_o, 0, sizeof(struct ustr)); | |
119 | utf_o->u_cmpID = 0; | |
120 | utf_o->u_len = 0; | |
121 | return 0; | |
122 | } | |
123 | ||
124 | if ((cmp_id != 8) && (cmp_id != 16)) | |
125 | { | |
126 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); | |
127 | return 0; | |
128 | } | |
129 | ||
130 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;) | |
131 | { | |
132 | ||
133 | /* Expand OSTA compressed Unicode to Unicode */ | |
134 | c = ocu[i++]; | |
135 | if (cmp_id == 16) | |
136 | c = (c << 8) | ocu[i++]; | |
137 | ||
138 | /* Compress Unicode to UTF-8 */ | |
139 | if (c < 0x80U) | |
140 | utf_o->u_name[utf_o->u_len++] = (uint8_t)c; | |
141 | else if (c < 0x800U) | |
142 | { | |
143 | utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); | |
144 | utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); | |
145 | } | |
146 | else | |
147 | { | |
148 | utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); | |
149 | utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f)); | |
150 | utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); | |
151 | } | |
152 | } | |
153 | utf_o->u_cmpID=8; | |
154 | ||
155 | return utf_o->u_len; | |
156 | } | |
157 | ||
158 | /* | |
159 | * | |
160 | * udf_utf8_to_ocu | |
161 | * | |
162 | * PURPOSE | |
163 | * Convert UTF-8 to the OSTA Compressed Unicode equivalent. | |
164 | * | |
165 | * DESCRIPTION | |
166 | * This routine is only called by udf_lookup(). | |
167 | * | |
168 | * PRE-CONDITIONS | |
169 | * ocu Pointer to OSTA Compressed Unicode output | |
170 | * buffer of size UDF_NAME_LEN bytes. | |
171 | * utf Pointer to UTF-8 input buffer. | |
172 | * utf_len Length of UTF-8 input buffer in bytes. | |
173 | * | |
174 | * POST-CONDITIONS | |
175 | * <return> Zero on success. | |
176 | * | |
177 | * HISTORY | |
178 | * November 12, 1997 - Andrew E. Mileski | |
179 | * Written, tested, and released. | |
180 | */ | |
181 | static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) | |
182 | { | |
183 | unsigned c, i, max_val, utf_char; | |
184 | int utf_cnt, u_len; | |
185 | ||
186 | memset(ocu, 0, sizeof(dstring) * length); | |
187 | ocu[0] = 8; | |
188 | max_val = 0xffU; | |
189 | ||
190 | try_again: | |
191 | u_len = 0U; | |
192 | utf_char = 0U; | |
193 | utf_cnt = 0U; | |
194 | for (i = 0U; i < utf->u_len; i++) | |
195 | { | |
196 | c = (uint8_t)utf->u_name[i]; | |
197 | ||
198 | /* Complete a multi-byte UTF-8 character */ | |
199 | if (utf_cnt) | |
200 | { | |
201 | utf_char = (utf_char << 6) | (c & 0x3fU); | |
202 | if (--utf_cnt) | |
203 | continue; | |
204 | } | |
205 | else | |
206 | { | |
207 | /* Check for a multi-byte UTF-8 character */ | |
208 | if (c & 0x80U) | |
209 | { | |
210 | /* Start a multi-byte UTF-8 character */ | |
211 | if ((c & 0xe0U) == 0xc0U) | |
212 | { | |
213 | utf_char = c & 0x1fU; | |
214 | utf_cnt = 1; | |
215 | } | |
216 | else if ((c & 0xf0U) == 0xe0U) | |
217 | { | |
218 | utf_char = c & 0x0fU; | |
219 | utf_cnt = 2; | |
220 | } | |
221 | else if ((c & 0xf8U) == 0xf0U) | |
222 | { | |
223 | utf_char = c & 0x07U; | |
224 | utf_cnt = 3; | |
225 | } | |
226 | else if ((c & 0xfcU) == 0xf8U) | |
227 | { | |
228 | utf_char = c & 0x03U; | |
229 | utf_cnt = 4; | |
230 | } | |
231 | else if ((c & 0xfeU) == 0xfcU) | |
232 | { | |
233 | utf_char = c & 0x01U; | |
234 | utf_cnt = 5; | |
235 | } | |
236 | else | |
237 | goto error_out; | |
238 | continue; | |
239 | } else | |
240 | /* Single byte UTF-8 character (most common) */ | |
241 | utf_char = c; | |
242 | } | |
243 | ||
244 | /* Choose no compression if necessary */ | |
245 | if (utf_char > max_val) | |
246 | { | |
247 | if ( 0xffU == max_val ) | |
248 | { | |
249 | max_val = 0xffffU; | |
250 | ocu[0] = (uint8_t)0x10U; | |
251 | goto try_again; | |
252 | } | |
253 | goto error_out; | |
254 | } | |
255 | ||
256 | if (max_val == 0xffffU) | |
257 | { | |
258 | ocu[++u_len] = (uint8_t)(utf_char >> 8); | |
259 | } | |
260 | ocu[++u_len] = (uint8_t)(utf_char & 0xffU); | |
261 | } | |
262 | ||
263 | ||
264 | if (utf_cnt) | |
265 | { | |
266 | error_out: | |
267 | ocu[++u_len] = '?'; | |
268 | printk(KERN_DEBUG "udf: bad UTF-8 character\n"); | |
269 | } | |
270 | ||
271 | ocu[length - 1] = (uint8_t)u_len + 1; | |
272 | return u_len + 1; | |
273 | } | |
274 | ||
275 | static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i) | |
276 | { | |
277 | uint8_t *ocu; | |
278 | uint32_t c; | |
279 | uint8_t cmp_id, ocu_len; | |
280 | int i; | |
281 | ||
282 | ocu = ocu_i->u_name; | |
283 | ||
284 | ocu_len = ocu_i->u_len; | |
285 | cmp_id = ocu_i->u_cmpID; | |
286 | utf_o->u_len = 0; | |
287 | ||
288 | if (ocu_len == 0) | |
289 | { | |
290 | memset(utf_o, 0, sizeof(struct ustr)); | |
291 | utf_o->u_cmpID = 0; | |
292 | utf_o->u_len = 0; | |
293 | return 0; | |
294 | } | |
295 | ||
296 | if ((cmp_id != 8) && (cmp_id != 16)) | |
297 | { | |
298 | printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); | |
299 | return 0; | |
300 | } | |
301 | ||
302 | for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;) | |
303 | { | |
304 | /* Expand OSTA compressed Unicode to Unicode */ | |
305 | c = ocu[i++]; | |
306 | if (cmp_id == 16) | |
307 | c = (c << 8) | ocu[i++]; | |
308 | ||
309 | utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], | |
310 | UDF_NAME_LEN - utf_o->u_len); | |
311 | } | |
312 | utf_o->u_cmpID=8; | |
313 | ||
314 | return utf_o->u_len; | |
315 | } | |
316 | ||
317 | static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length) | |
318 | { | |
319 | unsigned len, i, max_val; | |
320 | uint16_t uni_char; | |
321 | int u_len; | |
322 | ||
323 | memset(ocu, 0, sizeof(dstring) * length); | |
324 | ocu[0] = 8; | |
325 | max_val = 0xffU; | |
326 | ||
327 | try_again: | |
328 | u_len = 0U; | |
329 | for (i = 0U; i < uni->u_len; i++) | |
330 | { | |
331 | len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char); | |
332 | if (len <= 0) | |
333 | continue; | |
334 | ||
335 | if (uni_char > max_val) | |
336 | { | |
337 | max_val = 0xffffU; | |
338 | ocu[0] = (uint8_t)0x10U; | |
339 | goto try_again; | |
340 | } | |
341 | ||
342 | if (max_val == 0xffffU) | |
343 | ocu[++u_len] = (uint8_t)(uni_char >> 8); | |
344 | ocu[++u_len] = (uint8_t)(uni_char & 0xffU); | |
345 | i += len - 1; | |
346 | } | |
347 | ||
348 | ocu[length - 1] = (uint8_t)u_len + 1; | |
349 | return u_len + 1; | |
350 | } | |
351 | ||
352 | int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen) | |
353 | { | |
354 | struct ustr filename, unifilename; | |
355 | int len; | |
356 | ||
357 | if (udf_build_ustr_exact(&unifilename, sname, flen)) | |
358 | { | |
359 | return 0; | |
360 | } | |
361 | ||
362 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) | |
363 | { | |
364 | if (!udf_CS0toUTF8(&filename, &unifilename) ) | |
365 | { | |
366 | udf_debug("Failed in udf_get_filename: sname = %s\n", sname); | |
367 | return 0; | |
368 | } | |
369 | } | |
370 | else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | |
371 | { | |
372 | if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) ) | |
373 | { | |
374 | udf_debug("Failed in udf_get_filename: sname = %s\n", sname); | |
375 | return 0; | |
376 | } | |
377 | } | |
378 | else | |
379 | return 0; | |
380 | ||
381 | if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len, | |
382 | unifilename.u_name, unifilename.u_len))) | |
383 | { | |
384 | return len; | |
385 | } | |
386 | return 0; | |
387 | } | |
388 | ||
389 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, uint8_t *dname, int flen) | |
390 | { | |
391 | struct ustr unifilename; | |
392 | int namelen; | |
393 | ||
394 | if ( !(udf_char_to_ustr(&unifilename, sname, flen)) ) | |
395 | { | |
396 | return 0; | |
397 | } | |
398 | ||
399 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) | |
400 | { | |
401 | if ( !(namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN)) ) | |
402 | { | |
403 | return 0; | |
404 | } | |
405 | } | |
406 | else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | |
407 | { | |
408 | if ( !(namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN)) ) | |
409 | { | |
410 | return 0; | |
411 | } | |
412 | } | |
413 | else | |
414 | return 0; | |
415 | ||
416 | return namelen; | |
417 | } | |
418 | ||
419 | #define ILLEGAL_CHAR_MARK '_' | |
420 | #define EXT_MARK '.' | |
421 | #define CRC_MARK '#' | |
422 | #define EXT_SIZE 5 | |
423 | ||
424 | static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen) | |
425 | { | |
426 | int index, newIndex = 0, needsCRC = 0; | |
427 | int extIndex = 0, newExtIndex = 0, hasExt = 0; | |
428 | unsigned short valueCRC; | |
429 | uint8_t curr; | |
430 | const uint8_t hexChar[] = "0123456789ABCDEF"; | |
431 | ||
432 | if (udfName[0] == '.' && (udfLen == 1 || | |
433 | (udfLen == 2 && udfName[1] == '.'))) | |
434 | { | |
435 | needsCRC = 1; | |
436 | newIndex = udfLen; | |
437 | memcpy(newName, udfName, udfLen); | |
438 | } | |
439 | else | |
440 | { | |
441 | for (index = 0; index < udfLen; index++) | |
442 | { | |
443 | curr = udfName[index]; | |
444 | if (curr == '/' || curr == 0) | |
445 | { | |
446 | needsCRC = 1; | |
447 | curr = ILLEGAL_CHAR_MARK; | |
448 | while (index+1 < udfLen && (udfName[index+1] == '/' || | |
449 | udfName[index+1] == 0)) | |
450 | index++; | |
451 | } | |
452 | if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE) | |
453 | { | |
454 | if (udfLen == index + 1) | |
455 | hasExt = 0; | |
456 | else | |
457 | { | |
458 | hasExt = 1; | |
459 | extIndex = index; | |
460 | newExtIndex = newIndex; | |
461 | } | |
462 | } | |
463 | if (newIndex < 256) | |
464 | newName[newIndex++] = curr; | |
465 | else | |
466 | needsCRC = 1; | |
467 | } | |
468 | } | |
469 | if (needsCRC) | |
470 | { | |
471 | uint8_t ext[EXT_SIZE]; | |
472 | int localExtIndex = 0; | |
473 | ||
474 | if (hasExt) | |
475 | { | |
476 | int maxFilenameLen; | |
477 | for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen; | |
478 | index++ ) | |
479 | { | |
480 | curr = udfName[extIndex + index + 1]; | |
481 | ||
482 | if (curr == '/' || curr == 0) | |
483 | { | |
484 | needsCRC = 1; | |
485 | curr = ILLEGAL_CHAR_MARK; | |
486 | while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE | |
487 | && (udfName[extIndex + index + 2] == '/' || | |
488 | udfName[extIndex + index + 2] == 0))) | |
489 | index++; | |
490 | } | |
491 | ext[localExtIndex++] = curr; | |
492 | } | |
493 | maxFilenameLen = 250 - localExtIndex; | |
494 | if (newIndex > maxFilenameLen) | |
495 | newIndex = maxFilenameLen; | |
496 | else | |
497 | newIndex = newExtIndex; | |
498 | } | |
499 | else if (newIndex > 250) | |
500 | newIndex = 250; | |
501 | newName[newIndex++] = CRC_MARK; | |
502 | valueCRC = udf_crc(fidName, fidNameLen, 0); | |
503 | newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; | |
504 | newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; | |
505 | newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; | |
506 | newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; | |
507 | ||
508 | if (hasExt) | |
509 | { | |
510 | newName[newIndex++] = EXT_MARK; | |
511 | for (index = 0;index < localExtIndex ;index++ ) | |
512 | newName[newIndex++] = ext[index]; | |
513 | } | |
514 | } | |
515 | return newIndex; | |
516 | } |