7c9d25ae |
1 | /** @file\r |
2 | Language Library implementation that provides functions for language conversion\r |
3 | between ISO 639-2 and RFC 4646 language codes.\r |
4 | \r |
5 | Copyright (c) 2009, Intel Corporation<BR>\r |
6 | All rights reserved. This program and the accompanying materials\r |
7 | are licensed and made available under the terms and conditions of the BSD License\r |
8 | which accompanies this distribution. The full text of the license may be found at\r |
9 | http://opensource.org/licenses/bsd-license.php\r |
10 | \r |
11 | THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r |
12 | WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r |
13 | \r |
14 | **/\r |
15 | \r |
16 | #include <Library/BaseLib.h>\r |
17 | #include <Library/DebugLib.h>\r |
18 | #include <Library/MemoryAllocationLib.h>\r |
19 | #include <Library/LanguageLib.h>\r |
20 | \r |
21 | //\r |
22 | // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes\r |
23 | // Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code.\r |
24 | // The last 2 CHAR8 values are the ISO 639-1 code.\r |
25 | //\r |
26 | // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported.\r |
27 | //\r |
28 | // Commonly used language codes such as English and French are put in the front of the table for quick match.\r |
29 | //\r |
30 | GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] =\r |
31 | "\\r |
32 | engen\\r |
33 | frafr\\r |
34 | aaraa\\r |
35 | abkab\\r |
36 | aveae\\r |
37 | afraf\\r |
38 | akaak\\r |
39 | amham\\r |
40 | argan\\r |
41 | araar\\r |
42 | asmas\\r |
43 | avaav\\r |
44 | aymay\\r |
45 | azeaz\\r |
46 | bakba\\r |
47 | belbe\\r |
48 | bulbg\\r |
49 | bihbh\\r |
50 | bisbi\\r |
51 | bambm\\r |
52 | benbn\\r |
53 | bodbo\\r |
54 | brebr\\r |
55 | bosbs\\r |
56 | catca\\r |
57 | chece\\r |
58 | chach\\r |
59 | cosco\\r |
60 | crecr\\r |
61 | cescs\\r |
62 | chucu\\r |
63 | chvcv\\r |
64 | cymcy\\r |
65 | danda\\r |
66 | deude\\r |
67 | divdv\\r |
68 | dzodz\\r |
69 | eweee\\r |
70 | ellel\\r |
71 | epoeo\\r |
72 | spaes\\r |
73 | estet\\r |
74 | euseu\\r |
75 | fasfa\\r |
76 | fulff\\r |
77 | finfi\\r |
78 | fijfj\\r |
79 | faofo\\r |
80 | fryfy\\r |
81 | glega\\r |
82 | glagd\\r |
83 | glggl\\r |
84 | grngn\\r |
85 | gujgu\\r |
86 | glvgv\\r |
87 | hauha\\r |
88 | hebhe\\r |
89 | hinhi\\r |
90 | hmoho\\r |
91 | hrvhr\\r |
92 | hatht\\r |
93 | hunhu\\r |
94 | hyehy\\r |
95 | herhz\\r |
96 | inaia\\r |
97 | indid\\r |
98 | ileie\\r |
99 | iboig\\r |
100 | iiiii\\r |
101 | ipkik\\r |
102 | idoio\\r |
103 | islis\\r |
104 | itait\\r |
105 | ikuiu\\r |
106 | jpnja\\r |
107 | javjv\\r |
108 | katka\\r |
109 | konkg\\r |
110 | kikki\\r |
111 | kuakj\\r |
112 | kazkk\\r |
113 | kalkl\\r |
114 | khmkm\\r |
115 | kankn\\r |
116 | korko\\r |
117 | kaukr\\r |
118 | kasks\\r |
119 | kurku\\r |
120 | komkv\\r |
121 | corkw\\r |
122 | kirky\\r |
123 | latla\\r |
124 | ltzlb\\r |
125 | luglg\\r |
126 | limli\\r |
127 | linln\\r |
128 | laolo\\r |
129 | litlt\\r |
130 | lublu\\r |
131 | lavlv\\r |
132 | mlgmg\\r |
133 | mahmh\\r |
134 | mrimi\\r |
135 | mkdmk\\r |
136 | malml\\r |
137 | monmn\\r |
138 | marmr\\r |
139 | msams\\r |
140 | mltmt\\r |
141 | myamy\\r |
142 | nauna\\r |
143 | nobnb\\r |
144 | ndend\\r |
145 | nepne\\r |
146 | ndong\\r |
147 | nldnl\\r |
148 | nnonn\\r |
149 | norno\\r |
150 | nblnr\\r |
151 | navnv\\r |
152 | nyany\\r |
153 | ocioc\\r |
154 | ojioj\\r |
155 | ormom\\r |
156 | orior\\r |
157 | ossos\\r |
158 | panpa\\r |
159 | plipi\\r |
160 | polpl\\r |
161 | pusps\\r |
162 | porpt\\r |
163 | quequ\\r |
164 | rohrm\\r |
165 | runrn\\r |
166 | ronro\\r |
167 | rusru\\r |
168 | kinrw\\r |
169 | sansa\\r |
170 | srdsc\\r |
171 | sndsd\\r |
172 | smese\\r |
173 | sagsg\\r |
174 | sinsi\\r |
175 | slksk\\r |
176 | slvsl\\r |
177 | smosm\\r |
178 | snasn\\r |
179 | somso\\r |
180 | sqisq\\r |
181 | srpsr\\r |
182 | sswss\\r |
183 | sotst\\r |
184 | sunsu\\r |
185 | swesv\\r |
186 | swasw\\r |
187 | tamta\\r |
188 | telte\\r |
189 | tgktg\\r |
190 | thath\\r |
191 | tirti\\r |
192 | tuktk\\r |
193 | tgltl\\r |
194 | tsntn\\r |
195 | tonto\\r |
196 | turtr\\r |
197 | tsots\\r |
198 | tattt\\r |
199 | twitw\\r |
200 | tahty\\r |
201 | uigug\\r |
202 | ukruk\\r |
203 | urdur\\r |
204 | uzbuz\\r |
205 | venve\\r |
206 | vievi\\r |
207 | volvo\\r |
208 | wlnwa\\r |
209 | wolwo\\r |
210 | xhoxh\\r |
211 | yidyi\\r |
212 | yoryo\\r |
213 | zhaza\\r |
214 | zhozh\\r |
215 | zulzu\\r |
216 | ";\r |
217 | \r |
218 | /**\r |
219 | Converts upper case ASCII characters in an ASCII string to lower case ASCII \r |
220 | characters in an ASCII string.\r |
221 | \r |
222 | If a an ASCII character in Source is in the range 'A'..'Z', then it is converted \r |
223 | to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no \r |
224 | conversion is performed. Length ASCII characters from Source are convertered and\r |
225 | stored in Destination.\r |
226 | \r |
227 | @param Destination An ASCII string to store the results of the conversion.\r |
228 | @param Source The source ASCII string of the conversion.\r |
229 | @param Length The number of ASCII characters to convert.\r |
230 | \r |
231 | **/\r |
232 | VOID\r |
233 | EFIAPI\r |
234 | InternalLanguageLibToLower (\r |
235 | OUT CHAR8 *Destination,\r |
236 | IN CONST CHAR8 *Source,\r |
237 | IN UINTN Length\r |
238 | )\r |
239 | {\r |
240 | for (; Length > 0; Length--, Destination++, Source++) {\r |
60ef6427 |
241 | *Destination = (*Source >= 'A' && *Source <= 'Z') ? (CHAR8)(*Source + ('a' - 'A')) : *Source;\r |
7c9d25ae |
242 | }\r |
243 | }\r |
244 | \r |
245 | /**\r |
246 | Convert an ISO 639-2 language code to a RFC 4646 language code.\r |
247 | If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1\r |
248 | code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646\r |
249 | language code is composed of only a primary language subtag.\r |
250 | \r |
251 | If Iso639Language is NULL, then ASSERT.\r |
252 | If Rfc4646Language is NULL, then ASSERT.\r |
253 | \r |
254 | @param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string\r |
255 | which reprsents a RFC 4646 language code containging only\r |
256 | either a ISO 639-1 or ISO 639-2 primary language subtag.\r |
257 | This string is Null-terminated.\r |
258 | @param[in] Iso639Language Pointer to a 3-letter ASCII string which represents\r |
259 | an ISO 639-2 language code. This string is not required\r |
260 | to be Null-terminated.\r |
261 | \r |
262 | @retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code.\r |
263 | @retval FALSE The language code does not have corresponding ISO 639-1 code.\r |
264 | \r |
265 | **/\r |
266 | BOOLEAN\r |
267 | EFIAPI\r |
268 | ConvertIso639ToRfc4646 (\r |
269 | OUT CHAR8 *Rfc4646Language,\r |
270 | IN CONST CHAR8 *Iso639Language\r |
271 | )\r |
272 | {\r |
273 | CONST CHAR8 *Match;\r |
274 | \r |
275 | ASSERT (Iso639Language != NULL);\r |
276 | ASSERT (Rfc4646Language != NULL);\r |
277 | \r |
278 | //\r |
279 | // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language\r |
280 | //\r |
281 | InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3);\r |
282 | Rfc4646Language[3] = '\0';\r |
283 | \r |
284 | Match = mIso639ToRfc4646ConversionTable;\r |
285 | do {\r |
286 | Match = AsciiStrStr (Match, Rfc4646Language);\r |
287 | if (Match == NULL) {\r |
288 | return FALSE;\r |
289 | }\r |
290 | if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) {\r |
291 | break;\r |
292 | }\r |
293 | ++Match;\r |
294 | } while (TRUE);\r |
295 | Rfc4646Language[0] = Match[3];\r |
296 | Rfc4646Language[1] = Match[4];\r |
297 | Rfc4646Language[2] = '\0';\r |
298 | return TRUE;\r |
299 | }\r |
300 | \r |
301 | /**\r |
302 | Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language\r |
303 | subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary\r |
304 | language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2\r |
305 | code (T code if applies). Else the ISO 639-2 code is returned.\r |
306 | \r |
307 | If Rfc4646Language is NULL, then ASSERT.\r |
308 | If Iso639Language is NULL, then ASSERT.\r |
309 | \r |
310 | @param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string\r |
311 | which reprsents an ISO 639-2 language code. The string is Null-terminated.\r |
312 | @param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated\r |
313 | by a NULL or a ';' character.\r |
314 | \r |
315 | @retval TRUE Language code converted successfully.\r |
316 | @retval FALSE The RFC 4646 language code is invalid or unsupported.\r |
317 | \r |
318 | **/\r |
319 | BOOLEAN\r |
320 | EFIAPI\r |
321 | ConvertRfc4646ToIso639 (\r |
322 | OUT CHAR8 *Iso639Language,\r |
323 | IN CONST CHAR8 *Rfc4646Language\r |
324 | )\r |
325 | {\r |
326 | CONST CHAR8 *Match;\r |
327 | \r |
328 | ASSERT (Rfc4646Language != NULL);\r |
329 | ASSERT (Iso639Language != NULL);\r |
330 | \r |
331 | //\r |
332 | // RFC 4646 language code check before determining \r |
333 | // if the primary language subtag is ISO 639-1 or 639-2 code\r |
334 | //\r |
335 | if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') {\r |
336 | return FALSE;\r |
337 | }\r |
338 | \r |
339 | //\r |
340 | // Check if the primary language subtag is ISO 639-1 code\r |
341 | //\r |
342 | if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') {\r |
343 | //\r |
344 | // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language\r |
345 | //\r |
346 | InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2);\r |
347 | //\r |
348 | // Convert ISO 639-1 code to ISO 639-2 code\r |
349 | //\r |
350 | Iso639Language[2] = '\0';\r |
351 | Match = mIso639ToRfc4646ConversionTable;\r |
352 | do {\r |
353 | Match = AsciiStrStr (Match, Iso639Language);\r |
354 | if (Match == NULL) {\r |
355 | return FALSE;\r |
356 | }\r |
357 | if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) {\r |
358 | break;\r |
359 | }\r |
360 | ++Match;\r |
361 | } while (TRUE);\r |
362 | Rfc4646Language = Match - 3;\r |
363 | } else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) {\r |
364 | return FALSE;\r |
365 | }\r |
366 | Iso639Language[0] = Rfc4646Language[0];\r |
367 | Iso639Language[1] = Rfc4646Language[1];\r |
368 | Iso639Language[2] = Rfc4646Language[2];\r |
369 | Iso639Language[3] = '\0';\r |
370 | return TRUE; \r |
371 | }\r |
372 | \r |
373 | /**\r |
374 | Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes.\r |
375 | Caller is responsible for freeing the allocated buffer.\r |
376 | \r |
377 | If Iso639Languages is NULL, then ASSERT.\r |
378 | \r |
379 | @param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing\r |
380 | one or more ISO 639-2 3-letter language codes.\r |
381 | \r |
382 | @retval NULL Invalid ISO 639-2 language code found.\r |
383 | @retval NULL Out of memory.\r |
384 | @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.\r |
385 | This string is composed of one or more RFC4646 language codes each of which has only\r |
386 | ISO 639-1 2-letter primary language subtag.\r |
387 | \r |
388 | **/\r |
389 | CHAR8 *\r |
390 | EFIAPI\r |
391 | ConvertLanguagesIso639ToRfc4646 (\r |
392 | IN CONST CHAR8 *Iso639Languages\r |
393 | )\r |
394 | {\r |
395 | UINTN Length;\r |
396 | UINTN Iso639Index;\r |
397 | UINTN Rfc4646Index;\r |
398 | CHAR8 *Rfc4646Languages;\r |
399 | \r |
400 | ASSERT (Iso639Languages != NULL);\r |
401 | \r |
402 | //\r |
403 | // The length of ISO 639-2 lanugage codes string must be multiple of 3\r |
404 | //\r |
405 | Length = AsciiStrLen (Iso639Languages);\r |
406 | if (Length % 3) {\r |
407 | return NULL;\r |
408 | }\r |
409 | \r |
410 | //\r |
411 | // Allocate buffer for RFC 4646 language codes string\r |
412 | //\r |
413 | Rfc4646Languages = AllocatePool (Length + (Length / 3));\r |
414 | if (Rfc4646Languages == NULL) {\r |
415 | return NULL;\r |
416 | }\r |
417 | \r |
418 | for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) {\r |
419 | if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) {\r |
420 | Rfc4646Index += 2;\r |
421 | } else {\r |
422 | Rfc4646Index += 3;\r |
423 | }\r |
424 | Rfc4646Languages[Rfc4646Index++] = ';';\r |
425 | }\r |
426 | Rfc4646Languages[Rfc4646Index - 1] = '\0';\r |
427 | return Rfc4646Languages;\r |
428 | }\r |
429 | \r |
430 | /**\r |
431 | Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes.\r |
432 | The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code.\r |
433 | Caller is responsible for freeing the allocated buffer.\r |
434 | \r |
435 | If Rfc4646Languages is NULL, then ASSERT.\r |
436 | \r |
437 | @param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing\r |
438 | one or more RFC 4646 language codes.\r |
439 | \r |
440 | @retval NULL Invalid or unsupported RFC 4646 language code found.\r |
441 | @retval NULL Out of memory.\r |
442 | @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.\r |
443 | This string is composed of one or more ISO 639-2 language codes.\r |
444 | \r |
445 | **/\r |
446 | CHAR8 *\r |
447 | EFIAPI\r |
448 | ConvertLanguagesRfc4646ToIso639 (\r |
449 | IN CONST CHAR8 *Rfc4646Languages\r |
450 | )\r |
451 | {\r |
452 | UINTN NumLanguages;\r |
453 | UINTN Iso639Index;\r |
454 | UINTN Rfc4646Index;\r |
455 | CHAR8 *Iso639Languages;\r |
456 | \r |
457 | ASSERT (Rfc4646Languages != NULL);\r |
458 | \r |
459 | //\r |
460 | // Determine the number of languages in the RFC 4646 language codes string\r |
461 | //\r |
462 | for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) {\r |
463 | if (Rfc4646Languages[Rfc4646Index] == ';') {\r |
464 | NumLanguages++;\r |
465 | }\r |
466 | }\r |
467 | \r |
468 | //\r |
469 | // Allocate buffer for ISO 639-2 language codes string\r |
470 | //\r |
471 | Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1);\r |
472 | if (Iso639Languages == NULL) {\r |
473 | return NULL;\r |
474 | }\r |
475 | \r |
476 | //\r |
477 | // Do the conversion for each RFC 4646 language code\r |
478 | //\r |
479 | for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) {\r |
480 | if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) {\r |
481 | Iso639Index += 3;\r |
482 | } else {\r |
483 | FreePool (Iso639Languages);\r |
484 | return NULL;\r |
485 | }\r |
486 | //\r |
487 | // Locate next language code\r |
488 | //\r |
489 | while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') {\r |
490 | Rfc4646Index++;\r |
491 | }\r |
492 | if (Rfc4646Languages[Rfc4646Index] == ';') {\r |
493 | Rfc4646Index++;\r |
494 | }\r |
495 | }\r |
496 | Iso639Languages[Iso639Index] = '\0';\r |
497 | return Iso639Languages;\r |
498 | }\r |
499 | \r |