]> git.proxmox.com Git - mirror_edk2.git/blob - EdkCompatibilityPkg/Compatibility/Library/LanguageLib.c
9015865c2f0eab52ed34708c00b9e0ab5847c36b
[mirror_edk2.git] / EdkCompatibilityPkg / Compatibility / Library / LanguageLib.c
1 /** @file
2 Language Library implementation that provides functions for language conversion
3 between ISO 639-2 and RFC 4646 language codes.
4
5 Copyright (c) 2009, Intel Corporation<BR>
6 All rights reserved. This program and the accompanying materials
7 are licensed and made available under the terms and conditions of the BSD License
8 which accompanies this distribution. The full text of the license may be found at
9 http://opensource.org/licenses/bsd-license.php
10
11 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13
14 **/
15
16 #include <Library/BaseLib.h>
17 #include <Library/DebugLib.h>
18 #include <Library/MemoryAllocationLib.h>
19 #include <Library/LanguageLib.h>
20
21 //
22 // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes
23 // Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code.
24 // The last 2 CHAR8 values are the ISO 639-1 code.
25 //
26 // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported.
27 //
28 // Commonly used language codes such as English and French are put in the front of the table for quick match.
29 //
30 GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] =
31 "\
32 engen\
33 frafr\
34 aaraa\
35 abkab\
36 aveae\
37 afraf\
38 akaak\
39 amham\
40 argan\
41 araar\
42 asmas\
43 avaav\
44 aymay\
45 azeaz\
46 bakba\
47 belbe\
48 bulbg\
49 bihbh\
50 bisbi\
51 bambm\
52 benbn\
53 bodbo\
54 brebr\
55 bosbs\
56 catca\
57 chece\
58 chach\
59 cosco\
60 crecr\
61 cescs\
62 chucu\
63 chvcv\
64 cymcy\
65 danda\
66 deude\
67 divdv\
68 dzodz\
69 eweee\
70 ellel\
71 epoeo\
72 spaes\
73 estet\
74 euseu\
75 fasfa\
76 fulff\
77 finfi\
78 fijfj\
79 faofo\
80 fryfy\
81 glega\
82 glagd\
83 glggl\
84 grngn\
85 gujgu\
86 glvgv\
87 hauha\
88 hebhe\
89 hinhi\
90 hmoho\
91 hrvhr\
92 hatht\
93 hunhu\
94 hyehy\
95 herhz\
96 inaia\
97 indid\
98 ileie\
99 iboig\
100 iiiii\
101 ipkik\
102 idoio\
103 islis\
104 itait\
105 ikuiu\
106 jpnja\
107 javjv\
108 katka\
109 konkg\
110 kikki\
111 kuakj\
112 kazkk\
113 kalkl\
114 khmkm\
115 kankn\
116 korko\
117 kaukr\
118 kasks\
119 kurku\
120 komkv\
121 corkw\
122 kirky\
123 latla\
124 ltzlb\
125 luglg\
126 limli\
127 linln\
128 laolo\
129 litlt\
130 lublu\
131 lavlv\
132 mlgmg\
133 mahmh\
134 mrimi\
135 mkdmk\
136 malml\
137 monmn\
138 marmr\
139 msams\
140 mltmt\
141 myamy\
142 nauna\
143 nobnb\
144 ndend\
145 nepne\
146 ndong\
147 nldnl\
148 nnonn\
149 norno\
150 nblnr\
151 navnv\
152 nyany\
153 ocioc\
154 ojioj\
155 ormom\
156 orior\
157 ossos\
158 panpa\
159 plipi\
160 polpl\
161 pusps\
162 porpt\
163 quequ\
164 rohrm\
165 runrn\
166 ronro\
167 rusru\
168 kinrw\
169 sansa\
170 srdsc\
171 sndsd\
172 smese\
173 sagsg\
174 sinsi\
175 slksk\
176 slvsl\
177 smosm\
178 snasn\
179 somso\
180 sqisq\
181 srpsr\
182 sswss\
183 sotst\
184 sunsu\
185 swesv\
186 swasw\
187 tamta\
188 telte\
189 tgktg\
190 thath\
191 tirti\
192 tuktk\
193 tgltl\
194 tsntn\
195 tonto\
196 turtr\
197 tsots\
198 tattt\
199 twitw\
200 tahty\
201 uigug\
202 ukruk\
203 urdur\
204 uzbuz\
205 venve\
206 vievi\
207 volvo\
208 wlnwa\
209 wolwo\
210 xhoxh\
211 yidyi\
212 yoryo\
213 zhaza\
214 zhozh\
215 zulzu\
216 ";
217
218 /**
219 Converts upper case ASCII characters in an ASCII string to lower case ASCII
220 characters in an ASCII string.
221
222 If a an ASCII character in Source is in the range 'A'..'Z', then it is converted
223 to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no
224 conversion is performed. Length ASCII characters from Source are convertered and
225 stored in Destination.
226
227 @param Destination An ASCII string to store the results of the conversion.
228 @param Source The source ASCII string of the conversion.
229 @param Length The number of ASCII characters to convert.
230
231 **/
232 VOID
233 EFIAPI
234 InternalLanguageLibToLower (
235 OUT CHAR8 *Destination,
236 IN CONST CHAR8 *Source,
237 IN UINTN Length
238 )
239 {
240 for (; Length > 0; Length--, Destination++, Source++) {
241 *Destination = (*Source >= 'A' && *Source <= 'Z') ? *Source + ('a' - 'A') : *Source;
242 }
243 }
244
245 /**
246 Convert an ISO 639-2 language code to a RFC 4646 language code.
247 If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1
248 code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646
249 language code is composed of only a primary language subtag.
250
251 If Iso639Language is NULL, then ASSERT.
252 If Rfc4646Language is NULL, then ASSERT.
253
254 @param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string
255 which reprsents a RFC 4646 language code containging only
256 either a ISO 639-1 or ISO 639-2 primary language subtag.
257 This string is Null-terminated.
258 @param[in] Iso639Language Pointer to a 3-letter ASCII string which represents
259 an ISO 639-2 language code. This string is not required
260 to be Null-terminated.
261
262 @retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code.
263 @retval FALSE The language code does not have corresponding ISO 639-1 code.
264
265 **/
266 BOOLEAN
267 EFIAPI
268 ConvertIso639ToRfc4646 (
269 OUT CHAR8 *Rfc4646Language,
270 IN CONST CHAR8 *Iso639Language
271 )
272 {
273 CONST CHAR8 *Match;
274
275 ASSERT (Iso639Language != NULL);
276 ASSERT (Rfc4646Language != NULL);
277
278 //
279 // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language
280 //
281 InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3);
282 Rfc4646Language[3] = '\0';
283
284 Match = mIso639ToRfc4646ConversionTable;
285 do {
286 Match = AsciiStrStr (Match, Rfc4646Language);
287 if (Match == NULL) {
288 return FALSE;
289 }
290 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) {
291 break;
292 }
293 ++Match;
294 } while (TRUE);
295 Rfc4646Language[0] = Match[3];
296 Rfc4646Language[1] = Match[4];
297 Rfc4646Language[2] = '\0';
298 return TRUE;
299 }
300
301 /**
302 Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language
303 subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary
304 language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2
305 code (T code if applies). Else the ISO 639-2 code is returned.
306
307 If Rfc4646Language is NULL, then ASSERT.
308 If Iso639Language is NULL, then ASSERT.
309
310 @param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string
311 which reprsents an ISO 639-2 language code. The string is Null-terminated.
312 @param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated
313 by a NULL or a ';' character.
314
315 @retval TRUE Language code converted successfully.
316 @retval FALSE The RFC 4646 language code is invalid or unsupported.
317
318 **/
319 BOOLEAN
320 EFIAPI
321 ConvertRfc4646ToIso639 (
322 OUT CHAR8 *Iso639Language,
323 IN CONST CHAR8 *Rfc4646Language
324 )
325 {
326 CONST CHAR8 *Match;
327
328 ASSERT (Rfc4646Language != NULL);
329 ASSERT (Iso639Language != NULL);
330
331 //
332 // RFC 4646 language code check before determining
333 // if the primary language subtag is ISO 639-1 or 639-2 code
334 //
335 if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') {
336 return FALSE;
337 }
338
339 //
340 // Check if the primary language subtag is ISO 639-1 code
341 //
342 if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') {
343 //
344 // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language
345 //
346 InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2);
347 //
348 // Convert ISO 639-1 code to ISO 639-2 code
349 //
350 Iso639Language[2] = '\0';
351 Match = mIso639ToRfc4646ConversionTable;
352 do {
353 Match = AsciiStrStr (Match, Iso639Language);
354 if (Match == NULL) {
355 return FALSE;
356 }
357 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) {
358 break;
359 }
360 ++Match;
361 } while (TRUE);
362 Rfc4646Language = Match - 3;
363 } else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) {
364 return FALSE;
365 }
366 Iso639Language[0] = Rfc4646Language[0];
367 Iso639Language[1] = Rfc4646Language[1];
368 Iso639Language[2] = Rfc4646Language[2];
369 Iso639Language[3] = '\0';
370 return TRUE;
371 }
372
373 /**
374 Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes.
375 Caller is responsible for freeing the allocated buffer.
376
377 If Iso639Languages is NULL, then ASSERT.
378
379 @param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing
380 one or more ISO 639-2 3-letter language codes.
381
382 @retval NULL Invalid ISO 639-2 language code found.
383 @retval NULL Out of memory.
384 @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.
385 This string is composed of one or more RFC4646 language codes each of which has only
386 ISO 639-1 2-letter primary language subtag.
387
388 **/
389 CHAR8 *
390 EFIAPI
391 ConvertLanguagesIso639ToRfc4646 (
392 IN CONST CHAR8 *Iso639Languages
393 )
394 {
395 UINTN Length;
396 UINTN Iso639Index;
397 UINTN Rfc4646Index;
398 CHAR8 *Rfc4646Languages;
399
400 ASSERT (Iso639Languages != NULL);
401
402 //
403 // The length of ISO 639-2 lanugage codes string must be multiple of 3
404 //
405 Length = AsciiStrLen (Iso639Languages);
406 if (Length % 3) {
407 return NULL;
408 }
409
410 //
411 // Allocate buffer for RFC 4646 language codes string
412 //
413 Rfc4646Languages = AllocatePool (Length + (Length / 3));
414 if (Rfc4646Languages == NULL) {
415 return NULL;
416 }
417
418 for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) {
419 if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) {
420 Rfc4646Index += 2;
421 } else {
422 Rfc4646Index += 3;
423 }
424 Rfc4646Languages[Rfc4646Index++] = ';';
425 }
426 Rfc4646Languages[Rfc4646Index - 1] = '\0';
427 return Rfc4646Languages;
428 }
429
430 /**
431 Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes.
432 The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code.
433 Caller is responsible for freeing the allocated buffer.
434
435 If Rfc4646Languages is NULL, then ASSERT.
436
437 @param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing
438 one or more RFC 4646 language codes.
439
440 @retval NULL Invalid or unsupported RFC 4646 language code found.
441 @retval NULL Out of memory.
442 @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.
443 This string is composed of one or more ISO 639-2 language codes.
444
445 **/
446 CHAR8 *
447 EFIAPI
448 ConvertLanguagesRfc4646ToIso639 (
449 IN CONST CHAR8 *Rfc4646Languages
450 )
451 {
452 UINTN NumLanguages;
453 UINTN Iso639Index;
454 UINTN Rfc4646Index;
455 CHAR8 *Iso639Languages;
456
457 ASSERT (Rfc4646Languages != NULL);
458
459 //
460 // Determine the number of languages in the RFC 4646 language codes string
461 //
462 for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) {
463 if (Rfc4646Languages[Rfc4646Index] == ';') {
464 NumLanguages++;
465 }
466 }
467
468 //
469 // Allocate buffer for ISO 639-2 language codes string
470 //
471 Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1);
472 if (Iso639Languages == NULL) {
473 return NULL;
474 }
475
476 //
477 // Do the conversion for each RFC 4646 language code
478 //
479 for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) {
480 if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) {
481 Iso639Index += 3;
482 } else {
483 FreePool (Iso639Languages);
484 return NULL;
485 }
486 //
487 // Locate next language code
488 //
489 while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') {
490 Rfc4646Index++;
491 }
492 if (Rfc4646Languages[Rfc4646Index] == ';') {
493 Rfc4646Index++;
494 }
495 }
496 Iso639Languages[Iso639Index] = '\0';
497 return Iso639Languages;
498 }
499