2 Language Library implementation that provides functions for language conversion
3 between ISO 639-2 and RFC 4646 language codes.
5 Copyright (c) 2009, Intel Corporation<BR>
6 All rights reserved. This program and the accompanying materials
7 are licensed and made available under the terms and conditions of the BSD License
8 which accompanies this distribution. The full text of the license may be found at
9 http://opensource.org/licenses/bsd-license.php
11 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
16 #include <Library/BaseLib.h>
17 #include <Library/DebugLib.h>
18 #include <Library/MemoryAllocationLib.h>
19 #include <Library/LanguageLib.h>
22 // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes
23 // Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code.
24 // The last 2 CHAR8 values are the ISO 639-1 code.
26 // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported.
28 // Commonly used language codes such as English and French are put in the front of the table for quick match.
30 GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable
[] =
219 Converts upper case ASCII characters in an ASCII string to lower case ASCII
220 characters in an ASCII string.
222 If a an ASCII character in Source is in the range 'A'..'Z', then it is converted
223 to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no
224 conversion is performed. Length ASCII characters from Source are convertered and
225 stored in Destination.
227 @param Destination An ASCII string to store the results of the conversion.
228 @param Source The source ASCII string of the conversion.
229 @param Length The number of ASCII characters to convert.
234 InternalLanguageLibToLower (
235 OUT CHAR8
*Destination
,
236 IN CONST CHAR8
*Source
,
240 for (; Length
> 0; Length
--, Destination
++, Source
++) {
241 *Destination
= (*Source
>= 'A' && *Source
<= 'Z') ? (CHAR8
)(*Source
+ ('a' - 'A')) : *Source
;
246 Convert an ISO 639-2 language code to a RFC 4646 language code.
247 If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1
248 code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646
249 language code is composed of only a primary language subtag.
251 If Iso639Language is NULL, then ASSERT.
252 If Rfc4646Language is NULL, then ASSERT.
254 @param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string
255 which reprsents a RFC 4646 language code containging only
256 either a ISO 639-1 or ISO 639-2 primary language subtag.
257 This string is Null-terminated.
258 @param[in] Iso639Language Pointer to a 3-letter ASCII string which represents
259 an ISO 639-2 language code. This string is not required
260 to be Null-terminated.
262 @retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code.
263 @retval FALSE The language code does not have corresponding ISO 639-1 code.
268 ConvertIso639ToRfc4646 (
269 OUT CHAR8
*Rfc4646Language
,
270 IN CONST CHAR8
*Iso639Language
275 ASSERT (Iso639Language
!= NULL
);
276 ASSERT (Rfc4646Language
!= NULL
);
279 // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language
281 InternalLanguageLibToLower (Rfc4646Language
, Iso639Language
, 3);
282 Rfc4646Language
[3] = '\0';
284 Match
= mIso639ToRfc4646ConversionTable
;
286 Match
= AsciiStrStr (Match
, Rfc4646Language
);
290 if (((Match
- mIso639ToRfc4646ConversionTable
) % 5) == 0) {
295 Rfc4646Language
[0] = Match
[3];
296 Rfc4646Language
[1] = Match
[4];
297 Rfc4646Language
[2] = '\0';
302 Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language
303 subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary
304 language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2
305 code (T code if applies). Else the ISO 639-2 code is returned.
307 If Rfc4646Language is NULL, then ASSERT.
308 If Iso639Language is NULL, then ASSERT.
310 @param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string
311 which reprsents an ISO 639-2 language code. The string is Null-terminated.
312 @param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated
313 by a NULL or a ';' character.
315 @retval TRUE Language code converted successfully.
316 @retval FALSE The RFC 4646 language code is invalid or unsupported.
321 ConvertRfc4646ToIso639 (
322 OUT CHAR8
*Iso639Language
,
323 IN CONST CHAR8
*Rfc4646Language
328 ASSERT (Rfc4646Language
!= NULL
);
329 ASSERT (Iso639Language
!= NULL
);
332 // RFC 4646 language code check before determining
333 // if the primary language subtag is ISO 639-1 or 639-2 code
335 if (Rfc4646Language
[0] == '\0' || Rfc4646Language
[1] == '\0') {
340 // Check if the primary language subtag is ISO 639-1 code
342 if (Rfc4646Language
[2] == ';' || Rfc4646Language
[2] == '-' || Rfc4646Language
[2] == '\0') {
344 // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language
346 InternalLanguageLibToLower (Iso639Language
, Rfc4646Language
, 2);
348 // Convert ISO 639-1 code to ISO 639-2 code
350 Iso639Language
[2] = '\0';
351 Match
= mIso639ToRfc4646ConversionTable
;
353 Match
= AsciiStrStr (Match
, Iso639Language
);
357 if (((Match
- mIso639ToRfc4646ConversionTable
) % 5) == 3) {
362 Rfc4646Language
= Match
- 3;
363 } else if (!(Rfc4646Language
[3] == ';' || Rfc4646Language
[3] == '-' || Rfc4646Language
[3] == '\0')) {
366 Iso639Language
[0] = Rfc4646Language
[0];
367 Iso639Language
[1] = Rfc4646Language
[1];
368 Iso639Language
[2] = Rfc4646Language
[2];
369 Iso639Language
[3] = '\0';
374 Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes.
375 Caller is responsible for freeing the allocated buffer.
377 If Iso639Languages is NULL, then ASSERT.
379 @param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing
380 one or more ISO 639-2 3-letter language codes.
382 @retval NULL Invalid ISO 639-2 language code found.
383 @retval NULL Out of memory.
384 @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.
385 This string is composed of one or more RFC4646 language codes each of which has only
386 ISO 639-1 2-letter primary language subtag.
391 ConvertLanguagesIso639ToRfc4646 (
392 IN CONST CHAR8
*Iso639Languages
398 CHAR8
*Rfc4646Languages
;
400 ASSERT (Iso639Languages
!= NULL
);
403 // The length of ISO 639-2 lanugage codes string must be multiple of 3
405 Length
= AsciiStrLen (Iso639Languages
);
411 // Allocate buffer for RFC 4646 language codes string
413 Rfc4646Languages
= AllocatePool (Length
+ (Length
/ 3));
414 if (Rfc4646Languages
== NULL
) {
418 for (Iso639Index
= 0, Rfc4646Index
= 0; Iso639Languages
[Iso639Index
] != '\0'; Iso639Index
+= 3) {
419 if (ConvertIso639ToRfc4646 (&Rfc4646Languages
[Rfc4646Index
], &Iso639Languages
[Iso639Index
])) {
424 Rfc4646Languages
[Rfc4646Index
++] = ';';
426 Rfc4646Languages
[Rfc4646Index
- 1] = '\0';
427 return Rfc4646Languages
;
431 Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes.
432 The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code.
433 Caller is responsible for freeing the allocated buffer.
435 If Rfc4646Languages is NULL, then ASSERT.
437 @param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing
438 one or more RFC 4646 language codes.
440 @retval NULL Invalid or unsupported RFC 4646 language code found.
441 @retval NULL Out of memory.
442 @retval !NULL Pointer to the allocate buffer containing the Null-terminated converted language codes string.
443 This string is composed of one or more ISO 639-2 language codes.
448 ConvertLanguagesRfc4646ToIso639 (
449 IN CONST CHAR8
*Rfc4646Languages
455 CHAR8
*Iso639Languages
;
457 ASSERT (Rfc4646Languages
!= NULL
);
460 // Determine the number of languages in the RFC 4646 language codes string
462 for (Rfc4646Index
= 0, NumLanguages
= 1; Rfc4646Languages
[Rfc4646Index
] != '\0'; Rfc4646Index
++) {
463 if (Rfc4646Languages
[Rfc4646Index
] == ';') {
469 // Allocate buffer for ISO 639-2 language codes string
471 Iso639Languages
= AllocateZeroPool (NumLanguages
* 3 + 1);
472 if (Iso639Languages
== NULL
) {
477 // Do the conversion for each RFC 4646 language code
479 for (Rfc4646Index
= 0, Iso639Index
= 0; Rfc4646Languages
[Rfc4646Index
] != '\0';) {
480 if (ConvertRfc4646ToIso639 (&Iso639Languages
[Iso639Index
], &Rfc4646Languages
[Rfc4646Index
])) {
483 FreePool (Iso639Languages
);
487 // Locate next language code
489 while (Rfc4646Languages
[Rfc4646Index
] != ';' && Rfc4646Languages
[Rfc4646Index
] != '\0') {
492 if (Rfc4646Languages
[Rfc4646Index
] == ';') {
496 Iso639Languages
[Iso639Index
] = '\0';
497 return Iso639Languages
;