2 UCS2 to UTF8 manipulation library.
4 Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>
5 (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
7 SPDX-License-Identifier: BSD-2-Clause-Patent
11 #include <Library/BaseLib.h>
12 #include <Library/BaseMemoryLib.h>
13 #include <Library/BaseUcs2Utf8Lib.h>
14 #include <Library/DebugLib.h>
15 #include <Library/MemoryAllocationLib.h>
18 Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
19 this function is used to retrieve the UTF8 encoding size for a UCS2 character.
21 @param[in] Utf8Buffer The buffer for UTF8 encoded data.
23 @retval Return the size of UTF8 encoding string or 0 if it is not for
35 ASSERT (Utf8Buffer
!= NULL
);
37 TempChar
= *Utf8Buffer
;
38 if ((TempChar
& 0xF0) == 0xF0) {
41 // This format is not for UCS2.
47 if ((TempChar
& 0x80) == 0x80) {
48 if ((TempChar
& 0xC0) == 0xC0) {
51 if ((TempChar
& 0xE0) == 0xE0) {
62 Since each UCS2 character can be represented by the format: \uXXXX, this function
63 is used to retrieve the UCS2 character from a Unicode format.
64 Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
66 @param[in] Utf8Buffer The buffer for UTF8 encoded data.
67 @param[out] Ucs2Char The converted UCS2 character.
69 @retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal
70 digits string, and can't be converted to a UCS2
72 @retval EFI_SUCCESS The UCS2 character has been retrieved.
84 CHAR8 Ucs2CharFormat
[UNICODE_FORMAT_CHAR_SIZE
]; /// two Hexadecimal digits Ascii string, like "3F"
86 for (Index
= 0; Index
< 4; Index
++) {
87 if ((*(Utf8Buffer
+ 2 + Index
) & 0x80) != 0x00) {
88 return EFI_INVALID_PARAMETER
;
92 ZeroMem (Ucs2CharFormat
, UNICODE_FORMAT_CHAR_SIZE
);
95 // Get the First Number, Offset is 2
97 CopyMem (Ucs2CharFormat
, Utf8Buffer
+ 2, UNICODE_FORMAT_CHAR_LEN
);
98 Num1
= (UINT8
) AsciiStrHexToUintn (Ucs2CharFormat
);
101 // Get the Second Number, Offset is 4
103 CopyMem (Ucs2CharFormat
, Utf8Buffer
+ 4, UNICODE_FORMAT_CHAR_LEN
);
104 Num2
= (UINT8
) AsciiStrHexToUintn (Ucs2CharFormat
);
107 // Ucs2Char is Little-Endian
109 *((CHAR8
*) Ucs2Char
) = Num2
;
110 *(((CHAR8
*) Ucs2Char
) + 1) = Num1
;
116 Convert a UCS2 character to UTF8 encoding string.
118 @param[in] Ucs2Char The provided UCS2 character.
119 @param[out] Utf8Buffer The converted UTF8 encoded data.
121 @retval Return the size of UTF8 encoding data for this UCS2 character.
127 OUT CHAR8
*Utf8Buffer
132 ASSERT (Utf8Buffer
!= NULL
);
134 Ucs2Number
= (UINT16
) Ucs2Char
;
135 if (Ucs2Number
<= 0x007F) {
138 // UTF8 format: 0xxxxxxx
140 *Utf8Buffer
= Ucs2Char
& 0x7F;
143 } else if (Ucs2Number
>= 0x0080 && Ucs2Number
<= 0x07FF) {
146 // UTF8 format: 110xxxxx 10xxxxxx
148 *(Utf8Buffer
+ 1) = (Ucs2Char
& 0x3F) | 0x80;
149 *Utf8Buffer
= ((Ucs2Char
>> 6) & 0x1F) | 0xC0;
152 } else { /// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
155 // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
157 *(Utf8Buffer
+ 2) = (Ucs2Char
& 0x3F) | 0x80;
158 *(Utf8Buffer
+ 1) = ((Ucs2Char
>> 6) & 0x3F) | 0x80;
159 *Utf8Buffer
= ((Ucs2Char
>> 12) & 0x0F) | 0xE0;
165 Convert a UTF8 encoded data to a UCS2 character.
167 @param[in] Utf8Buffer The provided UTF8 encoded data.
168 @param[out] Ucs2Char The converted UCS2 character.
170 @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or
171 not for UCS2 character.
172 @retval EFI_SUCCESS The converted UCS2 character.
177 IN CHAR8
*Utf8Buffer
,
187 ASSERT (Utf8Buffer
!= NULL
&& Ucs2Char
!= NULL
);
188 ZeroMem (Ucs2Char
, sizeof (CHAR16
));
189 Ucs2Buffer
= (CHAR8
*) Ucs2Char
;
191 Utf8Size
= GetUTF8SizeForUCS2 (Utf8Buffer
);
197 // UTF8 format: 0xxxxxxx
199 TempChar1
= *Utf8Buffer
;
200 if ((TempChar1
& 0x80) != 0x00) {
201 return EFI_INVALID_PARAMETER
;
204 *Ucs2Buffer
= TempChar1
;
205 *(Ucs2Buffer
+ 1) = 0;
211 // UTF8 format: 110xxxxx 10xxxxxx
213 TempChar1
= *Utf8Buffer
;
214 if ((TempChar1
& 0xE0) != 0xC0) {
215 return EFI_INVALID_PARAMETER
;
218 TempChar2
= *(Utf8Buffer
+ 1);
219 if ((TempChar2
& 0xC0) != 0x80) {
220 return EFI_INVALID_PARAMETER
;
223 *Ucs2Buffer
= (TempChar1
<< 6) + (TempChar2
& 0x3F);
224 *(Ucs2Buffer
+ 1) = (TempChar1
>> 2) & 0x07;
230 // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
232 TempChar1
= *Utf8Buffer
;
233 if ((TempChar1
& 0xF0) != 0xE0) {
234 return EFI_INVALID_PARAMETER
;
237 TempChar2
= *(Utf8Buffer
+ 1);
238 if ((TempChar2
& 0xC0) != 0x80) {
239 return EFI_INVALID_PARAMETER
;
242 TempChar3
= *(Utf8Buffer
+ 2);
243 if ((TempChar3
& 0xC0) != 0x80) {
244 return EFI_INVALID_PARAMETER
;
247 *Ucs2Buffer
= (TempChar2
<< 6) + (TempChar3
& 0x3F);
248 *(Ucs2Buffer
+ 1) = (TempChar1
<< 4) + ((TempChar2
>> 2) & 0x0F);
254 return EFI_INVALID_PARAMETER
;
261 Convert a UCS2 string to a UTF8 encoded string.
263 @param[in] Ucs2Str The provided UCS2 string.
264 @param[out] Utf8StrAddr The converted UTF8 string address. Caller
265 is responsible for Free this string.
267 @retval EFI_INVALID_PARAMETER One or more parameters are invalid.
268 @retval EFI_OUT_OF_RESOURCES System runs out of resources.
269 @retval EFI_SUCCESS The UTF8 encoded string has been converted.
275 OUT CHAR8
**Utf8StrAddr
283 CHAR8 Utf8Buffer
[UTF8_BUFFER_FOR_UCS2_MAX_SIZE
];
284 UINT8 Utf8BufferSize
;
286 if (Ucs2Str
== NULL
|| Utf8StrAddr
== NULL
) {
287 return EFI_INVALID_PARAMETER
;
290 Ucs2StrLength
= StrLen (Ucs2Str
);
293 for (Ucs2StrIndex
= 0; Ucs2StrIndex
< Ucs2StrLength
; Ucs2StrIndex
++) {
295 ZeroMem (Utf8Buffer
, sizeof (Utf8Buffer
));
296 Utf8BufferSize
= UCS2CharToUTF8 (Ucs2Str
[Ucs2StrIndex
], Utf8Buffer
);
297 Utf8StrLength
+= Utf8BufferSize
;
300 Utf8Str
= AllocateZeroPool (Utf8StrLength
+ 1);
301 if (Utf8Str
== NULL
) {
302 return EFI_OUT_OF_RESOURCES
;
306 for (Ucs2StrIndex
= 0; Ucs2StrIndex
< Ucs2StrLength
; Ucs2StrIndex
++) {
308 ZeroMem (Utf8Buffer
, sizeof (Utf8Buffer
));
309 Utf8BufferSize
= UCS2CharToUTF8 (Ucs2Str
[Ucs2StrIndex
], Utf8Buffer
);
311 CopyMem (Utf8Str
+ Utf8StrIndex
, Utf8Buffer
, Utf8BufferSize
);
312 Utf8StrIndex
+= Utf8BufferSize
;
315 Utf8Str
[Utf8StrIndex
] = '\0';
316 *Utf8StrAddr
= Utf8Str
;
322 Convert a UTF8 encoded string to a UCS2 string.
324 @param[in] Utf8Str The provided UTF8 encoded string.
325 @param[out] Ucs2StrAddr The converted UCS2 string address. Caller
326 is responsible for Free this string.
328 @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to
329 convert to UCS2 string.
330 One or more parameters are invalid.
331 @retval EFI_OUT_OF_RESOURCES System runs out of resources.
332 @retval EFI_SUCCESS The UCS2 string has been converted.
338 OUT CHAR16
**Ucs2StrAddr
345 UINT8 Utf8BufferSize
;
348 if (Utf8Str
== NULL
|| Ucs2StrAddr
== NULL
) {
349 return EFI_INVALID_PARAMETER
;
353 // It is not an Ascii string, calculate string length.
356 while (*(Utf8Str
+ Utf8StrLength
) != '\0') {
361 // UCS2 string shall not be longer than the UTF8 string.
363 Ucs2StrTemp
= AllocateZeroPool ((Utf8StrLength
+ 1) * sizeof (CHAR16
));
364 if (Ucs2StrTemp
== NULL
) {
365 return EFI_OUT_OF_RESOURCES
;
370 while (Utf8Str
[Utf8StrIndex
] != '\0') {
372 if (CompareMem (Utf8Str
+ Utf8StrIndex
, "\\u", 2) == 0 &&
373 Utf8StrLength
- Utf8StrIndex
>= UNICODE_FORMAT_LEN
) {
375 Status
= GetUCS2CharByFormat (Utf8Str
+ Utf8StrIndex
, Ucs2StrTemp
+ Ucs2StrIndex
);
376 if (!EFI_ERROR (Status
)) {
378 Utf8StrIndex
+= UNICODE_FORMAT_LEN
;
382 StrCpyS (Ucs2StrTemp
+ Ucs2StrIndex
, 3, L
"\\u");
389 Utf8BufferSize
= GetUTF8SizeForUCS2 (Utf8Str
+ Utf8StrIndex
);
390 if (Utf8BufferSize
== 0 || Utf8StrLength
- Utf8StrIndex
< Utf8BufferSize
) {
392 FreePool (Ucs2StrTemp
);
393 return EFI_INVALID_PARAMETER
;
396 Status
= UTF8ToUCS2Char (Utf8Str
+ Utf8StrIndex
, Ucs2StrTemp
+ Ucs2StrIndex
);
397 if (EFI_ERROR (Status
)) {
399 FreePool (Ucs2StrTemp
);
400 return EFI_INVALID_PARAMETER
;
404 Utf8StrIndex
+= Utf8BufferSize
;
408 *Ucs2StrAddr
= AllocateZeroPool ((Ucs2StrIndex
+ 1) * sizeof (CHAR16
));
409 if (*Ucs2StrAddr
== NULL
) {
411 FreePool (Ucs2StrTemp
);
412 return EFI_OUT_OF_RESOURCES
;
415 StrCpyS (*Ucs2StrAddr
, Ucs2StrIndex
+ 1, Ucs2StrTemp
);
416 *(*Ucs2StrAddr
+ Ucs2StrIndex
) = L
'\0';
417 FreePool (Ucs2StrTemp
);