]> git.proxmox.com Git - mirror_edk2.git/blob - RedfishPkg/Library/BaseUcs2Utf8Lib/BaseUcs2Utf8Lib.c
RedfishPkg/Ucs2Utf8lib: UCS2 to UFT8 manipulation library
[mirror_edk2.git] / RedfishPkg / Library / BaseUcs2Utf8Lib / BaseUcs2Utf8Lib.c
1 /** @file
2 UCS2 to UTF8 manipulation library.
3
4 Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>
5 (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
6
7 SPDX-License-Identifier: BSD-2-Clause-Patent
8
9 **/
10 #include <Uefi.h>
11 #include <Library/BaseLib.h>
12 #include <Library/BaseMemoryLib.h>
13 #include <Library/BaseUcs2Utf8Lib.h>
14 #include <Library/DebugLib.h>
15 #include <Library/MemoryAllocationLib.h>
16
17 /**
18 Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
19 this function is used to retrieve the UTF8 encoding size for a UCS2 character.
20
21 @param[in] Utf8Buffer The buffer for UTF8 encoded data.
22
23 @retval Return the size of UTF8 encoding string or 0 if it is not for
24 UCS2 format.
25
26 **/
27 UINT8
28 GetUTF8SizeForUCS2 (
29 IN CHAR8 *Utf8Buffer
30 )
31 {
32 CHAR8 TempChar;
33 UINT8 Utf8Size;
34
35 ASSERT (Utf8Buffer != NULL);
36
37 TempChar = *Utf8Buffer;
38 if ((TempChar & 0xF0) == 0xF0) {
39
40 //
41 // This format is not for UCS2.
42 //
43 return 0;
44 }
45
46 Utf8Size = 1;
47 if ((TempChar & 0x80) == 0x80) {
48 if ((TempChar & 0xC0) == 0xC0) {
49
50 Utf8Size ++;
51 if ((TempChar & 0xE0) == 0xE0) {
52
53 Utf8Size ++;
54 }
55 }
56 }
57
58 return Utf8Size;
59 }
60
61 /**
62 Since each UCS2 character can be represented by the format: \uXXXX, this function
63 is used to retrieve the UCS2 character from a Unicode format.
64 Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
65
66 @param[in] Utf8Buffer The buffer for UTF8 encoded data.
67 @param[out] Ucs2Char The converted UCS2 character.
68
69 @retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal
70 digits string, and can't be converted to a UCS2
71 character.
72 @retval EFI_SUCCESS The UCS2 character has been retrieved.
73
74 **/
75 EFI_STATUS
76 GetUCS2CharByFormat (
77 IN CHAR8 *Utf8Buffer,
78 OUT CHAR16 *Ucs2Char
79 )
80 {
81 UINT8 Num1;
82 UINT8 Num2;
83 UINT8 Index;
84 CHAR8 Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE]; /// two Hexadecimal digits Ascii string, like "3F"
85
86 for (Index = 0; Index < 4; Index ++) {
87 if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {
88 return EFI_INVALID_PARAMETER;
89 }
90 }
91
92 ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);
93
94 //
95 // Get the First Number, Offset is 2
96 //
97 CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);
98 Num1 = (UINT8) AsciiStrHexToUintn (Ucs2CharFormat);
99
100 //
101 // Get the Second Number, Offset is 4
102 //
103 CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);
104 Num2 = (UINT8) AsciiStrHexToUintn (Ucs2CharFormat);
105
106 //
107 // Ucs2Char is Little-Endian
108 //
109 *((CHAR8 *) Ucs2Char) = Num2;
110 *(((CHAR8 *) Ucs2Char) + 1) = Num1;
111
112 return EFI_SUCCESS;
113 }
114
115 /**
116 Convert a UCS2 character to UTF8 encoding string.
117
118 @param[in] Ucs2Char The provided UCS2 character.
119 @param[out] Utf8Buffer The converted UTF8 encoded data.
120
121 @retval Return the size of UTF8 encoding data for this UCS2 character.
122
123 **/
124 UINT8
125 UCS2CharToUTF8 (
126 IN CHAR16 Ucs2Char,
127 OUT CHAR8 *Utf8Buffer
128 )
129 {
130 UINT16 Ucs2Number;
131
132 ASSERT (Utf8Buffer != NULL);
133
134 Ucs2Number = (UINT16) Ucs2Char;
135 if (Ucs2Number <= 0x007F) {
136
137 //
138 // UTF8 format: 0xxxxxxx
139 //
140 *Utf8Buffer = Ucs2Char & 0x7F;
141 return 1;
142
143 } else if (Ucs2Number >= 0x0080 && Ucs2Number <= 0x07FF) {
144
145 //
146 // UTF8 format: 110xxxxx 10xxxxxx
147 //
148 *(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;
149 *Utf8Buffer = ((Ucs2Char >> 6) & 0x1F) | 0xC0;
150 return 2;
151
152 } else { /// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
153
154 //
155 // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
156 //
157 *(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;
158 *(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;
159 *Utf8Buffer = ((Ucs2Char >> 12) & 0x0F) | 0xE0;
160 return 3;
161 }
162 }
163
164 /**
165 Convert a UTF8 encoded data to a UCS2 character.
166
167 @param[in] Utf8Buffer The provided UTF8 encoded data.
168 @param[out] Ucs2Char The converted UCS2 character.
169
170 @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or
171 not for UCS2 character.
172 @retval EFI_SUCCESS The converted UCS2 character.
173
174 **/
175 EFI_STATUS
176 UTF8ToUCS2Char (
177 IN CHAR8 *Utf8Buffer,
178 OUT CHAR16 *Ucs2Char
179 )
180 {
181 UINT8 Utf8Size;
182 CHAR8 *Ucs2Buffer;
183 CHAR8 TempChar1;
184 CHAR8 TempChar2;
185 CHAR8 TempChar3;
186
187 ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);
188 ZeroMem (Ucs2Char, sizeof (CHAR16));
189 Ucs2Buffer = (CHAR8 *) Ucs2Char;
190
191 Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);
192 switch (Utf8Size) {
193
194 case 1:
195
196 //
197 // UTF8 format: 0xxxxxxx
198 //
199 TempChar1 = *Utf8Buffer;
200 if ((TempChar1 & 0x80) != 0x00) {
201 return EFI_INVALID_PARAMETER;
202 }
203
204 *Ucs2Buffer = TempChar1;
205 *(Ucs2Buffer + 1) = 0;
206 break;
207
208 case 2:
209
210 //
211 // UTF8 format: 110xxxxx 10xxxxxx
212 //
213 TempChar1 = *Utf8Buffer;
214 if ((TempChar1 & 0xE0) != 0xC0) {
215 return EFI_INVALID_PARAMETER;
216 }
217
218 TempChar2 = *(Utf8Buffer + 1);
219 if ((TempChar2 & 0xC0) != 0x80) {
220 return EFI_INVALID_PARAMETER;
221 }
222
223 *Ucs2Buffer = (TempChar1 << 6) + (TempChar2 & 0x3F);
224 *(Ucs2Buffer + 1) = (TempChar1 >> 2) & 0x07;
225 break;
226
227 case 3:
228
229 //
230 // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
231 //
232 TempChar1 = *Utf8Buffer;
233 if ((TempChar1 & 0xF0) != 0xE0) {
234 return EFI_INVALID_PARAMETER;
235 }
236
237 TempChar2 = *(Utf8Buffer + 1);
238 if ((TempChar2 & 0xC0) != 0x80) {
239 return EFI_INVALID_PARAMETER;
240 }
241
242 TempChar3 = *(Utf8Buffer + 2);
243 if ((TempChar3 & 0xC0) != 0x80) {
244 return EFI_INVALID_PARAMETER;
245 }
246
247 *Ucs2Buffer = (TempChar2 << 6) + (TempChar3 & 0x3F);
248 *(Ucs2Buffer + 1) = (TempChar1 << 4) + ((TempChar2 >> 2) & 0x0F);
249
250 break;
251
252 default:
253
254 return EFI_INVALID_PARAMETER;
255 }
256
257 return EFI_SUCCESS;
258 }
259
260 /**
261 Convert a UCS2 string to a UTF8 encoded string.
262
263 @param[in] Ucs2Str The provided UCS2 string.
264 @param[out] Utf8StrAddr The converted UTF8 string address. Caller
265 is responsible for Free this string.
266
267 @retval EFI_INVALID_PARAMETER One or more parameters are invalid.
268 @retval EFI_OUT_OF_RESOURCES System runs out of resources.
269 @retval EFI_SUCCESS The UTF8 encoded string has been converted.
270
271 **/
272 EFI_STATUS
273 UCS2StrToUTF8 (
274 IN CHAR16 *Ucs2Str,
275 OUT CHAR8 **Utf8StrAddr
276 )
277 {
278 UINTN Ucs2StrIndex;
279 UINTN Ucs2StrLength;
280 CHAR8 *Utf8Str;
281 UINTN Utf8StrLength;
282 UINTN Utf8StrIndex;
283 CHAR8 Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];
284 UINT8 Utf8BufferSize;
285
286 if (Ucs2Str == NULL || Utf8StrAddr == NULL) {
287 return EFI_INVALID_PARAMETER;
288 }
289
290 Ucs2StrLength = StrLen (Ucs2Str);
291 Utf8StrLength = 0;
292
293 for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex ++) {
294
295 ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
296 Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
297 Utf8StrLength += Utf8BufferSize;
298 }
299
300 Utf8Str = AllocateZeroPool (Utf8StrLength + 1);
301 if (Utf8Str == NULL) {
302 return EFI_OUT_OF_RESOURCES;
303 }
304
305 Utf8StrIndex = 0;
306 for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex ++) {
307
308 ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
309 Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
310
311 CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);
312 Utf8StrIndex += Utf8BufferSize;
313 }
314
315 Utf8Str[Utf8StrIndex] = '\0';
316 *Utf8StrAddr = Utf8Str;
317
318 return EFI_SUCCESS;
319 }
320
321 /**
322 Convert a UTF8 encoded string to a UCS2 string.
323
324 @param[in] Utf8Str The provided UTF8 encoded string.
325 @param[out] Ucs2StrAddr The converted UCS2 string address. Caller
326 is responsible for Free this string.
327
328 @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to
329 convert to UCS2 string.
330 One or more parameters are invalid.
331 @retval EFI_OUT_OF_RESOURCES System runs out of resources.
332 @retval EFI_SUCCESS The UCS2 string has been converted.
333
334 **/
335 EFI_STATUS
336 UTF8StrToUCS2 (
337 IN CHAR8 *Utf8Str,
338 OUT CHAR16 **Ucs2StrAddr
339 )
340 {
341 EFI_STATUS Status;
342 UINTN Utf8StrIndex;
343 UINTN Utf8StrLength;
344 UINTN Ucs2StrIndex;
345 UINT8 Utf8BufferSize;
346 CHAR16 *Ucs2StrTemp;
347
348 if (Utf8Str == NULL || Ucs2StrAddr == NULL) {
349 return EFI_INVALID_PARAMETER;
350 }
351
352 //
353 // It is not an Ascii string, calculate string length.
354 //
355 Utf8StrLength = 0;
356 while (*(Utf8Str + Utf8StrLength) != '\0') {
357 Utf8StrLength ++;
358 }
359
360 //
361 // UCS2 string shall not be longer than the UTF8 string.
362 //
363 Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));
364 if (Ucs2StrTemp == NULL) {
365 return EFI_OUT_OF_RESOURCES;
366 }
367
368 Utf8StrIndex = 0;
369 Ucs2StrIndex = 0;
370 while (Utf8Str[Utf8StrIndex] != '\0') {
371
372 if (CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0 &&
373 Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN) {
374
375 Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
376 if (!EFI_ERROR (Status)) {
377
378 Utf8StrIndex += UNICODE_FORMAT_LEN;
379 Ucs2StrIndex ++;
380 } else {
381
382 StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");
383
384 Ucs2StrIndex += 2;
385 Utf8StrIndex += 2;
386 }
387 } else {
388
389 Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);
390 if (Utf8BufferSize == 0 || Utf8StrLength - Utf8StrIndex < Utf8BufferSize) {
391
392 FreePool (Ucs2StrTemp);
393 return EFI_INVALID_PARAMETER;
394 }
395
396 Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
397 if (EFI_ERROR (Status)) {
398
399 FreePool (Ucs2StrTemp);
400 return EFI_INVALID_PARAMETER;
401 }
402
403 Ucs2StrIndex ++;
404 Utf8StrIndex += Utf8BufferSize;
405 }
406 }
407
408 *Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));
409 if (*Ucs2StrAddr == NULL) {
410
411 FreePool (Ucs2StrTemp);
412 return EFI_OUT_OF_RESOURCES;
413 }
414
415 StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);
416 *(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';
417 FreePool (Ucs2StrTemp);
418
419 return EFI_SUCCESS;
420 }
421