--- /dev/null
+/** @file\r
+ UCS2 to UTF8 manipulation library.\r
+\r
+ Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>\r
+ (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>\r
+\r
+ SPDX-License-Identifier: BSD-2-Clause-Patent\r
+\r
+**/\r
+#include <Uefi.h>\r
+#include <Library/BaseLib.h>\r
+#include <Library/BaseMemoryLib.h>\r
+#include <Library/BaseUcs2Utf8Lib.h>\r
+#include <Library/DebugLib.h>\r
+#include <Library/MemoryAllocationLib.h>\r
+\r
+/**\r
+ Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,\r
+ this function is used to retrieve the UTF8 encoding size for a UCS2 character.\r
+\r
+ @param[in] Utf8Buffer The buffer for UTF8 encoded data.\r
+\r
+ @retval Return the size of UTF8 encoding string or 0 if it is not for\r
+ UCS2 format.\r
+\r
+**/\r
+UINT8\r
+GetUTF8SizeForUCS2 (\r
+ IN CHAR8 *Utf8Buffer\r
+ )\r
+{\r
+ CHAR8 TempChar;\r
+ UINT8 Utf8Size;\r
+\r
+ ASSERT (Utf8Buffer != NULL);\r
+\r
+ TempChar = *Utf8Buffer;\r
+ if ((TempChar & 0xF0) == 0xF0) {\r
+\r
+ //\r
+ // This format is not for UCS2.\r
+ //\r
+ return 0;\r
+ }\r
+\r
+ Utf8Size = 1;\r
+ if ((TempChar & 0x80) == 0x80) {\r
+ if ((TempChar & 0xC0) == 0xC0) {\r
+\r
+ Utf8Size ++;\r
+ if ((TempChar & 0xE0) == 0xE0) {\r
+\r
+ Utf8Size ++;\r
+ }\r
+ }\r
+ }\r
+\r
+ return Utf8Size;\r
+}\r
+\r
+/**\r
+ Since each UCS2 character can be represented by the format: \uXXXX, this function\r
+ is used to retrieve the UCS2 character from a Unicode format.\r
+ Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.\r
+\r
+ @param[in] Utf8Buffer The buffer for UTF8 encoded data.\r
+ @param[out] Ucs2Char The converted UCS2 character.\r
+\r
+ @retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal\r
+ digits string, and can't be converted to a UCS2\r
+ character.\r
+ @retval EFI_SUCCESS The UCS2 character has been retrieved.\r
+\r
+**/\r
+EFI_STATUS\r
+GetUCS2CharByFormat (\r
+ IN CHAR8 *Utf8Buffer,\r
+ OUT CHAR16 *Ucs2Char\r
+ )\r
+{\r
+ UINT8 Num1;\r
+ UINT8 Num2;\r
+ UINT8 Index;\r
+ CHAR8 Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE]; /// two Hexadecimal digits Ascii string, like "3F"\r
+\r
+ for (Index = 0; Index < 4; Index ++) {\r
+ if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+ }\r
+\r
+ ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);\r
+\r
+ //\r
+ // Get the First Number, Offset is 2\r
+ //\r
+ CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);\r
+ Num1 = (UINT8) AsciiStrHexToUintn (Ucs2CharFormat);\r
+\r
+ //\r
+ // Get the Second Number, Offset is 4\r
+ //\r
+ CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);\r
+ Num2 = (UINT8) AsciiStrHexToUintn (Ucs2CharFormat);\r
+\r
+ //\r
+ // Ucs2Char is Little-Endian\r
+ //\r
+ *((CHAR8 *) Ucs2Char) = Num2;\r
+ *(((CHAR8 *) Ucs2Char) + 1) = Num1;\r
+\r
+ return EFI_SUCCESS;\r
+}\r
+\r
+/**\r
+ Convert a UCS2 character to UTF8 encoding string.\r
+\r
+ @param[in] Ucs2Char The provided UCS2 character.\r
+ @param[out] Utf8Buffer The converted UTF8 encoded data.\r
+\r
+ @retval Return the size of UTF8 encoding data for this UCS2 character.\r
+\r
+**/\r
+UINT8\r
+UCS2CharToUTF8 (\r
+ IN CHAR16 Ucs2Char,\r
+ OUT CHAR8 *Utf8Buffer\r
+ )\r
+{\r
+ UINT16 Ucs2Number;\r
+\r
+ ASSERT (Utf8Buffer != NULL);\r
+\r
+ Ucs2Number = (UINT16) Ucs2Char;\r
+ if (Ucs2Number <= 0x007F) {\r
+\r
+ //\r
+ // UTF8 format: 0xxxxxxx\r
+ //\r
+ *Utf8Buffer = Ucs2Char & 0x7F;\r
+ return 1;\r
+\r
+ } else if (Ucs2Number >= 0x0080 && Ucs2Number <= 0x07FF) {\r
+\r
+ //\r
+ // UTF8 format: 110xxxxx 10xxxxxx\r
+ //\r
+ *(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;\r
+ *Utf8Buffer = ((Ucs2Char >> 6) & 0x1F) | 0xC0;\r
+ return 2;\r
+\r
+ } else { /// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF\r
+\r
+ //\r
+ // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx\r
+ //\r
+ *(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;\r
+ *(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;\r
+ *Utf8Buffer = ((Ucs2Char >> 12) & 0x0F) | 0xE0;\r
+ return 3;\r
+ }\r
+}\r
+\r
+/**\r
+ Convert a UTF8 encoded data to a UCS2 character.\r
+\r
+ @param[in] Utf8Buffer The provided UTF8 encoded data.\r
+ @param[out] Ucs2Char The converted UCS2 character.\r
+\r
+ @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or\r
+ not for UCS2 character.\r
+ @retval EFI_SUCCESS The converted UCS2 character.\r
+\r
+**/\r
+EFI_STATUS\r
+UTF8ToUCS2Char (\r
+ IN CHAR8 *Utf8Buffer,\r
+ OUT CHAR16 *Ucs2Char\r
+ )\r
+{\r
+ UINT8 Utf8Size;\r
+ CHAR8 *Ucs2Buffer;\r
+ CHAR8 TempChar1;\r
+ CHAR8 TempChar2;\r
+ CHAR8 TempChar3;\r
+\r
+ ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);\r
+ ZeroMem (Ucs2Char, sizeof (CHAR16));\r
+ Ucs2Buffer = (CHAR8 *) Ucs2Char;\r
+\r
+ Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);\r
+ switch (Utf8Size) {\r
+\r
+ case 1:\r
+\r
+ //\r
+ // UTF8 format: 0xxxxxxx\r
+ //\r
+ TempChar1 = *Utf8Buffer;\r
+ if ((TempChar1 & 0x80) != 0x00) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ *Ucs2Buffer = TempChar1;\r
+ *(Ucs2Buffer + 1) = 0;\r
+ break;\r
+\r
+ case 2:\r
+\r
+ //\r
+ // UTF8 format: 110xxxxx 10xxxxxx\r
+ //\r
+ TempChar1 = *Utf8Buffer;\r
+ if ((TempChar1 & 0xE0) != 0xC0) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ TempChar2 = *(Utf8Buffer + 1);\r
+ if ((TempChar2 & 0xC0) != 0x80) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ *Ucs2Buffer = (TempChar1 << 6) + (TempChar2 & 0x3F);\r
+ *(Ucs2Buffer + 1) = (TempChar1 >> 2) & 0x07;\r
+ break;\r
+\r
+ case 3:\r
+\r
+ //\r
+ // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx\r
+ //\r
+ TempChar1 = *Utf8Buffer;\r
+ if ((TempChar1 & 0xF0) != 0xE0) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ TempChar2 = *(Utf8Buffer + 1);\r
+ if ((TempChar2 & 0xC0) != 0x80) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ TempChar3 = *(Utf8Buffer + 2);\r
+ if ((TempChar3 & 0xC0) != 0x80) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ *Ucs2Buffer = (TempChar2 << 6) + (TempChar3 & 0x3F);\r
+ *(Ucs2Buffer + 1) = (TempChar1 << 4) + ((TempChar2 >> 2) & 0x0F);\r
+\r
+ break;\r
+\r
+ default:\r
+\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ return EFI_SUCCESS;\r
+}\r
+\r
+/**\r
+ Convert a UCS2 string to a UTF8 encoded string.\r
+\r
+ @param[in] Ucs2Str The provided UCS2 string.\r
+ @param[out] Utf8StrAddr The converted UTF8 string address. Caller\r
+ is responsible for Free this string.\r
+\r
+ @retval EFI_INVALID_PARAMETER One or more parameters are invalid.\r
+ @retval EFI_OUT_OF_RESOURCES System runs out of resources.\r
+ @retval EFI_SUCCESS The UTF8 encoded string has been converted.\r
+\r
+**/\r
+EFI_STATUS\r
+UCS2StrToUTF8 (\r
+ IN CHAR16 *Ucs2Str,\r
+ OUT CHAR8 **Utf8StrAddr\r
+ )\r
+{\r
+ UINTN Ucs2StrIndex;\r
+ UINTN Ucs2StrLength;\r
+ CHAR8 *Utf8Str;\r
+ UINTN Utf8StrLength;\r
+ UINTN Utf8StrIndex;\r
+ CHAR8 Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];\r
+ UINT8 Utf8BufferSize;\r
+\r
+ if (Ucs2Str == NULL || Utf8StrAddr == NULL) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ Ucs2StrLength = StrLen (Ucs2Str);\r
+ Utf8StrLength = 0;\r
+\r
+ for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex ++) {\r
+\r
+ ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));\r
+ Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);\r
+ Utf8StrLength += Utf8BufferSize;\r
+ }\r
+\r
+ Utf8Str = AllocateZeroPool (Utf8StrLength + 1);\r
+ if (Utf8Str == NULL) {\r
+ return EFI_OUT_OF_RESOURCES;\r
+ }\r
+\r
+ Utf8StrIndex = 0;\r
+ for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex ++) {\r
+\r
+ ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));\r
+ Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);\r
+\r
+ CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);\r
+ Utf8StrIndex += Utf8BufferSize;\r
+ }\r
+\r
+ Utf8Str[Utf8StrIndex] = '\0';\r
+ *Utf8StrAddr = Utf8Str;\r
+\r
+ return EFI_SUCCESS;\r
+}\r
+\r
+/**\r
+ Convert a UTF8 encoded string to a UCS2 string.\r
+\r
+ @param[in] Utf8Str The provided UTF8 encoded string.\r
+ @param[out] Ucs2StrAddr The converted UCS2 string address. Caller\r
+ is responsible for Free this string.\r
+\r
+ @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to\r
+ convert to UCS2 string.\r
+ One or more parameters are invalid.\r
+ @retval EFI_OUT_OF_RESOURCES System runs out of resources.\r
+ @retval EFI_SUCCESS The UCS2 string has been converted.\r
+\r
+**/\r
+EFI_STATUS\r
+UTF8StrToUCS2 (\r
+ IN CHAR8 *Utf8Str,\r
+ OUT CHAR16 **Ucs2StrAddr\r
+ )\r
+{\r
+ EFI_STATUS Status;\r
+ UINTN Utf8StrIndex;\r
+ UINTN Utf8StrLength;\r
+ UINTN Ucs2StrIndex;\r
+ UINT8 Utf8BufferSize;\r
+ CHAR16 *Ucs2StrTemp;\r
+\r
+ if (Utf8Str == NULL || Ucs2StrAddr == NULL) {\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ //\r
+ // It is not an Ascii string, calculate string length.\r
+ //\r
+ Utf8StrLength = 0;\r
+ while (*(Utf8Str + Utf8StrLength) != '\0') {\r
+ Utf8StrLength ++;\r
+ }\r
+\r
+ //\r
+ // UCS2 string shall not be longer than the UTF8 string.\r
+ //\r
+ Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));\r
+ if (Ucs2StrTemp == NULL) {\r
+ return EFI_OUT_OF_RESOURCES;\r
+ }\r
+\r
+ Utf8StrIndex = 0;\r
+ Ucs2StrIndex = 0;\r
+ while (Utf8Str[Utf8StrIndex] != '\0') {\r
+\r
+ if (CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0 &&\r
+ Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN) {\r
+\r
+ Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);\r
+ if (!EFI_ERROR (Status)) {\r
+\r
+ Utf8StrIndex += UNICODE_FORMAT_LEN;\r
+ Ucs2StrIndex ++;\r
+ } else {\r
+\r
+ StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");\r
+\r
+ Ucs2StrIndex += 2;\r
+ Utf8StrIndex += 2;\r
+ }\r
+ } else {\r
+\r
+ Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);\r
+ if (Utf8BufferSize == 0 || Utf8StrLength - Utf8StrIndex < Utf8BufferSize) {\r
+\r
+ FreePool (Ucs2StrTemp);\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);\r
+ if (EFI_ERROR (Status)) {\r
+\r
+ FreePool (Ucs2StrTemp);\r
+ return EFI_INVALID_PARAMETER;\r
+ }\r
+\r
+ Ucs2StrIndex ++;\r
+ Utf8StrIndex += Utf8BufferSize;\r
+ }\r
+ }\r
+\r
+ *Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));\r
+ if (*Ucs2StrAddr == NULL) {\r
+\r
+ FreePool (Ucs2StrTemp);\r
+ return EFI_OUT_OF_RESOURCES;\r
+ }\r
+\r
+ StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);\r
+ *(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';\r
+ FreePool (Ucs2StrTemp);\r
+\r
+ return EFI_SUCCESS;\r
+}\r
+\r