+++ /dev/null
-/** @file\r
- Copyright (c) 2016, Daryl McDaniel. All rights reserved.<BR>\r
- Copyright (c) 2012, Intel Corporation. All rights reserved.<BR>\r
- This program and the accompanying materials\r
- are licensed and made available under the terms and conditions of the BSD License\r
- which accompanies this distribution. The full text of the license may be found at\r
- http://opensource.org/licenses/bsd-license.php\r
-\r
- THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
- WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
-**/\r
-#include <assert.h>\r
-#include <string.h>\r
-#include <errno.h>\r
-#include <stdlib.h>\r
-#include <wchar.h>\r
-#include <sys/types.h>\r
-#include <limits.h>\r
-\r
-typedef int ch_UCS4;\r
-\r
-static mbstate_t LocalConvState = {0};\r
-\r
-/** Map a UTF-8 encoded prefix byte to a sequence length.\r
- Zero means illegal prefix, but valid surrogate if < 0xC0.\r
- One indicates an ASCII-7 equivalent character.\r
- Two, three, and four are the first byte for 2, 3, and 4 byte sequences, respectively.\r
- See RFC 3629 for details.\r
-\r
- TABLE ENCODING:\r
- Low Nibble decodes the first byte into the number of bytes in the sequence.\r
- A value of zero indicates an invalid byte.\r
- The High Nibble encodes a bit mask to be used to match against the high nibble of the second byte.\r
-\r
- example:\r
- SequenceLength = code[c0] & 0x0F;\r
- Mask = 0x80 | code[c0];\r
-\r
- Surrogate bytes are valid if: code[cX] & Mask > 0x80;\r
-\r
-*/\r
-static\r
-UINT8 utf8_code_length[256] = {\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 00-0F */\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 70-7F */\r
- 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, /* 80-8F */\r
- 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, /* 90-9F */\r
- 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* A0-AF */\r
- 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* B0-BF */\r
- 0x00, 0x00, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* C0-C1 + C2-CF */\r
- 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* D0-DF */\r
- 0x43, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x33, 0x73, 0x73, /* E0-EF */\r
- 0x64, 0x74, 0x74, 0x74, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* F0-F4 + F5-FF */\r
-};\r
-\r
-/** Process one byte of a multibyte character.\r
-\r
- @param[in] ch One byte of a multibyte character.\r
- @param[in,out] ps Pointer to a conversion state object.\r
-\r
- @retval -2 ch is an incomplete but potentially valid character.\r
- @retval -1 ch is not valid in this context.\r
- @retval 1:4 The length, in bytes, of the character ch just completed.\r
-**/\r
-static\r
-int\r
-ProcessOneByte(unsigned char ch, mbstate_t *ps)\r
-{\r
- UINT32 Mask;\r
- UINT32 Length;\r
- int RetVal = 0;\r
-\r
- if(ps->A > 3) {\r
- // We are in an invalid state\r
- ps->A = 0; // Initial State\r
- }\r
- ps->C[ps->A] = ch; // Save the current byte\r
- Mask = utf8_code_length[ch];\r
-\r
- if(ps->A == 0) { // Initial State. First byte of sequence.\r
- ps->E = Mask | 0x80;\r
- Length = Mask & 0xF;\r
- switch(Length) {\r
- case 0: // State 0, Code 0\r
- errno = EILSEQ;\r
- RetVal = -1;\r
- ps->E = 1; // Consume this byte\r
- break;\r
- case 1: // State 0, Code 1\r
- // ASCII-7 Character\r
- ps->B = ps->D[0] = ch;\r
- RetVal = 1;\r
- break;\r
- default: // State 0, Code 2, 3, 4\r
- ps->A = 1; // Next state is State-1\r
- RetVal = -2; // Incomplete but potentially valid character\r
- break;\r
- }\r
- }\r
- else {\r
- // We are in state 1, 2, or 3 and processing a surrogate byte\r
- Length = ps->E & 0xF;\r
- if((Mask & ps->E) > 0x80) {\r
- // This byte is valid\r
- switch(ps->A) { // Process based upon our current state\r
- case 1: // Second byte of the sequence.\r
- if(Length == 2) { // State 1, Code 2\r
- Length = ((ps->C[0] & 0x1f) << 6) + (ps->C[1] & 0x3f);\r
- assert ((Length > 0x007F) && (Length <= 0x07FF));\r
- ps->B = ps->D[0] = (UINT16)Length;\r
- ps->A = 0; // Next state is State-0\r
- RetVal = 2;\r
- }\r
- else { // This isn't the last byte, get more. State 1, Code 3 or 4\r
- ps->A = 2;\r
- RetVal = -2;\r
- }\r
- break;\r
- case 2: // Third byte of the sequence\r
- if(Length == 3) {\r
- Length = ((ps->C[0] & 0x0f) << 12) + ((ps->C[1] & 0x3f) << 6) + (ps->C[2] & 0x3f);\r
- assert ((Length > 0x07FF) && (Length <= 0xFFFF));\r
- ps->B = ps->D[0] = (UINT16)Length;\r
- ps->A = 0; // Next state is State-0\r
- RetVal = 3;\r
- }\r
- else {\r
- ps->A = 3;\r
- RetVal = -2;\r
- }\r
- break;\r
- case 3: // Fourth byte of the sequence\r
- if(Length == 4) {\r
- Length = ((ps->C[0] & 0x7) << 18) + ((ps->C[1] & 0x3f) << 12) +\r
- ((ps->C[2] & 0x3f) << 6) + (ps->C[3] & 0x3f);\r
- ps->B = Length;\r
- assert ((Length > 0xFFFF) && (Length <= 0x10ffff));\r
-\r
- /* compute and append the two surrogates: */\r
-\r
- /* translate from 10000..10FFFF to 0..FFFF */\r
- Length -= 0x10000;\r
-\r
- /* high surrogate = top 10 bits added to D800 */\r
- ps->D[0] = (UINT16)(0xD800 + (Length >> 10));\r
-\r
- /* low surrogate = bottom 10 bits added to DC00 */\r
- ps->D[1] = (UINT16)(0xDC00 + (Length & 0x03FF));\r
- ps->A = 0; // Next state is State-0\r
- RetVal = 4;\r
- }\r
- else {\r
- errno = EILSEQ;\r
- ps->A = 0;\r
- RetVal = -1;\r
- ps->E = 4; // Can't happen, but consume this byte anyway\r
- }\r
- break;\r
- }\r
- }\r
- else { // Invalid surrogate byte\r
- errno = EILSEQ;\r
- ps->A = 0; // Next is State-0\r
- RetVal = -1;\r
- ps->E = 0; // Don't Consume, it may be an initial byte\r
- }\r
- }\r
- return RetVal;\r
-}\r
-\r
-/** Convert one Multibyte sequence.\r
-\r
- @param[out] Dest Pointer to output location, or NULL\r
- @param[in] Src Multibyte Source (UTF8)\r
- @param[in] Len Max Number of bytes to convert\r
- @param[in] pS Pointer to State struct., or NULL\r
-\r
- @retval -2 Bytes processed comprise an incomplete, but potentially valid, character.\r
- @retval -1 An encoding error was encountered. ps->E indicates the number of bytes consumed.\r
- @retval 0 Either Src is NULL or it points to a NUL character.\r
- @retval 1:N N bytes were consumed producing a valid wide character.\r
-**/\r
-int\r
-DecodeOneStateful(\r
- wchar_t *Dest, // Pointer to output location, or NULL\r
- const char *Src, // Multibyte Source (UTF8)\r
- ssize_t Len, // Max Number of bytes to convert\r
- mbstate_t *pS // Pointer to State struct., or NULL\r
- )\r
-{\r
- const char *SrcEnd;\r
- int NumConv;\r
- unsigned char ch;\r
-\r
- if(pS == NULL) {\r
- pS = &LocalConvState;\r
- }\r
- NumConv = 0;\r
- if(Src != NULL) {\r
- if(*Src != 0) {\r
- SrcEnd = Src + Len;\r
- while(Src < SrcEnd) {\r
- ch = (unsigned char)*Src++;\r
- NumConv = ProcessOneByte(ch, pS);\r
- if(NumConv != -2) {\r
- break;\r
- }\r
- }\r
- }\r
- else if(Dest != NULL) {\r
- *Dest = 0;\r
- }\r
- }\r
- if((NumConv > 0) && (Dest != NULL)) {\r
- Dest[0] = pS->D[0];\r
- if(NumConv == 4) {\r
- Dest[1] = pS->D[1];\r
- }\r
- }\r
- return NumConv;\r
-}\r
-\r
-/* Determine the number of bytes needed to represent a Wide character\r
- as a MBCS character.\r
-\r
- A single wide character may convert into a one, two, three, or four byte\r
- narrow (MBCS or UTF-8) character. The number of MBCS bytes can be determined\r
- as follows.\r
-\r
- If WCS char < 0x00000080 One Byte\r
- Else if WCS char < 0x0000D800 Two Bytes\r
- Else Three Bytes\r
-\r
- Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
- Four-byte characters are not supported.\r
-\r
- @param[in] InCh Wide character to test.\r
-\r
- @retval -1 Improperly formed character\r
- @retval 0 InCh is 0x0000\r
- @retval >0 Number of bytes needed for the MBCS character\r
-*/\r
-int\r
-EFIAPI\r
-OneWcToMcLen(const wchar_t InCh)\r
-{\r
- ssize_t NumBytes;\r
-\r
- if(InCh == 0) { // Is this a NUL, 0x0000 ?\r
- NumBytes = 0;\r
- }\r
- else if(InCh < 0x0080) { // Is this a 1-byte character?\r
- NumBytes = 1;\r
- }\r
- else if(InCh < 0x0800) { // Is this a 2-byte character?\r
- NumBytes = 2;\r
- }\r
- else if((InCh >= 0xD800) && (InCh < 0xE000)) { // Is this a surrogate?\r
- NumBytes = -1;\r
- }\r
- else {\r
- NumBytes = 3; // Otherwise, it must be a 3-byte character.\r
- }\r
- return (int)NumBytes; // Return extimate of required bytes.\r
-}\r
-\r
-/* Determine the number of bytes needed to represent a Wide character string\r
- as a MBCS string of given maximum length. Will optionally return the number\r
- of wide characters that would be consumed.\r
-\r
- A single wide character may convert into a one, two, three, or four byte\r
- narrow (MBCS or UTF-8) character. The number of MBCS bytes can be determined\r
- as follows.\r
-\r
- If WCS char < 0x00000080 One Byte\r
- Else if WCS char < 0x00000800 Two Bytes\r
- Else if WCS char < 0x00010000 Three Bytes\r
- Else Four Bytes\r
-\r
- Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
- Four-byte characters should not be encountered.\r
-\r
- @param[in] Src Pointer to a wide character string.\r
- @param[in] Limit Maximum number of bytes the converted string may occupy.\r
- @param[out] NumChar Pointer to where to store the number of wide characters\r
- consumed, or NULL.\r
-\r
- @return The number of bytes required to convert Src to MBCS,\r
- not including the terminating NUL. If NumChar is not NULL, the number\r
- of characters represented by the return value will be written to\r
- where it points.\r
-*/\r
-size_t\r
-EFIAPI\r
-EstimateWtoM(const wchar_t * Src, size_t Limit, size_t *NumChar)\r
-{\r
- ssize_t Estimate;\r
- size_t CharCount;\r
- ssize_t NumBytes;\r
- wchar_t EChar;\r
-\r
- Estimate = 0;\r
- CharCount = 0;\r
- EChar = *Src++; // Get the initial character and point to next\r
- while(((NumBytes = OneWcToMcLen(EChar)) > 0) &&\r
- ((size_t)(Estimate + NumBytes) < Limit))\r
- { // Until one of the source characters is NUL\r
- ++CharCount; // Count this character.\r
- Estimate += NumBytes; // Count the Bytes for this character\r
- EChar = *Src++; // Get the next source character and point to the next.\r
- }\r
- if(NumChar != NULL) {\r
- *NumChar = CharCount;\r
- }\r
- return (size_t)Estimate; // Return esimate of required bytes.\r
-}\r
-\r
-/* Determine the number of characters in a MBCS string.\r
- MBCS characters are one to four bytes long. By examining the first byte\r
- of a MBCS character, one can determine the number of bytes comprising the\r
- character.\r
-\r
- 0x00 - 0x7F One\r
- 0xC0 - 0xDF Two\r
- 0xE0 - 0xEF Three\r
- 0xF0 - 0xF7 Four\r
-\r
- Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
- Four-byte characters should not be encountered.\r
-\r
- @param[in] Src The string to examine\r
-\r
- @return The number of characters represented by the MBCS string.\r
-**/\r
-size_t\r
-EFIAPI\r
-CountMbcsChars(const char *Src)\r
-{\r
- size_t Count;\r
- char EChar;\r
-\r
- Count = 0;\r
- EChar = *Src++;\r
- while(EChar != 0) {\r
- if(EChar < 0x80) {\r
- ++Count;\r
- }\r
- else if(EChar < 0xE0) {\r
- Count += 2;\r
- ++Src;\r
- }\r
- else if(EChar < 0xF0) {\r
- Count += 3;\r
- Src += 2;\r
- }\r
- else {\r
- // Ill-formed character\r
- break;\r
- }\r
- }\r
- return Count;\r
-}\r
-\r
-/** Convert a wide character (UTF16) into a multibyte character (UTF8)\r
-\r
- Converts a wide character into a corresponding multibyte character that\r
- begins in the conversion state described by the object pointed to by ps.\r
- If dst is not a null pointer, the converted character is then stored into\r
- the array pointed to by dst.\r
-\r
- It is the caller's responsibility to ensure that Dest is large enough to\r
- hold the resulting MBCS sequence.\r
-\r
- @param s Pointer to the wide-character string to convert\r
- @param Dest Pointer to the buffer in which to place the converted sequence, or NULL.\r
-\r
- @retval -1 An error occurred. The error reason is in errno.\r
- @retval >=0 The number of bytes stored into Dest.\r
-**/\r
-ssize_t\r
-EncodeUtf8(char *Dest, wchar_t ch)\r
-{\r
- char *p; /* next free byte in build buffer */\r
- int NumInBuff; // number of bytes in Buff\r
- char Buff[4]; // Buffer into which each character is built\r
-\r
- p = Buff;\r
-\r
- NumInBuff = 0;\r
- if (ch < 0x80) {\r
- /* Encode ASCII -- One Byte */\r
- *p++ = (char) ch;\r
- NumInBuff = 1;\r
- }\r
- else if (ch < 0x0800) {\r
- /* Encode Latin-1 -- Two Byte */\r
- *p++ = (char)(0xc0 | (ch >> 6));\r
- *p++ = (char)(0x80 | (ch & 0x3f));\r
- NumInBuff = 2;\r
- }\r
- else {\r
- /* Encode UCS2 Unicode ordinals -- Three Byte */\r
- /* Special case: check for surrogate -- Shouldn't happen in UEFI */\r
- if (0xD800 <= ch && ch < 0xE000) {\r
- errno = EILSEQ;\r
- return -1;\r
- }\r
- else {\r
- *p++ = (char)(0xe0 | (ch >> 12));\r
- *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
- *p++ = (char)(0x80 | (ch & 0x3f));\r
- NumInBuff = 3;\r
- }\r
- }\r
- /* At this point, Buff holds the converted character which is NumInBuff bytes long.\r
- NumInBuff is the value 1, 2, 3, or 4\r
- */\r
- if(Dest != NULL) { // Save character if Dest is not NULL\r
- memcpy(Dest, Buff, NumInBuff);\r
- }\r
- return NumInBuff; // Tell the caller\r
-}\r
-\r
-// ######################## Narrow to Wide Conversions #######################\r
-\r
-/** If ps is not a null pointer, the mbsinit function determines whether the\r
- pointed-to mbstate_t object describes an initial conversion state.\r
-\r
- @param[in] ps Pointer to the conversion state object to test.\r
-\r
- @return The mbsinit function returns nonzero if ps is a null pointer\r
- or if the pointed-to object describes an initial conversion\r
- state; otherwise, it returns zero.\r
-\r
- Declared in: wchar.h\r
-**/\r
-int\r
-mbsinit(const mbstate_t *ps)\r
-{\r
- if((ps == NULL) || (ps->A == 0)) {\r
- return 1;\r
- }\r
- return 0;\r
-}\r
-\r
-/** The mbrlen function is equivalent to the call:<BR>\r
-@verbatim\r
- mbrtowc(NULL, s, n, ps != NULL ? ps : &internal)\r
-@endverbatim\r
- where internal is the mbstate_t object for the mbrlen function, except that\r
- the expression designated by ps is evaluated only once.\r
-\r
- @param[in] s Pointer to a multibyte character sequence.\r
- @param[in] n Maximum number of bytes to examine.\r
- @param[in] pS Pointer to the conversion state object.\r
-\r
- @retval 0 The next n or fewer characters complete a NUL.\r
- @retval 1..n The number of bytes that complete the multibyte character.\r
- @retval -2 The next n bytes contribute to an incomplete (but potentially valid) multibyte character.\r
- @retval -1 An encoding error occurred.\r
-\r
- Declared in: wchar.h\r
-**/\r
-size_t\r
-mbrlen(\r
- const char *s,\r
- size_t n,\r
- mbstate_t *pS\r
- )\r
-{\r
- return mbrtowc(NULL, s, n, pS);\r
-}\r
-\r
-/** Determine the number of bytes comprising a multibyte character.\r
-\r
- If S is not a null pointer, the mblen function determines the number of bytes\r
- contained in the multibyte character pointed to by S. Except that the\r
- conversion state of the mbtowc function is not affected, it is equivalent to\r
- mbtowc((wchar_t *)0, S, N);\r
-\r
- @param[in] S NULL to query whether multibyte characters have\r
- state-dependent encodings. Otherwise, points to a\r
- multibyte character.\r
- @param[in] N The maximum number of bytes in a multibyte character.\r
-\r
- @return If S is a null pointer, the mblen function returns a nonzero or\r
- zero value, if multibyte character encodings, respectively, do\r
- or do not have state-dependent encodings. If S is not a null\r
- pointer, the mblen function either returns 0 (if S points to the\r
- null character), or returns the number of bytes that are contained\r
- in the multibyte character (if the next N or fewer bytes form a\r
- valid multibyte character), or returns -1 (if they do not form a\r
- valid multibyte character).\r
-\r
- Declared in: stdlib.h\r
-**/\r
-int\r
-mblen(\r
- const char *s,\r
- size_t n\r
- )\r
-{\r
- return (int)mbrlen(s, n, NULL);\r
-}\r
-\r
-/**\r
-If S is a null pointer, the mbrtowc function is equivalent to the call:<BR>\r
-@verbatim\r
- mbrtowc(NULL, "", 1, ps)\r
-@endverbatim\r
-\r
-In this case, the values of the parameters pwc and n are ignored.\r
-\r
-If S is not a null pointer, the mbrtowc function inspects at most n bytes beginning with\r
-the byte pointed to by S to determine the number of bytes needed to complete the next\r
-multibyte character (including any shift sequences). If the function determines that the\r
-next multibyte character is complete and valid, it determines the value of the\r
-corresponding wide character and then, if pwc is not a null pointer, stores that value in\r
-the object pointed to by pwc. If the corresponding wide character is the null wide\r
-character, the resulting state described is the initial conversion state.\r
-\r
- @param[out] pwc Pointer to where the resulting wide character is to be stored.\r
- @param[in] s Pointer to a multibyte character "string".\r
- @param[in] n The maximum number of bytes to inspect.\r
- @param[in] ps Pointer to a conversion state object.\r
-\r
- @retval 0 if the next n or fewer bytes complete the multibyte\r
- character that corresponds to the null wide\r
- character (which is the value stored).\r
- @retval between_1_and_n_inclusive if the next n or fewer bytes complete\r
- a valid multibyte character (which is the value\r
- stored); the value returned is the number of bytes\r
- that complete the multibyte character.\r
- @retval (size_t)(-2) if the next n bytes contribute to an incomplete\r
- (but potentially valid) multibyte character, and\r
- all n bytes have been processed (no value is stored).\r
- @retval (size_t)(-1) if an encoding error occurs, in which case the next\r
- n or fewer bytes do not contribute to a complete and\r
- valid multibyte character (no value is stored); the\r
- value of the macro EILSEQ is stored in errno, and\r
- the conversion state is unspecified.\r
-\r
- Declared in: wchar.h\r
-**/\r
-size_t\r
-mbrtowc(\r
- wchar_t *pwc,\r
- const char *s,\r
- size_t n,\r
- mbstate_t *ps\r
- )\r
-{\r
- int RetVal;\r
-\r
- RetVal = DecodeOneStateful(pwc, s, (ssize_t)n, ps);\r
- return (size_t)RetVal;\r
-}\r
-\r
-/** Convert a multibyte character into a wide character.\r
-\r
- If S is not a null pointer, the mbtowc function inspects at most N bytes\r
- beginning with the byte pointed to by S to determine the number of bytes\r
- needed to complete the next multibyte character (including any shift\r
- sequences). If the function determines that the next multibyte character\r
- is complete and valid, it determines the value of the corresponding wide\r
- character and then, if Pwc is not a null pointer, stores that value in\r
- the object pointed to by Pwc. If the corresponding wide character is the\r
- null wide character, the function is left in the initial conversion state.\r
-\r
- @param[out] Pwc Pointer to a wide-character object to receive the converted character.\r
- @param[in] S Pointer to a multibyte character to convert.\r
- @param[in] N Maximum number of bytes in a multibyte character.\r
-\r
- @return If S is a null pointer, the mbtowc function returns a nonzero or\r
- zero value, if multibyte character encodings, respectively, do\r
- or do not have state-dependent encodings. If S is not a null\r
- pointer, the mbtowc function either returns 0 (if S points to\r
- the null character), or returns the number of bytes that are\r
- contained in the converted multibyte character (if the next N or\r
- fewer bytes form a valid multibyte character), or returns -1\r
- (if they do not form a valid multibyte character).\r
-\r
- In no case will the value returned be greater than N or the value\r
- of the MB_CUR_MAX macro.\r
-\r
- Declared in: stdlib.h\r
-**/\r
-int\r
-mbtowc(\r
- wchar_t *pwc,\r
- const char *s,\r
- size_t n\r
- )\r
-{\r
- return (int)mbrtowc(pwc, s, n, NULL);\r
-}\r
-\r
-/**\r
-The mbsrtowcs function converts a sequence of multibyte characters that begins in the\r
-conversion state described by the object pointed to by ps, from the array indirectly\r
-pointed to by src into a sequence of corresponding wide characters. If dst is not a null\r
-pointer, the converted characters are stored into the array pointed to by dst. Conversion\r
-continues up to and including a terminating null character, which is also stored.\r
-Conversion stops earlier in two cases: when a sequence of bytes is encountered that does\r
-not form a valid multibyte character, or (if dst is not a null pointer) when len wide\r
-characters have been stored into the array pointed to by dst. Each conversion takes\r
-place as if by a call to the mbrtowc function.\r
-\r
-If dst is not a null pointer, the pointer object pointed to by src is assigned either a null\r
-pointer (if conversion stopped due to reaching a terminating null character) or the address\r
-just past the last multibyte character converted (if any). If conversion stopped due to\r
-reaching a terminating null character and if dst is not a null pointer, the resulting state\r
-described is the initial conversion state.\r
-\r
- @param[out] dst Pointer to where the resulting wide character sequence is stored.\r
- @param[in] src Pointer to a pointer to the multibyte character sequence to convert.\r
- @param[in] len Maximum number of wide characters to be stored into dst.\r
- @param[in] ps Pointer to a conversion state object.\r
-\r
- @return If the input conversion encounters a sequence of bytes that do\r
- not form a valid multibyte character, an encoding error occurs:\r
- the mbsrtowcs function stores the value of the macro EILSEQ in\r
- errno and returns (size_t)(-1); the conversion state is\r
- unspecified. Otherwise, it returns the number of multibyte\r
- characters successfully converted, not including the terminating\r
- null character (if any).\r
-\r
- Declared in: wchar.h\r
-**/\r
-size_t\r
-mbsrtowcs(\r
- wchar_t *dst,\r
- const char **src,\r
- size_t len,\r
- mbstate_t *ps\r
- )\r
-{\r
- int x;\r
- size_t RetVal = 0;\r
- const char *MySrc;\r
-\r
- if((src == NULL) || (*src == NULL)) {\r
- return 0;\r
- }\r
-\r
- MySrc = *src;\r
- for(x = 1 ; (len != 0) && (x > 0); --len) {\r
- x = DecodeOneStateful(dst, MySrc, MB_LEN_MAX, ps);\r
- switch(x) {\r
- case -2: // Incomplete character\r
- case -1: // Encoding error\r
- RetVal = (size_t)x;\r
- break;\r
- case 0: // Encountered NUL character: done.\r
- if(dst != NULL) {\r
- *dst = 0;\r
- *src = NULL;\r
- }\r
- break;\r
- default: // Successfully decoded a character, continue with next\r
- MySrc += x;\r
- if(dst != NULL) {\r
- ++dst;\r
- if(x == 4) {\r
- ++dst;\r
- }\r
- *src = MySrc;\r
- }\r
- ++RetVal;\r
- break;\r
- }\r
- }\r
- return RetVal;\r
-}\r
-\r
-/** Convert a multibyte character string into a wide-character string.\r
-\r
- The mbstowcs function converts a sequence of multibyte characters that\r
- begins in the initial shift state from the array pointed to by Src into\r
- a sequence of corresponding wide characters and stores not more than limit\r
- wide characters into the array pointed to by Dest. No multibyte\r
- characters that follow a null character (which is converted into a null\r
- wide character) will be examined or converted. Each multibyte character\r
- is converted as if by a call to the mbtowc function, except that the\r
- conversion state of the mbtowc function is not affected.\r
-\r
- No more than Limit elements will be modified in the array pointed to by Dest.\r
- If copying takes place between objects that overlap,\r
- the behavior is undefined.\r
-\r
- @param[out] Dest Pointer to the array to receive the converted string.\r
- @param[in] Src Pointer to the string to be converted.\r
- @param[in] Limit Maximum number of elements to be written to Dest.\r
-\r
- @return If an invalid multibyte character is encountered, the mbstowcs\r
- function returns (size_t)(-1). Otherwise, the mbstowcs function\r
- returns the number of array elements modified, not including a\r
- terminating null wide character, if any.\r
-\r
- Declared in: stdlib.h\r
-**/\r
-size_t\r
-mbstowcs(\r
- wchar_t *Dest,\r
- const char *Src,\r
- size_t Limit\r
- )\r
-{\r
-\r
- /* Dest may be NULL */\r
- /* Src may be NULL */\r
-\r
- return mbsrtowcs(Dest, &Src, Limit, NULL);\r
-}\r
-\r
-/** The btowc function determines whether C constitutes a valid single-byte\r
- character in the initial shift state.\r
-\r
- @param[in] C A narrow character to test or convert to wide.\r
-\r
- @return The btowc function returns WEOF if c has the value EOF or if\r
- (unsigned char)C does not constitute a valid single-byte\r
- character in the initial shift state. Otherwise, it returns the\r
- wide character representation of that character.\r
-\r
- Declared in: wchar.h\r
-**/\r
-wint_t\r
-btowc(int c)\r
-{\r
- int x;\r
- wchar_t Dest;\r
- wint_t RetVal = WEOF;\r
-\r
- if (c == EOF)\r
- return WEOF;\r
- x = DecodeOneStateful(&Dest, (const char *)&c, 1, NULL);\r
- if(x == 0) {\r
- RetVal = 0;\r
- }\r
- else if(x == 1) {\r
- RetVal = (wint_t)Dest;\r
- }\r
- return RetVal;\r
-}\r
-\r
-// ######################## Wide to Narrow Conversions #######################\r
-\r
-/**\r
-If S is a null pointer, the wcrtomb function is equivalent to the call:<BR>\r
-@verbatim\r
- wcrtomb(buf, L'\0', ps)\r
-@endverbatim\r
-where buf is an internal buffer.\r
-\r
-If S is not a null pointer, the wcrtomb function determines the number of bytes needed\r
-to represent the multibyte character that corresponds to the wide character given by wc\r
-(including any shift sequences), and stores the multibyte character representation in the\r
-array whose first element is pointed to by S. At most MB_CUR_MAX bytes are stored. If\r
-wc is a null wide character, a null byte is stored, preceded by any shift sequence needed\r
-to restore the initial shift state; the resulting state described is the initial conversion state.\r
-\r
- @param[out] Dest Pointer to the location in which to store the resulting\r
- multibyte character. Otherwise, NULL to reset the\r
- conversion state.\r
- @param[in] wchar The wide character to convert.\r
- @param[in,out] pS Pointer to a conversion state object, or NULL.\r
-\r
- @return The wcrtomb function returns the number of bytes stored in the\r
- array object (including any shift sequences). When wc is not a\r
- valid wide character, an encoding error occurs: the function\r
- stores the value of the macro EILSEQ in errno and\r
- returns (size_t)(-1); the conversion state is unspecified.\r
-\r
- Declared in: wchar.h\r
-**/\r
-size_t\r
-wcrtomb(\r
- char *Dest,\r
- wchar_t wchar,\r
- mbstate_t *pS\r
- )\r
-{\r
- size_t RetVal;\r
-\r
- /* Dest may be NULL */\r
- if (Dest == NULL) {\r
- RetVal = 1;\r
- }\r
- else {\r
- if (wchar == L'\0') {\r
- *Dest = '\0';\r
- RetVal = 1;\r
- }\r
- else {\r
- RetVal = EncodeUtf8(Dest, wchar);\r
- }\r
- }\r
- if(pS == NULL) {\r
- pS = &LocalConvState;\r
- }\r
- pS->A = 0; // Set ps to the initial conversion state\r
-\r
- return RetVal;\r
-}\r
-\r
-/** Convert a wide character into a multibyte character.\r
-\r
- The wctomb function determines the number of bytes needed to represent the\r
- multibyte character corresponding to the wide character given by WC\r
- (including any shift sequences), and stores the multibyte character\r
- representation in the array whose first element is pointed to by S (if S is\r
- not a null pointer). At most MB_CUR_MAX characters are stored. If WC is a\r
- null wide character, a null byte is stored, preceded by any shift sequence\r
- needed to restore the initial shift state, and the function is left in the\r
- initial conversion state.\r
-\r
- @param[out] S Pointer to the object to receive the converted multibyte character.\r
- @param[in] WC Wide character to be converted.\r
-\r
- @return If S is a null pointer, the wctomb function returns a nonzero or\r
- zero value, if multibyte character encodings, respectively, do or\r
- do not have state-dependent encodings. If S is not a null pointer,\r
- the wctomb function returns -1 if the value of WC does not\r
- correspond to a valid multibyte character, or returns the number\r
- of bytes that are contained in the multibyte character\r
- corresponding to the value of WC.\r
-\r
- In no case will the value returned be greater than the value of\r
- the MB_CUR_MAX macro.\r
-\r
- Declared in: stdlib.h\r
-**/\r
-int\r
-wctomb(\r
- char *s,\r
- wchar_t wchar\r
- )\r
-{\r
- /*\r
- If s is NULL just return whether MB Characters have state\r
- dependent encodings -- they don't.\r
- */\r
- if (s == NULL)\r
- return 0;\r
-\r
- return (int)wcrtomb(s, wchar, NULL);\r
-}\r
-\r
-/** The wcsrtombs function converts a sequence of wide characters from the array\r
- indirectly pointed to by Src into a sequence of corresponding multibyte\r
- characters that begins in the conversion state described by the object\r
- pointed to by ps.\r
-\r
- If Dest is not a null pointer, the converted characters are stored into the\r
- array pointed to by Dest. Conversion continues up to and including a\r
- terminating null wide character, which is also stored. Conversion stops\r
- earlier in two cases: when a wide character is reached that does not\r
- correspond to a valid multibyte character, or (if Dest is not a null\r
- pointer) when the next multibyte character would exceed the limit of Limit\r
- total bytes to be stored into the array pointed to by Dest. Each conversion\r
- takes place as if by a call to the wcrtomb function.)\r
-\r
- If Dest is not a null pointer, the pointer object pointed to by Src is\r
- assigned either a null pointer (if conversion stopped due to reaching\r
- a terminating null wide character) or the address just past the last wide\r
- character converted (if any). If conversion stopped due to reaching a\r
- terminating null wide character, the resulting state described is the\r
- initial conversion state.\r
-\r
- @param[in] Dest\r
- @param[in,out] Src\r
- @param[in] Limit Max number of bytes to store in Dest.\r
- @param[in,out] ps\r
-\r
- @return If conversion stops because a wide character is reached that\r
- does not correspond to a valid multibyte character, an\r
- encoding error occurs: the wcsrtombs function stores the\r
- value of the macro EILSEQ in errno and returns (size_t)(-1);\r
- the conversion state is unspecified. Otherwise, it returns\r
- the number of bytes in the resulting multibyte character\r
- sequence, not including the terminating null character (if any).\r
-\r
- Declared in: wchar.h\r
-**/\r
-size_t\r
-wcsrtombs(\r
- char *Dest,\r
- const wchar_t **Src,\r
- size_t Limit,\r
- mbstate_t *ps\r
-)\r
-{\r
- size_t NumStored;\r
- ssize_t MaxBytes;\r
- int count;\r
- wchar_t InCh;\r
-\r
- NumStored = 0;\r
- MaxBytes = (ssize_t)Limit;\r
-\r
- /* Dest may be NULL */\r
- /* Src may be NULL */\r
- /* ps appears to be unused */\r
-\r
- if (Src == NULL || *Src == NULL)\r
- return (0);\r
-\r
- if (Dest == NULL) {\r
- NumStored = EstimateWtoM(*Src, ASCII_STRING_MAX, NULL);\r
- }\r
- else {\r
- if((MaxBytes < 0) || (MaxBytes > ASCII_STRING_MAX)) {\r
- MaxBytes = ASCII_STRING_MAX;\r
- }\r
- while ((MaxBytes > 0) && (OneWcToMcLen(InCh = *(*Src)++) <= MaxBytes)) {\r
- if(InCh == 0) {\r
- *Src = NULL;\r
- *Dest = 0; // NUL terminate Dest string, but don't count the NUL\r
- break;\r
- }\r
- count = (int)wcrtomb(Dest, InCh, NULL);\r
- if(count >= 0) {\r
- Dest += count;\r
- MaxBytes -= count;\r
- NumStored += count;\r
- }\r
- else {\r
- NumStored = (size_t)(-1);\r
- }\r
- }\r
- }\r
-\r
-\r
- return NumStored;\r
-}\r
-\r
-/** Convert a wide-character string into a multibyte character string.\r
-\r
- The wcstombs function converts a sequence of wide characters from the\r
- array pointed to by Src into a sequence of corresponding multibyte\r
- characters that begins in the initial shift state, and stores these\r
- multibyte characters into the array pointed to by Dest, stopping if a\r
- multibyte character would exceed the limit of Limit total bytes or if a\r
- null character is stored. Each wide character is converted as if by\r
- a call to the wctomb function, except that the conversion state of\r
- the wctomb function is not affected.\r
-\r
- No more than Limit bytes will be modified in the array pointed to by Dest.\r
- If copying takes place between objects that overlap,\r
- the behavior is undefined.\r
-\r
- @param[out] Dest Pointer to the array to receive the converted string.\r
- @param[in] Src Pointer to the string to be converted.\r
- @param[in] Limit Maximum number of elements to be written to Dest.\r
-\r
- @return If a wide character is encountered that does not correspond to a\r
- valid multibyte character, the wcstombs function returns\r
- (size_t)(-1). Otherwise, the wcstombs function returns the number\r
- of bytes in the resulting multibyte character sequence,\r
- not including the terminating null character (if any).\r
-\r
- Declared in: stdlib.h\r
-**/\r
-size_t\r
-wcstombs(\r
- char *Dest,\r
- const wchar_t *Src,\r
- size_t Limit\r
-)\r
-{\r
- /* Dest may be NULL */\r
- return wcsrtombs(Dest, &Src, Limit, NULL);\r
-}\r
-\r
-/** The wctob function determines whether C corresponds to a member of the extended\r
- character set whose multibyte character representation is a single byte when in the initial\r
- shift state.\r
-\r
- wctob needs to be consistent with wcrtomb.\r
- If wcrtomb says that a character is representable in 1 byte,\r
- then wctob needs to also represent the character as 1 byte.\r
-\r
- @return The wctob function returns EOF if C does not correspond to a multibyte\r
- character with length one in the initial shift state. Otherwise, it\r
- returns the single-byte representation of that character as an\r
- unsigned char converted to an int.\r
-\r
- Declared in: wchar.h\r
-**/\r
-int\r
-wctob(wint_t c)\r
-{\r
- int RetVal;\r
-\r
- RetVal = EOF;\r
- if(c == 0) {\r
- RetVal = 0;\r
- }\r
- else if (OneWcToMcLen((const wchar_t)c) == 1) {\r
- RetVal = (int)(c & 0xFF);\r
- }\r
- return RetVal;\r
-}\r