--- /dev/null
+/** @file\r
+ Copyright (c) 2012, Intel Corporation. All rights reserved.<BR>\r
+ This program and the accompanying materials\r
+ are licensed and made available under the terms and conditions of the BSD License\r
+ which accompanies this distribution. The full text of the license may be found at\r
+ http://opensource.org/licenses/bsd-license.php\r
+\r
+ THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+**/\r
+#include <assert.h>\r
+#include <string.h>\r
+#include <errno.h>\r
+#include <stdlib.h>\r
+#include <wchar.h>\r
+#include <sys/types.h>\r
+\r
+typedef int ch_UCS4;\r
+\r
+static mbstate_t LocalConvState = {0};\r
+\r
+/** Map a UTF-8 encoded prefix byte to a sequence length.\r
+ Zero means illegal prefix, but valid surrogate if < 0xC0.\r
+ One indicates an ASCII-7 equivalent character.\r
+ Two, three, and four are the first byte for 2, 3, and 4 byte sequences, respectively.\r
+ See RFC 3629 for details.\r
+\r
+ TABLE ENCODING:\r
+ Low Nibble decodes the first byte into the number of bytes in the sequence.\r
+ A value of zero indicates an invalid byte.\r
+ The High Nibble encodes a bit mask to be used to match against the high nibble of the second byte.\r
+\r
+ example:\r
+ SequenceLength = code[c0] & 0x0F;\r
+ Mask = 0x80 | code[c0];\r
+\r
+ Surrogate bytes are valid if: code[cX] & Mask > 0x80;\r
+\r
+*/\r
+static\r
+UINT8 utf8_code_length[256] = {\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 00-0F */\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,\r
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 70-7F */\r
+ 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, /* 80-8F */\r
+ 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, /* 90-9F */\r
+ 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* A0-AF */\r
+ 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* B0-BF */\r
+ 0x00, 0x00, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* C0-C1 + C2-CF */\r
+ 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* D0-DF */\r
+ 0x43, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x33, 0x73, 0x73, /* E0-EF */\r
+ 0x64, 0x74, 0x74, 0x74, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* F0-F4 + F5-FF */\r
+};\r
+\r
+/** Process one byte of a multibyte character.\r
+\r
+ @param ch\r
+ @param ps\r
+\r
+ @retval -2\r
+ @retval -1\r
+ @retval 1:4\r
+**/\r
+static\r
+int\r
+ProcessOneByte(unsigned char ch, mbstate_t *ps)\r
+{\r
+ UINT32 Mask;\r
+ UINT32 Length;\r
+ int RetVal = 0;\r
+\r
+ if(ps->A > 3) {\r
+ // We are in an invalid state\r
+ ps->A = 0; // Initial State\r
+ }\r
+ ps->C[ps->A] = ch; // Save the current character\r
+ Mask = utf8_code_length[ch];\r
+\r
+ if(ps->A == 0) { // Initial State. First byte of sequence.\r
+ ps->E = Mask | 0x80;\r
+ Length = Mask & 0xF;\r
+ switch(Length) {\r
+ case 0: // State 0, Code 0\r
+ errno = EILSEQ;\r
+ RetVal = -1;\r
+ ps->E = 1; // Consume this character\r
+ break;\r
+ case 1: // State 0, Code 1\r
+ // ASCII-7 Character\r
+ ps->B = ps->D[0] = ch;\r
+ RetVal = 1;\r
+ break;\r
+ default: // State 0, Code 2, 3, 4\r
+ ps->A = 1; // Next state is State-1\r
+ RetVal = -2; // Incomplete but potentially valid character\r
+ break;\r
+ }\r
+ }\r
+ else {\r
+ // We are in state 1, 2, or 3 and processing a surrogate byte\r
+ Length = ps->E & 0xF;\r
+ if((Mask & ps->E) > 0x80) {\r
+ // This byte is valid\r
+ switch(ps->A) { // Process based upon our current state\r
+ case 1: // Second byte of the sequence.\r
+ if(Length == 2) { // State 1, Code 2\r
+ Length = ((ps->C[0] & 0x1f) << 6) + (ps->C[1] & 0x3f);\r
+ assert ((Length > 0x007F) && (Length <= 0x07FF));\r
+ ps->B = ps->D[0] = (UINT16)Length;\r
+ ps->A = 0; // Next state is State-0\r
+ RetVal = 2;\r
+ }\r
+ else { // This isn't the last character, get more. State 1, Code 3 or 4\r
+ ps->A = 2;\r
+ RetVal = -2;\r
+ }\r
+ break;\r
+ case 2: // Third byte of the sequence\r
+ if(Length == 3) {\r
+ Length = ((ps->C[0] & 0x0f) << 12) + ((ps->C[1] & 0x3f) << 6) + (ps->C[2] & 0x3f);\r
+ assert ((Length > 0x07FF) && (Length <= 0xFFFF));\r
+ ps->B = ps->D[0] = (UINT16)Length;\r
+ ps->A = 0; // Next state is State-0\r
+ RetVal = 3;\r
+ }\r
+ else {\r
+ ps->A = 3;\r
+ RetVal = -2;\r
+ }\r
+ break;\r
+ case 3: // Fourth byte of the sequence\r
+ if(Length == 4) {\r
+ Length = ((ps->C[0] & 0x7) << 18) + ((ps->C[1] & 0x3f) << 12) +\r
+ ((ps->C[2] & 0x3f) << 6) + (ps->C[3] & 0x3f);\r
+ ps->B = Length;\r
+ assert ((Length > 0xFFFF) && (Length <= 0x10ffff));\r
+\r
+ /* compute and append the two surrogates: */\r
+\r
+ /* translate from 10000..10FFFF to 0..FFFF */\r
+ Length -= 0x10000;\r
+\r
+ /* high surrogate = top 10 bits added to D800 */\r
+ ps->D[0] = (UINT16)(0xD800 + (Length >> 10));\r
+\r
+ /* low surrogate = bottom 10 bits added to DC00 */\r
+ ps->D[1] = (UINT16)(0xDC00 + (Length & 0x03FF));\r
+ ps->A = 0; // Next state is State-0\r
+ RetVal = 4;\r
+ }\r
+ else {\r
+ errno = EILSEQ;\r
+ ps->A = 0;\r
+ RetVal = -1;\r
+ ps->E = 4; // Can't happen, but consume this character anyway\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ else { // Invalid surrogate character\r
+ errno = EILSEQ;\r
+ ps->A = 0; // Next is State-0\r
+ RetVal = -1;\r
+ ps->E = 0; // Don't Consume, it may be an initial byte\r
+ }\r
+ }\r
+ return RetVal;\r
+}\r
+\r
+/** Convert one Multibyte sequence.\r
+\r
+ @param Dest\r
+ @param Src\r
+ @param Len\r
+ @param pS\r
+\r
+ @retval -2 Bytes processed comprise an incomplete, but potentially valid, character.\r
+ @retval -1 An encoding error was encountered. ps->E indicates the number of bytes consumed.\r
+ @retval 0 Either Src is NULL or it points to a NUL character.\r
+ @retval 1:N N bytes were consumed producing a valid wide character.\r
+**/\r
+int\r
+DecodeOneStateful(\r
+ wchar_t *Dest, // Pointer to output location, or NULL\r
+ const char *Src, // Multibyte Source (UTF8)\r
+ ssize_t Len, // Max Number of bytes to convert\r
+ mbstate_t *pS // Pointer to State struct., or NULL\r
+ )\r
+{\r
+ const char *SrcEnd;\r
+ int NumConv;\r
+ unsigned char ch;\r
+\r
+ if((Src == NULL) || (*Src == '\0')) {\r
+ return 0;\r
+ }\r
+ if(pS == NULL) {\r
+ pS = &LocalConvState;\r
+ }\r
+ SrcEnd = Src + Len;\r
+ NumConv = 0;\r
+ while(Src < SrcEnd) {\r
+ ch = (unsigned char)*Src++;\r
+ NumConv = ProcessOneByte(ch, pS);\r
+ if(NumConv != -2)\r
+ break;\r
+ }\r
+ if((NumConv > 0) && (Dest != NULL)) {\r
+ Dest[0] = pS->D[0];\r
+ if(NumConv == 4) {\r
+ Dest[1] = pS->D[1];\r
+ }\r
+ }\r
+ return NumConv;\r
+}\r
+\r
+/** Convert wide characters (UTF16) into multibyte characters (UTF8)\r
+\r
+ @param s Pointer to the wide-character string to convert\r
+ @param size Number of wide characters in s. size <= wcslen(s);\r
+\r
+ @return A newly allocated buffer containing the converted string is returned,\r
+ or NULL if an error occurred. Global variable errno contains more\r
+ information if NULL is returned.\r
+**/\r
+ssize_t\r
+EncodeUtf8(char *Dest, wchar_t *s, ssize_t size)\r
+{\r
+ char *p; /* next free byte in build buffer */\r
+ char *v; /* next free byte in destination */\r
+ ssize_t nneeded; /* number of result bytes needed */\r
+ int i; /* index into s of next input byte */\r
+ int NumInBuff; // number of bytes in Buff\r
+ char Buff[4]; // Buffer into which each character is built\r
+\r
+ assert(s != NULL);\r
+ assert(size >= 0);\r
+\r
+ v = Dest;\r
+ nneeded = 0;\r
+ if((size * MB_LEN_MAX) / MB_LEN_MAX != size) {\r
+ // size is too large and resulted in overflow when multiplied by MB_LEN_MAX\r
+ errno = EINVAL;\r
+ return (ssize_t)-1;\r
+ }\r
+\r
+ for (i = 0; i < size;) {\r
+ ch_UCS4 ch = s[i++];\r
+ p = Buff;\r
+\r
+ if (ch < 0x80) {\r
+ /* Encode ASCII -- One Byte */\r
+ *p++ = (char) ch;\r
+ }\r
+ else if (ch < 0x0800) {\r
+ /* Encode Latin-1 -- Two Byte */\r
+ *p++ = (char)(0xc0 | (ch >> 6));\r
+ *p++ = (char)(0x80 | (ch & 0x3f));\r
+ }\r
+ else {\r
+ /* Encode UCS2 Unicode ordinals -- Three Byte */\r
+ /* Special case: check for high surrogate -- Shouldn't happen in UEFI */\r
+ if (0xD800 <= ch && ch <= 0xDBFF && i < size) {\r
+ ch_UCS4 ch2 = s[i];\r
+ /* Check for low surrogate and combine the two to\r
+ form a UCS4 value */\r
+ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {\r
+ ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;\r
+ i++;\r
+ /* Encode UCS4 Unicode ordinals -- Four Byte */\r
+ *p++ = (char)(0xf0 | (ch >> 18));\r
+ *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));\r
+ *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
+ *p++ = (char)(0x80 | (ch & 0x3f));\r
+ continue;\r
+ }\r
+ /* Fall through: handles isolated high surrogates */\r
+ }\r
+ *p++ = (char)(0xe0 | (ch >> 12));\r
+ *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
+ *p++ = (char)(0x80 | (ch & 0x3f));\r
+ }\r
+ /* At this point, Buff holds the converted character which is NumInBuff bytes long.\r
+ NumInBuff is the value 1, 2, 3, or 4\r
+ */\r
+ NumInBuff = (int)(p - Buff); // Number of bytes in Buff\r
+ if(Dest != NULL) { // Save character if Dest is not NULL\r
+ memcpy(v, Buff, NumInBuff);\r
+ v += NumInBuff;\r
+ }\r
+ nneeded += NumInBuff; // Keep track of the number of bytes put into Dest\r
+ }\r
+ if(Dest != NULL) {\r
+ // Terminate the destination string.\r
+ *v = '\0';\r
+ }\r
+ return nneeded; // Tell the caller\r
+}\r
+\r
+// ######################## Narrow to Wide Conversions #######################\r
+\r
+/** If ps is not a null pointer, the mbsinit function determines whether the\r
+ pointed-to mbstate_t object describes an initial conversion state.\r
+\r
+ @return The mbsinit function returns nonzero if ps is a null pointer\r
+ or if the pointed-to object describes an initial conversion\r
+ state; otherwise, it returns zero.\r
+\r
+ Declared in: wchar.h\r
+**/\r
+int\r
+mbsinit(const mbstate_t *ps)\r
+{\r
+ if((ps == NULL) || (ps->A == 0)) {\r
+ return 1;\r
+ }\r
+ return 0;\r
+}\r
+\r
+/** The mbrlen function is equivalent to the call:<BR>\r
+@verbatim\r
+ mbrtowc(NULL, s, n, ps != NULL ? ps : &internal)\r
+@endverbatim\r
+ where internal is the mbstate_t object for the mbrlen function, except that\r
+ the expression designated by ps is evaluated only once.\r
+\r
+ @return The mbrlen function returns a value between zero and n,\r
+ inclusive, (size_t)(-2), or (size_t)(-1).\r
+\r
+ Declared in: wchar.h\r
+**/\r
+size_t\r
+mbrlen(\r
+ const char *s,\r
+ size_t n,\r
+ mbstate_t *ps\r
+ )\r
+{\r
+ return mbrtowc(NULL, s, n, ps);\r
+}\r
+\r
+/** Determine the number of bytes comprising a multibyte character.\r
+\r
+ If S is not a null pointer, the mblen function determines the number of bytes\r
+ contained in the multibyte character pointed to by S. Except that the\r
+ conversion state of the mbtowc function is not affected, it is equivalent to\r
+ mbtowc((wchar_t *)0, S, N);\r
+\r
+ @param[in] S NULL to query whether multibyte characters have\r
+ state-dependent encodings. Otherwise, points to a\r
+ multibyte character.\r
+ @param[in] N The maximum number of bytes in a multibyte character.\r
+\r
+ @return If S is a null pointer, the mblen function returns a nonzero or\r
+ zero value, if multibyte character encodings, respectively, do\r
+ or do not have state-dependent encodings. If S is not a null\r
+ pointer, the mblen function either returns 0 (if S points to the\r
+ null character), or returns the number of bytes that are contained\r
+ in the multibyte character (if the next N or fewer bytes form a\r
+ valid multibyte character), or returns -1 (if they do not form a\r
+ valid multibyte character).\r
+\r
+ Declared in: stdlib.h\r
+**/\r
+int\r
+mblen(\r
+ const char *s,\r
+ size_t n\r
+ )\r
+{\r
+ return (int)mbrlen(s, n, NULL);\r
+}\r
+\r
+/**\r
+If S is a null pointer, the mbrtowc function is equivalent to the call:<BR>\r
+@verbatim\r
+ mbrtowc(NULL, "", 1, ps)\r
+@endverbatim\r
+\r
+In this case, the values of the parameters pwc and n are ignored.\r
+\r
+If S is not a null pointer, the mbrtowc function inspects at most n bytes beginning with\r
+the byte pointed to by S to determine the number of bytes needed to complete the next\r
+multibyte character (including any shift sequences). If the function determines that the\r
+next multibyte character is complete and valid, it determines the value of the\r
+corresponding wide character and then, if pwc is not a null pointer, stores that value in\r
+the object pointed to by pwc. If the corresponding wide character is the null wide\r
+character, the resulting state described is the initial conversion state.\r
+\r
+ @retval 0 if the next n or fewer bytes complete the multibyte\r
+ character that corresponds to the null wide\r
+ character (which is the value stored).\r
+ @retval between_1_and_n_inclusive if the next n or fewer bytes complete\r
+ a valid multibyte character (which is the value\r
+ stored); the value returned is the number of bytes\r
+ that complete the multibyte character.\r
+ @retval (size_t)(-2) if the next n bytes contribute to an incomplete\r
+ (but potentially valid) multibyte character, and\r
+ all n bytes have been processed (no value is stored).\r
+ @retval (size_t)(-1) if an encoding error occurs, in which case the next\r
+ n or fewer bytes do not contribute to a complete and\r
+ valid multibyte character (no value is stored); the\r
+ value of the macro EILSEQ is stored in errno, and\r
+ the conversion state is unspecified.\r
+\r
+ Declared in: wchar.h\r
+**/\r
+size_t\r
+mbrtowc(\r
+ wchar_t *pwc,\r
+ const char *s,\r
+ size_t n,\r
+ mbstate_t *ps\r
+ )\r
+{\r
+ int RetVal;\r
+\r
+ RetVal = DecodeOneStateful(pwc, s, (ssize_t)n, ps);\r
+ return (size_t)RetVal;\r
+}\r
+\r
+/** Convert a multibyte character into a wide character.\r
+\r
+ If S is not a null pointer, the mbtowc function inspects at most N bytes\r
+ beginning with the byte pointed to by S to determine the number of bytes\r
+ needed to complete the next multibyte character (including any shift\r
+ sequences). If the function determines that the next multibyte character\r
+ is complete and valid, it determines the value of the corresponding wide\r
+ character and then, if Pwc is not a null pointer, stores that value in\r
+ the object pointed to by Pwc. If the corresponding wide character is the\r
+ null wide character, the function is left in the initial conversion state.\r
+\r
+ @param[out] Pwc Pointer to a wide-character object to receive the converted character.\r
+ @param[in] S Pointer to a multibyte character to convert.\r
+ @param[in] N Maximum number of bytes in a multibyte character.\r
+\r
+ @return If S is a null pointer, the mbtowc function returns a nonzero or\r
+ zero value, if multibyte character encodings, respectively, do\r
+ or do not have state-dependent encodings. If S is not a null\r
+ pointer, the mbtowc function either returns 0 (if S points to\r
+ the null character), or returns the number of bytes that are\r
+ contained in the converted multibyte character (if the next N or\r
+ fewer bytes form a valid multibyte character), or returns -1\r
+ (if they do not form a valid multibyte character).\r
+\r
+ In no case will the value returned be greater than N or the value\r
+ of the MB_CUR_MAX macro.\r
+\r
+ Declared in: stdlib.h\r
+**/\r
+int\r
+mbtowc(\r
+ wchar_t *pwc,\r
+ const char *s,\r
+ size_t n\r
+ )\r
+{\r
+ return (int)mbrtowc(pwc, s, n, NULL);\r
+}\r
+\r
+/**\r
+The mbsrtowcs function converts a sequence of multibyte characters that begins in the\r
+conversion state described by the object pointed to by ps, from the array indirectly\r
+pointed to by src into a sequence of corresponding wide characters. If dst is not a null\r
+pointer, the converted characters are stored into the array pointed to by dst. Conversion\r
+continues up to and including a terminating null character, which is also stored.\r
+Conversion stops earlier in two cases: when a sequence of bytes is encountered that does\r
+not form a valid multibyte character, or (if dst is not a null pointer) when len wide\r
+characters have been stored into the array pointed to by dst. Each conversion takes\r
+place as if by a call to the mbrtowc function.\r
+\r
+If dst is not a null pointer, the pointer object pointed to by src is assigned either a null\r
+pointer (if conversion stopped due to reaching a terminating null character) or the address\r
+just past the last multibyte character converted (if any). If conversion stopped due to\r
+reaching a terminating null character and if dst is not a null pointer, the resulting state\r
+described is the initial conversion state.\r
+\r
+ @return If the input conversion encounters a sequence of bytes that do\r
+ not form a valid multibyte character, an encoding error occurs:\r
+ the mbsrtowcs function stores the value of the macro EILSEQ in\r
+ errno and returns (size_t)(-1); the conversion state is\r
+ unspecified. Otherwise, it returns the number of multibyte\r
+ characters successfully converted, not including the terminating\r
+ null character (if any).\r
+\r
+ Declared in: wchar.h\r
+**/\r
+size_t\r
+mbsrtowcs(\r
+ wchar_t *dst,\r
+ const char **src,\r
+ size_t len,\r
+ mbstate_t *ps\r
+ )\r
+{\r
+ int x;\r
+ size_t RetVal = 0;\r
+ const char *MySrc;\r
+\r
+ if((src == NULL) || (*src == NULL) || (**src == '\0')) {\r
+ return 0;\r
+ }\r
+\r
+ MySrc = *src;\r
+ for(x = 1 ; (len != 0) && (x > 0); --len) {\r
+ x = DecodeOneStateful(dst, MySrc, MB_LEN_MAX, ps);\r
+ switch(x) {\r
+ case -2: // Incomplete character\r
+ case -1: // Encoding error\r
+ RetVal = (size_t)x;\r
+ break;\r
+ case 0: // Encountered NUL character: done.\r
+ if(dst != NULL) {\r
+ *dst = 0;\r
+ *src = NULL;\r
+ }\r
+ break;\r
+ default: // Successfully decoded a character, continue with next\r
+ MySrc += x;\r
+ if(dst != NULL) {\r
+ ++dst;\r
+ if(x == 4) {\r
+ ++dst;\r
+ }\r
+ *src = MySrc;\r
+ }\r
+ ++RetVal;\r
+ break;\r
+ }\r
+ }\r
+ return RetVal;\r
+}\r
+\r
+/** Convert a multibyte character string into a wide-character string.\r
+\r
+ The mbstowcs function converts a sequence of multibyte characters that\r
+ begins in the initial shift state from the array pointed to by Src into\r
+ a sequence of corresponding wide characters and stores not more than limit\r
+ wide characters into the array pointed to by Dest. No multibyte\r
+ characters that follow a null character (which is converted into a null\r
+ wide character) will be examined or converted. Each multibyte character\r
+ is converted as if by a call to the mbtowc function, except that the\r
+ conversion state of the mbtowc function is not affected.\r
+\r
+ No more than Limit elements will be modified in the array pointed to by Dest.\r
+ If copying takes place between objects that overlap,\r
+ the behavior is undefined.\r
+\r
+ @param[out] Dest Pointer to the array to receive the converted string.\r
+ @param[in] Src Pointer to the string to be converted.\r
+ @param[in] Limit Maximum number of elements to be written to Dest.\r
+\r
+ @return If an invalid multibyte character is encountered, the mbstowcs\r
+ function returns (size_t)(-1). Otherwise, the mbstowcs function\r
+ returns the number of array elements modified, not including a\r
+ terminating null wide character, if any.\r
+\r
+ Declared in: stdlib.h\r
+**/\r
+size_t\r
+mbstowcs(\r
+ wchar_t *pwcs,\r
+ const char *s,\r
+ size_t n\r
+ )\r
+{\r
+\r
+ /* pwcs may be NULL */\r
+ /* s may be NULL */\r
+\r
+ return mbsrtowcs(pwcs, &s, n, NULL);\r
+}\r
+\r
+/** The btowc function determines whether C constitutes a valid single-byte\r
+ character in the initial shift state.\r
+\r
+ @return The btowc function returns WEOF if c has the value EOF or if\r
+ (unsigned char)C does not constitute a valid single-byte\r
+ character in the initial shift state. Otherwise, it returns the\r
+ wide character representation of that character.\r
+\r
+ Declared in: wchar.h\r
+**/\r
+wint_t\r
+btowc(int c)\r
+{\r
+ int x;\r
+ wchar_t Dest;\r
+ wint_t RetVal = WEOF;\r
+\r
+ if (c == EOF)\r
+ return WEOF;\r
+ x = DecodeOneStateful(&Dest, (const char *)&c, 1, NULL);\r
+ if(x == 0) {\r
+ RetVal = 0;\r
+ }\r
+ else if(x == 1) {\r
+ RetVal = (wint_t)Dest;\r
+ }\r
+ return RetVal;\r
+}\r
+\r
+// ######################## Wide to Narrow Conversions #######################\r
+\r
+/**\r
+If S is a null pointer, the wcrtomb function is equivalent to the call:<BR>\r
+@verbatim\r
+ wcrtomb(buf, L'\0', ps)\r
+@endverbatim\r
+where buf is an internal buffer.\r
+\r
+If S is not a null pointer, the wcrtomb function determines the number of bytes needed\r
+to represent the multibyte character that corresponds to the wide character given by wc\r
+(including any shift sequences), and stores the multibyte character representation in the\r
+array whose first element is pointed to by S. At most MB_CUR_MAX bytes are stored. If\r
+wc is a null wide character, a null byte is stored, preceded by any shift sequence needed\r
+to restore the initial shift state; the resulting state described is the initial conversion state.\r
+\r
+ @return The wcrtomb function returns the number of bytes stored in the\r
+ array object (including any shift sequences). When wc is not a\r
+ valid wide character, an encoding error occurs: the function\r
+ stores the value of the macro EILSEQ in errno and\r
+ returns (size_t)(-1); the conversion state is unspecified.\r
+\r
+ Declared in: wchar.h\r
+**/\r
+size_t\r
+wcrtomb(\r
+ char *s,\r
+ wchar_t wchar,\r
+ mbstate_t *ps\r
+ )\r
+{\r
+ size_t RetVal;\r
+\r
+ /* s may be NULL */\r
+ if (s == NULL) {\r
+ RetVal = 1;\r
+ }\r
+ else {\r
+ if (wchar == L'\0') {\r
+ *s = '\0';\r
+ RetVal = 1;\r
+ }\r
+ else {\r
+ RetVal = EncodeUtf8(s, &wchar, 1);\r
+ }\r
+ }\r
+ return RetVal;\r
+}\r
+\r
+/** Convert a wide character into a multibyte character.\r
+\r
+ The wctomb function determines the number of bytes needed to represent the\r
+ multibyte character corresponding to the wide character given by WC\r
+ (including any shift sequences), and stores the multibyte character\r
+ representation in the array whose first element is pointed to by S (if S is\r
+ not a null pointer). At most MB_CUR_MAX characters are stored. If WC is a\r
+ null wide character, a null byte is stored, preceded by any shift sequence\r
+ needed to restore the initial shift state, and the function is left in the\r
+ initial conversion state.\r
+\r
+ @param[out] S Pointer to the object to receive the converted multibyte character.\r
+ @param[in] WC Wide character to be converted.\r
+\r
+ @return If S is a null pointer, the wctomb function returns a nonzero or\r
+ zero value, if multibyte character encodings, respectively, do or\r
+ do not have state-dependent encodings. If S is not a null pointer,\r
+ the wctomb function returns -1 if the value of WC does not\r
+ correspond to a valid multibyte character, or returns the number\r
+ of bytes that are contained in the multibyte character\r
+ corresponding to the value of WC.\r
+\r
+ In no case will the value returned be greater than the value of\r
+ the MB_CUR_MAX macro.\r
+\r
+ Declared in: stdlib.h\r
+**/\r
+int\r
+wctomb(\r
+ char *s,\r
+ wchar_t wchar\r
+ )\r
+{\r
+ /*\r
+ If s is NULL just return whether MB Characters have state\r
+ dependent encodings -- they don't.\r
+ */\r
+ if (s == NULL)\r
+ return 0;\r
+\r
+ return (int)wcrtomb(s, wchar, NULL);\r
+}\r
+\r
+/** The wcsrtombs function converts a sequence of wide characters from the array\r
+ indirectly pointed to by S into a sequence of corresponding multibyte\r
+ characters that begins in the conversion state described by the object\r
+ pointed to by ps.\r
+\r
+ If S is not a null pointer, the converted characters\r
+ are then stored into the array pointed to by S. Conversion continues\r
+ up to and including a terminating null wide character, which is also\r
+ stored. Conversion stops earlier in two cases: when a wide character is\r
+ reached that does not correspond to a valid multibyte character, or\r
+ (if S is not a null pointer) when the next multibyte character would\r
+ exceed the limit of N total bytes to be stored into the array pointed\r
+ to by S. Each conversion takes place as if by a call to the wcrtomb\r
+ function.)\r
+\r
+ If S is not a null pointer, the pointer object pointed to by pwcs is\r
+ assigned either a null pointer (if conversion stopped due to reaching\r
+ a terminating null wide character) or the address just past the last wide\r
+ character converted (if any). If conversion stopped due to reaching a\r
+ terminating null wide character, the resulting state described is the\r
+ initial conversion state.\r
+\r
+ @return If conversion stops because a wide character is reached that\r
+ does not correspond to a valid multibyte character, an\r
+ encoding error occurs: the wcsrtombs function stores the\r
+ value of the macro EILSEQ in errno and returns (size_t)(-1);\r
+ the conversion state is unspecified. Otherwise, it returns\r
+ the number of bytes in the resulting multibyte character\r
+ sequence, not including the terminating null character (if any).\r
+\r
+ Declared in: wchar.h\r
+**/\r
+size_t\r
+wcsrtombs(\r
+ char *s,\r
+ const wchar_t **pwcs,\r
+ size_t n,\r
+ mbstate_t *ps\r
+)\r
+{\r
+ int count = 0;\r
+\r
+ /* s may be NULL */\r
+ /* pwcs may be NULL */\r
+ /* ps appears to be unused */\r
+\r
+ if (pwcs == NULL || *pwcs == NULL)\r
+ return (0);\r
+\r
+ if (s == NULL) {\r
+ while (*(*pwcs)++ != 0)\r
+ count++;\r
+ return(count);\r
+ }\r
+\r
+ if (n != 0) {\r
+ do {\r
+ if ((*s++ = (char) *(*pwcs)++) == 0) {\r
+ *pwcs = NULL;\r
+ break;\r
+ }\r
+ count++;\r
+ } while (--n != 0);\r
+ }\r
+\r
+ return count;\r
+}\r
+\r
+/** Convert a wide-character string into a multibyte character string.\r
+\r
+ The wcstombs function converts a sequence of wide characters from the\r
+ array pointed to by Src into a sequence of corresponding multibyte\r
+ characters that begins in the initial shift state, and stores these\r
+ multibyte characters into the array pointed to by Dest, stopping if a\r
+ multibyte character would exceed the limit of Limit total bytes or if a\r
+ null character is stored. Each wide character is converted as if by\r
+ a call to the wctomb function, except that the conversion state of\r
+ the wctomb function is not affected.\r
+\r
+ No more than Limit bytes will be modified in the array pointed to by Dest.\r
+ If copying takes place between objects that overlap,\r
+ the behavior is undefined.\r
+\r
+ @param[out] Dest Pointer to the array to receive the converted string.\r
+ @param[in] Src Pointer to the string to be converted.\r
+ @param[in] Limit Maximum number of elements to be written to Dest.\r
+\r
+ @return If a wide character is encountered that does not correspond to a\r
+ valid multibyte character, the wcstombs function returns\r
+ (size_t)(-1). Otherwise, the wcstombs function returns the number\r
+ of bytes modified, not including a terminating null character,\r
+ if any.\r
+\r
+ Declared in: stdlib.h\r
+**/\r
+size_t\r
+wcstombs(\r
+ char *s,\r
+ const wchar_t *pwcs,\r
+ size_t n\r
+)\r
+{\r
+ /* s may be NULL */\r
+ return wcsrtombs(s, &pwcs, n, NULL);\r
+}\r
+\r
+/** The wctob function determines whether C corresponds to a member of the extended\r
+ character set whose multibyte character representation is a single byte when in the initial\r
+ shift state.\r
+\r
+ @return The wctob function returns EOF if C does not correspond to a multibyte\r
+ character with length one in the initial shift state. Otherwise, it\r
+ returns the single-byte representation of that character as an\r
+ unsigned char converted to an int.\r
+\r
+ Declared in: wchar.h\r
+**/\r
+int\r
+wctob(wint_t c)\r
+{\r
+ /* wctob needs to be consistent with wcrtomb.\r
+ if wcrtomb says that a character is representable in 1 byte,\r
+ which this implementation always says, then wctob needs to\r
+ also represent the character as 1 byte.\r
+ */\r
+ if (c == WEOF) {\r
+ return EOF;\r
+ }\r
+ return (int)(c & 0xFF);\r
+}\r