StdLib: Fix several problems where characters were not being correctly converted...

author darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>

Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)

committer darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>

Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)
author darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>
Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)
committer darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>
Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)
diff --git a/StdLib/LibC/Locale/multibyte_Utf8.c b/StdLib/LibC/Locale/multibyte_Utf8.c

index 3f29f2942af45debdfa33d821e732b4fc7e8264d..36e2cb379e72b4ed8e5f60b70e9f8c1b0ba9ed47 100644 (file)
--- a/StdLib/LibC/Locale/multibyte_Utf8.c
+++ b/StdLib/LibC/Locale/multibyte_Utf8.c
@@ -15,9 +15,9 @@
  #include  <wchar.h>\r
  #include  <sys/types.h>\r
  \r
  #include  <wchar.h>\r
  #include  <sys/types.h>\r
  \r
-typedef      int  ch_UCS4;\r
+typedef      int      ch_UCS4;\r
  \r
  \r
-static  mbstate_t         LocalConvState = {0};\r
+static  mbstate_t     LocalConvState = {0};\r
  \r
  /** Map a UTF-8 encoded prefix byte to a sequence length.\r
      Zero means illegal prefix, but valid surrogate if < 0xC0.\r
  \r
  /** Map a UTF-8 encoded prefix byte to a sequence length.\r
      Zero means illegal prefix, but valid surrogate if < 0xC0.\r
@@ -59,12 +59,12 @@ UINT8 utf8_code_length[256] = {
  \r
  /** Process one byte of a multibyte character.\r
  \r
  \r
  /** Process one byte of a multibyte character.\r
  \r
-    @param  ch\r
-    @param  ps\r
+    @param[in]      ch    One byte of a multibyte character.\r
+    @param[in,out]  ps    Pointer to a conversion state object.\r
  \r
  \r
-    @retval   -2\r
-    @retval   -1\r
-    @retval   1:4\r
+    @retval   -2      ch is an incomplete but potentially valid character.\r
+    @retval   -1      ch is not valid in this context.\r
+    @retval   1:4     The length, in bytes, of the character ch just completed.\r
  **/\r
  static\r
  int\r
  **/\r
  static\r
  int\r
@@ -174,10 +174,10 @@ ProcessOneByte(unsigned char ch, mbstate_t *ps)
  \r
  /** Convert one Multibyte sequence.\r
  \r
  \r
  /** Convert one Multibyte sequence.\r
  \r
-    @param  Dest\r
-    @param  Src\r
-    @param  Len\r
-    @param  pS\r
+    @param[out]   Dest      Pointer to output location, or NULL\r
+    @param[in]    Src       Multibyte Source (UTF8)\r
+    @param[in]    Len       Max Number of bytes to convert\r
+    @param[in]    pS        Pointer to State struct., or NULL\r
  \r
      @retval   -2      Bytes processed comprise an incomplete, but potentially valid, character.\r
      @retval   -1      An encoding error was encountered.  ps->E indicates the number of bytes consumed.\r
  \r
      @retval   -2      Bytes processed comprise an incomplete, but potentially valid, character.\r
      @retval   -1      An encoding error was encountered.  ps->E indicates the number of bytes consumed.\r
@@ -219,87 +219,212 @@ DecodeOneStateful(
    return NumConv;\r
  }\r
  \r
    return NumConv;\r
  }\r
  \r
-/** Convert wide characters (UTF16) into multibyte characters (UTF8)\r
+/*  Determine the number of bytes needed to represent a Wide character\r
+    as a MBCS character.\r
+\r
+    A single wide character may convert into a one, two, three, or four byte\r
+    narrow (MBCS or UTF-8) character.  The number of MBCS bytes can be determined\r
+    as follows.\r
+\r
+    If WCS char      < 0x00000080      One Byte\r
+    Else if WCS char < 0x0000D800      Two Bytes\r
+    Else                               Three Bytes\r
+\r
+    Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
+    Four-byte characters are not supported.\r
+\r
+    @param[in]    InCh      Wide character to test.\r
+\r
+    @retval     -1      Improperly formed character\r
+    @retval      0      InCh is 0x0000\r
+    @retval     >0      Number of bytes needed for the MBCS character\r
+*/\r
+int\r
+EFIAPI\r
+OneWcToMcLen(const wchar_t InCh)\r
+{\r
+  ssize_t   NumBytes;\r
+\r
+  if(InCh == 0) {             //    Is this a NUL, 0x0000 ?\r
+    NumBytes = 0;\r
+  }\r
+  else if(InCh < 0x0080) {    //    Is this a 1-byte character?\r
+    NumBytes = 1;\r
+  }\r
+  else if(InCh < 0x0800) {    //    Is this a 2-byte character?\r
+    NumBytes = 2;\r
+  }\r
+  else if((InCh >= 0xD800) && (InCh < 0xE000)) {    //    Is this a surrogate?\r
+    NumBytes = -1;\r
+  }\r
+  else {\r
+    NumBytes = 3;             //    Otherwise, it must be a 3-byte character.\r
+  }\r
+  return (int)NumBytes;      // Return extimate of required bytes.\r
+}\r
+\r
+/*  Determine the number of bytes needed to represent a Wide character string\r
+    as a MBCS string of given maximum length.  Will optionally return the number\r
+    of wide characters that would be consumed.\r
+\r
+    A single wide character may convert into a one, two, three, or four byte\r
+    narrow (MBCS or UTF-8) character.  The number of MBCS bytes can be determined\r
+    as follows.\r
+\r
+    If WCS char      < 0x00000080      One Byte\r
+    Else if WCS char < 0x00000800      Two Bytes\r
+    Else if WCS char < 0x00010000      Three Bytes\r
+    Else                               Four Bytes\r
+\r
+    Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
+    Four-byte characters should not be encountered.\r
+\r
+    @param[in]    Src       Pointer to a wide character string.\r
+    @param[in]    Limit     Maximum number of bytes the converted string may occupy.\r
+    @param[out]   NumChar   Pointer to where to store the number of wide characters, or NULL.\r
+\r
+    @return     The number of bytes required to convert Src to MBCS,\r
+                not including the terminating NUL.  If NumChar is not NULL, the number\r
+                of characters represented by the return value will be written to\r
+                where it points.\r
+*/\r
+size_t\r
+EFIAPI\r
+EstimateWtoM(const wchar_t * Src, size_t Limit, size_t *NumChar)\r
+{\r
+  ssize_t    Estimate;\r
+  size_t    CharCount;\r
+  ssize_t   NumBytes;\r
+  wchar_t   EChar;\r
+\r
+  Estimate  = 0;\r
+  CharCount = 0;\r
+  EChar = *Src++;               // Get the initial character and point to next\r
+  while(((NumBytes = OneWcToMcLen(EChar)) > 0)  &&\r
+        ((size_t)(Estimate + NumBytes) < Limit))\r
+  {                             // Until one of the source characters is NUL\r
+    ++CharCount;                //    Count this character.\r
+    Estimate += NumBytes;       //    Count the Bytes for this character\r
+    EChar = *Src++;             //    Get the next source character and point to the next.\r
+  }\r
+  if(NumChar != NULL) {\r
+    *NumChar = CharCount;\r
+  }\r
+  return (size_t)Estimate;      // Return esimate of required bytes.\r
+}\r
+\r
+/*  Determine the number of characters in a MBCS string.\r
+    MBCS characters are one to four bytes long.  By examining the first byte\r
+    of a MBCS character, one can determine the number of bytes comprising the\r
+    character.\r
+\r
+    0x00 - 0x7F     One\r
+    0xC0 - 0xDF     Two\r
+    0xE0 - 0xEF     Three\r
+    0xF0 - 0xF7     Four\r
+\r
+    Since UEFI only supports the Unicode Base Multilingual Plane (BMP),\r
+    Four-byte characters should not be encountered.\r
+\r
+    @param[in]    Src     The string to examine\r
+\r
+    @return   The number of characters represented by the MBCS string.\r
+**/\r
+size_t\r
+EFIAPI\r
+CountMbcsChars(const char *Src)\r
+{\r
+  size_t      Count;\r
+  char        EChar;\r
+\r
+  Count = 0;\r
+  EChar = *Src++;\r
+  while(EChar != 0) {\r
+    if(EChar < 0x80) {\r
+      ++Count;\r
+    }\r
+    else if(EChar < 0xE0) {\r
+      Count += 2;\r
+      ++Src;\r
+    }\r
+    else if(EChar < 0xF0) {\r
+      Count += 3;\r
+      Src += 2;\r
+    }\r
+    else {\r
+      // Ill-formed character\r
+      break;\r
+    }\r
+  }\r
+  return Count;\r
+}\r
+\r
+/** Convert a wide character (UTF16) into a multibyte character (UTF8)\r
+\r
+    Converts a wide character into a corresponding multibyte character that\r
+    begins in the conversion state described by the object pointed to by ps.\r
+    If dst is not a null pointer, the converted character is then stored into\r
+    the array pointed to by dst.\r
+\r
+    It is the caller's responsibility to ensure that Dest is large enough to\r
+    hold the resulting MBCS sequence.\r
  \r
      @param  s       Pointer to the wide-character string to convert\r
  \r
      @param  s       Pointer to the wide-character string to convert\r
-    @param  size    Number of wide characters in s.  size <= wcslen(s);\r
+    @param  Dest    Pointer to the buffer in which to place the converted sequence, or NULL.\r
  \r
  \r
-    @return A newly allocated buffer containing the converted string is returned,\r
-            or NULL if an error occurred.  Global variable errno contains more\r
-            information if NULL is returned.\r
+    @retval   -1    An error occurred.  The error reason is in errno.\r
+    @retval   >=0   The number of bytes stored into Dest.\r
  **/\r
  ssize_t\r
  **/\r
  ssize_t\r
-EncodeUtf8(char *Dest, wchar_t *s, ssize_t size)\r
+EncodeUtf8(char *Dest, wchar_t ch)\r
  {\r
    char       *p;              /* next free byte in build buffer */\r
  {\r
    char       *p;              /* next free byte in build buffer */\r
-  char       *v;              /* next free byte in destination */\r
-  ssize_t     nneeded;        /* number of result bytes needed */\r
-  int         i;              /* index into s of next input byte */\r
    int         NumInBuff;      // number of bytes in Buff\r
    char        Buff[4];        // Buffer into which each character is built\r
  \r
    int         NumInBuff;      // number of bytes in Buff\r
    char        Buff[4];        // Buffer into which each character is built\r
  \r
-  assert(s != NULL);\r
-  assert(size >= 0);\r
-\r
-  v = Dest;\r
-  nneeded = 0;\r
-  if((size * MB_LEN_MAX) / MB_LEN_MAX != size) {\r
-    // size is too large and resulted in overflow when multiplied by MB_LEN_MAX\r
-    errno = EINVAL;\r
-    return (ssize_t)-1;\r
-  }\r
-\r
- for (i = 0; i < size;) {\r
-    ch_UCS4 ch = s[i++];\r
      p = Buff;\r
  \r
      p = Buff;\r
  \r
-    if (ch < 0x80) {\r
-      /* Encode ASCII -- One Byte */\r
-      *p++ = (char) ch;\r
-    }\r
-    else if (ch < 0x0800) {\r
-      /* Encode Latin-1 -- Two Byte */\r
-      *p++ = (char)(0xc0 | (ch >> 6));\r
-      *p++ = (char)(0x80 | (ch & 0x3f));\r
-    }\r
-    else {\r
+  NumInBuff = 0;\r
+  if (ch < 0x80) {\r
+    /* Encode ASCII -- One Byte */\r
+    *p++ = (char) ch;\r
+    NumInBuff = 1;\r
+  }\r
+  else if (ch < 0x0800) {\r
+    /* Encode Latin-1 -- Two Byte */\r
+    *p++ = (char)(0xc0 | (ch >> 6));\r
+    *p++ = (char)(0x80 | (ch & 0x3f));\r
+    NumInBuff = 2;\r
+  }\r
+  else {\r
        /* Encode UCS2 Unicode ordinals -- Three Byte */\r
        /* Encode UCS2 Unicode ordinals -- Three Byte */\r
-      /* Special case: check for high surrogate -- Shouldn't happen in UEFI */\r
-      if (0xD800 <= ch && ch <= 0xDBFF && i < size) {\r
-        ch_UCS4 ch2 = s[i];\r
-        /* Check for low surrogate and combine the two to\r
-           form a UCS4 value */\r
-        if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {\r
-          ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;\r
-          i++;\r
-          /* Encode UCS4 Unicode ordinals -- Four Byte */\r
-          *p++ = (char)(0xf0 | (ch >> 18));\r
-          *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));\r
-          *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
-          *p++ = (char)(0x80 | (ch & 0x3f));\r
-          continue;\r
-        }\r
-        /* Fall through: handles isolated high surrogates */\r
+    /* Special case: check for surrogate -- Shouldn't happen in UEFI */\r
+    if (0xD800 <= ch && ch < 0xE000) {\r
+      errno = EILSEQ;\r
+      return -1;\r
        }\r
        }\r
+    else {\r
        *p++ = (char)(0xe0 | (ch >> 12));\r
        *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
        *p++ = (char)(0x80 | (ch & 0x3f));\r
        *p++ = (char)(0xe0 | (ch >> 12));\r
        *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));\r
        *p++ = (char)(0x80 | (ch & 0x3f));\r
+      NumInBuff = 3;\r
      }\r
      }\r
-    /*  At this point, Buff holds the converted character which is NumInBuff bytes long.\r
-        NumInBuff is the value 1, 2, 3, or 4\r
-    */\r
-    NumInBuff = (int)(p - Buff);     // Number of bytes in Buff\r
-    if(Dest != NULL) {        // Save character if Dest is not NULL\r
-      memcpy(v, Buff, NumInBuff);\r
-      v += NumInBuff;\r
-    }\r
-    nneeded += NumInBuff;     // Keep track of the number of bytes put into Dest\r
    }\r
    }\r
-  if(Dest != NULL) {\r
-    // Terminate the destination string.\r
-    *v = '\0';\r
+  /*  At this point, Buff holds the converted character which is NumInBuff bytes long.\r
+      NumInBuff is the value 1, 2, 3, or 4\r
+  */\r
+  if(Dest != NULL) {        // Save character if Dest is not NULL\r
+    memcpy(Dest, Buff, NumInBuff);\r
+\r
+    if(ch != 0) {\r
+      // Terminate the destination string.\r
+      Dest[NumInBuff] = '\0';\r
+    }\r
+    else {\r
+      NumInBuff = 0;\r
+    }\r
    }\r
    }\r
-  return nneeded;             // Tell the caller\r
+  return NumInBuff;             // Tell the caller\r
  }\r
  \r
  // ########################  Narrow to Wide Conversions #######################\r
  }\r
  \r
  // ########################  Narrow to Wide Conversions #######################\r
@@ -307,6 +432,8 @@ EncodeUtf8(char *Dest, wchar_t *s, ssize_t size)
  /** If ps is not a null pointer, the mbsinit function determines whether the\r
      pointed-to mbstate_t object describes an initial conversion state.\r
  \r
  /** If ps is not a null pointer, the mbsinit function determines whether the\r
      pointed-to mbstate_t object describes an initial conversion state.\r
  \r
+    @param[in]  ps    Pointer to the conversion state object to test.\r
+\r
      @return     The mbsinit function returns nonzero if ps is a null pointer\r
                  or if the pointed-to object describes an initial conversion\r
                  state; otherwise, it returns zero.\r
      @return     The mbsinit function returns nonzero if ps is a null pointer\r
                  or if the pointed-to object describes an initial conversion\r
                  state; otherwise, it returns zero.\r
@@ -329,8 +456,14 @@ mbsinit(const mbstate_t *ps)
      where internal is the mbstate_t object for the mbrlen function, except that\r
      the expression designated by ps is evaluated only once.\r
  \r
      where internal is the mbstate_t object for the mbrlen function, except that\r
      the expression designated by ps is evaluated only once.\r
  \r
-    @return   The mbrlen function returns a value between zero and n,\r
-              inclusive, (size_t)(-2), or (size_t)(-1).\r
+    @param[in]  s     Pointer to a multibyte character sequence.\r
+    @param[in]  n     Maximum number of bytes to examine.\r
+    @param[in]  pS    Pointer to the conversion state object.\r
+\r
+    @retval   0       The next n or fewer characters complete a NUL.\r
+    @retval   1..n    The number of bytes that complete the multibyte character.\r
+    @retval   -2      The next n bytes contribute to an incomplete (but potentially valid) multibyte character.\r
+    @retval   -1      An encoding error occurred.\r
  \r
      Declared in: wchar.h\r
  **/\r
  \r
      Declared in: wchar.h\r
  **/\r
@@ -338,10 +471,10 @@ size_t
  mbrlen(\r
    const char *s,\r
    size_t n,\r
  mbrlen(\r
    const char *s,\r
    size_t n,\r
-  mbstate_t *ps\r
+  mbstate_t *pS\r
    )\r
  {\r
    )\r
  {\r
-  return mbrtowc(NULL, s, n, ps);\r
+  return mbrtowc(NULL, s, n, pS);\r
  }\r
  \r
  /** Determine the number of bytes comprising a multibyte character.\r
  }\r
  \r
  /** Determine the number of bytes comprising a multibyte character.\r
@@ -392,6 +525,11 @@ corresponding wide character and then, if pwc is not a null pointer, stores that
  the object pointed to by pwc. If the corresponding wide character is the null wide\r
  character, the resulting state described is the initial conversion state.\r
  \r
  the object pointed to by pwc. If the corresponding wide character is the null wide\r
  character, the resulting state described is the initial conversion state.\r
  \r
+    @param[out]   pwc   Pointer to where the resulting wide character is to be stored.\r
+    @param[in]     s    Pointer to a multibyte character "string".\r
+    @param[in]     n    The maximum number of bytes to inspect.\r
+    @param[in]     ps   Pointer to a conversion state object.\r
+\r
      @retval   0             if the next n or fewer bytes complete the multibyte\r
                              character that corresponds to the null wide\r
                              character (which is the value stored).\r
      @retval   0             if the next n or fewer bytes complete the multibyte\r
                              character that corresponds to the null wide\r
                              character (which is the value stored).\r
@@ -480,6 +618,11 @@ just past the last multibyte character converted (if any). If conversion stopped
  reaching a terminating null character and if dst is not a null pointer, the resulting state\r
  described is the initial conversion state.\r
  \r
  reaching a terminating null character and if dst is not a null pointer, the resulting state\r
  described is the initial conversion state.\r
  \r
+    @param[out]   dst   Pointer to where the resulting wide character sequence is stored.\r
+    @param[in]    src   Pointer to a pointer to the multibyte character sequence to convert.\r
+    @param[in]    len   Maximum number of wide characters to be stored into dst.\r
+    @param[in]    ps    Pointer to a conversion state object.\r
+\r
      @return   If the input conversion encounters a sequence of bytes that do\r
                not form a valid multibyte character, an encoding error occurs:\r
                the mbsrtowcs function stores the value of the macro EILSEQ in\r
      @return   If the input conversion encounters a sequence of bytes that do\r
                not form a valid multibyte character, an encoding error occurs:\r
                the mbsrtowcs function stores the value of the macro EILSEQ in\r
@@ -564,21 +707,23 @@ mbsrtowcs(
  **/\r
  size_t\r
  mbstowcs(\r
  **/\r
  size_t\r
  mbstowcs(\r
-  wchar_t *pwcs,\r
-  const char *s,\r
-  size_t n\r
+  wchar_t *Dest,\r
+  const char *Src,\r
+  size_t Limit\r
    )\r
  {\r
  \r
    )\r
  {\r
  \r
-  /* pwcs may be NULL */\r
-  /* s may be NULL */\r
+  /* Dest may be NULL */\r
+  /* Src may be NULL */\r
  \r
  \r
-  return mbsrtowcs(pwcs, &s, n, NULL);\r
+  return mbsrtowcs(Dest, &Src, Limit, NULL);\r
  }\r
  \r
  /** The btowc function determines whether C constitutes a valid single-byte\r
      character in the initial shift state.\r
  \r
  }\r
  \r
  /** The btowc function determines whether C constitutes a valid single-byte\r
      character in the initial shift state.\r
  \r
+    @param[in]    C   A narrow character to test or convert to wide.\r
+\r
      @return   The btowc function returns WEOF if c has the value EOF or if\r
                (unsigned char)C does not constitute a valid single-byte\r
                character in the initial shift state. Otherwise, it returns the\r
      @return   The btowc function returns WEOF if c has the value EOF or if\r
                (unsigned char)C does not constitute a valid single-byte\r
                character in the initial shift state. Otherwise, it returns the\r
@@ -621,6 +766,12 @@ array whose first element is pointed to by S. At most MB_CUR_MAX bytes are store
  wc is a null wide character, a null byte is stored, preceded by any shift sequence needed\r
  to restore the initial shift state; the resulting state described is the initial conversion state.\r
  \r
  wc is a null wide character, a null byte is stored, preceded by any shift sequence needed\r
  to restore the initial shift state; the resulting state described is the initial conversion state.\r
  \r
+    @param[out]     Dest    Pointer to the location in which to store the resulting\r
+                            multibyte character.  Otherwise, NULL to reset the\r
+                            conversion state.\r
+    @param[in]      wchar   The wide character to convert.\r
+    @param[in,out]  pS      Pointer to a conversion state object, or NULL.\r
+\r
      @return   The wcrtomb function returns the number of bytes stored in the\r
                array object (including any shift sequences). When wc is not a\r
                valid wide character, an encoding error occurs: the function\r
      @return   The wcrtomb function returns the number of bytes stored in the\r
                array object (including any shift sequences). When wc is not a\r
                valid wide character, an encoding error occurs: the function\r
@@ -631,26 +782,31 @@ to restore the initial shift state; the resulting state described is the initial
  **/\r
  size_t\r
  wcrtomb(\r
  **/\r
  size_t\r
  wcrtomb(\r
-  char *s,\r
+  char *Dest,\r
    wchar_t wchar,\r
    wchar_t wchar,\r
-  mbstate_t *ps\r
+  mbstate_t *pS\r
    )\r
  {\r
    size_t    RetVal;\r
  \r
    )\r
  {\r
    size_t    RetVal;\r
  \r
-  /* s may be NULL */\r
-  if (s == NULL) {\r
+  /* Dest may be NULL */\r
+  if (Dest == NULL) {\r
      RetVal = 1;\r
    }\r
    else {\r
      if (wchar == L'\0') {\r
      RetVal = 1;\r
    }\r
    else {\r
      if (wchar == L'\0') {\r
-      *s = '\0';\r
+      *Dest = '\0';\r
        RetVal = 1;\r
      }\r
      else {\r
        RetVal = 1;\r
      }\r
      else {\r
-      RetVal = EncodeUtf8(s, &wchar, 1);\r
+      RetVal = EncodeUtf8(Dest, wchar);\r
      }\r
    }\r
      }\r
    }\r
+  if(pS == NULL) {\r
+    pS = &LocalConvState;\r
+  }\r
+  pS->A = 0;      // Set ps to the initial conversion state\r
+\r
    return RetVal;\r
  }\r
  \r
    return RetVal;\r
  }\r
  \r
@@ -698,27 +854,31 @@ wctomb(
  }\r
  \r
  /** The wcsrtombs function converts a sequence of wide characters from the array\r
  }\r
  \r
  /** The wcsrtombs function converts a sequence of wide characters from the array\r
-    indirectly pointed to by S into a sequence of corresponding multibyte\r
+    indirectly pointed to by Dest into a sequence of corresponding multibyte\r
      characters that begins in the conversion state described by the object\r
      pointed to by ps.\r
  \r
      characters that begins in the conversion state described by the object\r
      pointed to by ps.\r
  \r
-    If S is not a null pointer, the converted characters\r
-    are then stored into the array pointed to by S.  Conversion continues\r
-    up to and including a terminating null wide character, which is also\r
-    stored. Conversion stops earlier in two cases: when a wide character is\r
-    reached that does not correspond to a valid multibyte character, or\r
-    (if S is not a null pointer) when the next multibyte character would\r
-    exceed the limit of N total bytes to be stored into the array pointed\r
-    to by S. Each conversion takes place as if by a call to the wcrtomb\r
-    function.)\r
-\r
-    If S is not a null pointer, the pointer object pointed to by pwcs is\r
+    If Dest is not a null pointer, the converted characters are stored into the\r
+    array pointed to by Dest.  Conversion continues up to and including a\r
+    terminating null wide character, which is also stored. Conversion stops\r
+    earlier in two cases: when a wide character is reached that does not\r
+    correspond to a valid multibyte character, or (if Dest is not a null\r
+    pointer) when the next multibyte character would exceed the limit of Limit\r
+    total bytes to be stored into the array pointed to by Dest. Each conversion\r
+    takes place as if by a call to the wcrtomb function.)\r
+\r
+    If Dest is not a null pointer, the pointer object pointed to by Src is\r
      assigned either a null pointer (if conversion stopped due to reaching\r
      a terminating null wide character) or the address just past the last wide\r
      character converted (if any). If conversion stopped due to reaching a\r
      terminating null wide character, the resulting state described is the\r
      initial conversion state.\r
  \r
      assigned either a null pointer (if conversion stopped due to reaching\r
      a terminating null wide character) or the address just past the last wide\r
      character converted (if any). If conversion stopped due to reaching a\r
      terminating null wide character, the resulting state described is the\r
      initial conversion state.\r
  \r
+    @param[in]      Dest\r
+    @param[in,out]  Src\r
+    @param[in]      Limit   Max number of bytes to store in Dest.\r
+    @param[in,out]  ps\r
+\r
      @return     If conversion stops because a wide character is reached that\r
                  does not correspond to a valid multibyte character, an\r
                  encoding error occurs: the wcsrtombs function stores the\r
      @return     If conversion stops because a wide character is reached that\r
                  does not correspond to a valid multibyte character, an\r
                  encoding error occurs: the wcsrtombs function stores the\r
@@ -731,38 +891,50 @@ wctomb(
  **/\r
  size_t\r
  wcsrtombs(\r
  **/\r
  size_t\r
  wcsrtombs(\r
-  char *s,\r
-  const wchar_t **pwcs,\r
-  size_t n,\r
-  mbstate_t *ps\r
+  char           *Dest,\r
+  const wchar_t **Src,\r
+  size_t          Limit,\r
+  mbstate_t      *ps\r
  )\r
  {\r
  )\r
  {\r
-  int count = 0;\r
+  size_t  NumStored;\r
+  ssize_t MaxBytes;\r
+  int     count;\r
+  wchar_t InCh;\r
  \r
  \r
-  /* s may be NULL */\r
-  /* pwcs may be NULL */\r
+  NumStored = 0;\r
+  MaxBytes  = (ssize_t)Limit;\r
+\r
+  /* Dest may be NULL */\r
+  /* Src may be NULL */\r
    /* ps appears to be unused */\r
  \r
    /* ps appears to be unused */\r
  \r
-  if (pwcs == NULL || *pwcs == NULL)\r
+  if (Src == NULL || *Src == NULL)\r
      return (0);\r
  \r
      return (0);\r
  \r
-  if (s == NULL) {\r
-    while (*(*pwcs)++ != 0)\r
-      count++;\r
-    return(count);\r
+  if (Dest == NULL) {\r
+    NumStored = EstimateWtoM(*Src, MaxBytes, NULL);\r
    }\r
    }\r
-\r
-  if (n != 0) {\r
-    do {\r
-      if ((*s++ = (char) *(*pwcs)++) == 0) {\r
-        *pwcs = NULL;\r
+  else {\r
+    while (OneWcToMcLen(InCh = *(*Src)++) <= MaxBytes) {\r
+      if(InCh == 0) {\r
+        *Src = NULL;\r
          break;\r
        }\r
          break;\r
        }\r
-      count++;\r
-    } while (--n != 0);\r
+      count = (int)wcrtomb(Dest, InCh, NULL);\r
+      if(count >= 0) {\r
+        Dest += count;\r
+        MaxBytes -= count;\r
+        NumStored += count;\r
+      }\r
+      else {\r
+        NumStored = (size_t)(-1);\r
+      }\r
+    }\r
    }\r
  \r
    }\r
  \r
-  return count;\r
+\r
+  return NumStored;\r
  }\r
  \r
  /** Convert a wide-character string into a multibyte character string.\r
  }\r
  \r
  /** Convert a wide-character string into a multibyte character string.\r
@@ -794,19 +966,23 @@ wcsrtombs(
  **/\r
  size_t\r
  wcstombs(\r
  **/\r
  size_t\r
  wcstombs(\r
-  char *s,\r
-  const wchar_t *pwcs,\r
-  size_t n\r
+  char           *Dest,\r
+  const wchar_t  *Src,\r
+  size_t          Limit\r
  )\r
  {\r
  )\r
  {\r
-  /* s may be NULL */\r
-  return wcsrtombs(s, &pwcs, n, NULL);\r
+  /* Dest may be NULL */\r
+  return wcsrtombs(Dest, &Src, Limit, NULL);\r
  }\r
  \r
  /** The wctob function determines whether C corresponds to a member of the extended\r
      character set whose multibyte character representation is a single byte when in the initial\r
      shift state.\r
  \r
  }\r
  \r
  /** The wctob function determines whether C corresponds to a member of the extended\r
      character set whose multibyte character representation is a single byte when in the initial\r
      shift state.\r
  \r
+    wctob needs to be consistent with wcrtomb.\r
+    If wcrtomb says that a character is representable in 1 byte,\r
+    then wctob needs to also represent the character as 1 byte.\r
+\r
      @return     The wctob function returns EOF if C does not correspond to a multibyte\r
                  character with length one in the initial shift state. Otherwise, it\r
                  returns the single-byte representation of that character as an\r
      @return     The wctob function returns EOF if C does not correspond to a multibyte\r
                  character with length one in the initial shift state. Otherwise, it\r
                  returns the single-byte representation of that character as an\r
@@ -817,13 +993,14 @@ wcstombs(
  int\r
  wctob(wint_t c)\r
  {\r
  int\r
  wctob(wint_t c)\r
  {\r
-  /*  wctob needs to be consistent with wcrtomb.\r
-      if wcrtomb says that a character is representable in 1 byte,\r
-      which this implementation always says, then wctob needs to\r
-      also represent the character as 1 byte.\r
-  */\r
-  if (c == WEOF) {\r
-    return EOF;\r
+  int   RetVal;\r
+\r
+  RetVal = EOF;\r
+  if(c == 0) {\r
+    RetVal = 0;\r
+  }\r
+  else if (OneWcToMcLen((const wchar_t)c) == 1) {\r
+    RetVal = (int)(c & 0xFF);\r
    }\r
    }\r
-  return (int)(c & 0xFF);\r
+  return RetVal;\r
  }\r
  }\r
diff --git a/StdLib/LibC/Main/Main.c b/StdLib/LibC/Main/Main.c

index 523965fa4368b53d975b3b99027d34593954747b..9afffff7c6b9109ad5e368f8cfba0ec8aa3ddf0f 100644 (file)
--- a/StdLib/LibC/Main/Main.c
+++ b/StdLib/LibC/Main/Main.c
@@ -113,10 +113,9 @@ DEBUG_CODE_END();
    for(count = 0; count < Argc; ++count) {\r
      nArgv[count] = string;\r
      AVsz = wcstombs(string, Argv[count], nArgvSize);\r
    for(count = 0; count < Argc; ++count) {\r
      nArgv[count] = string;\r
      AVsz = wcstombs(string, Argv[count], nArgvSize);\r
-    string[AVsz] = 0;   /* NULL terminate the argument */\r
      DEBUG((DEBUG_INFO, "Cvt[%d] %d \"%s\" --> \"%a\"\n", (INT32)count, (INT32)AVsz, Argv[count], nArgv[count]));\r
      DEBUG((DEBUG_INFO, "Cvt[%d] %d \"%s\" --> \"%a\"\n", (INT32)count, (INT32)AVsz, Argv[count], nArgv[count]));\r
-    string += AVsz + 1;\r
-    nArgvSize -= AVsz + 1;\r
+    string += AVsz;\r
+    nArgvSize -= AVsz;\r
      if(nArgvSize < 0) {\r
        Print(L"ABORTING: Internal Argv[%d] conversion error.\n", count);\r
        exit(EXIT_FAILURE);\r
      if(nArgvSize < 0) {\r
        Print(L"ABORTING: Internal Argv[%d] conversion error.\n", count);\r
        exit(EXIT_FAILURE);\r
author	darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>
	Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)
committer	darylm503 <darylm503@6f19259b-4bc3-4df7-8a09-765794883524>
	Fri, 5 Oct 2012 22:00:43 +0000 (22:00 +0000)
StdLib/LibC/Locale/multibyte_Utf8.c		patch \| blob \| blame \| history
StdLib/LibC/Main/Main.c		patch \| blob \| blame \| history