]>
git.proxmox.com Git - mirror_edk2.git/blob - StdLib/LibC/Locale/multibyte_Utf8.c
3f29f2942af45debdfa33d821e732b4fc7e8264d
2 Copyright (c) 2012, Intel Corporation. All rights reserved.<BR>
3 This program and the accompanying materials
4 are licensed and made available under the terms and conditions of the BSD License
5 which accompanies this distribution. The full text of the license may be found at
6 http://opensource.org/licenses/bsd-license.php
8 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
9 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
16 #include <sys/types.h>
20 static mbstate_t LocalConvState
= {0};
22 /** Map a UTF-8 encoded prefix byte to a sequence length.
23 Zero means illegal prefix, but valid surrogate if < 0xC0.
24 One indicates an ASCII-7 equivalent character.
25 Two, three, and four are the first byte for 2, 3, and 4 byte sequences, respectively.
26 See RFC 3629 for details.
29 Low Nibble decodes the first byte into the number of bytes in the sequence.
30 A value of zero indicates an invalid byte.
31 The High Nibble encodes a bit mask to be used to match against the high nibble of the second byte.
34 SequenceLength = code[c0] & 0x0F;
35 Mask = 0x80 | code[c0];
37 Surrogate bytes are valid if: code[cX] & Mask > 0x80;
41 UINT8 utf8_code_length
[256] = {
42 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 00-0F */
43 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
44 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
45 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
46 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
47 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
48 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
49 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, /* 70-7F */
50 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, /* 80-8F */
51 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, 0xA0, /* 90-9F */
52 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* A0-AF */
53 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, /* B0-BF */
54 0x00, 0x00, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* C0-C1 + C2-CF */
55 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, /* D0-DF */
56 0x43, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x33, 0x73, 0x73, /* E0-EF */
57 0x64, 0x74, 0x74, 0x74, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* F0-F4 + F5-FF */
60 /** Process one byte of a multibyte character.
71 ProcessOneByte(unsigned char ch
, mbstate_t *ps
)
78 // We are in an invalid state
79 ps
->A
= 0; // Initial State
81 ps
->C
[ps
->A
] = ch
; // Save the current character
82 Mask
= utf8_code_length
[ch
];
84 if(ps
->A
== 0) { // Initial State. First byte of sequence.
88 case 0: // State 0, Code 0
91 ps
->E
= 1; // Consume this character
93 case 1: // State 0, Code 1
95 ps
->B
= ps
->D
[0] = ch
;
98 default: // State 0, Code 2, 3, 4
99 ps
->A
= 1; // Next state is State-1
100 RetVal
= -2; // Incomplete but potentially valid character
105 // We are in state 1, 2, or 3 and processing a surrogate byte
106 Length
= ps
->E
& 0xF;
107 if((Mask
& ps
->E
) > 0x80) {
108 // This byte is valid
109 switch(ps
->A
) { // Process based upon our current state
110 case 1: // Second byte of the sequence.
111 if(Length
== 2) { // State 1, Code 2
112 Length
= ((ps
->C
[0] & 0x1f) << 6) + (ps
->C
[1] & 0x3f);
113 assert ((Length
> 0x007F) && (Length
<= 0x07FF));
114 ps
->B
= ps
->D
[0] = (UINT16
)Length
;
115 ps
->A
= 0; // Next state is State-0
118 else { // This isn't the last character, get more. State 1, Code 3 or 4
123 case 2: // Third byte of the sequence
125 Length
= ((ps
->C
[0] & 0x0f) << 12) + ((ps
->C
[1] & 0x3f) << 6) + (ps
->C
[2] & 0x3f);
126 assert ((Length
> 0x07FF) && (Length
<= 0xFFFF));
127 ps
->B
= ps
->D
[0] = (UINT16
)Length
;
128 ps
->A
= 0; // Next state is State-0
136 case 3: // Fourth byte of the sequence
138 Length
= ((ps
->C
[0] & 0x7) << 18) + ((ps
->C
[1] & 0x3f) << 12) +
139 ((ps
->C
[2] & 0x3f) << 6) + (ps
->C
[3] & 0x3f);
141 assert ((Length
> 0xFFFF) && (Length
<= 0x10ffff));
143 /* compute and append the two surrogates: */
145 /* translate from 10000..10FFFF to 0..FFFF */
148 /* high surrogate = top 10 bits added to D800 */
149 ps
->D
[0] = (UINT16
)(0xD800 + (Length
>> 10));
151 /* low surrogate = bottom 10 bits added to DC00 */
152 ps
->D
[1] = (UINT16
)(0xDC00 + (Length
& 0x03FF));
153 ps
->A
= 0; // Next state is State-0
160 ps
->E
= 4; // Can't happen, but consume this character anyway
165 else { // Invalid surrogate character
167 ps
->A
= 0; // Next is State-0
169 ps
->E
= 0; // Don't Consume, it may be an initial byte
175 /** Convert one Multibyte sequence.
182 @retval -2 Bytes processed comprise an incomplete, but potentially valid, character.
183 @retval -1 An encoding error was encountered. ps->E indicates the number of bytes consumed.
184 @retval 0 Either Src is NULL or it points to a NUL character.
185 @retval 1:N N bytes were consumed producing a valid wide character.
189 wchar_t *Dest
, // Pointer to output location, or NULL
190 const char *Src
, // Multibyte Source (UTF8)
191 ssize_t Len
, // Max Number of bytes to convert
192 mbstate_t *pS
// Pointer to State struct., or NULL
199 if((Src
== NULL
) || (*Src
== '\0')) {
203 pS
= &LocalConvState
;
207 while(Src
< SrcEnd
) {
208 ch
= (unsigned char)*Src
++;
209 NumConv
= ProcessOneByte(ch
, pS
);
213 if((NumConv
> 0) && (Dest
!= NULL
)) {
222 /** Convert wide characters (UTF16) into multibyte characters (UTF8)
224 @param s Pointer to the wide-character string to convert
225 @param size Number of wide characters in s. size <= wcslen(s);
227 @return A newly allocated buffer containing the converted string is returned,
228 or NULL if an error occurred. Global variable errno contains more
229 information if NULL is returned.
232 EncodeUtf8(char *Dest
, wchar_t *s
, ssize_t size
)
234 char *p
; /* next free byte in build buffer */
235 char *v
; /* next free byte in destination */
236 ssize_t nneeded
; /* number of result bytes needed */
237 int i
; /* index into s of next input byte */
238 int NumInBuff
; // number of bytes in Buff
239 char Buff
[4]; // Buffer into which each character is built
246 if((size
* MB_LEN_MAX
) / MB_LEN_MAX
!= size
) {
247 // size is too large and resulted in overflow when multiplied by MB_LEN_MAX
252 for (i
= 0; i
< size
;) {
257 /* Encode ASCII -- One Byte */
260 else if (ch
< 0x0800) {
261 /* Encode Latin-1 -- Two Byte */
262 *p
++ = (char)(0xc0 | (ch
>> 6));
263 *p
++ = (char)(0x80 | (ch
& 0x3f));
266 /* Encode UCS2 Unicode ordinals -- Three Byte */
267 /* Special case: check for high surrogate -- Shouldn't happen in UEFI */
268 if (0xD800 <= ch
&& ch
<= 0xDBFF && i
< size
) {
270 /* Check for low surrogate and combine the two to
272 if (0xDC00 <= ch2
&& ch2
<= 0xDFFF) {
273 ch
= ((ch
- 0xD800) << 10 | (ch2
- 0xDC00)) + 0x10000;
275 /* Encode UCS4 Unicode ordinals -- Four Byte */
276 *p
++ = (char)(0xf0 | (ch
>> 18));
277 *p
++ = (char)(0x80 | ((ch
>> 12) & 0x3f));
278 *p
++ = (char)(0x80 | ((ch
>> 6) & 0x3f));
279 *p
++ = (char)(0x80 | (ch
& 0x3f));
282 /* Fall through: handles isolated high surrogates */
284 *p
++ = (char)(0xe0 | (ch
>> 12));
285 *p
++ = (char)(0x80 | ((ch
>> 6) & 0x3f));
286 *p
++ = (char)(0x80 | (ch
& 0x3f));
288 /* At this point, Buff holds the converted character which is NumInBuff bytes long.
289 NumInBuff is the value 1, 2, 3, or 4
291 NumInBuff
= (int)(p
- Buff
); // Number of bytes in Buff
292 if(Dest
!= NULL
) { // Save character if Dest is not NULL
293 memcpy(v
, Buff
, NumInBuff
);
296 nneeded
+= NumInBuff
; // Keep track of the number of bytes put into Dest
299 // Terminate the destination string.
302 return nneeded
; // Tell the caller
305 // ######################## Narrow to Wide Conversions #######################
307 /** If ps is not a null pointer, the mbsinit function determines whether the
308 pointed-to mbstate_t object describes an initial conversion state.
310 @return The mbsinit function returns nonzero if ps is a null pointer
311 or if the pointed-to object describes an initial conversion
312 state; otherwise, it returns zero.
317 mbsinit(const mbstate_t *ps
)
319 if((ps
== NULL
) || (ps
->A
== 0)) {
325 /** The mbrlen function is equivalent to the call:<BR>
327 mbrtowc(NULL, s, n, ps != NULL ? ps : &internal)
329 where internal is the mbstate_t object for the mbrlen function, except that
330 the expression designated by ps is evaluated only once.
332 @return The mbrlen function returns a value between zero and n,
333 inclusive, (size_t)(-2), or (size_t)(-1).
344 return mbrtowc(NULL
, s
, n
, ps
);
347 /** Determine the number of bytes comprising a multibyte character.
349 If S is not a null pointer, the mblen function determines the number of bytes
350 contained in the multibyte character pointed to by S. Except that the
351 conversion state of the mbtowc function is not affected, it is equivalent to
352 mbtowc((wchar_t *)0, S, N);
354 @param[in] S NULL to query whether multibyte characters have
355 state-dependent encodings. Otherwise, points to a
357 @param[in] N The maximum number of bytes in a multibyte character.
359 @return If S is a null pointer, the mblen function returns a nonzero or
360 zero value, if multibyte character encodings, respectively, do
361 or do not have state-dependent encodings. If S is not a null
362 pointer, the mblen function either returns 0 (if S points to the
363 null character), or returns the number of bytes that are contained
364 in the multibyte character (if the next N or fewer bytes form a
365 valid multibyte character), or returns -1 (if they do not form a
366 valid multibyte character).
368 Declared in: stdlib.h
376 return (int)mbrlen(s
, n
, NULL
);
380 If S is a null pointer, the mbrtowc function is equivalent to the call:<BR>
382 mbrtowc(NULL, "", 1, ps)
385 In this case, the values of the parameters pwc and n are ignored.
387 If S is not a null pointer, the mbrtowc function inspects at most n bytes beginning with
388 the byte pointed to by S to determine the number of bytes needed to complete the next
389 multibyte character (including any shift sequences). If the function determines that the
390 next multibyte character is complete and valid, it determines the value of the
391 corresponding wide character and then, if pwc is not a null pointer, stores that value in
392 the object pointed to by pwc. If the corresponding wide character is the null wide
393 character, the resulting state described is the initial conversion state.
395 @retval 0 if the next n or fewer bytes complete the multibyte
396 character that corresponds to the null wide
397 character (which is the value stored).
398 @retval between_1_and_n_inclusive if the next n or fewer bytes complete
399 a valid multibyte character (which is the value
400 stored); the value returned is the number of bytes
401 that complete the multibyte character.
402 @retval (size_t)(-2) if the next n bytes contribute to an incomplete
403 (but potentially valid) multibyte character, and
404 all n bytes have been processed (no value is stored).
405 @retval (size_t)(-1) if an encoding error occurs, in which case the next
406 n or fewer bytes do not contribute to a complete and
407 valid multibyte character (no value is stored); the
408 value of the macro EILSEQ is stored in errno, and
409 the conversion state is unspecified.
423 RetVal
= DecodeOneStateful(pwc
, s
, (ssize_t
)n
, ps
);
424 return (size_t)RetVal
;
427 /** Convert a multibyte character into a wide character.
429 If S is not a null pointer, the mbtowc function inspects at most N bytes
430 beginning with the byte pointed to by S to determine the number of bytes
431 needed to complete the next multibyte character (including any shift
432 sequences). If the function determines that the next multibyte character
433 is complete and valid, it determines the value of the corresponding wide
434 character and then, if Pwc is not a null pointer, stores that value in
435 the object pointed to by Pwc. If the corresponding wide character is the
436 null wide character, the function is left in the initial conversion state.
438 @param[out] Pwc Pointer to a wide-character object to receive the converted character.
439 @param[in] S Pointer to a multibyte character to convert.
440 @param[in] N Maximum number of bytes in a multibyte character.
442 @return If S is a null pointer, the mbtowc function returns a nonzero or
443 zero value, if multibyte character encodings, respectively, do
444 or do not have state-dependent encodings. If S is not a null
445 pointer, the mbtowc function either returns 0 (if S points to
446 the null character), or returns the number of bytes that are
447 contained in the converted multibyte character (if the next N or
448 fewer bytes form a valid multibyte character), or returns -1
449 (if they do not form a valid multibyte character).
451 In no case will the value returned be greater than N or the value
452 of the MB_CUR_MAX macro.
454 Declared in: stdlib.h
463 return (int)mbrtowc(pwc
, s
, n
, NULL
);
467 The mbsrtowcs function converts a sequence of multibyte characters that begins in the
468 conversion state described by the object pointed to by ps, from the array indirectly
469 pointed to by src into a sequence of corresponding wide characters. If dst is not a null
470 pointer, the converted characters are stored into the array pointed to by dst. Conversion
471 continues up to and including a terminating null character, which is also stored.
472 Conversion stops earlier in two cases: when a sequence of bytes is encountered that does
473 not form a valid multibyte character, or (if dst is not a null pointer) when len wide
474 characters have been stored into the array pointed to by dst. Each conversion takes
475 place as if by a call to the mbrtowc function.
477 If dst is not a null pointer, the pointer object pointed to by src is assigned either a null
478 pointer (if conversion stopped due to reaching a terminating null character) or the address
479 just past the last multibyte character converted (if any). If conversion stopped due to
480 reaching a terminating null character and if dst is not a null pointer, the resulting state
481 described is the initial conversion state.
483 @return If the input conversion encounters a sequence of bytes that do
484 not form a valid multibyte character, an encoding error occurs:
485 the mbsrtowcs function stores the value of the macro EILSEQ in
486 errno and returns (size_t)(-1); the conversion state is
487 unspecified. Otherwise, it returns the number of multibyte
488 characters successfully converted, not including the terminating
489 null character (if any).
505 if((src
== NULL
) || (*src
== NULL
) || (**src
== '\0')) {
510 for(x
= 1 ; (len
!= 0) && (x
> 0); --len
) {
511 x
= DecodeOneStateful(dst
, MySrc
, MB_LEN_MAX
, ps
);
513 case -2: // Incomplete character
514 case -1: // Encoding error
517 case 0: // Encountered NUL character: done.
523 default: // Successfully decoded a character, continue with next
539 /** Convert a multibyte character string into a wide-character string.
541 The mbstowcs function converts a sequence of multibyte characters that
542 begins in the initial shift state from the array pointed to by Src into
543 a sequence of corresponding wide characters and stores not more than limit
544 wide characters into the array pointed to by Dest. No multibyte
545 characters that follow a null character (which is converted into a null
546 wide character) will be examined or converted. Each multibyte character
547 is converted as if by a call to the mbtowc function, except that the
548 conversion state of the mbtowc function is not affected.
550 No more than Limit elements will be modified in the array pointed to by Dest.
551 If copying takes place between objects that overlap,
552 the behavior is undefined.
554 @param[out] Dest Pointer to the array to receive the converted string.
555 @param[in] Src Pointer to the string to be converted.
556 @param[in] Limit Maximum number of elements to be written to Dest.
558 @return If an invalid multibyte character is encountered, the mbstowcs
559 function returns (size_t)(-1). Otherwise, the mbstowcs function
560 returns the number of array elements modified, not including a
561 terminating null wide character, if any.
563 Declared in: stdlib.h
573 /* pwcs may be NULL */
576 return mbsrtowcs(pwcs
, &s
, n
, NULL
);
579 /** The btowc function determines whether C constitutes a valid single-byte
580 character in the initial shift state.
582 @return The btowc function returns WEOF if c has the value EOF or if
583 (unsigned char)C does not constitute a valid single-byte
584 character in the initial shift state. Otherwise, it returns the
585 wide character representation of that character.
594 wint_t RetVal
= WEOF
;
598 x
= DecodeOneStateful(&Dest
, (const char *)&c
, 1, NULL
);
603 RetVal
= (wint_t)Dest
;
608 // ######################## Wide to Narrow Conversions #######################
611 If S is a null pointer, the wcrtomb function is equivalent to the call:<BR>
613 wcrtomb(buf, L'\0', ps)
615 where buf is an internal buffer.
617 If S is not a null pointer, the wcrtomb function determines the number of bytes needed
618 to represent the multibyte character that corresponds to the wide character given by wc
619 (including any shift sequences), and stores the multibyte character representation in the
620 array whose first element is pointed to by S. At most MB_CUR_MAX bytes are stored. If
621 wc is a null wide character, a null byte is stored, preceded by any shift sequence needed
622 to restore the initial shift state; the resulting state described is the initial conversion state.
624 @return The wcrtomb function returns the number of bytes stored in the
625 array object (including any shift sequences). When wc is not a
626 valid wide character, an encoding error occurs: the function
627 stores the value of the macro EILSEQ in errno and
628 returns (size_t)(-1); the conversion state is unspecified.
646 if (wchar
== L
'\0') {
651 RetVal
= EncodeUtf8(s
, &wchar
, 1);
657 /** Convert a wide character into a multibyte character.
659 The wctomb function determines the number of bytes needed to represent the
660 multibyte character corresponding to the wide character given by WC
661 (including any shift sequences), and stores the multibyte character
662 representation in the array whose first element is pointed to by S (if S is
663 not a null pointer). At most MB_CUR_MAX characters are stored. If WC is a
664 null wide character, a null byte is stored, preceded by any shift sequence
665 needed to restore the initial shift state, and the function is left in the
666 initial conversion state.
668 @param[out] S Pointer to the object to receive the converted multibyte character.
669 @param[in] WC Wide character to be converted.
671 @return If S is a null pointer, the wctomb function returns a nonzero or
672 zero value, if multibyte character encodings, respectively, do or
673 do not have state-dependent encodings. If S is not a null pointer,
674 the wctomb function returns -1 if the value of WC does not
675 correspond to a valid multibyte character, or returns the number
676 of bytes that are contained in the multibyte character
677 corresponding to the value of WC.
679 In no case will the value returned be greater than the value of
680 the MB_CUR_MAX macro.
682 Declared in: stdlib.h
691 If s is NULL just return whether MB Characters have state
692 dependent encodings -- they don't.
697 return (int)wcrtomb(s
, wchar
, NULL
);
700 /** The wcsrtombs function converts a sequence of wide characters from the array
701 indirectly pointed to by S into a sequence of corresponding multibyte
702 characters that begins in the conversion state described by the object
705 If S is not a null pointer, the converted characters
706 are then stored into the array pointed to by S. Conversion continues
707 up to and including a terminating null wide character, which is also
708 stored. Conversion stops earlier in two cases: when a wide character is
709 reached that does not correspond to a valid multibyte character, or
710 (if S is not a null pointer) when the next multibyte character would
711 exceed the limit of N total bytes to be stored into the array pointed
712 to by S. Each conversion takes place as if by a call to the wcrtomb
715 If S is not a null pointer, the pointer object pointed to by pwcs is
716 assigned either a null pointer (if conversion stopped due to reaching
717 a terminating null wide character) or the address just past the last wide
718 character converted (if any). If conversion stopped due to reaching a
719 terminating null wide character, the resulting state described is the
720 initial conversion state.
722 @return If conversion stops because a wide character is reached that
723 does not correspond to a valid multibyte character, an
724 encoding error occurs: the wcsrtombs function stores the
725 value of the macro EILSEQ in errno and returns (size_t)(-1);
726 the conversion state is unspecified. Otherwise, it returns
727 the number of bytes in the resulting multibyte character
728 sequence, not including the terminating null character (if any).
735 const wchar_t **pwcs
,
743 /* pwcs may be NULL */
744 /* ps appears to be unused */
746 if (pwcs
== NULL
|| *pwcs
== NULL
)
750 while (*(*pwcs
)++ != 0)
757 if ((*s
++ = (char) *(*pwcs
)++) == 0) {
768 /** Convert a wide-character string into a multibyte character string.
770 The wcstombs function converts a sequence of wide characters from the
771 array pointed to by Src into a sequence of corresponding multibyte
772 characters that begins in the initial shift state, and stores these
773 multibyte characters into the array pointed to by Dest, stopping if a
774 multibyte character would exceed the limit of Limit total bytes or if a
775 null character is stored. Each wide character is converted as if by
776 a call to the wctomb function, except that the conversion state of
777 the wctomb function is not affected.
779 No more than Limit bytes will be modified in the array pointed to by Dest.
780 If copying takes place between objects that overlap,
781 the behavior is undefined.
783 @param[out] Dest Pointer to the array to receive the converted string.
784 @param[in] Src Pointer to the string to be converted.
785 @param[in] Limit Maximum number of elements to be written to Dest.
787 @return If a wide character is encountered that does not correspond to a
788 valid multibyte character, the wcstombs function returns
789 (size_t)(-1). Otherwise, the wcstombs function returns the number
790 of bytes modified, not including a terminating null character,
793 Declared in: stdlib.h
803 return wcsrtombs(s
, &pwcs
, n
, NULL
);
806 /** The wctob function determines whether C corresponds to a member of the extended
807 character set whose multibyte character representation is a single byte when in the initial
810 @return The wctob function returns EOF if C does not correspond to a multibyte
811 character with length one in the initial shift state. Otherwise, it
812 returns the single-byte representation of that character as an
813 unsigned char converted to an int.
820 /* wctob needs to be consistent with wcrtomb.
821 if wcrtomb says that a character is representable in 1 byte,
822 which this implementation always says, then wctob needs to
823 also represent the character as 1 byte.
828 return (int)(c
& 0xFF);