[ceph.git] / ceph / src / boost / boost / nowide / utf / utf.hpp

//
//  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
//  Copyright (c) 2020 Alexander Grund
//
//  Distributed under the Boost Software License, Version 1.0. (See
//  accompanying file LICENSE or copy at
//  http://www.boost.org/LICENSE_1_0.txt)
//
#ifndef BOOST_NOWIDE_UTF_HPP_INCLUDED
#define BOOST_NOWIDE_UTF_HPP_INCLUDED

#include <boost/nowide/config.hpp>
#include <cstdint>

namespace boost {
namespace nowide {
    ///
    /// \brief Namespace that holds basic operations on UTF encoded sequences
    ///
    /// All functions defined in this namespace do not require linking with Boost.Nowide library.
    /// Extracted from Boost.Locale
    ///
    namespace utf {

        ///
        /// \brief The integral type that can hold a Unicode code point
        ///
        using code_point = uint32_t;

        ///
        /// \brief Special constant that defines illegal code point
        ///
        static const code_point illegal = 0xFFFFFFFFu;

        ///
        /// \brief Special constant that defines incomplete code point
        ///
        static const code_point incomplete = 0xFFFFFFFEu;

        ///
        /// \brief the function checks if \a v is a valid code point
        ///
        inline bool is_valid_codepoint(code_point v)
        {
            if(v > 0x10FFFF)
                return false;
            if(0xD800 <= v && v <= 0xDFFF) // surrogates
                return false;
            return true;
        }

#ifdef BOOST_NOWIDE_DOXYGEN
        ///
        /// \brief UTF Traits class - functions to convert UTF sequences to and from Unicode code points
        ///
        template<typename CharType, int size = sizeof(CharType)>
        struct utf_traits
        {
            ///
            /// The type of the character
            ///
            using char_type = CharType;
            ///
            /// Read one code point from the range [p,e) and return it.
            ///
            /// - If the sequence that was read is incomplete sequence returns \ref incomplete,
            /// - If illegal sequence detected returns \ref illegal
            ///
            /// Requirements
            ///
            /// - Iterator is valid input iterator
            ///
            /// Postconditions
            ///
            /// - p points to the last consumed character
            ///
            template<typename Iterator>
            static code_point decode(Iterator& p, Iterator e);

            ///
            /// Maximal width of valid sequence in the code units:
            ///
            /// - UTF-8  - 4
            /// - UTF-16 - 2
            /// - UTF-32 - 1
            ///
            static const int max_width;
            ///
            /// The width of specific code point in the code units.
            ///
            /// Requirement: value is a valid Unicode code point
            /// Returns value in range [1..max_width]
            ///
            static int width(code_point value);

            ///
            /// Get the size of the trail part of variable length encoded sequence.
            ///
            /// Returns -1 if C is not valid lead character
            ///
            static int trail_length(char_type c);
            ///
            /// Returns true if c is trail code unit, always false for UTF-32
            ///
            static bool is_trail(char_type c);
            ///
            /// Returns true if c is lead code unit, always true of UTF-32
            ///
            static bool is_lead(char_type c);

            ///
            /// Convert valid Unicode code point \a value to the UTF sequence.
            ///
            /// Requirements:
            ///
            /// - \a value is valid code point
            /// - \a out is an output iterator should be able to accept at least width(value) units
            ///
            /// Returns the iterator past the last written code unit.
            ///
            template<typename Iterator>
            static Iterator encode(code_point value, Iterator out);
            ///
            /// Decodes valid UTF sequence that is pointed by p into code point.
            ///
            /// If the sequence is invalid or points to end the behavior is undefined
            ///
            template<typename Iterator>
            static code_point decode_valid(Iterator& p);
        };

#else

        template<typename CharType, int size = sizeof(CharType)>
        struct utf_traits;

        template<typename CharType>
        struct utf_traits<CharType, 1>
        {
            using char_type = CharType;

            static int trail_length(char_type ci)
            {
                unsigned char c = ci;
                if(c < 128)
                    return 0;
                if(BOOST_UNLIKELY(c < 194))
                    return -1;
                if(c < 224)
                    return 1;
                if(c < 240)
                    return 2;
                if(BOOST_LIKELY(c <= 244))
                    return 3;
                return -1;
            }

            static const int max_width = 4;

            static int width(code_point value)
            {
                if(value <= 0x7F)
                {
                    return 1;
                } else if(value <= 0x7FF)
                {
                    return 2;
                } else if(BOOST_LIKELY(value <= 0xFFFF))
                {
                    return 3;
                } else
                {
                    return 4;
                }
            }

            static bool is_trail(char_type ci)
            {
                unsigned char c = ci;
                return (c & 0xC0) == 0x80;
            }

            static bool is_lead(char_type ci)
            {
                return !is_trail(ci);
            }

            template<typename Iterator>
            static code_point decode(Iterator& p, Iterator e)
            {
                if(BOOST_UNLIKELY(p == e))
                    return incomplete;

                unsigned char lead = *p++;

                // First byte is fully validated here
                int trail_size = trail_length(lead);

                if(BOOST_UNLIKELY(trail_size < 0))
                    return illegal;

                // OK as only ASCII may be of size = 0
                // also optimize for ASCII text
                if(trail_size == 0)
                    return lead;

                code_point c = lead & ((1 << (6 - trail_size)) - 1);

                // Read the rest
                unsigned char tmp;
                switch(trail_size)
                {
                case 3:
                    if(BOOST_UNLIKELY(p == e))
                        return incomplete;
                    tmp = *p++;
                    if(!is_trail(tmp))
                        return illegal;
                    c = (c << 6) | (tmp & 0x3F);
                    BOOST_NOWIDE_FALLTHROUGH;
                case 2:
                    if(BOOST_UNLIKELY(p == e))
                        return incomplete;
                    tmp = *p++;
                    if(!is_trail(tmp))
                        return illegal;
                    c = (c << 6) | (tmp & 0x3F);
                    BOOST_NOWIDE_FALLTHROUGH;
                case 1:
                    if(BOOST_UNLIKELY(p == e))
                        return incomplete;
                    tmp = *p++;
                    if(!is_trail(tmp))
                        return illegal;
                    c = (c << 6) | (tmp & 0x3F);
                }

                // Check code point validity:
                // - no surrogates and valid range
                // - most compact representation
                if(BOOST_UNLIKELY(!is_valid_codepoint(c)) || BOOST_UNLIKELY(width(c) != trail_size + 1))
                {
                    p -= trail_size;
                    return illegal;
                }

                return c;
            }

            template<typename Iterator>
            static code_point decode_valid(Iterator& p)
            {
                unsigned char lead = *p++;
                if(lead < 192)
                    return lead;

                int trail_size;

                if(lead < 224)
                    trail_size = 1;
                else if(BOOST_LIKELY(lead < 240)) // non-BMP rare
                    trail_size = 2;
                else
                    trail_size = 3;

                code_point c = lead & ((1 << (6 - trail_size)) - 1);

                switch(trail_size)
                {
                case 3: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
                case 2: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
                case 1: c = (c << 6) | (static_cast<unsigned char>(*p++) & 0x3F);
                }

                return c;
            }

            template<typename Iterator>
            static Iterator encode(code_point value, Iterator out)
            {
                if(value <= 0x7F)
                {
                    *out++ = static_cast<char_type>(value);
                } else if(value <= 0x7FF)
                {
                    *out++ = static_cast<char_type>((value >> 6) | 0xC0);
                    *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
                } else if(BOOST_LIKELY(value <= 0xFFFF))
                {
                    *out++ = static_cast<char_type>((value >> 12) | 0xE0);
                    *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
                    *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
                } else
                {
                    *out++ = static_cast<char_type>((value >> 18) | 0xF0);
                    *out++ = static_cast<char_type>(((value >> 12) & 0x3F) | 0x80);
                    *out++ = static_cast<char_type>(((value >> 6) & 0x3F) | 0x80);
                    *out++ = static_cast<char_type>((value & 0x3F) | 0x80);
                }
                return out;
            }
        }; // utf8

        template<typename CharType>
        struct utf_traits<CharType, 2>
        {
            using char_type = CharType;

            // See RFC 2781
            static bool is_single_codepoint(uint16_t x)
            {
                // Ranges [U+0000, 0+D7FF], [U+E000, U+FFFF] are numerically equal in UTF-16
                return x <= 0xD7FF || x >= 0xE000;
            }
            static bool is_first_surrogate(uint16_t x)
            {
                // Range [U+D800, 0+DBFF]: High surrogate
                return 0xD800 <= x && x <= 0xDBFF;
            }
            static bool is_second_surrogate(uint16_t x)
            {
                // Range [U+DC00, 0+DFFF]: Low surrogate
                return 0xDC00 <= x && x <= 0xDFFF;
            }
            static code_point combine_surrogate(uint16_t w1, uint16_t w2)
            {
                return ((code_point(w1 & 0x3FF) << 10) | (w2 & 0x3FF)) + 0x10000;
            }
            static int trail_length(char_type c)
            {
                if(is_first_surrogate(c))
                    return 1;
                if(is_second_surrogate(c))
                    return -1;
                return 0;
            }
            /// Return true if c is trail code unit, always false for UTF-32
            static bool is_trail(char_type c)
            {
                return is_second_surrogate(c);
            }
            /// Return true if c is lead code unit, always true of UTF-32
            static bool is_lead(char_type c)
            {
                return !is_second_surrogate(c);
            }

            template<typename It>
            static code_point decode(It& current, It last)
            {
                if(BOOST_UNLIKELY(current == last))
                    return incomplete;
                uint16_t w1 = *current++;
                if(BOOST_LIKELY(is_single_codepoint(w1)))
                {
                    return w1;
                }
                // Now it's either a high or a low surrogate, the latter is invalid
                if(w1 >= 0xDC00)
                    return illegal;
                if(current == last)
                    return incomplete;
                uint16_t w2 = *current++;
                if(!is_second_surrogate(w2))
                    return illegal;
                return combine_surrogate(w1, w2);
            }
            template<typename It>
            static code_point decode_valid(It& current)
            {
                uint16_t w1 = *current++;
                if(BOOST_LIKELY(is_single_codepoint(w1)))
                {
                    return w1;
                }
                uint16_t w2 = *current++;
                return combine_surrogate(w1, w2);
            }

            static const int max_width = 2;
            static int width(code_point u) // LCOV_EXCL_LINE
            {
                return u >= 0x10000 ? 2 : 1;
            }
            template<typename It>
            static It encode(code_point u, It out)
            {
                if(BOOST_LIKELY(u <= 0xFFFF))
                {
                    *out++ = static_cast<char_type>(u);
                } else
                {
                    u -= 0x10000;
                    *out++ = static_cast<char_type>(0xD800 | (u >> 10));
                    *out++ = static_cast<char_type>(0xDC00 | (u & 0x3FF));
                }
                return out;
            }
        }; // utf16;

        template<typename CharType>
        struct utf_traits<CharType, 4>
        {
            using char_type = CharType;
            static int trail_length(char_type c)
            {
                if(is_valid_codepoint(c))
                    return 0;
                return -1;
            }
            static bool is_trail(char_type /*c*/)
            {
                return false;
            }
            static bool is_lead(char_type /*c*/)
            {
                return true;
            }

            template<typename It>
            static code_point decode_valid(It& current)
            {
                return *current++;
            }

            template<typename It>
            static code_point decode(It& current, It last)
            {
                if(BOOST_UNLIKELY(current == last))
                    return incomplete;
                code_point c = *current++;
                if(BOOST_UNLIKELY(!is_valid_codepoint(c)))
                    return illegal;
                return c;
            }
            static const int max_width = 1;
            static int width(code_point /*u*/)
            {
                return 1;
            }
            template<typename It>
            static It encode(code_point u, It out)
            {
                *out++ = static_cast<char_type>(u);
                return out;
            }
        }; // utf32

#endif

    } // namespace utf
} // namespace nowide
} // namespace boost

#endif
Commit	Line	Data
20effc67 TL	1	//
	2	// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
	3	// Copyright (c) 2020 Alexander Grund
	4	//
	5	// Distributed under the Boost Software License, Version 1.0. (See
	6	// accompanying file LICENSE or copy at
	7	// http://www.boost.org/LICENSE_1_0.txt)
	8	//
	9	#ifndef BOOST_NOWIDE_UTF_HPP_INCLUDED
	10	#define BOOST_NOWIDE_UTF_HPP_INCLUDED
	11
	12	#include <boost/nowide/config.hpp>
	13	#include <cstdint>
	14
	15	namespace boost {
	16	namespace nowide {
	17	///
	18	/// \brief Namespace that holds basic operations on UTF encoded sequences
	19	///
1e59de90	20	/// All functions defined in this namespace do not require linking with Boost.Nowide library.
20effc67 TL	21	/// Extracted from Boost.Locale
	22	///
	23	namespace utf {
	24
	25	///
	26	/// \brief The integral type that can hold a Unicode code point
	27	///
	28	using code_point = uint32_t;
	29
	30	///
	31	/// \brief Special constant that defines illegal code point
	32	///
	33	static const code_point illegal = 0xFFFFFFFFu;
	34
	35	///
	36	/// \brief Special constant that defines incomplete code point
	37	///
	38	static const code_point incomplete = 0xFFFFFFFEu;
	39
	40	///
	41	/// \brief the function checks if \a v is a valid code point
	42	///
	43	inline bool is_valid_codepoint(code_point v)
	44	{
	45	if(v > 0x10FFFF)
	46	return false;
	47	if(0xD800 <= v && v <= 0xDFFF) // surrogates
	48	return false;
	49	return true;
	50	}
	51
	52	#ifdef BOOST_NOWIDE_DOXYGEN
	53	///
	54	/// \brief UTF Traits class - functions to convert UTF sequences to and from Unicode code points
	55	///
	56	template<typename CharType, int size = sizeof(CharType)>
	57	struct utf_traits
	58	{
	59	///
	60	/// The type of the character
	61	///
	62	using char_type = CharType;
	63	///
	64	/// Read one code point from the range [p,e) and return it.
	65	///
	66	/// - If the sequence that was read is incomplete sequence returns \ref incomplete,
	67	/// - If illegal sequence detected returns \ref illegal
	68	///
	69	/// Requirements
	70	///
	71	/// - Iterator is valid input iterator
	72	///
	73	/// Postconditions
	74	///
	75	/// - p points to the last consumed character
	76	///
	77	template<typename Iterator>
	78	static code_point decode(Iterator& p, Iterator e);
	79
	80	///
	81	/// Maximal width of valid sequence in the code units:
	82	///
	83	/// - UTF-8 - 4
	84	/// - UTF-16 - 2
85	/// - UTF-32 - 1
86	///
87	static const int max_width;
88	///
89	/// The width of specific code point in the code units.
90	///
91	/// Requirement: value is a valid Unicode code point
92	/// Returns value in range [1..max_width]
93	///
94	static int width(code_point value);
95
96	///
97	/// Get the size of the trail part of variable length encoded sequence.
98	///
99	/// Returns -1 if C is not valid lead character
100	///
101	static int trail_length(char_type c);
102	///
103	/// Returns true if c is trail code unit, always false for UTF-32
104	///
105	static bool is_trail(char_type c);
106	///
107	/// Returns true if c is lead code unit, always true of UTF-32
108	///
109	static bool is_lead(char_type c);
110
111	///
112	/// Convert valid Unicode code point \a value to the UTF sequence.
113	///
114	/// Requirements:
115	///
116	/// - \a value is valid code point
117	/// - \a out is an output iterator should be able to accept at least width(value) units
118	///
119	/// Returns the iterator past the last written code unit.
120	///
121	template<typename Iterator>
122	static Iterator encode(code_point value, Iterator out);
123	///
124	/// Decodes valid UTF sequence that is pointed by p into code point.
125	///
126	/// If the sequence is invalid or points to end the behavior is undefined
127	///
128	template<typename Iterator>
129	static code_point decode_valid(Iterator& p);
130	};
131
132	#else
133
134	template<typename CharType, int size = sizeof(CharType)>
135	struct utf_traits;
136
137	template<typename CharType>
138	struct utf_traits<CharType, 1>
139	{
140	using char_type = CharType;
141
142	static int trail_length(char_type ci)
143	{
144	unsigned char c = ci;
145	if(c < 128)
146	return 0;
147	if(BOOST_UNLIKELY(c < 194))
148	return -1;
149	if(c < 224)
150	return 1;
151	if(c < 240)
152	return 2;
153	if(BOOST_LIKELY(c <= 244))
154	return 3;
155	return -1;
156	}
157
158	static const int max_width = 4;
159
160	static int width(code_point value)
161	{
162	if(value <= 0x7F)
163	{
164	return 1;
165	} else if(value <= 0x7FF)
166	{
167	return 2;
168	} else if(BOOST_LIKELY(value <= 0xFFFF))
169	{
170	return 3;
171	} else
172	{
173	return 4;
174	}
175	}
176
177	static bool is_trail(char_type ci)
178	{
179	unsigned char c = ci;
180	return (c & 0xC0) == 0x80;
181	}
182
183	static bool is_lead(char_type ci)
184	{
185	return !is_trail(ci);
186	}
187
188	template<typename Iterator>
189	static code_point decode(Iterator& p, Iterator e)
190	{
191	if(BOOST_UNLIKELY(p == e))
192	return incomplete;
193
194	unsigned char lead = *p++;
195
196	// First byte is fully validated here
197	int trail_size = trail_length(lead);
198
199	if(BOOST_UNLIKELY(trail_size < 0))
200	return illegal;
201
202	// OK as only ASCII may be of size = 0
203	// also optimize for ASCII text
204	if(trail_size == 0)
205	return lead;
206
207	code_point c = lead & ((1 << (6 - trail_size)) - 1);
208
209	// Read the rest
210	unsigned char tmp;
211	switch(trail_size)
212	{
213	case 3:
214	if(BOOST_UNLIKELY(p == e))
215	return incomplete;
216	tmp = *p++;
217	if(!is_trail(tmp))
218	return illegal;
219	c = (c << 6) \| (tmp & 0x3F);
220	BOOST_NOWIDE_FALLTHROUGH;
221	case 2:
222	if(BOOST_UNLIKELY(p == e))
223	return incomplete;
224	tmp = *p++;
225	if(!is_trail(tmp))
226	return illegal;
227	c = (c << 6) \| (tmp & 0x3F);
228	BOOST_NOWIDE_FALLTHROUGH;
229	case 1:
230	if(BOOST_UNLIKELY(p == e))
231	return incomplete;
232	tmp = *p++;
233	if(!is_trail(tmp))
234	return illegal;
235	c = (c << 6) \| (tmp & 0x3F);
236	}
237
1e59de90 TL	238	// Check code point validity:
	239	// - no surrogates and valid range
	240	// - most compact representation
	241	if(BOOST_UNLIKELY(!is_valid_codepoint(c)) \|\| BOOST_UNLIKELY(width(c) != trail_size + 1))
	242	{
	243	p -= trail_size;
20effc67	244	return illegal;
1e59de90	245	}
20effc67 TL	246
	247	return c;
	248	}
	249
	250	template<typename Iterator>
	251	static code_point decode_valid(Iterator& p)
	252	{
	253	unsigned char lead = *p++;
	254	if(lead < 192)
	255	return lead;
	256
	257	int trail_size;
	258
	259	if(lead < 224)
	260	trail_size = 1;
	261	else if(BOOST_LIKELY(lead < 240)) // non-BMP rare
	262	trail_size = 2;
	263	else
	264	trail_size = 3;
	265
	266	code_point c = lead & ((1 << (6 - trail_size)) - 1);
	267
	268	switch(trail_size)
	269	{
	270	case 3: c = (c << 6) \| (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
	271	case 2: c = (c << 6) \| (static_cast<unsigned char>(*p++) & 0x3F); BOOST_NOWIDE_FALLTHROUGH;
	272	case 1: c = (c << 6) \| (static_cast<unsigned char>(*p++) & 0x3F);
	273	}
	274
	275	return c;
	276	}
	277
	278	template<typename Iterator>
	279	static Iterator encode(code_point value, Iterator out)
	280	{
	281	if(value <= 0x7F)
	282	{
	283	*out++ = static_cast<char_type>(value);
	284	} else if(value <= 0x7FF)
	285	{
	286	*out++ = static_cast<char_type>((value >> 6) \| 0xC0);
	287	*out++ = static_cast<char_type>((value & 0x3F) \| 0x80);
	288	} else if(BOOST_LIKELY(value <= 0xFFFF))
	289	{
	290	*out++ = static_cast<char_type>((value >> 12) \| 0xE0);
	291	*out++ = static_cast<char_type>(((value >> 6) & 0x3F) \| 0x80);
	292	*out++ = static_cast<char_type>((value & 0x3F) \| 0x80);
	293	} else
	294	{
	295	*out++ = static_cast<char_type>((value >> 18) \| 0xF0);
	296	*out++ = static_cast<char_type>(((value >> 12) & 0x3F) \| 0x80);
	297	*out++ = static_cast<char_type>(((value >> 6) & 0x3F) \| 0x80);
	298	*out++ = static_cast<char_type>((value & 0x3F) \| 0x80);
	299	}
	300	return out;
	301	}
	302	}; // utf8
	303
	304	template<typename CharType>
	305	struct utf_traits<CharType, 2>
	306	{
	307	using char_type = CharType;
	308
	309	// See RFC 2781
1e59de90 TL	310	static bool is_single_codepoint(uint16_t x)
	311	{
	312	// Ranges [U+0000, 0+D7FF], [U+E000, U+FFFF] are numerically equal in UTF-16
	313	return x <= 0xD7FF \|\| x >= 0xE000;
	314	}
20effc67 TL	315	static bool is_first_surrogate(uint16_t x)
20effc67 TL	316	{
1e59de90	317	// Range [U+D800, 0+DBFF]: High surrogate
20effc67 TL	318	return 0xD800 <= x && x <= 0xDBFF;
	319	}
	320	static bool is_second_surrogate(uint16_t x)
	321	{
1e59de90	322	// Range [U+DC00, 0+DFFF]: Low surrogate
20effc67 TL	323	return 0xDC00 <= x && x <= 0xDFFF;
	324	}
	325	static code_point combine_surrogate(uint16_t w1, uint16_t w2)
	326	{
	327	return ((code_point(w1 & 0x3FF) << 10) \| (w2 & 0x3FF)) + 0x10000;
	328	}
	329	static int trail_length(char_type c)
	330	{
	331	if(is_first_surrogate(c))
	332	return 1;
	333	if(is_second_surrogate(c))
	334	return -1;
	335	return 0;
	336	}
1e59de90	337	/// Return true if c is trail code unit, always false for UTF-32
20effc67 TL	338	static bool is_trail(char_type c)
	339	{
	340	return is_second_surrogate(c);
	341	}
1e59de90	342	/// Return true if c is lead code unit, always true of UTF-32
20effc67 TL	343	static bool is_lead(char_type c)
	344	{
	345	return !is_second_surrogate(c);
	346	}
	347
	348	template<typename It>
	349	static code_point decode(It& current, It last)
	350	{
	351	if(BOOST_UNLIKELY(current == last))
	352	return incomplete;
	353	uint16_t w1 = *current++;
1e59de90	354	if(BOOST_LIKELY(is_single_codepoint(w1)))
20effc67 TL	355	{
	356	return w1;
	357	}
1e59de90 TL	358	// Now it's either a high or a low surrogate, the latter is invalid
1e59de90 TL	359	if(w1 >= 0xDC00)
20effc67 TL	360	return illegal;
	361	if(current == last)
	362	return incomplete;
	363	uint16_t w2 = *current++;
1e59de90	364	if(!is_second_surrogate(w2))
20effc67 TL	365	return illegal;
	366	return combine_surrogate(w1, w2);
	367	}
	368	template<typename It>
	369	static code_point decode_valid(It& current)
	370	{
	371	uint16_t w1 = *current++;
1e59de90	372	if(BOOST_LIKELY(is_single_codepoint(w1)))
20effc67 TL	373	{
	374	return w1;
	375	}
	376	uint16_t w2 = *current++;
	377	return combine_surrogate(w1, w2);
	378	}
	379
	380	static const int max_width = 2;
1e59de90	381	static int width(code_point u) // LCOV_EXCL_LINE
20effc67 TL	382	{
	383	return u >= 0x10000 ? 2 : 1;
	384	}
	385	template<typename It>
	386	static It encode(code_point u, It out)
	387	{
	388	if(BOOST_LIKELY(u <= 0xFFFF))
	389	{
	390	*out++ = static_cast<char_type>(u);
	391	} else
	392	{
	393	u -= 0x10000;
	394	*out++ = static_cast<char_type>(0xD800 \| (u >> 10));
	395	*out++ = static_cast<char_type>(0xDC00 \| (u & 0x3FF));
	396	}
	397	return out;
	398	}
	399	}; // utf16;
	400
	401	template<typename CharType>
	402	struct utf_traits<CharType, 4>
	403	{
	404	using char_type = CharType;
	405	static int trail_length(char_type c)
	406	{
	407	if(is_valid_codepoint(c))
	408	return 0;
	409	return -1;
	410	}
	411	static bool is_trail(char_type /c/)
	412	{
	413	return false;
	414	}
	415	static bool is_lead(char_type /c/)
	416	{
	417	return true;
	418	}
	419
	420	template<typename It>
	421	static code_point decode_valid(It& current)
	422	{
	423	return *current++;
	424	}
	425
	426	template<typename It>
	427	static code_point decode(It& current, It last)
	428	{
	429	if(BOOST_UNLIKELY(current == last))
	430	return incomplete;
	431	code_point c = *current++;
	432	if(BOOST_UNLIKELY(!is_valid_codepoint(c)))
	433	return illegal;
	434	return c;
	435	}
	436	static const int max_width = 1;
	437	static int width(code_point /u/)
	438	{
	439	return 1;
	440	}
	441	template<typename It>
	442	static It encode(code_point u, It out)
	443	{
	444	*out++ = static_cast<char_type>(u);
	445	return out;
446	}
20effc67 TL	447	}; // utf32
	448
	449	#endif
	450
	451	} // namespace utf
	452	} // namespace nowide
	453	} // namespace boost
	454
	455	#endif