1 // Tencent is pleased to support the open source community by making RapidJSON available.
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
8 // http://opensource.org/licenses/MIT
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
15 #ifndef RAPIDJSON_ENCODINGS_H_
16 #define RAPIDJSON_ENCODINGS_H_
18 #include "rapidjson.h"
22 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
23 RAPIDJSON_DIAG_OFF(4702) // unreachable code
24 #elif defined(__GNUC__)
26 RAPIDJSON_DIAG_OFF(effc
++)
27 RAPIDJSON_DIAG_OFF(overflow
)
30 RAPIDJSON_NAMESPACE_BEGIN
32 ///////////////////////////////////////////////////////////////////////////////
35 /*! \class rapidjson::Encoding
36 \brief Concept for encoding of Unicode characters.
40 typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
42 enum { supportUnicode = 1 }; // or 0 if not supporting unicode
44 //! \brief Encode a Unicode codepoint to an output stream.
45 //! \param os Output stream.
46 //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
47 template<typename OutputStream>
48 static void Encode(OutputStream& os, unsigned codepoint);
50 //! \brief Decode a Unicode codepoint from an input stream.
51 //! \param is Input stream.
52 //! \param codepoint Output of the unicode codepoint.
53 //! \return true if a valid codepoint can be decoded from the stream.
54 template <typename InputStream>
55 static bool Decode(InputStream& is, unsigned* codepoint);
57 //! \brief Validate one Unicode codepoint from an encoded stream.
58 //! \param is Input stream to obtain codepoint.
59 //! \param os Output for copying one codepoint.
60 //! \return true if it is valid.
61 //! \note This function just validating and copying the codepoint without actually decode it.
62 template <typename InputStream, typename OutputStream>
63 static bool Validate(InputStream& is, OutputStream& os);
65 // The following functions are deal with byte streams.
67 //! Take a character from input byte stream, skip BOM if exist.
68 template <typename InputByteStream>
69 static CharType TakeBOM(InputByteStream& is);
71 //! Take a character from input byte stream.
72 template <typename InputByteStream>
73 static Ch Take(InputByteStream& is);
75 //! Put BOM to output byte stream.
76 template <typename OutputByteStream>
77 static void PutBOM(OutputByteStream& os);
79 //! Put a character to output byte stream.
80 template <typename OutputByteStream>
81 static void Put(OutputByteStream& os, Ch c);
86 ///////////////////////////////////////////////////////////////////////////////
90 /*! http://en.wikipedia.org/wiki/UTF-8
91 http://tools.ietf.org/html/rfc3629
92 \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
93 \note implements Encoding concept
95 template<typename CharType
= char>
99 enum { supportUnicode
= 1 };
101 template<typename OutputStream
>
102 static void Encode(OutputStream
& os
, unsigned codepoint
) {
103 if (codepoint
<= 0x7F)
104 os
.Put(static_cast<Ch
>(codepoint
& 0xFF));
105 else if (codepoint
<= 0x7FF) {
106 os
.Put(static_cast<Ch
>(0xC0 | ((codepoint
>> 6) & 0xFF)));
107 os
.Put(static_cast<Ch
>(0x80 | ((codepoint
& 0x3F))));
109 else if (codepoint
<= 0xFFFF) {
110 os
.Put(static_cast<Ch
>(0xE0 | ((codepoint
>> 12) & 0xFF)));
111 os
.Put(static_cast<Ch
>(0x80 | ((codepoint
>> 6) & 0x3F)));
112 os
.Put(static_cast<Ch
>(0x80 | (codepoint
& 0x3F)));
115 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
116 os
.Put(static_cast<Ch
>(0xF0 | ((codepoint
>> 18) & 0xFF)));
117 os
.Put(static_cast<Ch
>(0x80 | ((codepoint
>> 12) & 0x3F)));
118 os
.Put(static_cast<Ch
>(0x80 | ((codepoint
>> 6) & 0x3F)));
119 os
.Put(static_cast<Ch
>(0x80 | (codepoint
& 0x3F)));
123 template<typename OutputStream
>
124 static void EncodeUnsafe(OutputStream
& os
, unsigned codepoint
) {
125 if (codepoint
<= 0x7F)
126 PutUnsafe(os
, static_cast<Ch
>(codepoint
& 0xFF));
127 else if (codepoint
<= 0x7FF) {
128 PutUnsafe(os
, static_cast<Ch
>(0xC0 | ((codepoint
>> 6) & 0xFF)));
129 PutUnsafe(os
, static_cast<Ch
>(0x80 | ((codepoint
& 0x3F))));
131 else if (codepoint
<= 0xFFFF) {
132 PutUnsafe(os
, static_cast<Ch
>(0xE0 | ((codepoint
>> 12) & 0xFF)));
133 PutUnsafe(os
, static_cast<Ch
>(0x80 | ((codepoint
>> 6) & 0x3F)));
134 PutUnsafe(os
, static_cast<Ch
>(0x80 | (codepoint
& 0x3F)));
137 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
138 PutUnsafe(os
, static_cast<Ch
>(0xF0 | ((codepoint
>> 18) & 0xFF)));
139 PutUnsafe(os
, static_cast<Ch
>(0x80 | ((codepoint
>> 12) & 0x3F)));
140 PutUnsafe(os
, static_cast<Ch
>(0x80 | ((codepoint
>> 6) & 0x3F)));
141 PutUnsafe(os
, static_cast<Ch
>(0x80 | (codepoint
& 0x3F)));
145 template <typename InputStream
>
146 static bool Decode(InputStream
& is
, unsigned* codepoint
) {
147 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
148 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
149 #define TAIL() COPY(); TRANS(0x70)
150 typename
InputStream::Ch c
= is
.Take();
152 *codepoint
= static_cast<unsigned char>(c
);
156 unsigned char type
= GetRange(static_cast<unsigned char>(c
));
160 *codepoint
= (0xFF >> type
) & static_cast<unsigned char>(c
);
164 case 2: TAIL(); return result
;
165 case 3: TAIL(); TAIL(); return result
;
166 case 4: COPY(); TRANS(0x50); TAIL(); return result
;
167 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result
;
168 case 6: TAIL(); TAIL(); TAIL(); return result
;
169 case 10: COPY(); TRANS(0x20); TAIL(); return result
;
170 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result
;
171 default: return false;
178 template <typename InputStream
, typename OutputStream
>
179 static bool Validate(InputStream
& is
, OutputStream
& os
) {
180 #define COPY() os.Put(c = is.Take())
181 #define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
182 #define TAIL() COPY(); TRANS(0x70)
189 switch (GetRange(static_cast<unsigned char>(c
))) {
190 case 2: TAIL(); return result
;
191 case 3: TAIL(); TAIL(); return result
;
192 case 4: COPY(); TRANS(0x50); TAIL(); return result
;
193 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result
;
194 case 6: TAIL(); TAIL(); TAIL(); return result
;
195 case 10: COPY(); TRANS(0x20); TAIL(); return result
;
196 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result
;
197 default: return false;
204 static unsigned char GetRange(unsigned char c
) {
205 // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
206 // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
207 static const unsigned char type
[] = {
208 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
212 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
213 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
214 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
215 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
216 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
217 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
222 template <typename InputByteStream
>
223 static CharType
TakeBOM(InputByteStream
& is
) {
224 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
225 typename
InputByteStream::Ch c
= Take(is
);
226 if (static_cast<unsigned char>(c
) != 0xEFu
) return c
;
228 if (static_cast<unsigned char>(c
) != 0xBBu
) return c
;
230 if (static_cast<unsigned char>(c
) != 0xBFu
) return c
;
235 template <typename InputByteStream
>
236 static Ch
Take(InputByteStream
& is
) {
237 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
238 return static_cast<Ch
>(is
.Take());
241 template <typename OutputByteStream
>
242 static void PutBOM(OutputByteStream
& os
) {
243 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
244 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xEFu
));
245 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xBBu
));
246 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xBFu
));
249 template <typename OutputByteStream
>
250 static void Put(OutputByteStream
& os
, Ch c
) {
251 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
252 os
.Put(static_cast<typename
OutputByteStream::Ch
>(c
));
256 ///////////////////////////////////////////////////////////////////////////////
260 /*! http://en.wikipedia.org/wiki/UTF-16
261 http://tools.ietf.org/html/rfc2781
262 \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
263 \note implements Encoding concept
265 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
266 For streaming, use UTF16LE and UTF16BE, which handle endianness.
268 template<typename CharType
= wchar_t>
271 RAPIDJSON_STATIC_ASSERT(sizeof(Ch
) >= 2);
273 enum { supportUnicode
= 1 };
275 template<typename OutputStream
>
276 static void Encode(OutputStream
& os
, unsigned codepoint
) {
277 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputStream::Ch
) >= 2);
278 if (codepoint
<= 0xFFFF) {
279 RAPIDJSON_ASSERT(codepoint
< 0xD800 || codepoint
> 0xDFFF); // Code point itself cannot be surrogate pair
280 os
.Put(static_cast<typename
OutputStream::Ch
>(codepoint
));
283 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
284 unsigned v
= codepoint
- 0x10000;
285 os
.Put(static_cast<typename
OutputStream::Ch
>((v
>> 10) | 0xD800));
286 os
.Put((v
& 0x3FF) | 0xDC00);
291 template<typename OutputStream
>
292 static void EncodeUnsafe(OutputStream
& os
, unsigned codepoint
) {
293 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputStream::Ch
) >= 2);
294 if (codepoint
<= 0xFFFF) {
295 RAPIDJSON_ASSERT(codepoint
< 0xD800 || codepoint
> 0xDFFF); // Code point itself cannot be surrogate pair
296 PutUnsafe(os
, static_cast<typename
OutputStream::Ch
>(codepoint
));
299 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
300 unsigned v
= codepoint
- 0x10000;
301 PutUnsafe(os
, static_cast<typename
OutputStream::Ch
>((v
>> 10) | 0xD800));
302 PutUnsafe(os
, (v
& 0x3FF) | 0xDC00);
306 template <typename InputStream
>
307 static bool Decode(InputStream
& is
, unsigned* codepoint
) {
308 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputStream::Ch
) >= 2);
309 typename
InputStream::Ch c
= is
.Take();
310 if (c
< 0xD800 || c
> 0xDFFF) {
311 *codepoint
= static_cast<unsigned>(c
);
314 else if (c
<= 0xDBFF) {
315 *codepoint
= (static_cast<unsigned>(c
) & 0x3FF) << 10;
317 *codepoint
|= (static_cast<unsigned>(c
) & 0x3FF);
318 *codepoint
+= 0x10000;
319 return c
>= 0xDC00 && c
<= 0xDFFF;
324 template <typename InputStream
, typename OutputStream
>
325 static bool Validate(InputStream
& is
, OutputStream
& os
) {
326 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputStream::Ch
) >= 2);
327 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputStream::Ch
) >= 2);
328 typename
InputStream::Ch c
;
329 os
.Put(static_cast<typename
OutputStream::Ch
>(c
= is
.Take()));
330 if (c
< 0xD800 || c
> 0xDFFF)
332 else if (c
<= 0xDBFF) {
333 os
.Put(c
= is
.Take());
334 return c
>= 0xDC00 && c
<= 0xDFFF;
340 //! UTF-16 little endian encoding.
341 template<typename CharType
= wchar_t>
342 struct UTF16LE
: UTF16
<CharType
> {
343 template <typename InputByteStream
>
344 static CharType
TakeBOM(InputByteStream
& is
) {
345 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
346 CharType c
= Take(is
);
347 return static_cast<uint16_t>(c
) == 0xFEFFu
? Take(is
) : c
;
350 template <typename InputByteStream
>
351 static CharType
Take(InputByteStream
& is
) {
352 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
353 unsigned c
= static_cast<uint8_t>(is
.Take());
354 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 8;
355 return static_cast<CharType
>(c
);
358 template <typename OutputByteStream
>
359 static void PutBOM(OutputByteStream
& os
) {
360 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
361 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFFu
));
362 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFEu
));
365 template <typename OutputByteStream
>
366 static void Put(OutputByteStream
& os
, CharType c
) {
367 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
368 os
.Put(static_cast<typename
OutputByteStream::Ch
>(static_cast<unsigned>(c
) & 0xFFu
));
369 os
.Put(static_cast<typename
OutputByteStream::Ch
>((static_cast<unsigned>(c
) >> 8) & 0xFFu
));
373 //! UTF-16 big endian encoding.
374 template<typename CharType
= wchar_t>
375 struct UTF16BE
: UTF16
<CharType
> {
376 template <typename InputByteStream
>
377 static CharType
TakeBOM(InputByteStream
& is
) {
378 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
379 CharType c
= Take(is
);
380 return static_cast<uint16_t>(c
) == 0xFEFFu
? Take(is
) : c
;
383 template <typename InputByteStream
>
384 static CharType
Take(InputByteStream
& is
) {
385 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
386 unsigned c
= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 8;
387 c
|= static_cast<uint8_t>(is
.Take());
388 return static_cast<CharType
>(c
);
391 template <typename OutputByteStream
>
392 static void PutBOM(OutputByteStream
& os
) {
393 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
394 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFEu
));
395 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFFu
));
398 template <typename OutputByteStream
>
399 static void Put(OutputByteStream
& os
, CharType c
) {
400 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
401 os
.Put(static_cast<typename
OutputByteStream::Ch
>((static_cast<unsigned>(c
) >> 8) & 0xFFu
));
402 os
.Put(static_cast<typename
OutputByteStream::Ch
>(static_cast<unsigned>(c
) & 0xFFu
));
406 ///////////////////////////////////////////////////////////////////////////////
410 /*! http://en.wikipedia.org/wiki/UTF-32
411 \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
412 \note implements Encoding concept
414 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
415 For streaming, use UTF32LE and UTF32BE, which handle endianness.
417 template<typename CharType
= unsigned>
420 RAPIDJSON_STATIC_ASSERT(sizeof(Ch
) >= 4);
422 enum { supportUnicode
= 1 };
424 template<typename OutputStream
>
425 static void Encode(OutputStream
& os
, unsigned codepoint
) {
426 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputStream::Ch
) >= 4);
427 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
431 template<typename OutputStream
>
432 static void EncodeUnsafe(OutputStream
& os
, unsigned codepoint
) {
433 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputStream::Ch
) >= 4);
434 RAPIDJSON_ASSERT(codepoint
<= 0x10FFFF);
435 PutUnsafe(os
, codepoint
);
438 template <typename InputStream
>
439 static bool Decode(InputStream
& is
, unsigned* codepoint
) {
440 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputStream::Ch
) >= 4);
443 return c
<= 0x10FFFF;
446 template <typename InputStream
, typename OutputStream
>
447 static bool Validate(InputStream
& is
, OutputStream
& os
) {
448 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputStream::Ch
) >= 4);
450 os
.Put(c
= is
.Take());
451 return c
<= 0x10FFFF;
455 //! UTF-32 little endian enocoding.
456 template<typename CharType
= unsigned>
457 struct UTF32LE
: UTF32
<CharType
> {
458 template <typename InputByteStream
>
459 static CharType
TakeBOM(InputByteStream
& is
) {
460 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
461 CharType c
= Take(is
);
462 return static_cast<uint32_t>(c
) == 0x0000FEFFu
? Take(is
) : c
;
465 template <typename InputByteStream
>
466 static CharType
Take(InputByteStream
& is
) {
467 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
468 unsigned c
= static_cast<uint8_t>(is
.Take());
469 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 8;
470 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 16;
471 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 24;
472 return static_cast<CharType
>(c
);
475 template <typename OutputByteStream
>
476 static void PutBOM(OutputByteStream
& os
) {
477 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
478 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFFu
));
479 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFEu
));
480 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0x00u
));
481 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0x00u
));
484 template <typename OutputByteStream
>
485 static void Put(OutputByteStream
& os
, CharType c
) {
486 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
487 os
.Put(static_cast<typename
OutputByteStream::Ch
>(c
& 0xFFu
));
488 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 8) & 0xFFu
));
489 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 16) & 0xFFu
));
490 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 24) & 0xFFu
));
494 //! UTF-32 big endian encoding.
495 template<typename CharType
= unsigned>
496 struct UTF32BE
: UTF32
<CharType
> {
497 template <typename InputByteStream
>
498 static CharType
TakeBOM(InputByteStream
& is
) {
499 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
500 CharType c
= Take(is
);
501 return static_cast<uint32_t>(c
) == 0x0000FEFFu
? Take(is
) : c
;
504 template <typename InputByteStream
>
505 static CharType
Take(InputByteStream
& is
) {
506 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
507 unsigned c
= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 24;
508 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 16;
509 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take())) << 8;
510 c
|= static_cast<unsigned>(static_cast<uint8_t>(is
.Take()));
511 return static_cast<CharType
>(c
);
514 template <typename OutputByteStream
>
515 static void PutBOM(OutputByteStream
& os
) {
516 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
517 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0x00u
));
518 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0x00u
));
519 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFEu
));
520 os
.Put(static_cast<typename
OutputByteStream::Ch
>(0xFFu
));
523 template <typename OutputByteStream
>
524 static void Put(OutputByteStream
& os
, CharType c
) {
525 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
526 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 24) & 0xFFu
));
527 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 16) & 0xFFu
));
528 os
.Put(static_cast<typename
OutputByteStream::Ch
>((c
>> 8) & 0xFFu
));
529 os
.Put(static_cast<typename
OutputByteStream::Ch
>(c
& 0xFFu
));
533 ///////////////////////////////////////////////////////////////////////////////
537 /*! http://en.wikipedia.org/wiki/ASCII
538 \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
539 \note implements Encoding concept
541 template<typename CharType
= char>
545 enum { supportUnicode
= 0 };
547 template<typename OutputStream
>
548 static void Encode(OutputStream
& os
, unsigned codepoint
) {
549 RAPIDJSON_ASSERT(codepoint
<= 0x7F);
550 os
.Put(static_cast<Ch
>(codepoint
& 0xFF));
553 template<typename OutputStream
>
554 static void EncodeUnsafe(OutputStream
& os
, unsigned codepoint
) {
555 RAPIDJSON_ASSERT(codepoint
<= 0x7F);
556 PutUnsafe(os
, static_cast<Ch
>(codepoint
& 0xFF));
559 template <typename InputStream
>
560 static bool Decode(InputStream
& is
, unsigned* codepoint
) {
561 uint8_t c
= static_cast<uint8_t>(is
.Take());
566 template <typename InputStream
, typename OutputStream
>
567 static bool Validate(InputStream
& is
, OutputStream
& os
) {
568 uint8_t c
= static_cast<uint8_t>(is
.Take());
569 os
.Put(static_cast<typename
OutputStream::Ch
>(c
));
573 template <typename InputByteStream
>
574 static CharType
TakeBOM(InputByteStream
& is
) {
575 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
576 uint8_t c
= static_cast<uint8_t>(Take(is
));
577 return static_cast<Ch
>(c
);
580 template <typename InputByteStream
>
581 static Ch
Take(InputByteStream
& is
) {
582 RAPIDJSON_STATIC_ASSERT(sizeof(typename
InputByteStream::Ch
) == 1);
583 return static_cast<Ch
>(is
.Take());
586 template <typename OutputByteStream
>
587 static void PutBOM(OutputByteStream
& os
) {
588 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
592 template <typename OutputByteStream
>
593 static void Put(OutputByteStream
& os
, Ch c
) {
594 RAPIDJSON_STATIC_ASSERT(sizeof(typename
OutputByteStream::Ch
) == 1);
595 os
.Put(static_cast<typename
OutputByteStream::Ch
>(c
));
599 ///////////////////////////////////////////////////////////////////////////////
602 //! Runtime-specified UTF encoding type of a stream.
604 kUTF8
= 0, //!< UTF-8.
605 kUTF16LE
= 1, //!< UTF-16 little endian.
606 kUTF16BE
= 2, //!< UTF-16 big endian.
607 kUTF32LE
= 3, //!< UTF-32 little endian.
608 kUTF32BE
= 4 //!< UTF-32 big endian.
611 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
612 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
614 template<typename CharType
>
618 enum { supportUnicode
= 1 };
620 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
622 template<typename OutputStream
>
623 RAPIDJSON_FORCEINLINE
static void Encode(OutputStream
& os
, unsigned codepoint
) {
624 typedef void (*EncodeFunc
)(OutputStream
&, unsigned);
625 static const EncodeFunc f
[] = { RAPIDJSON_ENCODINGS_FUNC(Encode
) };
626 (*f
[os
.GetType()])(os
, codepoint
);
629 template<typename OutputStream
>
630 RAPIDJSON_FORCEINLINE
static void EncodeUnsafe(OutputStream
& os
, unsigned codepoint
) {
631 typedef void (*EncodeFunc
)(OutputStream
&, unsigned);
632 static const EncodeFunc f
[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe
) };
633 (*f
[os
.GetType()])(os
, codepoint
);
636 template <typename InputStream
>
637 RAPIDJSON_FORCEINLINE
static bool Decode(InputStream
& is
, unsigned* codepoint
) {
638 typedef bool (*DecodeFunc
)(InputStream
&, unsigned*);
639 static const DecodeFunc f
[] = { RAPIDJSON_ENCODINGS_FUNC(Decode
) };
640 return (*f
[is
.GetType()])(is
, codepoint
);
643 template <typename InputStream
, typename OutputStream
>
644 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream
& is
, OutputStream
& os
) {
645 typedef bool (*ValidateFunc
)(InputStream
&, OutputStream
&);
646 static const ValidateFunc f
[] = { RAPIDJSON_ENCODINGS_FUNC(Validate
) };
647 return (*f
[is
.GetType()])(is
, os
);
650 #undef RAPIDJSON_ENCODINGS_FUNC
653 ///////////////////////////////////////////////////////////////////////////////
656 //! Encoding conversion.
657 template<typename SourceEncoding
, typename TargetEncoding
>
659 //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
660 template<typename InputStream
, typename OutputStream
>
661 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream
& is
, OutputStream
& os
) {
663 if (!SourceEncoding::Decode(is
, &codepoint
))
665 TargetEncoding::Encode(os
, codepoint
);
669 template<typename InputStream
, typename OutputStream
>
670 RAPIDJSON_FORCEINLINE
static bool TranscodeUnsafe(InputStream
& is
, OutputStream
& os
) {
672 if (!SourceEncoding::Decode(is
, &codepoint
))
674 TargetEncoding::EncodeUnsafe(os
, codepoint
);
678 //! Validate one Unicode codepoint from an encoded stream.
679 template<typename InputStream
, typename OutputStream
>
680 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream
& is
, OutputStream
& os
) {
681 return Transcode(is
, os
); // Since source/target encoding is different, must transcode.
685 // Forward declaration.
686 template<typename Stream
>
687 inline void PutUnsafe(Stream
& stream
, typename
Stream::Ch c
);
689 //! Specialization of Transcoder with same source and target encoding.
690 template<typename Encoding
>
691 struct Transcoder
<Encoding
, Encoding
> {
692 template<typename InputStream
, typename OutputStream
>
693 RAPIDJSON_FORCEINLINE
static bool Transcode(InputStream
& is
, OutputStream
& os
) {
694 os
.Put(is
.Take()); // Just copy one code unit. This semantic is different from primary template class.
698 template<typename InputStream
, typename OutputStream
>
699 RAPIDJSON_FORCEINLINE
static bool TranscodeUnsafe(InputStream
& is
, OutputStream
& os
) {
700 PutUnsafe(os
, is
.Take()); // Just copy one code unit. This semantic is different from primary template class.
704 template<typename InputStream
, typename OutputStream
>
705 RAPIDJSON_FORCEINLINE
static bool Validate(InputStream
& is
, OutputStream
& os
) {
706 return Encoding::Validate(is
, os
); // source/target encoding are the same
710 RAPIDJSON_NAMESPACE_END
712 #if defined(__GNUC__) || defined(_MSC_VER)
716 #endif // RAPIDJSON_ENCODINGS_H_