]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP |
2 | #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP | |
3 | ||
4 | #include <boost/assert.hpp> | |
5 | #include <boost/range/iterator_range_core.hpp> | |
6 | ||
7 | #include <utility> | |
8 | ||
9 | namespace boost { namespace property_tree { | |
10 | namespace json_parser { namespace detail | |
11 | { | |
12 | ||
13 | struct external_wide_encoding | |
14 | { | |
15 | typedef wchar_t external_char; | |
16 | ||
17 | bool is_nl(wchar_t c) const { return c == L'\n'; } | |
18 | bool is_ws(wchar_t c) const { | |
19 | return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r'; | |
20 | } | |
21 | ||
22 | bool is_minus(wchar_t c) const { return c == L'-'; } | |
23 | bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; } | |
24 | bool is_dot(wchar_t c) const { return c == L'.'; } | |
25 | bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; } | |
26 | bool is_0(wchar_t c) const { return c == L'0'; } | |
27 | bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; } | |
28 | bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; } | |
29 | ||
30 | bool is_quote(wchar_t c) const { return c == L'"'; } | |
31 | bool is_backslash(wchar_t c) const { return c == L'\\'; } | |
32 | bool is_slash(wchar_t c) const { return c == L'/'; } | |
33 | ||
34 | bool is_comma(wchar_t c) const { return c == L','; } | |
35 | bool is_open_bracket(wchar_t c) const { return c == L'['; } | |
36 | bool is_close_bracket(wchar_t c) const { return c == L']'; } | |
37 | bool is_colon(wchar_t c) const { return c == L':'; } | |
38 | bool is_open_brace(wchar_t c) const { return c == L'{'; } | |
39 | bool is_close_brace(wchar_t c) const { return c == L'}'; } | |
40 | ||
41 | bool is_a(wchar_t c) const { return c == L'a'; } | |
42 | bool is_b(wchar_t c) const { return c == L'b'; } | |
43 | bool is_e(wchar_t c) const { return c == L'e'; } | |
44 | bool is_f(wchar_t c) const { return c == L'f'; } | |
45 | bool is_l(wchar_t c) const { return c == L'l'; } | |
46 | bool is_n(wchar_t c) const { return c == L'n'; } | |
47 | bool is_r(wchar_t c) const { return c == L'r'; } | |
48 | bool is_s(wchar_t c) const { return c == L's'; } | |
49 | bool is_t(wchar_t c) const { return c == L't'; } | |
50 | bool is_u(wchar_t c) const { return c == L'u'; } | |
51 | ||
52 | int decode_hexdigit(wchar_t c) { | |
53 | if (c >= L'0' && c <= L'9') return c - L'0'; | |
54 | if (c >= L'A' && c <= L'F') return c - L'A' + 10; | |
55 | if (c >= L'a' && c <= L'f') return c - L'a' + 10; | |
56 | return -1; | |
57 | } | |
58 | }; | |
59 | ||
60 | template <bool B> struct is_utf16 {}; | |
61 | ||
62 | class wide_wide_encoding : public external_wide_encoding | |
63 | { | |
64 | typedef is_utf16<sizeof(wchar_t) == 2> test_utf16; | |
65 | public: | |
66 | typedef wchar_t internal_char; | |
67 | ||
68 | template <typename Iterator> | |
69 | boost::iterator_range<Iterator> | |
70 | to_internal(Iterator first, Iterator last) const { | |
71 | return boost::make_iterator_range(first, last); | |
72 | } | |
73 | ||
74 | wchar_t to_internal_trivial(wchar_t c) const { | |
75 | BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c)); | |
76 | return c; | |
77 | } | |
78 | ||
79 | template <typename Iterator, typename Sentinel, | |
80 | typename EncodingErrorFn> | |
81 | void skip_codepoint(Iterator& cur, Sentinel end, | |
82 | EncodingErrorFn error_fn) const { | |
83 | transcode_codepoint(cur, end, DoNothing(), error_fn); | |
84 | } | |
85 | ||
86 | template <typename Iterator, typename Sentinel, typename TranscodedFn, | |
87 | typename EncodingErrorFn> | |
88 | void transcode_codepoint(Iterator& cur, Sentinel end, | |
89 | TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { | |
90 | return transcode_codepoint(cur, end, transcoded_fn, error_fn, | |
91 | test_utf16()); | |
92 | } | |
93 | ||
94 | template <typename TranscodedFn> | |
95 | void feed_codepoint(unsigned codepoint, | |
96 | TranscodedFn transcoded_fn) const { | |
97 | feed_codepoint(codepoint, transcoded_fn, test_utf16()); | |
98 | } | |
99 | ||
100 | template <typename Iterator, typename Sentinel> | |
101 | void skip_introduction(Iterator& cur, Sentinel end) const { | |
102 | // Endianness is already decoded at this level. | |
103 | if (cur != end && *cur == 0xfeff) { | |
104 | ++cur; | |
105 | } | |
106 | } | |
107 | ||
108 | private: | |
109 | struct DoNothing { | |
110 | void operator ()(wchar_t) const {} | |
111 | }; | |
112 | ||
113 | template <typename Iterator, typename Sentinel, typename TranscodedFn, | |
114 | typename EncodingErrorFn> | |
115 | void transcode_codepoint(Iterator& cur, Sentinel, | |
116 | TranscodedFn transcoded_fn, | |
117 | EncodingErrorFn error_fn, | |
118 | is_utf16<false>) const { | |
119 | wchar_t c = *cur; | |
120 | if (c < 0x20) { | |
121 | error_fn(); | |
122 | } | |
123 | transcoded_fn(c); | |
124 | ++cur; | |
125 | } | |
126 | template <typename Iterator, typename Sentinel, typename TranscodedFn, | |
127 | typename EncodingErrorFn> | |
128 | void transcode_codepoint(Iterator& cur, Sentinel end, | |
129 | TranscodedFn transcoded_fn, | |
130 | EncodingErrorFn error_fn, | |
131 | is_utf16<true>) const { | |
132 | wchar_t c = *cur; | |
133 | if (c < 0x20) { | |
134 | error_fn(); | |
135 | } | |
136 | if (is_surrogate_low(c)) { | |
137 | error_fn(); | |
138 | } | |
139 | transcoded_fn(c); | |
140 | ++cur; | |
141 | if (is_surrogate_high(c)) { | |
142 | if (cur == end) { | |
143 | error_fn(); | |
144 | } | |
145 | c = *cur; | |
146 | if (!is_surrogate_low(c)) { | |
147 | error_fn(); | |
148 | } | |
149 | transcoded_fn(c); | |
150 | ++cur; | |
151 | } | |
152 | } | |
153 | ||
154 | template <typename TranscodedFn> | |
155 | void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, | |
156 | is_utf16<false>) const { | |
157 | transcoded_fn(static_cast<wchar_t>(codepoint)); | |
158 | } | |
159 | template <typename TranscodedFn> | |
160 | void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, | |
161 | is_utf16<true>) const { | |
162 | if (codepoint < 0x10000) { | |
163 | transcoded_fn(static_cast<wchar_t>(codepoint)); | |
164 | } else { | |
165 | codepoint -= 0x10000; | |
166 | transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800)); | |
167 | transcoded_fn(static_cast<wchar_t>( | |
168 | (codepoint & 0x3ff) | 0xdc00)); | |
169 | } | |
170 | } | |
171 | ||
172 | static bool is_surrogate_high(unsigned codepoint) { | |
173 | return (codepoint & 0xfc00) == 0xd800; | |
174 | } | |
175 | static bool is_surrogate_low(unsigned codepoint) { | |
176 | return (codepoint & 0xfc00) == 0xdc00; | |
177 | } | |
178 | }; | |
179 | ||
180 | }}}} | |
181 | ||
182 | #endif |