]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP |
2 | #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP | |
3 | ||
4 | #include <boost/assert.hpp> | |
5 | #include <boost/range/iterator_range_core.hpp> | |
6 | ||
7 | #include <utility> | |
8 | ||
9 | namespace boost { namespace property_tree { | |
10 | namespace json_parser { namespace detail | |
11 | { | |
12 | ||
13 | struct external_ascii_superset_encoding | |
14 | { | |
15 | typedef char external_char; | |
16 | ||
17 | bool is_nl(char c) const { return c == '\n'; } | |
18 | bool is_ws(char c) const { | |
19 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'; | |
20 | } | |
21 | ||
22 | bool is_minus(char c) const { return c == '-'; } | |
23 | bool is_plusminus(char c) const { return c == '+' || c == '-'; } | |
24 | bool is_dot(char c) const { return c == '.'; } | |
25 | bool is_eE(char c) const { return c == 'e' || c == 'E'; } | |
26 | bool is_0(char c) const { return c == '0'; } | |
27 | bool is_digit(char c) const { return c >= '0' && c <= '9'; } | |
28 | bool is_digit0(char c) const { return c >= '1' && c <= '9'; } | |
29 | ||
30 | bool is_quote(char c) const { return c == '"'; } | |
31 | bool is_backslash(char c) const { return c == '\\'; } | |
32 | bool is_slash(char c) const { return c == '/'; } | |
33 | ||
34 | bool is_comma(char c) const { return c == ','; } | |
35 | bool is_open_bracket(char c) const { return c == '['; } | |
36 | bool is_close_bracket(char c) const { return c == ']'; } | |
37 | bool is_colon(char c) const { return c == ':'; } | |
38 | bool is_open_brace(char c) const { return c == '{'; } | |
39 | bool is_close_brace(char c) const { return c == '}'; } | |
40 | ||
41 | bool is_a(char c) const { return c == 'a'; } | |
42 | bool is_b(char c) const { return c == 'b'; } | |
43 | bool is_e(char c) const { return c == 'e'; } | |
44 | bool is_f(char c) const { return c == 'f'; } | |
45 | bool is_l(char c) const { return c == 'l'; } | |
46 | bool is_n(char c) const { return c == 'n'; } | |
47 | bool is_r(char c) const { return c == 'r'; } | |
48 | bool is_s(char c) const { return c == 's'; } | |
49 | bool is_t(char c) const { return c == 't'; } | |
50 | bool is_u(char c) const { return c == 'u'; } | |
51 | ||
52 | int decode_hexdigit(char c) { | |
53 | if (c >= '0' && c <= '9') return c - '0'; | |
54 | if (c >= 'A' && c <= 'F') return c - 'A' + 10; | |
55 | if (c >= 'a' && c <= 'f') return c - 'a' + 10; | |
56 | return -1; | |
57 | } | |
58 | }; | |
59 | ||
60 | struct utf8_utf8_encoding : external_ascii_superset_encoding | |
61 | { | |
62 | typedef char internal_char; | |
63 | ||
64 | template <typename Iterator> | |
65 | boost::iterator_range<Iterator> | |
66 | to_internal(Iterator first, Iterator last) const { | |
67 | return boost::make_iterator_range(first, last); | |
68 | } | |
69 | ||
70 | char to_internal_trivial(char c) const { | |
71 | BOOST_ASSERT(static_cast<unsigned char>(c) <= 0x7f); | |
72 | return c; | |
73 | } | |
74 | ||
75 | template <typename Iterator, typename Sentinel, | |
76 | typename EncodingErrorFn> | |
77 | void skip_codepoint(Iterator& cur, Sentinel end, | |
78 | EncodingErrorFn error_fn) const { | |
79 | transcode_codepoint(cur, end, DoNothing(), error_fn); | |
80 | } | |
81 | ||
82 | template <typename Iterator, typename Sentinel, typename TranscodedFn, | |
83 | typename EncodingErrorFn> | |
84 | void transcode_codepoint(Iterator& cur, Sentinel end, | |
85 | TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { | |
86 | unsigned char c = *cur; | |
87 | ++cur; | |
88 | if (c <= 0x7f) { | |
89 | // Solo byte, filter out disallowed codepoints. | |
90 | if (c < 0x20) { | |
91 | error_fn(); | |
92 | } | |
93 | transcoded_fn(c); | |
94 | return; | |
95 | } | |
96 | int trailing = trail_table(c); | |
97 | if (trailing == -1) { | |
98 | // Standalone trailing byte or overly long sequence. | |
99 | error_fn(); | |
100 | } | |
101 | transcoded_fn(c); | |
102 | for (int i = 0; i < trailing; ++i) { | |
103 | if (cur == end || !is_trail(*cur)) { | |
104 | error_fn(); | |
105 | } | |
106 | transcoded_fn(*cur); | |
107 | ++cur; | |
108 | } | |
109 | } | |
110 | ||
111 | template <typename TranscodedFn> | |
112 | void feed_codepoint(unsigned codepoint, | |
113 | TranscodedFn transcoded_fn) const { | |
114 | if (codepoint <= 0x7f) { | |
115 | transcoded_fn(static_cast<char>(codepoint)); | |
116 | } else if (codepoint <= 0x7ff) { | |
117 | transcoded_fn(static_cast<char>(0xc0 | (codepoint >> 6))); | |
118 | transcoded_fn(trail(codepoint)); | |
119 | } else if (codepoint <= 0xffff) { | |
120 | transcoded_fn(static_cast<char>(0xe0 | (codepoint >> 12))); | |
121 | transcoded_fn(trail(codepoint >> 6)); | |
122 | transcoded_fn(trail(codepoint)); | |
123 | } else if (codepoint <= 0x10ffff) { | |
124 | transcoded_fn(static_cast<char>(0xf0 | (codepoint >> 18))); | |
125 | transcoded_fn(trail(codepoint >> 12)); | |
126 | transcoded_fn(trail(codepoint >> 6)); | |
127 | transcoded_fn(trail(codepoint)); | |
128 | } | |
129 | } | |
130 | ||
131 | template <typename Iterator, typename Sentinel> | |
132 | void skip_introduction(Iterator& cur, Sentinel end) const { | |
133 | if (cur != end && static_cast<unsigned char>(*cur) == 0xef) { | |
134 | if (++cur == end) return; | |
135 | if (++cur == end) return; | |
136 | if (++cur == end) return; | |
137 | } | |
138 | } | |
139 | ||
140 | private: | |
141 | struct DoNothing { | |
142 | void operator ()(char) const {} | |
143 | }; | |
144 | ||
145 | bool is_trail(unsigned char c) const { | |
146 | return (c & 0xc0) == 0x80; | |
147 | } | |
148 | ||
149 | int trail_table(unsigned char c) const { | |
150 | static const signed char table[] = { | |
151 | /* not a lead byte */ | |
152 | /* 0x10???sss */ -1, -1, -1, -1, -1, -1, -1, -1, | |
153 | /* 0x110??sss */ 1, 1, 1, 1, /* 1 trailing byte */ | |
154 | /* 0x1110?sss */ 2, 2, /* 2 trailing bytes */ | |
155 | /* 0x11110sss */ 3, /* 3 trailing bytes */ | |
156 | /* 0x11111sss */ -1 /* 4 or 5 trailing bytes, disallowed */ | |
157 | }; | |
158 | return table[(c & 0x7f) >> 3]; | |
159 | } | |
160 | ||
161 | char trail(unsigned unmasked) const { | |
162 | return static_cast<char>(0x80 | (unmasked & 0x3f)); | |
163 | } | |
164 | }; | |
165 | ||
166 | }}}} | |
167 | ||
168 | #endif |