1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_PARSER_HPP
4 #include <boost/property_tree/json_parser/error.hpp>
6 #include <boost/ref.hpp>
7 #include <boost/bind.hpp>
8 #include <boost/format.hpp>
14 namespace boost { namespace property_tree {
15 namespace json_parser { namespace detail
18 template <typename Encoding, typename Iterator, typename Sentinel>
22 typedef typename std::iterator_traits<Iterator>::value_type
24 typedef bool (Encoding::*encoding_predicate)(code_unit c) const;
26 explicit source(Encoding& encoding) : encoding(encoding) {}
28 template <typename Range>
29 void set_input(const std::string& filename, const Range& r)
31 this->filename = filename;
34 // Note that there is no backtracking, so if e.g. a UTF-8 file
35 // starts with something that initially looks like a BOM but isn't,
37 // However, no valid JSON file can start with a UTF-8 EF byte.
38 encoding.skip_introduction(cur, end);
43 bool done() const { return cur == end; }
45 void parse_error(const char* msg) {
46 BOOST_PROPERTY_TREE_THROW(
47 json_parser_error(msg, filename, line));
51 if (encoding.is_nl(*cur)) {
60 template <typename Action>
61 bool have(encoding_predicate p, Action& a) {
62 bool found = cur != end && (encoding.*p)(*cur);
70 bool have(encoding_predicate p) {
75 template <typename Action>
76 void expect(encoding_predicate p, const char* msg, Action& a) {
82 void expect(encoding_predicate p, const char* msg) {
87 code_unit need_cur(const char* msg) {
94 Iterator& raw_cur() { return cur; }
95 Sentinel raw_end() { return end; }
99 void operator ()(code_unit) const {}
105 std::string filename;
110 template <typename Callbacks, typename Encoding, typename Iterator,
111 typename = typename std::iterator_traits<Iterator>
113 class number_callback_adapter
116 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
118 : callbacks(callbacks), encoding(encoding), first(cur), cur(cur)
121 void operator ()(typename Encoding::external_char) {}
123 void finish() const {
124 callbacks.on_number(encoding.to_internal(first, cur));
128 number_callback_adapter(const number_callback_adapter&);
130 Callbacks& callbacks;
136 template <typename Callbacks, typename Encoding, typename Iterator>
137 class number_callback_adapter<Callbacks, Encoding, Iterator,
138 std::input_iterator_tag>
141 number_callback_adapter(Callbacks& callbacks, Encoding& encoding,
143 : callbacks(callbacks), encoding(encoding), first(true)
146 void operator ()(typename Encoding::external_char c) {
148 callbacks.on_begin_number();
151 callbacks.on_digit(encoding.to_internal_trivial(c));
154 void finish() const {
155 callbacks.on_end_number();
158 number_callback_adapter(const number_callback_adapter&);
160 Callbacks& callbacks;
165 template <typename Callbacks, typename Encoding, typename Iterator,
166 typename = typename std::iterator_traits<Iterator>
168 class string_callback_adapter
171 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
173 : callbacks(callbacks), encoding(encoding), cur(cur),
182 callbacks.on_code_units(encoding.to_internal(run_begin, cur));
185 template <typename Sentinel, typename EncodingErrorFn>
186 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
187 encoding.skip_codepoint(cur, end, error_fn);
191 string_callback_adapter(const string_callback_adapter&);
193 Callbacks& callbacks;
199 template <typename Callbacks, typename Encoding, typename Iterator>
200 class string_callback_adapter<Callbacks, Encoding, Iterator,
201 std::input_iterator_tag>
204 string_callback_adapter(Callbacks& callbacks, Encoding& encoding,
206 : callbacks(callbacks), encoding(encoding), cur(cur)
213 template <typename Sentinel, typename EncodingErrorFn>
214 void process_codepoint(Sentinel end, EncodingErrorFn error_fn) {
215 encoding.transcode_codepoint(cur, end,
216 boost::bind(&Callbacks::on_code_unit,
217 boost::ref(callbacks), _1),
222 string_callback_adapter(const string_callback_adapter&);
224 Callbacks& callbacks;
229 template <typename Callbacks, typename Encoding, typename Iterator,
233 typedef detail::number_callback_adapter<Callbacks, Encoding, Iterator>
235 typedef detail::string_callback_adapter<Callbacks, Encoding, Iterator>
237 typedef detail::source<Encoding, Iterator, Sentinel> source;
238 typedef typename source::code_unit code_unit;
241 parser(Callbacks& callbacks, Encoding& encoding)
242 : callbacks(callbacks), encoding(encoding), src(encoding)
245 template <typename Range>
246 void set_input(const std::string& filename, const Range& r) {
247 src.set_input(filename, r);
253 parse_error("garbage after data");
258 if (parse_object()) return;
259 if (parse_array()) return;
260 if (parse_string()) return;
261 if (parse_boolean()) return;
262 if (parse_null()) return;
263 if (parse_number()) return;
264 parse_error("expected value");
269 if (!have(&Encoding::is_n)) {
272 expect(&Encoding::is_u, "expected 'null'");
273 expect(&Encoding::is_l, "expected 'null'");
274 expect(&Encoding::is_l, "expected 'null'");
279 bool parse_boolean() {
281 if (have(&Encoding::is_t)) {
282 expect(&Encoding::is_r, "expected 'true'");
283 expect(&Encoding::is_u, "expected 'true'");
284 expect(&Encoding::is_e, "expected 'true'");
285 callbacks.on_boolean(true);
288 if (have(&Encoding::is_f)) {
289 expect(&Encoding::is_a, "expected 'false'");
290 expect(&Encoding::is_l, "expected 'false'");
291 expect(&Encoding::is_s, "expected 'false'");
292 expect(&Encoding::is_e, "expected 'false'");
293 callbacks.on_boolean(false);
299 bool parse_number() {
302 number_adapter adapter(callbacks, encoding, src.raw_cur());
303 bool started = false;
304 if (have(&Encoding::is_minus, adapter)) {
307 if (!have(&Encoding::is_0, adapter) && !parse_int_part(adapter)) {
309 parse_error("expected digits after -");
313 parse_frac_part(adapter);
314 parse_exp_part(adapter);
319 bool parse_string() {
322 if (!have(&Encoding::is_quote)) {
326 callbacks.on_begin_string();
327 string_adapter adapter(callbacks, encoding, src.raw_cur());
328 while (!encoding.is_quote(need_cur("unterminated string"))) {
329 if (encoding.is_backslash(*src.raw_cur())) {
330 adapter.finish_run();
335 adapter.process_codepoint(src.raw_end(),
336 boost::bind(&parser::parse_error,
337 this, "invalid code sequence"));
340 adapter.finish_run();
341 callbacks.on_end_string();
349 if (!have(&Encoding::is_open_bracket)) {
353 callbacks.on_begin_array();
355 if (have(&Encoding::is_close_bracket)) {
356 callbacks.on_end_array();
362 } while (have(&Encoding::is_comma));
363 expect(&Encoding::is_close_bracket, "expected ']' or ','");
364 callbacks.on_end_array();
368 bool parse_object() {
371 if (!have(&Encoding::is_open_brace)) {
375 callbacks.on_begin_object();
377 if (have(&Encoding::is_close_brace)) {
378 callbacks.on_end_object();
382 if (!parse_string()) {
383 parse_error("expected key string");
386 expect(&Encoding::is_colon, "expected ':'");
389 } while (have(&Encoding::is_comma));
390 expect(&Encoding::is_close_brace, "expected '}' or ','");
391 callbacks.on_end_object();
396 typedef typename source::encoding_predicate encoding_predicate;
398 void parse_error(const char* msg) { src.parse_error(msg); }
399 void next() { src.next(); }
400 template <typename Action>
401 bool have(encoding_predicate p, Action& a) { return src.have(p, a); }
402 bool have(encoding_predicate p) { return src.have(p); }
403 template <typename Action>
404 void expect(encoding_predicate p, const char* msg, Action& a) {
405 src.expect(p, msg, a);
407 void expect(encoding_predicate p, const char* msg) {
410 code_unit need_cur(const char* msg) { return src.need_cur(msg); }
413 while (have(&Encoding::is_ws)) {
417 bool parse_int_part(number_adapter& action) {
418 if (!have(&Encoding::is_digit0, action)) {
421 parse_digits(action);
425 void parse_frac_part(number_adapter& action) {
426 if (!have(&Encoding::is_dot, action)) {
429 expect(&Encoding::is_digit, "need at least one digit after '.'",
431 parse_digits(action);
434 void parse_exp_part(number_adapter& action) {
435 if (!have(&Encoding::is_eE, action)) {
438 have(&Encoding::is_plusminus, action);
439 expect(&Encoding::is_digit, "need at least one digit in exponent",
441 parse_digits(action);
444 void parse_digits(number_adapter& action) {
445 while (have(&Encoding::is_digit, action)) {
449 void parse_escape() {
450 if (have(&Encoding::is_quote)) {
452 } else if (have(&Encoding::is_backslash)) {
454 } else if (have(&Encoding::is_slash)) {
456 } else if (have(&Encoding::is_b)) {
457 feed(0x08); // backspace
458 } else if (have(&Encoding::is_f)) {
459 feed(0x0c); // formfeed
460 } else if (have(&Encoding::is_n)) {
461 feed(0x0a); // line feed
462 } else if (have(&Encoding::is_r)) {
463 feed(0x0d); // carriage return
464 } else if (have(&Encoding::is_t)) {
465 feed(0x09); // horizontal tab
466 } else if (have(&Encoding::is_u)) {
467 parse_codepoint_ref();
469 parse_error("invalid escape sequence");
473 unsigned parse_hex_quad() {
474 unsigned codepoint = 0;
475 for (int i = 0; i < 4; ++i) {
476 int value = encoding.decode_hexdigit(
477 need_cur("invalid escape sequence"));
479 parse_error("invalid escape sequence");
488 static bool is_surrogate_high(unsigned codepoint) {
489 return (codepoint & 0xfc00) == 0xd800;
491 static bool is_surrogate_low(unsigned codepoint) {
492 return (codepoint & 0xfc00) == 0xdc00;
494 static unsigned combine_surrogates(unsigned high, unsigned low) {
495 return 0x010000 + (((high & 0x3ff) << 10) | (low & 0x3ff));
498 void parse_codepoint_ref() {
499 unsigned codepoint = parse_hex_quad();
500 if (is_surrogate_low(codepoint)) {
501 parse_error("invalid codepoint, stray low surrogate");
503 if (is_surrogate_high(codepoint)) {
504 expect(&Encoding::is_backslash,
505 "invalid codepoint, stray high surrogate");
506 expect(&Encoding::is_u,
507 "expected codepoint reference after high surrogate");
508 int low = parse_hex_quad();
509 if (!is_surrogate_low(low)) {
510 parse_error("expected low surrogate after high surrogate");
512 codepoint = combine_surrogates(codepoint, low);
517 void feed(unsigned codepoint) {
518 encoding.feed_codepoint(codepoint,
519 boost::bind(&Callbacks::on_code_unit,
520 boost::ref(callbacks), _1));
523 Callbacks& callbacks;