]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | /*============================================================================= |
2 | Copyright (c) 2017 Daniel James | |
3 | ||
4 | Use, modification and distribution is subject to the Boost Software | |
5 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 | http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | ||
9 | #include "xml_parse.hpp" | |
10 | #include "simple_parse.hpp" | |
11 | #include "stream.hpp" | |
12 | #include "utils.hpp" | |
13 | ||
14 | namespace quickbook | |
15 | { | |
16 | namespace detail | |
17 | { | |
18 | // write_xml_tree | |
19 | ||
20 | void write_xml_tree_impl( | |
21 | std::string& out, xml_element* node, unsigned int depth); | |
22 | ||
23 | void write_xml_tree(xml_element* node) | |
24 | { | |
25 | std::string result; | |
26 | write_xml_tree_impl(result, node, 0); | |
27 | quickbook::detail::out() << result << std::flush; | |
28 | } | |
29 | ||
30 | void write_xml_tree_impl( | |
31 | std::string& out, xml_element* node, unsigned int depth) | |
32 | { | |
33 | if (!node) { | |
34 | return; | |
35 | } | |
36 | ||
37 | for (unsigned i = 0; i < depth; ++i) { | |
38 | out += " "; | |
39 | } | |
40 | switch (node->type_) { | |
41 | case xml_element::element_node: | |
42 | out += "Node: "; | |
43 | out += node->name_; | |
44 | break; | |
45 | case xml_element::element_text: | |
46 | out += "Text"; | |
47 | break; | |
48 | case xml_element::element_html: | |
49 | out += "HTML"; | |
50 | break; | |
51 | default: | |
52 | out += "Unknown node type"; | |
53 | break; | |
54 | } | |
55 | out += "\n"; | |
56 | for (xml_element* it = node->children(); it; it = it->next()) { | |
57 | write_xml_tree_impl(out, it, depth + 1); | |
58 | } | |
59 | } | |
60 | ||
61 | // xml_parse | |
62 | ||
63 | void read_tag( | |
64 | xml_tree_builder&, | |
65 | string_iterator& it, | |
66 | string_iterator start, | |
67 | string_iterator end); | |
68 | void read_close_tag( | |
69 | xml_tree_builder&, | |
70 | string_iterator& it, | |
71 | string_iterator start, | |
72 | string_iterator end); | |
73 | void skip_question_mark_tag( | |
74 | string_iterator& it, string_iterator start, string_iterator end); | |
75 | void skip_exclamation_mark_tag( | |
76 | string_iterator& it, string_iterator start, string_iterator end); | |
77 | quickbook::string_view read_tag_name( | |
78 | string_iterator& it, string_iterator start, string_iterator end); | |
79 | quickbook::string_view read_attribute_value( | |
80 | string_iterator& it, string_iterator start, string_iterator end); | |
81 | quickbook::string_view read_string( | |
82 | string_iterator& it, string_iterator end); | |
83 | ||
84 | xml_tree xml_parse(quickbook::string_view source) | |
85 | { | |
86 | typedef string_iterator iterator; | |
87 | iterator it = source.begin(), end = source.end(); | |
88 | ||
89 | xml_tree_builder builder; | |
90 | ||
91 | while (true) { | |
92 | iterator start = it; | |
93 | read_to(it, end, '<'); | |
94 | if (start != it) { | |
95 | builder.add_element(xml_element::text_node( | |
96 | quickbook::string_view(start, it - start))); | |
97 | } | |
98 | ||
99 | if (it == end) { | |
100 | break; | |
101 | } | |
102 | start = it++; | |
103 | if (it == end) { | |
104 | throw xml_parse_error("Invalid tag", start); | |
105 | } | |
106 | ||
107 | switch (*it) { | |
108 | case '?': | |
109 | skip_question_mark_tag(it, start, end); | |
110 | break; | |
111 | case '!': | |
112 | skip_exclamation_mark_tag(it, start, end); | |
113 | break; | |
114 | case '/': | |
115 | read_close_tag(builder, it, start, end); | |
116 | break; | |
117 | default: | |
118 | read_tag(builder, it, start, end); | |
119 | break; | |
120 | } | |
121 | } | |
122 | ||
123 | return builder.release(); | |
124 | } | |
125 | ||
126 | void read_tag( | |
127 | xml_tree_builder& builder, | |
128 | string_iterator& it, | |
129 | string_iterator start, | |
130 | string_iterator end) | |
131 | { | |
132 | assert(it == start + 1 && it != end); | |
133 | quickbook::string_view name = read_tag_name(it, start, end); | |
134 | xml_element* node = xml_element::node(name); | |
135 | builder.add_element(node); | |
136 | ||
137 | // Read attributes | |
138 | while (true) { | |
139 | read_some_of(it, end, " \t\n\r"); | |
140 | if (it == end) { | |
141 | throw xml_parse_error("Invalid tag", start); | |
142 | } | |
143 | if (*it == '>') { | |
144 | ++it; | |
145 | builder.start_children(); | |
146 | break; | |
147 | } | |
148 | if (*it == '/') { | |
149 | ++it; | |
150 | read_some_of(it, end, " \t\n\r"); | |
151 | if (it == end || *it != '>') { | |
152 | throw xml_parse_error("Invalid tag", start); | |
153 | } | |
154 | ++it; | |
155 | break; | |
156 | } | |
157 | quickbook::string_view attribute_name = | |
158 | read_tag_name(it, start, end); | |
159 | read_some_of(it, end, " \t\n\r"); | |
160 | if (it == end) { | |
161 | throw xml_parse_error("Invalid tag", start); | |
162 | } | |
163 | quickbook::string_view attribute_value; | |
164 | if (*it == '=') { | |
165 | ++it; | |
166 | attribute_value = read_attribute_value(it, start, end); | |
167 | } | |
168 | node->set_attribute( | |
169 | attribute_name, | |
170 | quickbook::detail::decode_string(attribute_value)); | |
171 | } | |
172 | } | |
173 | ||
174 | void read_close_tag( | |
175 | xml_tree_builder& builder, | |
176 | string_iterator& it, | |
177 | string_iterator start, | |
178 | string_iterator end) | |
179 | { | |
180 | assert(it == start + 1 && it != end && *it == '/'); | |
181 | ++it; | |
182 | quickbook::string_view name = read_tag_name(it, start, end); | |
183 | read_some_of(it, end, " \t\n\r"); | |
184 | if (it == end || *it != '>') { | |
185 | throw xml_parse_error("Invalid close tag", start); | |
186 | } | |
187 | ++it; | |
188 | ||
189 | if (!builder.parent() || builder.parent()->name_ != name) { | |
190 | throw xml_parse_error("Close tag doesn't match", start); | |
191 | } | |
192 | ||
193 | builder.end_children(); | |
194 | } | |
195 | ||
196 | void skip_question_mark_tag( | |
197 | string_iterator& it, string_iterator start, string_iterator end) | |
198 | { | |
199 | assert(it == start + 1 && it != end && *it == '?'); | |
200 | ++it; | |
201 | ||
202 | while (true) { | |
203 | read_to_one_of(it, end, "\"'?<>"); | |
204 | if (it == end) { | |
205 | throw xml_parse_error("Invalid tag", start); | |
206 | } | |
207 | switch (*it) { | |
208 | case '"': | |
209 | case '\'': | |
210 | read_string(it, end); | |
211 | break; | |
212 | case '?': | |
213 | if (read(it, end, "?>")) { | |
214 | return; | |
215 | } | |
216 | else { | |
217 | ++it; | |
218 | } | |
219 | break; | |
220 | default: | |
221 | throw xml_parse_error("Invalid tag", start); | |
222 | } | |
223 | } | |
224 | } | |
225 | ||
226 | void skip_exclamation_mark_tag( | |
227 | string_iterator& it, string_iterator start, string_iterator end) | |
228 | { | |
229 | assert(it == start + 1 && it != end && *it == '!'); | |
230 | ++it; | |
231 | ||
232 | if (read(it, end, "--")) { | |
233 | if (read_past(it, end, "-->")) { | |
234 | return; | |
235 | } | |
236 | else { | |
237 | throw xml_parse_error("Invalid comment", start); | |
238 | } | |
239 | } | |
240 | ||
241 | while (true) { | |
242 | read_to_one_of(it, end, "\"'<>"); | |
243 | if (it == end) { | |
244 | throw xml_parse_error("Invalid tag", start); | |
245 | } | |
246 | switch (*it) { | |
247 | case '"': | |
248 | case '\'': | |
249 | read_string(it, end); | |
250 | break; | |
251 | case '>': | |
252 | ++it; | |
253 | return; | |
254 | default: | |
255 | throw xml_parse_error("Invalid tag", start); | |
256 | } | |
257 | } | |
258 | } | |
259 | ||
260 | quickbook::string_view read_tag_name( | |
261 | string_iterator& it, string_iterator start, string_iterator end) | |
262 | { | |
263 | read_some_of(it, end, " \t\n\r"); | |
264 | string_iterator name_start = it; | |
265 | read_some_of( | |
266 | it, end, | |
267 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:-"); | |
268 | if (name_start == it) { | |
269 | throw xml_parse_error("Invalid tag", start); | |
270 | } | |
271 | return quickbook::string_view(name_start, it - name_start); | |
272 | } | |
273 | ||
274 | quickbook::string_view read_attribute_value( | |
275 | string_iterator& it, string_iterator start, string_iterator end) | |
276 | { | |
277 | read_some_of(it, end, " \t\n\r"); | |
278 | if (*it == '"' || *it == '\'') { | |
279 | return read_string(it, end); | |
280 | } | |
281 | else { | |
282 | throw xml_parse_error("Invalid tag", start); | |
283 | } | |
284 | } | |
285 | ||
286 | quickbook::string_view read_string( | |
287 | string_iterator& it, string_iterator end) | |
288 | { | |
289 | assert(it != end && (*it == '"' || *it == '\'')); | |
290 | ||
291 | string_iterator start = it; | |
292 | char deliminator = *it; | |
293 | ++it; | |
294 | read_to(it, end, deliminator); | |
295 | if (it == end) { | |
296 | throw xml_parse_error("Invalid string", start); | |
297 | } | |
298 | ++it; | |
299 | return quickbook::string_view(start + 1, it - start - 2); | |
300 | } | |
301 | } | |
302 | } |