]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2011, 2013 Daniel James | |
3 | ||
4 | Use, modification and distribution is subject to the Boost Software | |
5 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 | http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | ||
9 | #include <cctype> | |
10 | #include "document_state_impl.hpp" | |
11 | #include <boost/make_shared.hpp> | |
12 | #include <boost/unordered_map.hpp> | |
13 | #include <boost/lexical_cast.hpp> | |
14 | #include <boost/foreach.hpp> | |
15 | #include <boost/range/algorithm.hpp> | |
16 | ||
17 | // TODO: This should possibly try to always generate valid XML ids: | |
18 | // http://www.w3.org/TR/REC-xml/#NT-NameStartChar | |
19 | ||
20 | namespace quickbook { | |
21 | // | |
22 | // The maximum size of a generated part of an id. | |
23 | // | |
24 | // Not a strict maximum, sometimes broken because the user | |
25 | // explicitly uses a longer id, or for backwards compatibility. | |
26 | ||
27 | static const std::size_t max_size = 32; | |
28 | ||
29 | typedef std::vector<id_placeholder const*> placeholder_index; | |
30 | placeholder_index index_placeholders(document_state_impl const&, boost::string_ref); | |
31 | ||
32 | void generate_id_block( | |
33 | placeholder_index::iterator, placeholder_index::iterator, | |
34 | std::vector<std::string>& generated_ids); | |
35 | ||
36 | std::vector<std::string> generate_ids(document_state_impl const& state, boost::string_ref xml) | |
37 | { | |
38 | std::vector<std::string> generated_ids(state.placeholders.size()); | |
39 | ||
40 | // Get a list of the placeholders in the order that we wish to | |
41 | // process them. | |
42 | placeholder_index placeholders = index_placeholders(state, xml); | |
43 | ||
44 | typedef std::vector<id_placeholder const*>::iterator iterator; | |
45 | iterator it = placeholders.begin(), end = placeholders.end(); | |
46 | ||
47 | while (it != end) { | |
48 | // We process all the ids that have the same number of dots | |
49 | // together. Note that ids with different parents can clash, e.g. | |
50 | // because of old fashioned id generation or anchors containing | |
51 | // multiple dots. | |
52 | // | |
53 | // So find the group of placeholders with the same number of dots. | |
54 | iterator group_begin = it, group_end = it; | |
55 | while (group_end != end && (*group_end)->num_dots == (*it)->num_dots) | |
56 | ++group_end; | |
57 | ||
58 | generate_id_block(group_begin, group_end, generated_ids); | |
59 | it = group_end; | |
60 | } | |
61 | ||
62 | return generated_ids; | |
63 | } | |
64 | ||
65 | // | |
66 | // index_placeholders | |
67 | // | |
68 | // Create a sorted index of the placeholders, in order | |
69 | // to make numbering duplicates easy. A total order. | |
70 | // | |
71 | ||
72 | struct placeholder_compare | |
73 | { | |
74 | std::vector<unsigned>& order; | |
75 | ||
76 | placeholder_compare(std::vector<unsigned>& order) : order(order) {} | |
77 | ||
78 | bool operator()(id_placeholder const* x, id_placeholder const* y) const | |
79 | { | |
80 | bool x_explicit = x->category.c >= id_category::explicit_id; | |
81 | bool y_explicit = y->category.c >= id_category::explicit_id; | |
82 | ||
83 | return | |
84 | x->num_dots < y->num_dots ? true : | |
85 | x->num_dots > y->num_dots ? false : | |
86 | x_explicit > y_explicit ? true : | |
87 | x_explicit < y_explicit ? false : | |
88 | order[x->index] < order[y->index]; | |
89 | } | |
90 | }; | |
91 | ||
92 | struct get_placeholder_order_callback : xml_processor::callback | |
93 | { | |
94 | document_state_impl const& state; | |
95 | std::vector<unsigned>& order; | |
96 | unsigned count; | |
97 | ||
98 | get_placeholder_order_callback(document_state_impl const& state, | |
99 | std::vector<unsigned>& order) | |
100 | : state(state), | |
101 | order(order), | |
102 | count(0) | |
103 | {} | |
104 | ||
105 | void id_value(boost::string_ref value) | |
106 | { | |
107 | set_placeholder_order(state.get_placeholder(value)); | |
108 | } | |
109 | ||
110 | void set_placeholder_order(id_placeholder const* p) | |
111 | { | |
112 | if (p && !order[p->index]) { | |
113 | set_placeholder_order(p->parent); | |
114 | order[p->index] = ++count; | |
115 | } | |
116 | } | |
117 | }; | |
118 | ||
119 | placeholder_index index_placeholders( | |
120 | document_state_impl const& state, | |
121 | boost::string_ref xml) | |
122 | { | |
123 | // The order that the placeholder appear in the xml source. | |
124 | std::vector<unsigned> order(state.placeholders.size()); | |
125 | ||
126 | xml_processor processor; | |
127 | get_placeholder_order_callback callback(state, order); | |
128 | processor.parse(xml, callback); | |
129 | ||
130 | placeholder_index sorted_placeholders; | |
131 | sorted_placeholders.reserve(state.placeholders.size()); | |
132 | BOOST_FOREACH(id_placeholder const& p, state.placeholders) | |
133 | if (order[p.index]) sorted_placeholders.push_back(&p); | |
134 | boost::sort(sorted_placeholders, placeholder_compare(order)); | |
135 | ||
136 | return sorted_placeholders; | |
137 | } | |
138 | ||
139 | // Resolve and generate ids. | |
140 | ||
141 | struct generate_id_block_type | |
142 | { | |
143 | // The ids which won't require duplicate handling. | |
144 | typedef boost::unordered_map<std::string, id_placeholder const*> | |
145 | chosen_id_map; | |
146 | chosen_id_map chosen_ids; | |
147 | std::vector<std::string>& generated_ids; | |
148 | ||
149 | generate_id_block_type(std::vector<std::string>& generated_ids) : | |
150 | generated_ids(generated_ids) {} | |
151 | ||
152 | void generate(placeholder_index::iterator begin, | |
153 | placeholder_index::iterator end); | |
154 | ||
155 | std::string resolve_id(id_placeholder const*); | |
156 | std::string generate_id(id_placeholder const*, std::string const&); | |
157 | }; | |
158 | ||
159 | void generate_id_block(placeholder_index::iterator begin, | |
160 | placeholder_index::iterator end, | |
161 | std::vector<std::string>& generated_ids) | |
162 | { | |
163 | generate_id_block_type impl(generated_ids); | |
164 | impl.generate(begin, end); | |
165 | } | |
166 | ||
167 | void generate_id_block_type::generate(placeholder_index::iterator begin, | |
168 | placeholder_index::iterator end) | |
169 | { | |
170 | std::vector<std::string> resolved_ids; | |
171 | ||
172 | for (placeholder_index::iterator i = begin; i != end; ++i) | |
173 | resolved_ids.push_back(resolve_id(*i)); | |
174 | ||
175 | unsigned index = 0; | |
176 | for (placeholder_index::iterator i = begin; i != end; ++i, ++index) | |
177 | { | |
178 | generated_ids[(**i).index] = | |
179 | generate_id(*i, resolved_ids[index]); | |
180 | } | |
181 | } | |
182 | ||
183 | std::string generate_id_block_type::resolve_id(id_placeholder const* p) | |
184 | { | |
185 | std::string id = p->parent ? | |
186 | generated_ids[p->parent->index] + "." + p->id : | |
187 | p->id; | |
188 | ||
189 | if (p->category.c > id_category::numbered) { | |
190 | // Reserve the id if it isn't already reserved. | |
191 | chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first; | |
192 | ||
193 | // If it was reserved by a placeholder with a lower category, | |
194 | // then overwrite it. | |
195 | if (p->category.c > pos->second->category.c) | |
196 | pos->second = p; | |
197 | } | |
198 | ||
199 | return id; | |
200 | } | |
201 | ||
202 | std::string generate_id_block_type::generate_id(id_placeholder const* p, | |
203 | std::string const& resolved_id) | |
204 | { | |
205 | if (p->category.c > id_category::numbered && | |
206 | chosen_ids.at(resolved_id) == p) | |
207 | { | |
208 | return resolved_id; | |
209 | } | |
210 | ||
211 | // Split the id into its parent part and child part. | |
212 | // | |
213 | // Note: can't just use the placeholder's parent, as the | |
214 | // placeholder id might contain dots. | |
215 | std::size_t child_start = resolved_id.rfind('.'); | |
216 | std::string parent_id, base_id; | |
217 | ||
218 | if (child_start == std::string::npos) { | |
219 | base_id = normalize_id(resolved_id, max_size - 1); | |
220 | } | |
221 | else { | |
222 | parent_id = resolved_id.substr(0, child_start + 1); | |
223 | base_id = normalize_id(resolved_id.substr(child_start + 1), | |
224 | max_size - 1); | |
225 | } | |
226 | ||
227 | // Since we're adding digits, don't want an id that ends in | |
228 | // a digit. | |
229 | ||
230 | unsigned int length = base_id.size(); | |
231 | ||
232 | if (length > 0 && std::isdigit(base_id[length - 1])) { | |
233 | if (length < max_size - 1) { | |
234 | base_id += '_'; | |
235 | ++length; | |
236 | } | |
237 | else { | |
238 | while (length > 0 && std::isdigit(base_id[length -1])) | |
239 | --length; | |
240 | base_id.erase(length); | |
241 | } | |
242 | } | |
243 | ||
244 | unsigned count = 0; | |
245 | ||
246 | while (true) | |
247 | { | |
248 | std::string postfix = | |
249 | boost::lexical_cast<std::string>(count++); | |
250 | ||
251 | if ((base_id.size() + postfix.size()) > max_size) { | |
252 | // The id is now too long, so reduce the length and | |
253 | // start again. | |
254 | ||
255 | // Would need a lot of ids to get this far.... | |
256 | if (length == 0) throw std::runtime_error("Too many ids"); | |
257 | ||
258 | // Trim a character. | |
259 | --length; | |
260 | ||
261 | // Trim any trailing digits. | |
262 | while (length > 0 && std::isdigit(base_id[length -1])) | |
263 | --length; | |
264 | ||
265 | base_id.erase(length); | |
266 | count = 0; | |
267 | } | |
268 | else { | |
269 | // Try to reserve this id. | |
270 | std::string generated_id = parent_id + base_id + postfix; | |
271 | ||
272 | if (chosen_ids.emplace(generated_id, p).second) { | |
273 | return generated_id; | |
274 | } | |
275 | } | |
276 | } | |
277 | } | |
278 | ||
279 | // | |
280 | // replace_ids | |
281 | // | |
282 | // Return a copy of the xml with all the placeholders replaced by | |
283 | // generated_ids. | |
284 | // | |
285 | ||
286 | struct replace_ids_callback : xml_processor::callback | |
287 | { | |
288 | document_state_impl const& state; | |
289 | std::vector<std::string> const* ids; | |
290 | boost::string_ref::const_iterator source_pos; | |
291 | std::string result; | |
292 | ||
293 | replace_ids_callback(document_state_impl const& state, | |
294 | std::vector<std::string> const* ids) | |
295 | : state(state), | |
296 | ids(ids), | |
297 | source_pos(), | |
298 | result() | |
299 | {} | |
300 | ||
301 | void start(boost::string_ref xml) | |
302 | { | |
303 | source_pos = xml.begin(); | |
304 | } | |
305 | ||
306 | void id_value(boost::string_ref value) | |
307 | { | |
308 | if (id_placeholder const* p = state.get_placeholder(value)) | |
309 | { | |
310 | boost::string_ref id = ids ? | |
311 | (*ids)[p->index] : p->unresolved_id; | |
312 | ||
313 | result.append(source_pos, value.begin()); | |
314 | result.append(id.begin(), id.end()); | |
315 | source_pos = value.end(); | |
316 | } | |
317 | } | |
318 | ||
319 | void finish(boost::string_ref xml) | |
320 | { | |
321 | result.append(source_pos, xml.end()); | |
322 | source_pos = xml.end(); | |
323 | } | |
324 | }; | |
325 | ||
326 | std::string replace_ids(document_state_impl const& state, boost::string_ref xml, | |
327 | std::vector<std::string> const* ids) | |
328 | { | |
329 | xml_processor processor; | |
330 | replace_ids_callback callback(state, ids); | |
331 | processor.parse(xml, callback); | |
332 | return callback.result; | |
333 | } | |
334 | ||
335 | // | |
336 | // normalize_id | |
337 | // | |
338 | // Normalizes generated ids. | |
339 | // | |
340 | ||
341 | std::string normalize_id(boost::string_ref src_id) | |
342 | { | |
343 | return normalize_id(src_id, max_size); | |
344 | } | |
345 | ||
346 | std::string normalize_id(boost::string_ref src_id, std::size_t size) | |
347 | { | |
348 | std::string id(src_id.begin(), src_id.end()); | |
349 | ||
350 | std::size_t src = 0; | |
351 | std::size_t dst = 0; | |
352 | ||
353 | while (src < id.length() && id[src] == '_') { | |
354 | ++src; | |
355 | } | |
356 | ||
357 | if (src == id.length()) { | |
358 | id = "_"; | |
359 | } | |
360 | else { | |
361 | while (src < id.length() && dst < size) { | |
362 | if (id[src] == '_') { | |
363 | do { | |
364 | ++src; | |
365 | } while(src < id.length() && id[src] == '_'); | |
366 | ||
367 | if (src < id.length()) id[dst++] = '_'; | |
368 | } | |
369 | else { | |
370 | id[dst++] = id[src++]; | |
371 | } | |
372 | } | |
373 | ||
374 | id.erase(dst); | |
375 | } | |
376 | ||
377 | return id; | |
378 | } | |
379 | } |