]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2011, 2013 Daniel James | |
3 | ||
4 | Use, modification and distribution is subject to the Boost Software | |
5 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
6 | http://www.boost.org/LICENSE_1_0.txt) | |
7 | =============================================================================*/ | |
8 | ||
9 | #include <cctype> | |
10 | #include "document_state_impl.hpp" | |
11 | #include <boost/make_shared.hpp> | |
12 | #include <boost/unordered_map.hpp> | |
13 | #include <boost/lexical_cast.hpp> | |
14 | #include <boost/foreach.hpp> | |
15 | #include <boost/range/algorithm.hpp> | |
16 | ||
7c673cae FG |
17 | namespace quickbook { |
18 | // | |
19 | // The maximum size of a generated part of an id. | |
20 | // | |
21 | // Not a strict maximum, sometimes broken because the user | |
22 | // explicitly uses a longer id, or for backwards compatibility. | |
23 | ||
24 | static const std::size_t max_size = 32; | |
25 | ||
26 | typedef std::vector<id_placeholder const*> placeholder_index; | |
b32b8144 | 27 | placeholder_index index_placeholders(document_state_impl const&, quickbook::string_view); |
7c673cae FG |
28 | |
29 | void generate_id_block( | |
30 | placeholder_index::iterator, placeholder_index::iterator, | |
31 | std::vector<std::string>& generated_ids); | |
32 | ||
b32b8144 | 33 | std::vector<std::string> generate_ids(document_state_impl const& state, quickbook::string_view xml) |
7c673cae FG |
34 | { |
35 | std::vector<std::string> generated_ids(state.placeholders.size()); | |
36 | ||
37 | // Get a list of the placeholders in the order that we wish to | |
38 | // process them. | |
39 | placeholder_index placeholders = index_placeholders(state, xml); | |
40 | ||
41 | typedef std::vector<id_placeholder const*>::iterator iterator; | |
42 | iterator it = placeholders.begin(), end = placeholders.end(); | |
43 | ||
44 | while (it != end) { | |
45 | // We process all the ids that have the same number of dots | |
46 | // together. Note that ids with different parents can clash, e.g. | |
47 | // because of old fashioned id generation or anchors containing | |
48 | // multiple dots. | |
49 | // | |
50 | // So find the group of placeholders with the same number of dots. | |
51 | iterator group_begin = it, group_end = it; | |
52 | while (group_end != end && (*group_end)->num_dots == (*it)->num_dots) | |
53 | ++group_end; | |
54 | ||
55 | generate_id_block(group_begin, group_end, generated_ids); | |
56 | it = group_end; | |
57 | } | |
58 | ||
59 | return generated_ids; | |
60 | } | |
61 | ||
62 | // | |
63 | // index_placeholders | |
64 | // | |
65 | // Create a sorted index of the placeholders, in order | |
66 | // to make numbering duplicates easy. A total order. | |
67 | // | |
68 | ||
69 | struct placeholder_compare | |
70 | { | |
71 | std::vector<unsigned>& order; | |
72 | ||
b32b8144 | 73 | placeholder_compare(std::vector<unsigned>& order_) : order(order_) {} |
7c673cae FG |
74 | |
75 | bool operator()(id_placeholder const* x, id_placeholder const* y) const | |
76 | { | |
77 | bool x_explicit = x->category.c >= id_category::explicit_id; | |
78 | bool y_explicit = y->category.c >= id_category::explicit_id; | |
79 | ||
80 | return | |
81 | x->num_dots < y->num_dots ? true : | |
82 | x->num_dots > y->num_dots ? false : | |
83 | x_explicit > y_explicit ? true : | |
84 | x_explicit < y_explicit ? false : | |
85 | order[x->index] < order[y->index]; | |
86 | } | |
87 | }; | |
88 | ||
89 | struct get_placeholder_order_callback : xml_processor::callback | |
90 | { | |
91 | document_state_impl const& state; | |
92 | std::vector<unsigned>& order; | |
93 | unsigned count; | |
94 | ||
b32b8144 FG |
95 | get_placeholder_order_callback(document_state_impl const& state_, |
96 | std::vector<unsigned>& order_) | |
97 | : state(state_), | |
98 | order(order_), | |
7c673cae FG |
99 | count(0) |
100 | {} | |
101 | ||
b32b8144 | 102 | void id_value(quickbook::string_view value) |
7c673cae FG |
103 | { |
104 | set_placeholder_order(state.get_placeholder(value)); | |
105 | } | |
106 | ||
107 | void set_placeholder_order(id_placeholder const* p) | |
108 | { | |
109 | if (p && !order[p->index]) { | |
110 | set_placeholder_order(p->parent); | |
111 | order[p->index] = ++count; | |
112 | } | |
113 | } | |
114 | }; | |
115 | ||
116 | placeholder_index index_placeholders( | |
117 | document_state_impl const& state, | |
b32b8144 | 118 | quickbook::string_view xml) |
7c673cae FG |
119 | { |
120 | // The order that the placeholder appear in the xml source. | |
121 | std::vector<unsigned> order(state.placeholders.size()); | |
122 | ||
123 | xml_processor processor; | |
124 | get_placeholder_order_callback callback(state, order); | |
125 | processor.parse(xml, callback); | |
126 | ||
127 | placeholder_index sorted_placeholders; | |
128 | sorted_placeholders.reserve(state.placeholders.size()); | |
129 | BOOST_FOREACH(id_placeholder const& p, state.placeholders) | |
130 | if (order[p.index]) sorted_placeholders.push_back(&p); | |
131 | boost::sort(sorted_placeholders, placeholder_compare(order)); | |
132 | ||
133 | return sorted_placeholders; | |
134 | } | |
135 | ||
136 | // Resolve and generate ids. | |
137 | ||
138 | struct generate_id_block_type | |
139 | { | |
140 | // The ids which won't require duplicate handling. | |
141 | typedef boost::unordered_map<std::string, id_placeholder const*> | |
142 | chosen_id_map; | |
143 | chosen_id_map chosen_ids; | |
144 | std::vector<std::string>& generated_ids; | |
145 | ||
b32b8144 FG |
146 | explicit generate_id_block_type(std::vector<std::string>& generated_ids_) : |
147 | generated_ids(generated_ids_) {} | |
7c673cae FG |
148 | |
149 | void generate(placeholder_index::iterator begin, | |
150 | placeholder_index::iterator end); | |
151 | ||
152 | std::string resolve_id(id_placeholder const*); | |
153 | std::string generate_id(id_placeholder const*, std::string const&); | |
154 | }; | |
155 | ||
156 | void generate_id_block(placeholder_index::iterator begin, | |
157 | placeholder_index::iterator end, | |
158 | std::vector<std::string>& generated_ids) | |
159 | { | |
160 | generate_id_block_type impl(generated_ids); | |
161 | impl.generate(begin, end); | |
162 | } | |
163 | ||
164 | void generate_id_block_type::generate(placeholder_index::iterator begin, | |
165 | placeholder_index::iterator end) | |
166 | { | |
167 | std::vector<std::string> resolved_ids; | |
168 | ||
169 | for (placeholder_index::iterator i = begin; i != end; ++i) | |
170 | resolved_ids.push_back(resolve_id(*i)); | |
171 | ||
172 | unsigned index = 0; | |
173 | for (placeholder_index::iterator i = begin; i != end; ++i, ++index) | |
174 | { | |
175 | generated_ids[(**i).index] = | |
176 | generate_id(*i, resolved_ids[index]); | |
177 | } | |
178 | } | |
179 | ||
180 | std::string generate_id_block_type::resolve_id(id_placeholder const* p) | |
181 | { | |
182 | std::string id = p->parent ? | |
183 | generated_ids[p->parent->index] + "." + p->id : | |
184 | p->id; | |
185 | ||
186 | if (p->category.c > id_category::numbered) { | |
187 | // Reserve the id if it isn't already reserved. | |
188 | chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first; | |
189 | ||
190 | // If it was reserved by a placeholder with a lower category, | |
191 | // then overwrite it. | |
192 | if (p->category.c > pos->second->category.c) | |
193 | pos->second = p; | |
194 | } | |
195 | ||
196 | return id; | |
197 | } | |
198 | ||
199 | std::string generate_id_block_type::generate_id(id_placeholder const* p, | |
200 | std::string const& resolved_id) | |
201 | { | |
202 | if (p->category.c > id_category::numbered && | |
203 | chosen_ids.at(resolved_id) == p) | |
204 | { | |
205 | return resolved_id; | |
206 | } | |
207 | ||
208 | // Split the id into its parent part and child part. | |
209 | // | |
210 | // Note: can't just use the placeholder's parent, as the | |
211 | // placeholder id might contain dots. | |
212 | std::size_t child_start = resolved_id.rfind('.'); | |
213 | std::string parent_id, base_id; | |
214 | ||
215 | if (child_start == std::string::npos) { | |
216 | base_id = normalize_id(resolved_id, max_size - 1); | |
217 | } | |
218 | else { | |
219 | parent_id = resolved_id.substr(0, child_start + 1); | |
220 | base_id = normalize_id(resolved_id.substr(child_start + 1), | |
221 | max_size - 1); | |
222 | } | |
223 | ||
224 | // Since we're adding digits, don't want an id that ends in | |
225 | // a digit. | |
226 | ||
b32b8144 | 227 | std::string::size_type length = base_id.size(); |
7c673cae FG |
228 | |
229 | if (length > 0 && std::isdigit(base_id[length - 1])) { | |
230 | if (length < max_size - 1) { | |
231 | base_id += '_'; | |
232 | ++length; | |
233 | } | |
234 | else { | |
235 | while (length > 0 && std::isdigit(base_id[length -1])) | |
236 | --length; | |
237 | base_id.erase(length); | |
238 | } | |
239 | } | |
240 | ||
241 | unsigned count = 0; | |
242 | ||
b32b8144 | 243 | for (;;) |
7c673cae FG |
244 | { |
245 | std::string postfix = | |
246 | boost::lexical_cast<std::string>(count++); | |
247 | ||
248 | if ((base_id.size() + postfix.size()) > max_size) { | |
249 | // The id is now too long, so reduce the length and | |
250 | // start again. | |
251 | ||
252 | // Would need a lot of ids to get this far.... | |
253 | if (length == 0) throw std::runtime_error("Too many ids"); | |
254 | ||
255 | // Trim a character. | |
256 | --length; | |
257 | ||
258 | // Trim any trailing digits. | |
259 | while (length > 0 && std::isdigit(base_id[length -1])) | |
260 | --length; | |
261 | ||
262 | base_id.erase(length); | |
263 | count = 0; | |
264 | } | |
265 | else { | |
266 | // Try to reserve this id. | |
267 | std::string generated_id = parent_id + base_id + postfix; | |
268 | ||
269 | if (chosen_ids.emplace(generated_id, p).second) { | |
270 | return generated_id; | |
271 | } | |
272 | } | |
273 | } | |
274 | } | |
275 | ||
276 | // | |
277 | // replace_ids | |
278 | // | |
279 | // Return a copy of the xml with all the placeholders replaced by | |
280 | // generated_ids. | |
281 | // | |
282 | ||
283 | struct replace_ids_callback : xml_processor::callback | |
284 | { | |
285 | document_state_impl const& state; | |
286 | std::vector<std::string> const* ids; | |
b32b8144 | 287 | string_iterator source_pos; |
7c673cae FG |
288 | std::string result; |
289 | ||
b32b8144 FG |
290 | replace_ids_callback(document_state_impl const& state_, |
291 | std::vector<std::string> const* ids_) | |
292 | : state(state_), | |
293 | ids(ids_), | |
7c673cae FG |
294 | source_pos(), |
295 | result() | |
296 | {} | |
297 | ||
b32b8144 | 298 | void start(quickbook::string_view xml) |
7c673cae FG |
299 | { |
300 | source_pos = xml.begin(); | |
301 | } | |
302 | ||
b32b8144 | 303 | void id_value(quickbook::string_view value) |
7c673cae FG |
304 | { |
305 | if (id_placeholder const* p = state.get_placeholder(value)) | |
306 | { | |
b32b8144 | 307 | quickbook::string_view id = ids ? |
7c673cae FG |
308 | (*ids)[p->index] : p->unresolved_id; |
309 | ||
310 | result.append(source_pos, value.begin()); | |
311 | result.append(id.begin(), id.end()); | |
312 | source_pos = value.end(); | |
313 | } | |
314 | } | |
315 | ||
b32b8144 | 316 | void finish(quickbook::string_view xml) |
7c673cae FG |
317 | { |
318 | result.append(source_pos, xml.end()); | |
319 | source_pos = xml.end(); | |
320 | } | |
321 | }; | |
322 | ||
b32b8144 | 323 | std::string replace_ids(document_state_impl const& state, quickbook::string_view xml, |
7c673cae FG |
324 | std::vector<std::string> const* ids) |
325 | { | |
326 | xml_processor processor; | |
327 | replace_ids_callback callback(state, ids); | |
328 | processor.parse(xml, callback); | |
329 | return callback.result; | |
330 | } | |
331 | ||
332 | // | |
333 | // normalize_id | |
334 | // | |
335 | // Normalizes generated ids. | |
336 | // | |
337 | ||
b32b8144 | 338 | std::string normalize_id(quickbook::string_view src_id) |
7c673cae FG |
339 | { |
340 | return normalize_id(src_id, max_size); | |
341 | } | |
342 | ||
b32b8144 | 343 | std::string normalize_id(quickbook::string_view src_id, std::size_t size) |
7c673cae FG |
344 | { |
345 | std::string id(src_id.begin(), src_id.end()); | |
346 | ||
347 | std::size_t src = 0; | |
348 | std::size_t dst = 0; | |
349 | ||
350 | while (src < id.length() && id[src] == '_') { | |
351 | ++src; | |
352 | } | |
353 | ||
354 | if (src == id.length()) { | |
355 | id = "_"; | |
356 | } | |
357 | else { | |
358 | while (src < id.length() && dst < size) { | |
359 | if (id[src] == '_') { | |
360 | do { | |
361 | ++src; | |
362 | } while(src < id.length() && id[src] == '_'); | |
363 | ||
364 | if (src < id.length()) id[dst++] = '_'; | |
365 | } | |
366 | else { | |
367 | id[dst++] = id[src++]; | |
368 | } | |
369 | } | |
370 | ||
371 | id.erase(dst); | |
372 | } | |
373 | ||
374 | return id; | |
375 | } | |
376 | } |