1 /*=============================================================================
2 Copyright (c) 2011, 2013 Daniel James
4 Use, modification and distribution is subject to the Boost Software
5 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
6 http://www.boost.org/LICENSE_1_0.txt)
7 =============================================================================*/
10 #include "document_state_impl.hpp"
11 #include <boost/make_shared.hpp>
12 #include <boost/unordered_map.hpp>
13 #include <boost/lexical_cast.hpp>
14 #include <boost/foreach.hpp>
15 #include <boost/range/algorithm.hpp>
17 // TODO: This should possibly try to always generate valid XML ids:
18 // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
22 // The maximum size of a generated part of an id.
24 // Not a strict maximum, sometimes broken because the user
25 // explicitly uses a longer id, or for backwards compatibility.
27 static const std::size_t max_size
= 32;
29 typedef std::vector
<id_placeholder
const*> placeholder_index
;
30 placeholder_index
index_placeholders(document_state_impl
const&, boost::string_ref
);
32 void generate_id_block(
33 placeholder_index::iterator
, placeholder_index::iterator
,
34 std::vector
<std::string
>& generated_ids
);
36 std::vector
<std::string
> generate_ids(document_state_impl
const& state
, boost::string_ref xml
)
38 std::vector
<std::string
> generated_ids(state
.placeholders
.size());
40 // Get a list of the placeholders in the order that we wish to
42 placeholder_index placeholders
= index_placeholders(state
, xml
);
44 typedef std::vector
<id_placeholder
const*>::iterator iterator
;
45 iterator it
= placeholders
.begin(), end
= placeholders
.end();
48 // We process all the ids that have the same number of dots
49 // together. Note that ids with different parents can clash, e.g.
50 // because of old fashioned id generation or anchors containing
53 // So find the group of placeholders with the same number of dots.
54 iterator group_begin
= it
, group_end
= it
;
55 while (group_end
!= end
&& (*group_end
)->num_dots
== (*it
)->num_dots
)
58 generate_id_block(group_begin
, group_end
, generated_ids
);
68 // Create a sorted index of the placeholders, in order
69 // to make numbering duplicates easy. A total order.
72 struct placeholder_compare
74 std::vector
<unsigned>& order
;
76 placeholder_compare(std::vector
<unsigned>& order
) : order(order
) {}
78 bool operator()(id_placeholder
const* x
, id_placeholder
const* y
) const
80 bool x_explicit
= x
->category
.c
>= id_category::explicit_id
;
81 bool y_explicit
= y
->category
.c
>= id_category::explicit_id
;
84 x
->num_dots
< y
->num_dots
? true :
85 x
->num_dots
> y
->num_dots
? false :
86 x_explicit
> y_explicit
? true :
87 x_explicit
< y_explicit
? false :
88 order
[x
->index
] < order
[y
->index
];
92 struct get_placeholder_order_callback
: xml_processor::callback
94 document_state_impl
const& state
;
95 std::vector
<unsigned>& order
;
98 get_placeholder_order_callback(document_state_impl
const& state
,
99 std::vector
<unsigned>& order
)
105 void id_value(boost::string_ref value
)
107 set_placeholder_order(state
.get_placeholder(value
));
110 void set_placeholder_order(id_placeholder
const* p
)
112 if (p
&& !order
[p
->index
]) {
113 set_placeholder_order(p
->parent
);
114 order
[p
->index
] = ++count
;
119 placeholder_index
index_placeholders(
120 document_state_impl
const& state
,
121 boost::string_ref xml
)
123 // The order that the placeholder appear in the xml source.
124 std::vector
<unsigned> order(state
.placeholders
.size());
126 xml_processor processor
;
127 get_placeholder_order_callback
callback(state
, order
);
128 processor
.parse(xml
, callback
);
130 placeholder_index sorted_placeholders
;
131 sorted_placeholders
.reserve(state
.placeholders
.size());
132 BOOST_FOREACH(id_placeholder
const& p
, state
.placeholders
)
133 if (order
[p
.index
]) sorted_placeholders
.push_back(&p
);
134 boost::sort(sorted_placeholders
, placeholder_compare(order
));
136 return sorted_placeholders
;
139 // Resolve and generate ids.
141 struct generate_id_block_type
143 // The ids which won't require duplicate handling.
144 typedef boost::unordered_map
<std::string
, id_placeholder
const*>
146 chosen_id_map chosen_ids
;
147 std::vector
<std::string
>& generated_ids
;
149 generate_id_block_type(std::vector
<std::string
>& generated_ids
) :
150 generated_ids(generated_ids
) {}
152 void generate(placeholder_index::iterator begin
,
153 placeholder_index::iterator end
);
155 std::string
resolve_id(id_placeholder
const*);
156 std::string
generate_id(id_placeholder
const*, std::string
const&);
159 void generate_id_block(placeholder_index::iterator begin
,
160 placeholder_index::iterator end
,
161 std::vector
<std::string
>& generated_ids
)
163 generate_id_block_type
impl(generated_ids
);
164 impl
.generate(begin
, end
);
167 void generate_id_block_type::generate(placeholder_index::iterator begin
,
168 placeholder_index::iterator end
)
170 std::vector
<std::string
> resolved_ids
;
172 for (placeholder_index::iterator i
= begin
; i
!= end
; ++i
)
173 resolved_ids
.push_back(resolve_id(*i
));
176 for (placeholder_index::iterator i
= begin
; i
!= end
; ++i
, ++index
)
178 generated_ids
[(**i
).index
] =
179 generate_id(*i
, resolved_ids
[index
]);
183 std::string
generate_id_block_type::resolve_id(id_placeholder
const* p
)
185 std::string id
= p
->parent
?
186 generated_ids
[p
->parent
->index
] + "." + p
->id
:
189 if (p
->category
.c
> id_category::numbered
) {
190 // Reserve the id if it isn't already reserved.
191 chosen_id_map::iterator pos
= chosen_ids
.emplace(id
, p
).first
;
193 // If it was reserved by a placeholder with a lower category,
194 // then overwrite it.
195 if (p
->category
.c
> pos
->second
->category
.c
)
202 std::string
generate_id_block_type::generate_id(id_placeholder
const* p
,
203 std::string
const& resolved_id
)
205 if (p
->category
.c
> id_category::numbered
&&
206 chosen_ids
.at(resolved_id
) == p
)
211 // Split the id into its parent part and child part.
213 // Note: can't just use the placeholder's parent, as the
214 // placeholder id might contain dots.
215 std::size_t child_start
= resolved_id
.rfind('.');
216 std::string parent_id
, base_id
;
218 if (child_start
== std::string::npos
) {
219 base_id
= normalize_id(resolved_id
, max_size
- 1);
222 parent_id
= resolved_id
.substr(0, child_start
+ 1);
223 base_id
= normalize_id(resolved_id
.substr(child_start
+ 1),
227 // Since we're adding digits, don't want an id that ends in
230 unsigned int length
= base_id
.size();
232 if (length
> 0 && std::isdigit(base_id
[length
- 1])) {
233 if (length
< max_size
- 1) {
238 while (length
> 0 && std::isdigit(base_id
[length
-1]))
240 base_id
.erase(length
);
248 std::string postfix
=
249 boost::lexical_cast
<std::string
>(count
++);
251 if ((base_id
.size() + postfix
.size()) > max_size
) {
252 // The id is now too long, so reduce the length and
255 // Would need a lot of ids to get this far....
256 if (length
== 0) throw std::runtime_error("Too many ids");
261 // Trim any trailing digits.
262 while (length
> 0 && std::isdigit(base_id
[length
-1]))
265 base_id
.erase(length
);
269 // Try to reserve this id.
270 std::string generated_id
= parent_id
+ base_id
+ postfix
;
272 if (chosen_ids
.emplace(generated_id
, p
).second
) {
282 // Return a copy of the xml with all the placeholders replaced by
286 struct replace_ids_callback
: xml_processor::callback
288 document_state_impl
const& state
;
289 std::vector
<std::string
> const* ids
;
290 boost::string_ref::const_iterator source_pos
;
293 replace_ids_callback(document_state_impl
const& state
,
294 std::vector
<std::string
> const* ids
)
301 void start(boost::string_ref xml
)
303 source_pos
= xml
.begin();
306 void id_value(boost::string_ref value
)
308 if (id_placeholder
const* p
= state
.get_placeholder(value
))
310 boost::string_ref id
= ids
?
311 (*ids
)[p
->index
] : p
->unresolved_id
;
313 result
.append(source_pos
, value
.begin());
314 result
.append(id
.begin(), id
.end());
315 source_pos
= value
.end();
319 void finish(boost::string_ref xml
)
321 result
.append(source_pos
, xml
.end());
322 source_pos
= xml
.end();
326 std::string
replace_ids(document_state_impl
const& state
, boost::string_ref xml
,
327 std::vector
<std::string
> const* ids
)
329 xml_processor processor
;
330 replace_ids_callback
callback(state
, ids
);
331 processor
.parse(xml
, callback
);
332 return callback
.result
;
338 // Normalizes generated ids.
341 std::string
normalize_id(boost::string_ref src_id
)
343 return normalize_id(src_id
, max_size
);
346 std::string
normalize_id(boost::string_ref src_id
, std::size_t size
)
348 std::string
id(src_id
.begin(), src_id
.end());
353 while (src
< id
.length() && id
[src
] == '_') {
357 if (src
== id
.length()) {
361 while (src
< id
.length() && dst
< size
) {
362 if (id
[src
] == '_') {
365 } while(src
< id
.length() && id
[src
] == '_');
367 if (src
< id
.length()) id
[dst
++] = '_';
370 id
[dst
++] = id
[src
++];