]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2005 2006 Joel de Guzman | |
3 | http://spirit.sourceforge.net/ | |
4 | ||
5 | Use, modification and distribution is subject to the Boost Software | |
6 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
7 | http://www.boost.org/LICENSE_1_0.txt) | |
8 | =============================================================================*/ | |
9 | #include "post_process.hpp" | |
10 | #include <boost/spirit/include/classic_core.hpp> | |
11 | #include <boost/bind.hpp> | |
12 | #include <set> | |
13 | #include <stack> | |
14 | #include <cctype> | |
15 | ||
16 | namespace quickbook | |
17 | { | |
18 | namespace cl = boost::spirit::classic; | |
19 | typedef std::string::const_iterator iter_type; | |
20 | ||
21 | struct printer | |
22 | { | |
b32b8144 FG |
23 | printer(std::string& out_, int& current_indent_, int linewidth_) |
24 | : prev(0), out(out_), current_indent(current_indent_) , column(0) | |
25 | , in_string(false), linewidth(linewidth_) {} | |
7c673cae FG |
26 | |
27 | void indent() | |
28 | { | |
29 | BOOST_ASSERT(current_indent >= 0); // this should not happen! | |
30 | for (int i = 0; i < current_indent; ++i) | |
31 | out += ' '; | |
32 | column = current_indent; | |
33 | } | |
34 | ||
35 | void trim_spaces() | |
36 | { | |
37 | out.erase(out.find_last_not_of(' ')+1); // trim trailing spaces | |
38 | } | |
39 | ||
40 | void break_line() | |
41 | { | |
42 | trim_spaces(); | |
43 | out += '\n'; | |
44 | indent(); | |
45 | } | |
46 | ||
47 | bool line_is_empty() const | |
48 | { | |
49 | for (iter_type i = out.end()-(column-current_indent); i != out.end(); ++i) | |
50 | { | |
51 | if (*i != ' ') | |
52 | return false; | |
53 | } | |
54 | return true; | |
55 | } | |
56 | ||
57 | void align_indent() | |
58 | { | |
59 | // make sure we are at the proper indent position | |
60 | if (column != current_indent) | |
61 | { | |
62 | if (column > current_indent) | |
63 | { | |
64 | if (line_is_empty()) | |
65 | { | |
66 | // trim just enough trailing spaces down to current_indent position | |
67 | out.erase(out.end()-(column-current_indent), out.end()); | |
68 | column = current_indent; | |
69 | } | |
70 | else | |
71 | { | |
72 | // nope, line is not empty. do a hard CR | |
73 | break_line(); | |
74 | } | |
75 | } | |
76 | else | |
77 | { | |
78 | // will this happen? (i.e. column <= current_indent) | |
79 | while (column != current_indent) | |
80 | { | |
81 | out += ' '; | |
82 | ++column; | |
83 | } | |
84 | } | |
85 | } | |
86 | } | |
87 | ||
88 | void print(char ch) | |
89 | { | |
90 | // Print a char. Attempt to break the line if we are exceeding | |
91 | // the target linewidth. The linewidth is not an absolute limit. | |
92 | // There are many cases where a line will exceed the linewidth | |
93 | // and there is no way to properly break the line. Preformatted | |
94 | // code that exceeds the linewidth are examples. We cannot break | |
95 | // preformatted code. We shall not attempt to be very strict with | |
96 | // line breaking. What's more important is to have a reproducable | |
97 | // output (i.e. processing two logically equivalent xml files | |
98 | // results in two lexically equivalent xml files). *** pretty | |
99 | // formatting is a secondary goal *** | |
100 | ||
101 | // Strings will occur only in tag attributes. Normal content | |
102 | // will have " instead. We shall deal only with tag | |
103 | // attributes here. | |
104 | if (ch == '"') | |
105 | in_string = !in_string; // don't break strings! | |
106 | ||
107 | if (!in_string && std::isspace(static_cast<unsigned char>(ch))) | |
108 | { | |
109 | // we can break spaces if they are not inside strings | |
110 | if (!std::isspace(static_cast<unsigned char>(prev))) | |
111 | { | |
112 | if (column >= linewidth) | |
113 | { | |
114 | break_line(); | |
115 | if (column == 0 && ch == ' ') | |
116 | { | |
117 | ++column; | |
118 | out += ' '; | |
119 | } | |
120 | } | |
121 | else | |
122 | { | |
123 | ++column; | |
124 | out += ' '; | |
125 | } | |
126 | } | |
127 | } | |
128 | else | |
129 | { | |
130 | // we can break tag boundaries and stuff after | |
131 | // delimiters if they are not inside strings | |
132 | // and *only-if* the preceding char is a space | |
133 | if (!in_string | |
134 | && column >= linewidth | |
135 | && (ch == '<' && std::isspace(static_cast<unsigned char>(prev)))) | |
136 | break_line(); | |
137 | out += ch; | |
138 | ++column; | |
139 | } | |
140 | ||
141 | prev = ch; | |
142 | } | |
143 | ||
144 | void | |
145 | print(iter_type f, iter_type l) | |
146 | { | |
147 | for (iter_type i = f; i != l; ++i) | |
148 | print(*i); | |
149 | } | |
150 | ||
151 | void | |
152 | print_tag(iter_type f, iter_type l, bool is_flow_tag) | |
153 | { | |
154 | if (is_flow_tag) | |
155 | { | |
156 | print(f, l); | |
157 | } | |
158 | else | |
159 | { | |
160 | // This is not a flow tag, so, we're going to do a | |
161 | // carriage return anyway. Let us remove extra right | |
162 | // spaces. | |
163 | std::string str(f, l); | |
164 | BOOST_ASSERT(f != l); // this should not happen | |
165 | iter_type i = str.end(); | |
166 | while (i != str.begin() && std::isspace(static_cast<unsigned char>(*(i-1)))) | |
167 | --i; | |
168 | print(str.begin(), i); | |
169 | } | |
170 | } | |
171 | ||
172 | char prev; | |
173 | std::string& out; | |
174 | int& current_indent; | |
175 | int column; | |
176 | bool in_string; | |
177 | int linewidth; | |
b32b8144 FG |
178 | |
179 | private: | |
180 | printer& operator=(printer const&); | |
7c673cae FG |
181 | }; |
182 | ||
183 | char const* block_tags_[] = | |
184 | { | |
185 | "author" | |
186 | , "blockquote" | |
187 | , "bridgehead" | |
188 | , "callout" | |
189 | , "calloutlist" | |
190 | , "caution" | |
191 | , "copyright" | |
192 | , "entry" | |
193 | , "important" | |
194 | , "informaltable" | |
195 | , "itemizedlist" | |
196 | , "legalnotice" | |
197 | , "listitem" | |
198 | , "note" | |
199 | , "orderedlist" | |
200 | , "para" | |
201 | , "row" | |
202 | , "section" | |
203 | , "simpara" | |
204 | , "table" | |
205 | , "tbody" | |
206 | , "textobject" | |
207 | , "tgroup" | |
208 | , "thead" | |
209 | , "tip" | |
210 | , "variablelist" | |
211 | , "varlistentry" | |
212 | , "warning" | |
213 | , "xml" | |
214 | , "xi:include" | |
215 | }; | |
216 | ||
217 | char const* doc_types_[] = | |
218 | { | |
219 | "book" | |
220 | , "article" | |
221 | , "library" | |
222 | , "chapter" | |
223 | , "part" | |
224 | , "appendix" | |
225 | , "preface" | |
226 | , "qandadiv" | |
227 | , "qandaset" | |
228 | , "reference" | |
229 | , "set" | |
230 | }; | |
231 | ||
232 | struct tidy_compiler | |
233 | { | |
b32b8144 FG |
234 | tidy_compiler(std::string& out_, int linewidth_) |
235 | : out(out_), current_indent(0), printer_(out, current_indent, linewidth_) | |
7c673cae | 236 | { |
b32b8144 FG |
237 | static std::size_t const n_block_tags = sizeof(block_tags_)/sizeof(char const*); |
238 | for (std::size_t i = 0; i != n_block_tags; ++i) | |
7c673cae FG |
239 | { |
240 | block_tags.insert(block_tags_[i]); | |
241 | } | |
242 | ||
b32b8144 FG |
243 | static std::size_t const n_doc_types = sizeof(doc_types_)/sizeof(char const*); |
244 | for (std::size_t i = 0; i != n_doc_types; ++i) | |
7c673cae FG |
245 | { |
246 | block_tags.insert(doc_types_[i]); | |
247 | block_tags.insert(doc_types_[i] + std::string("info")); | |
248 | block_tags.insert(doc_types_[i] + std::string("purpose")); | |
249 | } | |
250 | } | |
251 | ||
252 | bool is_flow_tag(std::string const& tag) | |
253 | { | |
254 | return block_tags.find(tag) == block_tags.end(); | |
255 | } | |
256 | ||
257 | std::set<std::string> block_tags; | |
258 | std::stack<std::string> tags; | |
259 | std::string& out; | |
260 | int current_indent; | |
261 | printer printer_; | |
262 | std::string current_tag; | |
b32b8144 FG |
263 | |
264 | private: | |
265 | tidy_compiler& operator=(tidy_compiler const&); | |
7c673cae FG |
266 | }; |
267 | ||
268 | struct tidy_grammar : cl::grammar<tidy_grammar> | |
269 | { | |
b32b8144 FG |
270 | tidy_grammar(tidy_compiler& state_, int indent_) |
271 | : state(state_), indent(indent_) {} | |
7c673cae FG |
272 | |
273 | template <typename Scanner> | |
274 | struct definition | |
275 | { | |
276 | definition(tidy_grammar const& self) | |
277 | { | |
278 | tag = (cl::lexeme_d[+(cl::alpha_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)]; | |
279 | ||
280 | code = | |
281 | "<programlisting>" | |
282 | >> *(cl::anychar_p - "</programlisting>") | |
283 | >> "</programlisting>" | |
284 | ; | |
285 | ||
286 | // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ? | |
287 | // It is there to preserve the space after the tag that is | |
288 | // otherwise consumed by the cl::space_p skipper. | |
289 | ||
290 | escape = | |
291 | cl::str_p("<!--quickbook-escape-prefix-->") >> | |
292 | (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->"))) | |
293 | [ | |
294 | boost::bind(&tidy_grammar::do_escape, &self, _1, _2) | |
295 | ] | |
296 | >> cl::lexeme_d | |
297 | [ | |
298 | cl::str_p("<!--quickbook-escape-postfix-->") >> | |
299 | (*cl::space_p) | |
300 | [ | |
301 | boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2) | |
302 | ] | |
303 | ] | |
304 | ; | |
305 | ||
306 | start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
307 | start_end_tag = | |
308 | '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p] | |
309 | | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p] | |
310 | | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p] | |
311 | | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p] | |
312 | ; | |
313 | content = cl::lexeme_d[ +(cl::anychar_p - '<') ]; | |
314 | end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
315 | ||
316 | markup = | |
317 | escape | |
318 | | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)] | |
319 | | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)] | |
320 | | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)] | |
321 | | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)] | |
322 | | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)] | |
323 | ; | |
324 | ||
325 | tidy = +markup; | |
326 | } | |
327 | ||
328 | cl::rule<Scanner> const& | |
329 | start() { return tidy; } | |
330 | ||
331 | cl::rule<Scanner> | |
332 | tidy, tag, start_tag, start_end_tag, | |
333 | content, end_tag, markup, code, escape; | |
334 | }; | |
335 | ||
336 | void do_escape_post(iter_type f, iter_type l) const | |
337 | { | |
338 | for (iter_type i = f; i != l; ++i) | |
339 | state.out += *i; | |
340 | } | |
341 | ||
342 | void do_escape(iter_type f, iter_type l) const | |
343 | { | |
344 | while (f != l && std::isspace(*f)) | |
345 | ++f; | |
346 | for (iter_type i = f; i != l; ++i) | |
347 | state.out += *i; | |
348 | } | |
349 | ||
350 | void do_code(iter_type f, iter_type l) const | |
351 | { | |
352 | state.printer_.trim_spaces(); | |
353 | if (state.out[state.out.size() - 1] != '\n') | |
354 | state.out += '\n'; | |
355 | // print the string taking care of line | |
356 | // ending CR/LF platform issues | |
357 | for (iter_type i = f; i != l; ++i) | |
358 | { | |
359 | if (*i == '\n') | |
360 | { | |
361 | state.printer_.trim_spaces(); | |
362 | state.out += '\n'; | |
363 | ++i; | |
364 | if (i != l && *i != '\r') | |
365 | state.out += *i; | |
366 | } | |
367 | else if (*i == '\r') | |
368 | { | |
369 | state.printer_.trim_spaces(); | |
370 | state.out += '\n'; | |
371 | ++i; | |
372 | if (i != l && *i != '\n') | |
373 | state.out += *i; | |
374 | } | |
375 | else | |
376 | { | |
377 | state.out += *i; | |
378 | } | |
379 | } | |
380 | state.out += '\n'; | |
381 | state.printer_.indent(); | |
382 | } | |
383 | ||
384 | void do_tag(iter_type f, iter_type l) const | |
385 | { | |
386 | state.current_tag = std::string(f, l); | |
387 | } | |
388 | ||
389 | void do_start_end_tag(iter_type f, iter_type l) const | |
390 | { | |
391 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
392 | if (!is_flow_tag) | |
393 | state.printer_.align_indent(); | |
394 | state.printer_.print_tag(f, l, is_flow_tag); | |
395 | if (!is_flow_tag) | |
396 | state.printer_.break_line(); | |
397 | } | |
398 | ||
399 | void do_start_tag(iter_type f, iter_type l) const | |
400 | { | |
401 | state.tags.push(state.current_tag); | |
402 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
403 | if (!is_flow_tag) | |
404 | state.printer_.align_indent(); | |
405 | state.printer_.print_tag(f, l, is_flow_tag); | |
406 | if (!is_flow_tag) | |
407 | { | |
408 | state.current_indent += indent; | |
409 | state.printer_.break_line(); | |
410 | } | |
411 | } | |
412 | ||
413 | void do_content(iter_type f, iter_type l) const | |
414 | { | |
415 | state.printer_.print(f, l); | |
416 | } | |
417 | ||
418 | void do_end_tag(iter_type f, iter_type l) const | |
419 | { | |
420 | if (state.tags.empty()) | |
421 | throw quickbook::post_process_failure("Mismatched tags."); | |
422 | ||
423 | bool is_flow_tag = state.is_flow_tag(state.tags.top()); | |
424 | if (!is_flow_tag) | |
425 | { | |
426 | state.current_indent -= indent; | |
427 | state.printer_.align_indent(); | |
428 | } | |
429 | state.printer_.print_tag(f, l, is_flow_tag); | |
430 | if (!is_flow_tag) | |
431 | state.printer_.break_line(); | |
432 | state.tags.pop(); | |
433 | } | |
434 | ||
435 | tidy_compiler& state; | |
436 | int indent; | |
b32b8144 FG |
437 | |
438 | private: | |
439 | tidy_grammar& operator=(tidy_grammar const&); | |
7c673cae FG |
440 | }; |
441 | ||
442 | std::string post_process( | |
443 | std::string const& in | |
444 | , int indent | |
445 | , int linewidth) | |
446 | { | |
447 | if (indent == -1) | |
448 | indent = 2; // set default to 2 | |
449 | if (linewidth == -1) | |
450 | linewidth = 80; // set default to 80 | |
451 | ||
452 | std::string tidy; | |
453 | tidy_compiler state(tidy, linewidth); | |
454 | tidy_grammar g(state, indent); | |
455 | cl::parse_info<iter_type> r = parse(in.begin(), in.end(), g, cl::space_p); | |
456 | if (r.full) | |
457 | { | |
458 | return tidy; | |
459 | } | |
460 | else | |
461 | { | |
462 | throw quickbook::post_process_failure("Post Processing Failed."); | |
463 | } | |
464 | } | |
465 | } | |
466 |