]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2005 2006 Joel de Guzman | |
3 | http://spirit.sourceforge.net/ | |
4 | ||
5 | Use, modification and distribution is subject to the Boost Software | |
6 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
7 | http://www.boost.org/LICENSE_1_0.txt) | |
8 | =============================================================================*/ | |
9 | #include "post_process.hpp" | |
11fdf7f2 | 10 | #include <cctype> |
7c673cae FG |
11 | #include <set> |
12 | #include <stack> | |
20effc67 | 13 | #include <boost/bind/bind.hpp> |
11fdf7f2 | 14 | #include <boost/spirit/include/classic_core.hpp> |
92f5a8d4 TL |
15 | #include <boost/spirit/include/phoenix1_operators.hpp> |
16 | #include <boost/spirit/include/phoenix1_primitives.hpp> | |
7c673cae | 17 | |
20effc67 TL |
18 | using namespace boost::placeholders; |
19 | ||
7c673cae FG |
20 | namespace quickbook |
21 | { | |
22 | namespace cl = boost::spirit::classic; | |
92f5a8d4 | 23 | namespace ph = phoenix; |
7c673cae FG |
24 | typedef std::string::const_iterator iter_type; |
25 | ||
92f5a8d4 | 26 | struct pretty_printer |
7c673cae | 27 | { |
92f5a8d4 | 28 | pretty_printer(std::string& out_, int& current_indent_, int linewidth_) |
11fdf7f2 TL |
29 | : prev(0) |
30 | , out(out_) | |
31 | , current_indent(current_indent_) | |
32 | , column(0) | |
33 | , in_string(false) | |
34 | , linewidth(linewidth_) | |
35 | { | |
36 | } | |
7c673cae FG |
37 | |
38 | void indent() | |
39 | { | |
40 | BOOST_ASSERT(current_indent >= 0); // this should not happen! | |
41 | for (int i = 0; i < current_indent; ++i) | |
42 | out += ' '; | |
43 | column = current_indent; | |
44 | } | |
45 | ||
46 | void trim_spaces() | |
47 | { | |
11fdf7f2 | 48 | out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces |
7c673cae FG |
49 | } |
50 | ||
51 | void break_line() | |
52 | { | |
53 | trim_spaces(); | |
54 | out += '\n'; | |
55 | indent(); | |
56 | } | |
57 | ||
58 | bool line_is_empty() const | |
59 | { | |
11fdf7f2 TL |
60 | for (iter_type i = out.end() - (column - current_indent); |
61 | i != out.end(); ++i) { | |
62 | if (*i != ' ') return false; | |
7c673cae FG |
63 | } |
64 | return true; | |
65 | } | |
66 | ||
67 | void align_indent() | |
68 | { | |
69 | // make sure we are at the proper indent position | |
11fdf7f2 TL |
70 | if (column != current_indent) { |
71 | if (column > current_indent) { | |
72 | if (line_is_empty()) { | |
73 | // trim just enough trailing spaces down to | |
74 | // current_indent position | |
75 | out.erase( | |
76 | out.end() - (column - current_indent), out.end()); | |
7c673cae FG |
77 | column = current_indent; |
78 | } | |
11fdf7f2 | 79 | else { |
7c673cae FG |
80 | // nope, line is not empty. do a hard CR |
81 | break_line(); | |
82 | } | |
83 | } | |
11fdf7f2 | 84 | else { |
7c673cae | 85 | // will this happen? (i.e. column <= current_indent) |
11fdf7f2 | 86 | while (column != current_indent) { |
7c673cae FG |
87 | out += ' '; |
88 | ++column; | |
89 | } | |
90 | } | |
91 | } | |
92 | } | |
93 | ||
94 | void print(char ch) | |
95 | { | |
96 | // Print a char. Attempt to break the line if we are exceeding | |
97 | // the target linewidth. The linewidth is not an absolute limit. | |
98 | // There are many cases where a line will exceed the linewidth | |
99 | // and there is no way to properly break the line. Preformatted | |
100 | // code that exceeds the linewidth are examples. We cannot break | |
101 | // preformatted code. We shall not attempt to be very strict with | |
102 | // line breaking. What's more important is to have a reproducable | |
103 | // output (i.e. processing two logically equivalent xml files | |
104 | // results in two lexically equivalent xml files). *** pretty | |
105 | // formatting is a secondary goal *** | |
106 | ||
107 | // Strings will occur only in tag attributes. Normal content | |
108 | // will have " instead. We shall deal only with tag | |
109 | // attributes here. | |
11fdf7f2 | 110 | if (ch == '"') in_string = !in_string; // don't break strings! |
7c673cae | 111 | |
11fdf7f2 | 112 | if (!in_string && std::isspace(static_cast<unsigned char>(ch))) { |
7c673cae | 113 | // we can break spaces if they are not inside strings |
11fdf7f2 TL |
114 | if (!std::isspace(static_cast<unsigned char>(prev))) { |
115 | if (column >= linewidth) { | |
7c673cae | 116 | break_line(); |
11fdf7f2 | 117 | if (column == 0 && ch == ' ') { |
7c673cae FG |
118 | ++column; |
119 | out += ' '; | |
120 | } | |
121 | } | |
11fdf7f2 | 122 | else { |
7c673cae FG |
123 | ++column; |
124 | out += ' '; | |
125 | } | |
126 | } | |
127 | } | |
11fdf7f2 | 128 | else { |
7c673cae FG |
129 | // we can break tag boundaries and stuff after |
130 | // delimiters if they are not inside strings | |
131 | // and *only-if* the preceding char is a space | |
11fdf7f2 TL |
132 | if (!in_string && column >= linewidth && |
133 | (ch == '<' && | |
134 | std::isspace(static_cast<unsigned char>(prev)))) | |
7c673cae FG |
135 | break_line(); |
136 | out += ch; | |
137 | ++column; | |
138 | } | |
139 | ||
140 | prev = ch; | |
141 | } | |
142 | ||
11fdf7f2 | 143 | void print(iter_type f, iter_type l) |
7c673cae FG |
144 | { |
145 | for (iter_type i = f; i != l; ++i) | |
146 | print(*i); | |
147 | } | |
148 | ||
11fdf7f2 | 149 | void print_tag(iter_type f, iter_type l, bool is_flow_tag) |
7c673cae | 150 | { |
11fdf7f2 | 151 | if (is_flow_tag) { |
7c673cae FG |
152 | print(f, l); |
153 | } | |
11fdf7f2 | 154 | else { |
7c673cae FG |
155 | // This is not a flow tag, so, we're going to do a |
156 | // carriage return anyway. Let us remove extra right | |
157 | // spaces. | |
158 | std::string str(f, l); | |
159 | BOOST_ASSERT(f != l); // this should not happen | |
160 | iter_type i = str.end(); | |
11fdf7f2 TL |
161 | while (i != str.begin() && |
162 | std::isspace(static_cast<unsigned char>(*(i - 1)))) | |
7c673cae FG |
163 | --i; |
164 | print(str.begin(), i); | |
165 | } | |
166 | } | |
167 | ||
168 | char prev; | |
169 | std::string& out; | |
170 | int& current_indent; | |
171 | int column; | |
172 | bool in_string; | |
173 | int linewidth; | |
b32b8144 | 174 | |
11fdf7f2 | 175 | private: |
92f5a8d4 | 176 | pretty_printer& operator=(pretty_printer const&); |
7c673cae FG |
177 | }; |
178 | ||
92f5a8d4 TL |
179 | char const* html_block_tags_[] = { |
180 | "div", "p", "blockquote", "address", "h1", "h2", "h3", | |
181 | "h4", "h5", "h6", "ul", "ol", "li", "dl", | |
182 | "dt", "dd", "table", "tr", "th", "td", "tbody", | |
183 | "thead", "form", "fieldset", "hr", "noscript", "html", "body"}; | |
184 | ||
11fdf7f2 TL |
185 | char const* block_tags_[] = { |
186 | "author", "blockquote", "bridgehead", "callout", | |
187 | "calloutlist", "caution", "copyright", "entry", | |
188 | "important", "informaltable", "itemizedlist", "legalnotice", | |
189 | "listitem", "note", "orderedlist", "para", | |
190 | "row", "section", "simpara", "table", | |
191 | "tbody", "textobject", "tgroup", "thead", | |
192 | "tip", "variablelist", "varlistentry", "warning", | |
193 | "xml", "xi:include"}; | |
7c673cae | 194 | |
11fdf7f2 TL |
195 | char const* doc_types_[] = {"book", "article", "library", "chapter", |
196 | "part", "appendix", "preface", "qandadiv", | |
197 | "qandaset", "reference", "set"}; | |
7c673cae FG |
198 | |
199 | struct tidy_compiler | |
200 | { | |
92f5a8d4 | 201 | tidy_compiler(std::string& out_, int linewidth_, bool is_html) |
11fdf7f2 TL |
202 | : out(out_) |
203 | , current_indent(0) | |
92f5a8d4 | 204 | , printer(out_, current_indent, linewidth_) |
7c673cae | 205 | { |
92f5a8d4 TL |
206 | if (is_html) { |
207 | static std::size_t const n_block_tags = | |
208 | sizeof(html_block_tags_) / sizeof(char const*); | |
209 | for (std::size_t i = 0; i != n_block_tags; ++i) { | |
210 | block_tags.insert(html_block_tags_[i]); | |
211 | } | |
7c673cae | 212 | } |
92f5a8d4 TL |
213 | else { |
214 | static std::size_t const n_block_tags = | |
215 | sizeof(block_tags_) / sizeof(char const*); | |
216 | for (std::size_t i = 0; i != n_block_tags; ++i) { | |
217 | block_tags.insert(block_tags_[i]); | |
218 | } | |
7c673cae | 219 | |
92f5a8d4 TL |
220 | static std::size_t const n_doc_types = |
221 | sizeof(doc_types_) / sizeof(char const*); | |
222 | for (std::size_t i = 0; i != n_doc_types; ++i) { | |
223 | block_tags.insert(doc_types_[i]); | |
224 | block_tags.insert(doc_types_[i] + std::string("info")); | |
225 | block_tags.insert(doc_types_[i] + std::string("purpose")); | |
226 | } | |
7c673cae FG |
227 | } |
228 | } | |
229 | ||
230 | bool is_flow_tag(std::string const& tag) | |
231 | { | |
232 | return block_tags.find(tag) == block_tags.end(); | |
233 | } | |
234 | ||
235 | std::set<std::string> block_tags; | |
236 | std::stack<std::string> tags; | |
237 | std::string& out; | |
238 | int current_indent; | |
92f5a8d4 | 239 | pretty_printer printer; |
7c673cae | 240 | std::string current_tag; |
b32b8144 | 241 | |
11fdf7f2 | 242 | private: |
b32b8144 | 243 | tidy_compiler& operator=(tidy_compiler const&); |
7c673cae FG |
244 | }; |
245 | ||
246 | struct tidy_grammar : cl::grammar<tidy_grammar> | |
247 | { | |
92f5a8d4 TL |
248 | tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_) |
249 | : state(state_), indent(indent_), is_html(is_html_) | |
11fdf7f2 TL |
250 | { |
251 | } | |
7c673cae | 252 | |
11fdf7f2 | 253 | template <typename Scanner> struct definition |
7c673cae FG |
254 | { |
255 | definition(tidy_grammar const& self) | |
256 | { | |
11fdf7f2 TL |
257 | // clang-format off |
258 | ||
92f5a8d4 TL |
259 | tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)]; |
260 | ||
261 | code = cl::eps_p(ph::var(self.is_html)) | |
262 | >> "<" | |
263 | >> cl::lexeme_d[cl::str_p("pre")] | |
264 | >> *(cl::anychar_p - '>') | |
265 | >> ">" | |
266 | >> *(cl::anychar_p - "</pre>") | |
267 | >> "</pre" | |
268 | >> cl::lexeme_d[">" >> *cl::space_p] | |
269 | | cl::eps_p(!ph::var(self.is_html)) | |
270 | >> "<programlisting>" | |
7c673cae | 271 | >> *(cl::anychar_p - "</programlisting>") |
92f5a8d4 TL |
272 | >> "</programlisting" |
273 | >> cl::lexeme_d[">" >> *cl::space_p] | |
7c673cae FG |
274 | ; |
275 | ||
276 | // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ? | |
277 | // It is there to preserve the space after the tag that is | |
278 | // otherwise consumed by the cl::space_p skipper. | |
279 | ||
280 | escape = | |
281 | cl::str_p("<!--quickbook-escape-prefix-->") >> | |
282 | (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->"))) | |
283 | [ | |
284 | boost::bind(&tidy_grammar::do_escape, &self, _1, _2) | |
285 | ] | |
286 | >> cl::lexeme_d | |
287 | [ | |
288 | cl::str_p("<!--quickbook-escape-postfix-->") >> | |
289 | (*cl::space_p) | |
290 | [ | |
291 | boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2) | |
292 | ] | |
293 | ] | |
294 | ; | |
295 | ||
296 | start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
297 | start_end_tag = | |
298 | '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p] | |
299 | | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p] | |
300 | | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p] | |
301 | | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p] | |
302 | ; | |
303 | content = cl::lexeme_d[ +(cl::anychar_p - '<') ]; | |
304 | end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
305 | ||
306 | markup = | |
307 | escape | |
308 | | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)] | |
309 | | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)] | |
310 | | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)] | |
311 | | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)] | |
312 | | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)] | |
313 | ; | |
314 | ||
92f5a8d4 | 315 | tidy = *markup; |
11fdf7f2 TL |
316 | |
317 | // clang-format on | |
7c673cae FG |
318 | } |
319 | ||
11fdf7f2 | 320 | cl::rule<Scanner> const& start() { return tidy; } |
7c673cae | 321 | |
11fdf7f2 TL |
322 | cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content, |
323 | end_tag, markup, code, escape; | |
7c673cae FG |
324 | }; |
325 | ||
326 | void do_escape_post(iter_type f, iter_type l) const | |
327 | { | |
328 | for (iter_type i = f; i != l; ++i) | |
329 | state.out += *i; | |
330 | } | |
331 | ||
332 | void do_escape(iter_type f, iter_type l) const | |
333 | { | |
11fdf7f2 | 334 | while (f != l && std::isspace(*f)) { |
7c673cae | 335 | ++f; |
11fdf7f2 TL |
336 | } |
337 | while (f != l && std::isspace(*(l - 1))) { | |
338 | --l; | |
339 | } | |
340 | for (iter_type i = f; i != l; ++i) { | |
7c673cae | 341 | state.out += *i; |
11fdf7f2 | 342 | } |
7c673cae FG |
343 | } |
344 | ||
345 | void do_code(iter_type f, iter_type l) const | |
346 | { | |
92f5a8d4 | 347 | state.printer.trim_spaces(); |
11fdf7f2 | 348 | if (state.out[state.out.size() - 1] != '\n') state.out += '\n'; |
92f5a8d4 TL |
349 | |
350 | // trim trailing space from after closing tag | |
351 | while (f != l && std::isspace(*(l - 1))) { | |
352 | --l; | |
353 | } | |
354 | ||
7c673cae FG |
355 | // print the string taking care of line |
356 | // ending CR/LF platform issues | |
92f5a8d4 | 357 | for (iter_type i = f; i != l;) { |
11fdf7f2 | 358 | if (*i == '\n') { |
92f5a8d4 | 359 | state.printer.trim_spaces(); |
7c673cae FG |
360 | state.out += '\n'; |
361 | ++i; | |
92f5a8d4 TL |
362 | if (i != l && *i == '\r') { |
363 | ++i; | |
364 | } | |
7c673cae | 365 | } |
11fdf7f2 | 366 | else if (*i == '\r') { |
92f5a8d4 | 367 | state.printer.trim_spaces(); |
7c673cae FG |
368 | state.out += '\n'; |
369 | ++i; | |
92f5a8d4 TL |
370 | if (i != l && *i == '\n') { |
371 | ++i; | |
372 | } | |
7c673cae | 373 | } |
11fdf7f2 | 374 | else { |
7c673cae | 375 | state.out += *i; |
92f5a8d4 | 376 | ++i; |
7c673cae FG |
377 | } |
378 | } | |
379 | state.out += '\n'; | |
92f5a8d4 | 380 | state.printer.indent(); |
7c673cae FG |
381 | } |
382 | ||
383 | void do_tag(iter_type f, iter_type l) const | |
384 | { | |
385 | state.current_tag = std::string(f, l); | |
386 | } | |
387 | ||
388 | void do_start_end_tag(iter_type f, iter_type l) const | |
389 | { | |
390 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
92f5a8d4 TL |
391 | if (!is_flow_tag) state.printer.align_indent(); |
392 | state.printer.print_tag(f, l, is_flow_tag); | |
393 | if (!is_flow_tag) state.printer.break_line(); | |
7c673cae FG |
394 | } |
395 | ||
396 | void do_start_tag(iter_type f, iter_type l) const | |
397 | { | |
398 | state.tags.push(state.current_tag); | |
399 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
92f5a8d4 TL |
400 | if (!is_flow_tag) state.printer.align_indent(); |
401 | state.printer.print_tag(f, l, is_flow_tag); | |
11fdf7f2 | 402 | if (!is_flow_tag) { |
7c673cae | 403 | state.current_indent += indent; |
92f5a8d4 | 404 | state.printer.break_line(); |
7c673cae FG |
405 | } |
406 | } | |
407 | ||
408 | void do_content(iter_type f, iter_type l) const | |
409 | { | |
92f5a8d4 | 410 | state.printer.print(f, l); |
7c673cae FG |
411 | } |
412 | ||
413 | void do_end_tag(iter_type f, iter_type l) const | |
414 | { | |
415 | if (state.tags.empty()) | |
416 | throw quickbook::post_process_failure("Mismatched tags."); | |
11fdf7f2 | 417 | |
7c673cae | 418 | bool is_flow_tag = state.is_flow_tag(state.tags.top()); |
11fdf7f2 | 419 | if (!is_flow_tag) { |
7c673cae | 420 | state.current_indent -= indent; |
92f5a8d4 | 421 | state.printer.align_indent(); |
7c673cae | 422 | } |
92f5a8d4 TL |
423 | state.printer.print_tag(f, l, is_flow_tag); |
424 | if (!is_flow_tag) state.printer.break_line(); | |
7c673cae FG |
425 | state.tags.pop(); |
426 | } | |
427 | ||
428 | tidy_compiler& state; | |
429 | int indent; | |
92f5a8d4 | 430 | bool is_html; |
b32b8144 | 431 | |
11fdf7f2 | 432 | private: |
b32b8144 | 433 | tidy_grammar& operator=(tidy_grammar const&); |
7c673cae FG |
434 | }; |
435 | ||
92f5a8d4 TL |
436 | std::string post_process( |
437 | std::string const& in, int indent, int linewidth, bool is_html) | |
7c673cae | 438 | { |
11fdf7f2 TL |
439 | if (indent == -1) indent = 2; // set default to 2 |
440 | if (linewidth == -1) linewidth = 80; // set default to 80 | |
7c673cae FG |
441 | |
442 | std::string tidy; | |
92f5a8d4 TL |
443 | tidy_compiler state(tidy, linewidth, is_html); |
444 | tidy_grammar g(state, indent, is_html); | |
11fdf7f2 TL |
445 | cl::parse_info<iter_type> r = |
446 | parse(in.begin(), in.end(), g, cl::space_p); | |
447 | if (r.full) { | |
7c673cae FG |
448 | return tidy; |
449 | } | |
11fdf7f2 | 450 | else { |
7c673cae FG |
451 | throw quickbook::post_process_failure("Post Processing Failed."); |
452 | } | |
453 | } | |
454 | } |