]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/quickbook/src/post_process.cpp
c40c5e24fbc156f7531e0b02a946e234229a8c97
[ceph.git] / ceph / src / boost / tools / quickbook / src / post_process.cpp
1 /*=============================================================================
2 Copyright (c) 2005 2006 Joel de Guzman
3 http://spirit.sourceforge.net/
4
5 Use, modification and distribution is subject to the Boost Software
6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 http://www.boost.org/LICENSE_1_0.txt)
8 =============================================================================*/
9 #include "post_process.hpp"
10 #include <cctype>
11 #include <set>
12 #include <stack>
13 #include <boost/bind.hpp>
14 #include <boost/spirit/include/classic_core.hpp>
15 #include <boost/spirit/include/phoenix1_operators.hpp>
16 #include <boost/spirit/include/phoenix1_primitives.hpp>
17
18 namespace quickbook
19 {
20 namespace cl = boost::spirit::classic;
21 namespace ph = phoenix;
22 typedef std::string::const_iterator iter_type;
23
24 struct pretty_printer
25 {
26 pretty_printer(std::string& out_, int& current_indent_, int linewidth_)
27 : prev(0)
28 , out(out_)
29 , current_indent(current_indent_)
30 , column(0)
31 , in_string(false)
32 , linewidth(linewidth_)
33 {
34 }
35
36 void indent()
37 {
38 BOOST_ASSERT(current_indent >= 0); // this should not happen!
39 for (int i = 0; i < current_indent; ++i)
40 out += ' ';
41 column = current_indent;
42 }
43
44 void trim_spaces()
45 {
46 out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces
47 }
48
49 void break_line()
50 {
51 trim_spaces();
52 out += '\n';
53 indent();
54 }
55
56 bool line_is_empty() const
57 {
58 for (iter_type i = out.end() - (column - current_indent);
59 i != out.end(); ++i) {
60 if (*i != ' ') return false;
61 }
62 return true;
63 }
64
65 void align_indent()
66 {
67 // make sure we are at the proper indent position
68 if (column != current_indent) {
69 if (column > current_indent) {
70 if (line_is_empty()) {
71 // trim just enough trailing spaces down to
72 // current_indent position
73 out.erase(
74 out.end() - (column - current_indent), out.end());
75 column = current_indent;
76 }
77 else {
78 // nope, line is not empty. do a hard CR
79 break_line();
80 }
81 }
82 else {
83 // will this happen? (i.e. column <= current_indent)
84 while (column != current_indent) {
85 out += ' ';
86 ++column;
87 }
88 }
89 }
90 }
91
92 void print(char ch)
93 {
94 // Print a char. Attempt to break the line if we are exceeding
95 // the target linewidth. The linewidth is not an absolute limit.
96 // There are many cases where a line will exceed the linewidth
97 // and there is no way to properly break the line. Preformatted
98 // code that exceeds the linewidth are examples. We cannot break
99 // preformatted code. We shall not attempt to be very strict with
100 // line breaking. What's more important is to have a reproducable
101 // output (i.e. processing two logically equivalent xml files
102 // results in two lexically equivalent xml files). *** pretty
103 // formatting is a secondary goal ***
104
105 // Strings will occur only in tag attributes. Normal content
106 // will have &quot; instead. We shall deal only with tag
107 // attributes here.
108 if (ch == '"') in_string = !in_string; // don't break strings!
109
110 if (!in_string && std::isspace(static_cast<unsigned char>(ch))) {
111 // we can break spaces if they are not inside strings
112 if (!std::isspace(static_cast<unsigned char>(prev))) {
113 if (column >= linewidth) {
114 break_line();
115 if (column == 0 && ch == ' ') {
116 ++column;
117 out += ' ';
118 }
119 }
120 else {
121 ++column;
122 out += ' ';
123 }
124 }
125 }
126 else {
127 // we can break tag boundaries and stuff after
128 // delimiters if they are not inside strings
129 // and *only-if* the preceding char is a space
130 if (!in_string && column >= linewidth &&
131 (ch == '<' &&
132 std::isspace(static_cast<unsigned char>(prev))))
133 break_line();
134 out += ch;
135 ++column;
136 }
137
138 prev = ch;
139 }
140
141 void print(iter_type f, iter_type l)
142 {
143 for (iter_type i = f; i != l; ++i)
144 print(*i);
145 }
146
147 void print_tag(iter_type f, iter_type l, bool is_flow_tag)
148 {
149 if (is_flow_tag) {
150 print(f, l);
151 }
152 else {
153 // This is not a flow tag, so, we're going to do a
154 // carriage return anyway. Let us remove extra right
155 // spaces.
156 std::string str(f, l);
157 BOOST_ASSERT(f != l); // this should not happen
158 iter_type i = str.end();
159 while (i != str.begin() &&
160 std::isspace(static_cast<unsigned char>(*(i - 1))))
161 --i;
162 print(str.begin(), i);
163 }
164 }
165
166 char prev;
167 std::string& out;
168 int& current_indent;
169 int column;
170 bool in_string;
171 int linewidth;
172
173 private:
174 pretty_printer& operator=(pretty_printer const&);
175 };
176
177 char const* html_block_tags_[] = {
178 "div", "p", "blockquote", "address", "h1", "h2", "h3",
179 "h4", "h5", "h6", "ul", "ol", "li", "dl",
180 "dt", "dd", "table", "tr", "th", "td", "tbody",
181 "thead", "form", "fieldset", "hr", "noscript", "html", "body"};
182
183 char const* block_tags_[] = {
184 "author", "blockquote", "bridgehead", "callout",
185 "calloutlist", "caution", "copyright", "entry",
186 "important", "informaltable", "itemizedlist", "legalnotice",
187 "listitem", "note", "orderedlist", "para",
188 "row", "section", "simpara", "table",
189 "tbody", "textobject", "tgroup", "thead",
190 "tip", "variablelist", "varlistentry", "warning",
191 "xml", "xi:include"};
192
193 char const* doc_types_[] = {"book", "article", "library", "chapter",
194 "part", "appendix", "preface", "qandadiv",
195 "qandaset", "reference", "set"};
196
197 struct tidy_compiler
198 {
199 tidy_compiler(std::string& out_, int linewidth_, bool is_html)
200 : out(out_)
201 , current_indent(0)
202 , printer(out_, current_indent, linewidth_)
203 {
204 if (is_html) {
205 static std::size_t const n_block_tags =
206 sizeof(html_block_tags_) / sizeof(char const*);
207 for (std::size_t i = 0; i != n_block_tags; ++i) {
208 block_tags.insert(html_block_tags_[i]);
209 }
210 }
211 else {
212 static std::size_t const n_block_tags =
213 sizeof(block_tags_) / sizeof(char const*);
214 for (std::size_t i = 0; i != n_block_tags; ++i) {
215 block_tags.insert(block_tags_[i]);
216 }
217
218 static std::size_t const n_doc_types =
219 sizeof(doc_types_) / sizeof(char const*);
220 for (std::size_t i = 0; i != n_doc_types; ++i) {
221 block_tags.insert(doc_types_[i]);
222 block_tags.insert(doc_types_[i] + std::string("info"));
223 block_tags.insert(doc_types_[i] + std::string("purpose"));
224 }
225 }
226 }
227
228 bool is_flow_tag(std::string const& tag)
229 {
230 return block_tags.find(tag) == block_tags.end();
231 }
232
233 std::set<std::string> block_tags;
234 std::stack<std::string> tags;
235 std::string& out;
236 int current_indent;
237 pretty_printer printer;
238 std::string current_tag;
239
240 private:
241 tidy_compiler& operator=(tidy_compiler const&);
242 };
243
244 struct tidy_grammar : cl::grammar<tidy_grammar>
245 {
246 tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_)
247 : state(state_), indent(indent_), is_html(is_html_)
248 {
249 }
250
251 template <typename Scanner> struct definition
252 {
253 definition(tidy_grammar const& self)
254 {
255 // clang-format off
256
257 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
258
259 code = cl::eps_p(ph::var(self.is_html))
260 >> "<"
261 >> cl::lexeme_d[cl::str_p("pre")]
262 >> *(cl::anychar_p - '>')
263 >> ">"
264 >> *(cl::anychar_p - "</pre>")
265 >> "</pre"
266 >> cl::lexeme_d[">" >> *cl::space_p]
267 | cl::eps_p(!ph::var(self.is_html))
268 >> "<programlisting>"
269 >> *(cl::anychar_p - "</programlisting>")
270 >> "</programlisting"
271 >> cl::lexeme_d[">" >> *cl::space_p]
272 ;
273
274 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
275 // It is there to preserve the space after the tag that is
276 // otherwise consumed by the cl::space_p skipper.
277
278 escape =
279 cl::str_p("<!--quickbook-escape-prefix-->") >>
280 (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
281 [
282 boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
283 ]
284 >> cl::lexeme_d
285 [
286 cl::str_p("<!--quickbook-escape-postfix-->") >>
287 (*cl::space_p)
288 [
289 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
290 ]
291 ]
292 ;
293
294 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
295 start_end_tag =
296 '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
297 | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
298 | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
299 | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
300 ;
301 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
302 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
303
304 markup =
305 escape
306 | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
307 | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
308 | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
309 | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
310 | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
311 ;
312
313 tidy = *markup;
314
315 // clang-format on
316 }
317
318 cl::rule<Scanner> const& start() { return tidy; }
319
320 cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content,
321 end_tag, markup, code, escape;
322 };
323
324 void do_escape_post(iter_type f, iter_type l) const
325 {
326 for (iter_type i = f; i != l; ++i)
327 state.out += *i;
328 }
329
330 void do_escape(iter_type f, iter_type l) const
331 {
332 while (f != l && std::isspace(*f)) {
333 ++f;
334 }
335 while (f != l && std::isspace(*(l - 1))) {
336 --l;
337 }
338 for (iter_type i = f; i != l; ++i) {
339 state.out += *i;
340 }
341 }
342
343 void do_code(iter_type f, iter_type l) const
344 {
345 state.printer.trim_spaces();
346 if (state.out[state.out.size() - 1] != '\n') state.out += '\n';
347
348 // trim trailing space from after closing tag
349 while (f != l && std::isspace(*(l - 1))) {
350 --l;
351 }
352
353 // print the string taking care of line
354 // ending CR/LF platform issues
355 for (iter_type i = f; i != l;) {
356 if (*i == '\n') {
357 state.printer.trim_spaces();
358 state.out += '\n';
359 ++i;
360 if (i != l && *i == '\r') {
361 ++i;
362 }
363 }
364 else if (*i == '\r') {
365 state.printer.trim_spaces();
366 state.out += '\n';
367 ++i;
368 if (i != l && *i == '\n') {
369 ++i;
370 }
371 }
372 else {
373 state.out += *i;
374 ++i;
375 }
376 }
377 state.out += '\n';
378 state.printer.indent();
379 }
380
381 void do_tag(iter_type f, iter_type l) const
382 {
383 state.current_tag = std::string(f, l);
384 }
385
386 void do_start_end_tag(iter_type f, iter_type l) const
387 {
388 bool is_flow_tag = state.is_flow_tag(state.current_tag);
389 if (!is_flow_tag) state.printer.align_indent();
390 state.printer.print_tag(f, l, is_flow_tag);
391 if (!is_flow_tag) state.printer.break_line();
392 }
393
394 void do_start_tag(iter_type f, iter_type l) const
395 {
396 state.tags.push(state.current_tag);
397 bool is_flow_tag = state.is_flow_tag(state.current_tag);
398 if (!is_flow_tag) state.printer.align_indent();
399 state.printer.print_tag(f, l, is_flow_tag);
400 if (!is_flow_tag) {
401 state.current_indent += indent;
402 state.printer.break_line();
403 }
404 }
405
406 void do_content(iter_type f, iter_type l) const
407 {
408 state.printer.print(f, l);
409 }
410
411 void do_end_tag(iter_type f, iter_type l) const
412 {
413 if (state.tags.empty())
414 throw quickbook::post_process_failure("Mismatched tags.");
415
416 bool is_flow_tag = state.is_flow_tag(state.tags.top());
417 if (!is_flow_tag) {
418 state.current_indent -= indent;
419 state.printer.align_indent();
420 }
421 state.printer.print_tag(f, l, is_flow_tag);
422 if (!is_flow_tag) state.printer.break_line();
423 state.tags.pop();
424 }
425
426 tidy_compiler& state;
427 int indent;
428 bool is_html;
429
430 private:
431 tidy_grammar& operator=(tidy_grammar const&);
432 };
433
434 std::string post_process(
435 std::string const& in, int indent, int linewidth, bool is_html)
436 {
437 if (indent == -1) indent = 2; // set default to 2
438 if (linewidth == -1) linewidth = 80; // set default to 80
439
440 std::string tidy;
441 tidy_compiler state(tidy, linewidth, is_html);
442 tidy_grammar g(state, indent, is_html);
443 cl::parse_info<iter_type> r =
444 parse(in.begin(), in.end(), g, cl::space_p);
445 if (r.full) {
446 return tidy;
447 }
448 else {
449 throw quickbook::post_process_failure("Post Processing Failed.");
450 }
451 }
452 }