]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/quickbook/src/post_process.cpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / tools / quickbook / src / post_process.cpp
1 /*=============================================================================
2 Copyright (c) 2005 2006 Joel de Guzman
3 http://spirit.sourceforge.net/
4
5 Use, modification and distribution is subject to the Boost Software
6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 http://www.boost.org/LICENSE_1_0.txt)
8 =============================================================================*/
9 #include "post_process.hpp"
10 #include <cctype>
11 #include <set>
12 #include <stack>
13 #include <boost/bind/bind.hpp>
14 #include <boost/spirit/include/classic_core.hpp>
15 #include <boost/spirit/include/phoenix1_operators.hpp>
16 #include <boost/spirit/include/phoenix1_primitives.hpp>
17
18 using namespace boost::placeholders;
19
20 namespace quickbook
21 {
22 namespace cl = boost::spirit::classic;
23 namespace ph = phoenix;
24 typedef std::string::const_iterator iter_type;
25
26 struct pretty_printer
27 {
28 pretty_printer(std::string& out_, int& current_indent_, int linewidth_)
29 : prev(0)
30 , out(out_)
31 , current_indent(current_indent_)
32 , column(0)
33 , in_string(false)
34 , linewidth(linewidth_)
35 {
36 }
37
38 void indent()
39 {
40 BOOST_ASSERT(current_indent >= 0); // this should not happen!
41 for (int i = 0; i < current_indent; ++i)
42 out += ' ';
43 column = current_indent;
44 }
45
46 void trim_spaces()
47 {
48 out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces
49 }
50
51 void break_line()
52 {
53 trim_spaces();
54 out += '\n';
55 indent();
56 }
57
58 bool line_is_empty() const
59 {
60 for (iter_type i = out.end() - (column - current_indent);
61 i != out.end(); ++i) {
62 if (*i != ' ') return false;
63 }
64 return true;
65 }
66
67 void align_indent()
68 {
69 // make sure we are at the proper indent position
70 if (column != current_indent) {
71 if (column > current_indent) {
72 if (line_is_empty()) {
73 // trim just enough trailing spaces down to
74 // current_indent position
75 out.erase(
76 out.end() - (column - current_indent), out.end());
77 column = current_indent;
78 }
79 else {
80 // nope, line is not empty. do a hard CR
81 break_line();
82 }
83 }
84 else {
85 // will this happen? (i.e. column <= current_indent)
86 while (column != current_indent) {
87 out += ' ';
88 ++column;
89 }
90 }
91 }
92 }
93
94 void print(char ch)
95 {
96 // Print a char. Attempt to break the line if we are exceeding
97 // the target linewidth. The linewidth is not an absolute limit.
98 // There are many cases where a line will exceed the linewidth
99 // and there is no way to properly break the line. Preformatted
100 // code that exceeds the linewidth are examples. We cannot break
101 // preformatted code. We shall not attempt to be very strict with
102 // line breaking. What's more important is to have a reproducable
103 // output (i.e. processing two logically equivalent xml files
104 // results in two lexically equivalent xml files). *** pretty
105 // formatting is a secondary goal ***
106
107 // Strings will occur only in tag attributes. Normal content
108 // will have &quot; instead. We shall deal only with tag
109 // attributes here.
110 if (ch == '"') in_string = !in_string; // don't break strings!
111
112 if (!in_string && std::isspace(static_cast<unsigned char>(ch))) {
113 // we can break spaces if they are not inside strings
114 if (!std::isspace(static_cast<unsigned char>(prev))) {
115 if (column >= linewidth) {
116 break_line();
117 if (column == 0 && ch == ' ') {
118 ++column;
119 out += ' ';
120 }
121 }
122 else {
123 ++column;
124 out += ' ';
125 }
126 }
127 }
128 else {
129 // we can break tag boundaries and stuff after
130 // delimiters if they are not inside strings
131 // and *only-if* the preceding char is a space
132 if (!in_string && column >= linewidth &&
133 (ch == '<' &&
134 std::isspace(static_cast<unsigned char>(prev))))
135 break_line();
136 out += ch;
137 ++column;
138 }
139
140 prev = ch;
141 }
142
143 void print(iter_type f, iter_type l)
144 {
145 for (iter_type i = f; i != l; ++i)
146 print(*i);
147 }
148
149 void print_tag(iter_type f, iter_type l, bool is_flow_tag)
150 {
151 if (is_flow_tag) {
152 print(f, l);
153 }
154 else {
155 // This is not a flow tag, so, we're going to do a
156 // carriage return anyway. Let us remove extra right
157 // spaces.
158 std::string str(f, l);
159 BOOST_ASSERT(f != l); // this should not happen
160 iter_type i = str.end();
161 while (i != str.begin() &&
162 std::isspace(static_cast<unsigned char>(*(i - 1))))
163 --i;
164 print(str.begin(), i);
165 }
166 }
167
168 char prev;
169 std::string& out;
170 int& current_indent;
171 int column;
172 bool in_string;
173 int linewidth;
174
175 private:
176 pretty_printer& operator=(pretty_printer const&);
177 };
178
179 char const* html_block_tags_[] = {
180 "div", "p", "blockquote", "address", "h1", "h2", "h3",
181 "h4", "h5", "h6", "ul", "ol", "li", "dl",
182 "dt", "dd", "table", "tr", "th", "td", "tbody",
183 "thead", "form", "fieldset", "hr", "noscript", "html", "body"};
184
185 char const* block_tags_[] = {
186 "author", "blockquote", "bridgehead", "callout",
187 "calloutlist", "caution", "copyright", "entry",
188 "important", "informaltable", "itemizedlist", "legalnotice",
189 "listitem", "note", "orderedlist", "para",
190 "row", "section", "simpara", "table",
191 "tbody", "textobject", "tgroup", "thead",
192 "tip", "variablelist", "varlistentry", "warning",
193 "xml", "xi:include"};
194
195 char const* doc_types_[] = {"book", "article", "library", "chapter",
196 "part", "appendix", "preface", "qandadiv",
197 "qandaset", "reference", "set"};
198
199 struct tidy_compiler
200 {
201 tidy_compiler(std::string& out_, int linewidth_, bool is_html)
202 : out(out_)
203 , current_indent(0)
204 , printer(out_, current_indent, linewidth_)
205 {
206 if (is_html) {
207 static std::size_t const n_block_tags =
208 sizeof(html_block_tags_) / sizeof(char const*);
209 for (std::size_t i = 0; i != n_block_tags; ++i) {
210 block_tags.insert(html_block_tags_[i]);
211 }
212 }
213 else {
214 static std::size_t const n_block_tags =
215 sizeof(block_tags_) / sizeof(char const*);
216 for (std::size_t i = 0; i != n_block_tags; ++i) {
217 block_tags.insert(block_tags_[i]);
218 }
219
220 static std::size_t const n_doc_types =
221 sizeof(doc_types_) / sizeof(char const*);
222 for (std::size_t i = 0; i != n_doc_types; ++i) {
223 block_tags.insert(doc_types_[i]);
224 block_tags.insert(doc_types_[i] + std::string("info"));
225 block_tags.insert(doc_types_[i] + std::string("purpose"));
226 }
227 }
228 }
229
230 bool is_flow_tag(std::string const& tag)
231 {
232 return block_tags.find(tag) == block_tags.end();
233 }
234
235 std::set<std::string> block_tags;
236 std::stack<std::string> tags;
237 std::string& out;
238 int current_indent;
239 pretty_printer printer;
240 std::string current_tag;
241
242 private:
243 tidy_compiler& operator=(tidy_compiler const&);
244 };
245
246 struct tidy_grammar : cl::grammar<tidy_grammar>
247 {
248 tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_)
249 : state(state_), indent(indent_), is_html(is_html_)
250 {
251 }
252
253 template <typename Scanner> struct definition
254 {
255 definition(tidy_grammar const& self)
256 {
257 // clang-format off
258
259 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
260
261 code = cl::eps_p(ph::var(self.is_html))
262 >> "<"
263 >> cl::lexeme_d[cl::str_p("pre")]
264 >> *(cl::anychar_p - '>')
265 >> ">"
266 >> *(cl::anychar_p - "</pre>")
267 >> "</pre"
268 >> cl::lexeme_d[">" >> *cl::space_p]
269 | cl::eps_p(!ph::var(self.is_html))
270 >> "<programlisting>"
271 >> *(cl::anychar_p - "</programlisting>")
272 >> "</programlisting"
273 >> cl::lexeme_d[">" >> *cl::space_p]
274 ;
275
276 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
277 // It is there to preserve the space after the tag that is
278 // otherwise consumed by the cl::space_p skipper.
279
280 escape =
281 cl::str_p("<!--quickbook-escape-prefix-->") >>
282 (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
283 [
284 boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
285 ]
286 >> cl::lexeme_d
287 [
288 cl::str_p("<!--quickbook-escape-postfix-->") >>
289 (*cl::space_p)
290 [
291 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
292 ]
293 ]
294 ;
295
296 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
297 start_end_tag =
298 '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
299 | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
300 | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
301 | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
302 ;
303 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
304 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
305
306 markup =
307 escape
308 | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
309 | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
310 | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
311 | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
312 | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
313 ;
314
315 tidy = *markup;
316
317 // clang-format on
318 }
319
320 cl::rule<Scanner> const& start() { return tidy; }
321
322 cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content,
323 end_tag, markup, code, escape;
324 };
325
326 void do_escape_post(iter_type f, iter_type l) const
327 {
328 for (iter_type i = f; i != l; ++i)
329 state.out += *i;
330 }
331
332 void do_escape(iter_type f, iter_type l) const
333 {
334 while (f != l && std::isspace(*f)) {
335 ++f;
336 }
337 while (f != l && std::isspace(*(l - 1))) {
338 --l;
339 }
340 for (iter_type i = f; i != l; ++i) {
341 state.out += *i;
342 }
343 }
344
345 void do_code(iter_type f, iter_type l) const
346 {
347 state.printer.trim_spaces();
348 if (state.out[state.out.size() - 1] != '\n') state.out += '\n';
349
350 // trim trailing space from after closing tag
351 while (f != l && std::isspace(*(l - 1))) {
352 --l;
353 }
354
355 // print the string taking care of line
356 // ending CR/LF platform issues
357 for (iter_type i = f; i != l;) {
358 if (*i == '\n') {
359 state.printer.trim_spaces();
360 state.out += '\n';
361 ++i;
362 if (i != l && *i == '\r') {
363 ++i;
364 }
365 }
366 else if (*i == '\r') {
367 state.printer.trim_spaces();
368 state.out += '\n';
369 ++i;
370 if (i != l && *i == '\n') {
371 ++i;
372 }
373 }
374 else {
375 state.out += *i;
376 ++i;
377 }
378 }
379 state.out += '\n';
380 state.printer.indent();
381 }
382
383 void do_tag(iter_type f, iter_type l) const
384 {
385 state.current_tag = std::string(f, l);
386 }
387
388 void do_start_end_tag(iter_type f, iter_type l) const
389 {
390 bool is_flow_tag = state.is_flow_tag(state.current_tag);
391 if (!is_flow_tag) state.printer.align_indent();
392 state.printer.print_tag(f, l, is_flow_tag);
393 if (!is_flow_tag) state.printer.break_line();
394 }
395
396 void do_start_tag(iter_type f, iter_type l) const
397 {
398 state.tags.push(state.current_tag);
399 bool is_flow_tag = state.is_flow_tag(state.current_tag);
400 if (!is_flow_tag) state.printer.align_indent();
401 state.printer.print_tag(f, l, is_flow_tag);
402 if (!is_flow_tag) {
403 state.current_indent += indent;
404 state.printer.break_line();
405 }
406 }
407
408 void do_content(iter_type f, iter_type l) const
409 {
410 state.printer.print(f, l);
411 }
412
413 void do_end_tag(iter_type f, iter_type l) const
414 {
415 if (state.tags.empty())
416 throw quickbook::post_process_failure("Mismatched tags.");
417
418 bool is_flow_tag = state.is_flow_tag(state.tags.top());
419 if (!is_flow_tag) {
420 state.current_indent -= indent;
421 state.printer.align_indent();
422 }
423 state.printer.print_tag(f, l, is_flow_tag);
424 if (!is_flow_tag) state.printer.break_line();
425 state.tags.pop();
426 }
427
428 tidy_compiler& state;
429 int indent;
430 bool is_html;
431
432 private:
433 tidy_grammar& operator=(tidy_grammar const&);
434 };
435
436 std::string post_process(
437 std::string const& in, int indent, int linewidth, bool is_html)
438 {
439 if (indent == -1) indent = 2; // set default to 2
440 if (linewidth == -1) linewidth = 80; // set default to 80
441
442 std::string tidy;
443 tidy_compiler state(tidy, linewidth, is_html);
444 tidy_grammar g(state, indent, is_html);
445 cl::parse_info<iter_type> r =
446 parse(in.begin(), in.end(), g, cl::space_p);
447 if (r.full) {
448 return tidy;
449 }
450 else {
451 throw quickbook::post_process_failure("Post Processing Failed.");
452 }
453 }
454 }