]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/tools/quickbook/src/post_process.cpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / tools / quickbook / src / post_process.cpp
CommitLineData
7c673cae
FG
1/*=============================================================================
2 Copyright (c) 2005 2006 Joel de Guzman
3 http://spirit.sourceforge.net/
4
5 Use, modification and distribution is subject to the Boost Software
6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 http://www.boost.org/LICENSE_1_0.txt)
8=============================================================================*/
9#include "post_process.hpp"
11fdf7f2 10#include <cctype>
7c673cae
FG
11#include <set>
12#include <stack>
20effc67 13#include <boost/bind/bind.hpp>
11fdf7f2 14#include <boost/spirit/include/classic_core.hpp>
92f5a8d4
TL
15#include <boost/spirit/include/phoenix1_operators.hpp>
16#include <boost/spirit/include/phoenix1_primitives.hpp>
7c673cae 17
20effc67
TL
18using namespace boost::placeholders;
19
7c673cae
FG
20namespace quickbook
21{
22 namespace cl = boost::spirit::classic;
92f5a8d4 23 namespace ph = phoenix;
7c673cae
FG
24 typedef std::string::const_iterator iter_type;
25
92f5a8d4 26 struct pretty_printer
7c673cae 27 {
92f5a8d4 28 pretty_printer(std::string& out_, int& current_indent_, int linewidth_)
11fdf7f2
TL
29 : prev(0)
30 , out(out_)
31 , current_indent(current_indent_)
32 , column(0)
33 , in_string(false)
34 , linewidth(linewidth_)
35 {
36 }
7c673cae
FG
37
38 void indent()
39 {
40 BOOST_ASSERT(current_indent >= 0); // this should not happen!
41 for (int i = 0; i < current_indent; ++i)
42 out += ' ';
43 column = current_indent;
44 }
45
46 void trim_spaces()
47 {
11fdf7f2 48 out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces
7c673cae
FG
49 }
50
51 void break_line()
52 {
53 trim_spaces();
54 out += '\n';
55 indent();
56 }
57
58 bool line_is_empty() const
59 {
11fdf7f2
TL
60 for (iter_type i = out.end() - (column - current_indent);
61 i != out.end(); ++i) {
62 if (*i != ' ') return false;
7c673cae
FG
63 }
64 return true;
65 }
66
67 void align_indent()
68 {
69 // make sure we are at the proper indent position
11fdf7f2
TL
70 if (column != current_indent) {
71 if (column > current_indent) {
72 if (line_is_empty()) {
73 // trim just enough trailing spaces down to
74 // current_indent position
75 out.erase(
76 out.end() - (column - current_indent), out.end());
7c673cae
FG
77 column = current_indent;
78 }
11fdf7f2 79 else {
7c673cae
FG
80 // nope, line is not empty. do a hard CR
81 break_line();
82 }
83 }
11fdf7f2 84 else {
7c673cae 85 // will this happen? (i.e. column <= current_indent)
11fdf7f2 86 while (column != current_indent) {
7c673cae
FG
87 out += ' ';
88 ++column;
89 }
90 }
91 }
92 }
93
94 void print(char ch)
95 {
96 // Print a char. Attempt to break the line if we are exceeding
97 // the target linewidth. The linewidth is not an absolute limit.
98 // There are many cases where a line will exceed the linewidth
99 // and there is no way to properly break the line. Preformatted
100 // code that exceeds the linewidth are examples. We cannot break
101 // preformatted code. We shall not attempt to be very strict with
102 // line breaking. What's more important is to have a reproducable
103 // output (i.e. processing two logically equivalent xml files
104 // results in two lexically equivalent xml files). *** pretty
105 // formatting is a secondary goal ***
106
107 // Strings will occur only in tag attributes. Normal content
108 // will have &quot; instead. We shall deal only with tag
109 // attributes here.
11fdf7f2 110 if (ch == '"') in_string = !in_string; // don't break strings!
7c673cae 111
11fdf7f2 112 if (!in_string && std::isspace(static_cast<unsigned char>(ch))) {
7c673cae 113 // we can break spaces if they are not inside strings
11fdf7f2
TL
114 if (!std::isspace(static_cast<unsigned char>(prev))) {
115 if (column >= linewidth) {
7c673cae 116 break_line();
11fdf7f2 117 if (column == 0 && ch == ' ') {
7c673cae
FG
118 ++column;
119 out += ' ';
120 }
121 }
11fdf7f2 122 else {
7c673cae
FG
123 ++column;
124 out += ' ';
125 }
126 }
127 }
11fdf7f2 128 else {
7c673cae
FG
129 // we can break tag boundaries and stuff after
130 // delimiters if they are not inside strings
131 // and *only-if* the preceding char is a space
11fdf7f2
TL
132 if (!in_string && column >= linewidth &&
133 (ch == '<' &&
134 std::isspace(static_cast<unsigned char>(prev))))
7c673cae
FG
135 break_line();
136 out += ch;
137 ++column;
138 }
139
140 prev = ch;
141 }
142
11fdf7f2 143 void print(iter_type f, iter_type l)
7c673cae
FG
144 {
145 for (iter_type i = f; i != l; ++i)
146 print(*i);
147 }
148
11fdf7f2 149 void print_tag(iter_type f, iter_type l, bool is_flow_tag)
7c673cae 150 {
11fdf7f2 151 if (is_flow_tag) {
7c673cae
FG
152 print(f, l);
153 }
11fdf7f2 154 else {
7c673cae
FG
155 // This is not a flow tag, so, we're going to do a
156 // carriage return anyway. Let us remove extra right
157 // spaces.
158 std::string str(f, l);
159 BOOST_ASSERT(f != l); // this should not happen
160 iter_type i = str.end();
11fdf7f2
TL
161 while (i != str.begin() &&
162 std::isspace(static_cast<unsigned char>(*(i - 1))))
7c673cae
FG
163 --i;
164 print(str.begin(), i);
165 }
166 }
167
168 char prev;
169 std::string& out;
170 int& current_indent;
171 int column;
172 bool in_string;
173 int linewidth;
b32b8144 174
11fdf7f2 175 private:
92f5a8d4 176 pretty_printer& operator=(pretty_printer const&);
7c673cae
FG
177 };
178
92f5a8d4
TL
179 char const* html_block_tags_[] = {
180 "div", "p", "blockquote", "address", "h1", "h2", "h3",
181 "h4", "h5", "h6", "ul", "ol", "li", "dl",
182 "dt", "dd", "table", "tr", "th", "td", "tbody",
183 "thead", "form", "fieldset", "hr", "noscript", "html", "body"};
184
11fdf7f2
TL
185 char const* block_tags_[] = {
186 "author", "blockquote", "bridgehead", "callout",
187 "calloutlist", "caution", "copyright", "entry",
188 "important", "informaltable", "itemizedlist", "legalnotice",
189 "listitem", "note", "orderedlist", "para",
190 "row", "section", "simpara", "table",
191 "tbody", "textobject", "tgroup", "thead",
192 "tip", "variablelist", "varlistentry", "warning",
193 "xml", "xi:include"};
7c673cae 194
11fdf7f2
TL
195 char const* doc_types_[] = {"book", "article", "library", "chapter",
196 "part", "appendix", "preface", "qandadiv",
197 "qandaset", "reference", "set"};
7c673cae
FG
198
199 struct tidy_compiler
200 {
92f5a8d4 201 tidy_compiler(std::string& out_, int linewidth_, bool is_html)
11fdf7f2
TL
202 : out(out_)
203 , current_indent(0)
92f5a8d4 204 , printer(out_, current_indent, linewidth_)
7c673cae 205 {
92f5a8d4
TL
206 if (is_html) {
207 static std::size_t const n_block_tags =
208 sizeof(html_block_tags_) / sizeof(char const*);
209 for (std::size_t i = 0; i != n_block_tags; ++i) {
210 block_tags.insert(html_block_tags_[i]);
211 }
7c673cae 212 }
92f5a8d4
TL
213 else {
214 static std::size_t const n_block_tags =
215 sizeof(block_tags_) / sizeof(char const*);
216 for (std::size_t i = 0; i != n_block_tags; ++i) {
217 block_tags.insert(block_tags_[i]);
218 }
7c673cae 219
92f5a8d4
TL
220 static std::size_t const n_doc_types =
221 sizeof(doc_types_) / sizeof(char const*);
222 for (std::size_t i = 0; i != n_doc_types; ++i) {
223 block_tags.insert(doc_types_[i]);
224 block_tags.insert(doc_types_[i] + std::string("info"));
225 block_tags.insert(doc_types_[i] + std::string("purpose"));
226 }
7c673cae
FG
227 }
228 }
229
230 bool is_flow_tag(std::string const& tag)
231 {
232 return block_tags.find(tag) == block_tags.end();
233 }
234
235 std::set<std::string> block_tags;
236 std::stack<std::string> tags;
237 std::string& out;
238 int current_indent;
92f5a8d4 239 pretty_printer printer;
7c673cae 240 std::string current_tag;
b32b8144 241
11fdf7f2 242 private:
b32b8144 243 tidy_compiler& operator=(tidy_compiler const&);
7c673cae
FG
244 };
245
246 struct tidy_grammar : cl::grammar<tidy_grammar>
247 {
92f5a8d4
TL
248 tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_)
249 : state(state_), indent(indent_), is_html(is_html_)
11fdf7f2
TL
250 {
251 }
7c673cae 252
11fdf7f2 253 template <typename Scanner> struct definition
7c673cae
FG
254 {
255 definition(tidy_grammar const& self)
256 {
11fdf7f2
TL
257 // clang-format off
258
92f5a8d4
TL
259 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
260
261 code = cl::eps_p(ph::var(self.is_html))
262 >> "<"
263 >> cl::lexeme_d[cl::str_p("pre")]
264 >> *(cl::anychar_p - '>')
265 >> ">"
266 >> *(cl::anychar_p - "</pre>")
267 >> "</pre"
268 >> cl::lexeme_d[">" >> *cl::space_p]
269 | cl::eps_p(!ph::var(self.is_html))
270 >> "<programlisting>"
7c673cae 271 >> *(cl::anychar_p - "</programlisting>")
92f5a8d4
TL
272 >> "</programlisting"
273 >> cl::lexeme_d[">" >> *cl::space_p]
7c673cae
FG
274 ;
275
276 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
277 // It is there to preserve the space after the tag that is
278 // otherwise consumed by the cl::space_p skipper.
279
280 escape =
281 cl::str_p("<!--quickbook-escape-prefix-->") >>
282 (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
283 [
284 boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
285 ]
286 >> cl::lexeme_d
287 [
288 cl::str_p("<!--quickbook-escape-postfix-->") >>
289 (*cl::space_p)
290 [
291 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
292 ]
293 ]
294 ;
295
296 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
297 start_end_tag =
298 '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
299 | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
300 | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
301 | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
302 ;
303 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
304 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
305
306 markup =
307 escape
308 | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
309 | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
310 | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
311 | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
312 | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
313 ;
314
92f5a8d4 315 tidy = *markup;
11fdf7f2
TL
316
317 // clang-format on
7c673cae
FG
318 }
319
11fdf7f2 320 cl::rule<Scanner> const& start() { return tidy; }
7c673cae 321
11fdf7f2
TL
322 cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content,
323 end_tag, markup, code, escape;
7c673cae
FG
324 };
325
326 void do_escape_post(iter_type f, iter_type l) const
327 {
328 for (iter_type i = f; i != l; ++i)
329 state.out += *i;
330 }
331
332 void do_escape(iter_type f, iter_type l) const
333 {
11fdf7f2 334 while (f != l && std::isspace(*f)) {
7c673cae 335 ++f;
11fdf7f2
TL
336 }
337 while (f != l && std::isspace(*(l - 1))) {
338 --l;
339 }
340 for (iter_type i = f; i != l; ++i) {
7c673cae 341 state.out += *i;
11fdf7f2 342 }
7c673cae
FG
343 }
344
345 void do_code(iter_type f, iter_type l) const
346 {
92f5a8d4 347 state.printer.trim_spaces();
11fdf7f2 348 if (state.out[state.out.size() - 1] != '\n') state.out += '\n';
92f5a8d4
TL
349
350 // trim trailing space from after closing tag
351 while (f != l && std::isspace(*(l - 1))) {
352 --l;
353 }
354
7c673cae
FG
355 // print the string taking care of line
356 // ending CR/LF platform issues
92f5a8d4 357 for (iter_type i = f; i != l;) {
11fdf7f2 358 if (*i == '\n') {
92f5a8d4 359 state.printer.trim_spaces();
7c673cae
FG
360 state.out += '\n';
361 ++i;
92f5a8d4
TL
362 if (i != l && *i == '\r') {
363 ++i;
364 }
7c673cae 365 }
11fdf7f2 366 else if (*i == '\r') {
92f5a8d4 367 state.printer.trim_spaces();
7c673cae
FG
368 state.out += '\n';
369 ++i;
92f5a8d4
TL
370 if (i != l && *i == '\n') {
371 ++i;
372 }
7c673cae 373 }
11fdf7f2 374 else {
7c673cae 375 state.out += *i;
92f5a8d4 376 ++i;
7c673cae
FG
377 }
378 }
379 state.out += '\n';
92f5a8d4 380 state.printer.indent();
7c673cae
FG
381 }
382
383 void do_tag(iter_type f, iter_type l) const
384 {
385 state.current_tag = std::string(f, l);
386 }
387
388 void do_start_end_tag(iter_type f, iter_type l) const
389 {
390 bool is_flow_tag = state.is_flow_tag(state.current_tag);
92f5a8d4
TL
391 if (!is_flow_tag) state.printer.align_indent();
392 state.printer.print_tag(f, l, is_flow_tag);
393 if (!is_flow_tag) state.printer.break_line();
7c673cae
FG
394 }
395
396 void do_start_tag(iter_type f, iter_type l) const
397 {
398 state.tags.push(state.current_tag);
399 bool is_flow_tag = state.is_flow_tag(state.current_tag);
92f5a8d4
TL
400 if (!is_flow_tag) state.printer.align_indent();
401 state.printer.print_tag(f, l, is_flow_tag);
11fdf7f2 402 if (!is_flow_tag) {
7c673cae 403 state.current_indent += indent;
92f5a8d4 404 state.printer.break_line();
7c673cae
FG
405 }
406 }
407
408 void do_content(iter_type f, iter_type l) const
409 {
92f5a8d4 410 state.printer.print(f, l);
7c673cae
FG
411 }
412
413 void do_end_tag(iter_type f, iter_type l) const
414 {
415 if (state.tags.empty())
416 throw quickbook::post_process_failure("Mismatched tags.");
11fdf7f2 417
7c673cae 418 bool is_flow_tag = state.is_flow_tag(state.tags.top());
11fdf7f2 419 if (!is_flow_tag) {
7c673cae 420 state.current_indent -= indent;
92f5a8d4 421 state.printer.align_indent();
7c673cae 422 }
92f5a8d4
TL
423 state.printer.print_tag(f, l, is_flow_tag);
424 if (!is_flow_tag) state.printer.break_line();
7c673cae
FG
425 state.tags.pop();
426 }
427
428 tidy_compiler& state;
429 int indent;
92f5a8d4 430 bool is_html;
b32b8144 431
11fdf7f2 432 private:
b32b8144 433 tidy_grammar& operator=(tidy_grammar const&);
7c673cae
FG
434 };
435
92f5a8d4
TL
436 std::string post_process(
437 std::string const& in, int indent, int linewidth, bool is_html)
7c673cae 438 {
11fdf7f2
TL
439 if (indent == -1) indent = 2; // set default to 2
440 if (linewidth == -1) linewidth = 80; // set default to 80
7c673cae
FG
441
442 std::string tidy;
92f5a8d4
TL
443 tidy_compiler state(tidy, linewidth, is_html);
444 tidy_grammar g(state, indent, is_html);
11fdf7f2
TL
445 cl::parse_info<iter_type> r =
446 parse(in.begin(), in.end(), g, cl::space_p);
447 if (r.full) {
7c673cae
FG
448 return tidy;
449 }
11fdf7f2 450 else {
7c673cae
FG
451 throw quickbook::post_process_failure("Post Processing Failed.");
452 }
453 }
454}