]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/tools/quickbook/src/post_process.cpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / tools / quickbook / src / post_process.cpp
1 /*=============================================================================
2 Copyright (c) 2005 2006 Joel de Guzman
3 http://spirit.sourceforge.net/
4
5 Use, modification and distribution is subject to the Boost Software
6 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 http://www.boost.org/LICENSE_1_0.txt)
8 =============================================================================*/
9 #include "post_process.hpp"
10 #include <boost/spirit/include/classic_core.hpp>
11 #include <boost/bind.hpp>
12 #include <set>
13 #include <stack>
14 #include <cctype>
15
16 namespace quickbook
17 {
18 namespace cl = boost::spirit::classic;
19 typedef std::string::const_iterator iter_type;
20
21 struct printer
22 {
23 printer(std::string& out_, int& current_indent_, int linewidth_)
24 : prev(0), out(out_), current_indent(current_indent_) , column(0)
25 , in_string(false), linewidth(linewidth_) {}
26
27 void indent()
28 {
29 BOOST_ASSERT(current_indent >= 0); // this should not happen!
30 for (int i = 0; i < current_indent; ++i)
31 out += ' ';
32 column = current_indent;
33 }
34
35 void trim_spaces()
36 {
37 out.erase(out.find_last_not_of(' ')+1); // trim trailing spaces
38 }
39
40 void break_line()
41 {
42 trim_spaces();
43 out += '\n';
44 indent();
45 }
46
47 bool line_is_empty() const
48 {
49 for (iter_type i = out.end()-(column-current_indent); i != out.end(); ++i)
50 {
51 if (*i != ' ')
52 return false;
53 }
54 return true;
55 }
56
57 void align_indent()
58 {
59 // make sure we are at the proper indent position
60 if (column != current_indent)
61 {
62 if (column > current_indent)
63 {
64 if (line_is_empty())
65 {
66 // trim just enough trailing spaces down to current_indent position
67 out.erase(out.end()-(column-current_indent), out.end());
68 column = current_indent;
69 }
70 else
71 {
72 // nope, line is not empty. do a hard CR
73 break_line();
74 }
75 }
76 else
77 {
78 // will this happen? (i.e. column <= current_indent)
79 while (column != current_indent)
80 {
81 out += ' ';
82 ++column;
83 }
84 }
85 }
86 }
87
88 void print(char ch)
89 {
90 // Print a char. Attempt to break the line if we are exceeding
91 // the target linewidth. The linewidth is not an absolute limit.
92 // There are many cases where a line will exceed the linewidth
93 // and there is no way to properly break the line. Preformatted
94 // code that exceeds the linewidth are examples. We cannot break
95 // preformatted code. We shall not attempt to be very strict with
96 // line breaking. What's more important is to have a reproducable
97 // output (i.e. processing two logically equivalent xml files
98 // results in two lexically equivalent xml files). *** pretty
99 // formatting is a secondary goal ***
100
101 // Strings will occur only in tag attributes. Normal content
102 // will have &quot; instead. We shall deal only with tag
103 // attributes here.
104 if (ch == '"')
105 in_string = !in_string; // don't break strings!
106
107 if (!in_string && std::isspace(static_cast<unsigned char>(ch)))
108 {
109 // we can break spaces if they are not inside strings
110 if (!std::isspace(static_cast<unsigned char>(prev)))
111 {
112 if (column >= linewidth)
113 {
114 break_line();
115 if (column == 0 && ch == ' ')
116 {
117 ++column;
118 out += ' ';
119 }
120 }
121 else
122 {
123 ++column;
124 out += ' ';
125 }
126 }
127 }
128 else
129 {
130 // we can break tag boundaries and stuff after
131 // delimiters if they are not inside strings
132 // and *only-if* the preceding char is a space
133 if (!in_string
134 && column >= linewidth
135 && (ch == '<' && std::isspace(static_cast<unsigned char>(prev))))
136 break_line();
137 out += ch;
138 ++column;
139 }
140
141 prev = ch;
142 }
143
144 void
145 print(iter_type f, iter_type l)
146 {
147 for (iter_type i = f; i != l; ++i)
148 print(*i);
149 }
150
151 void
152 print_tag(iter_type f, iter_type l, bool is_flow_tag)
153 {
154 if (is_flow_tag)
155 {
156 print(f, l);
157 }
158 else
159 {
160 // This is not a flow tag, so, we're going to do a
161 // carriage return anyway. Let us remove extra right
162 // spaces.
163 std::string str(f, l);
164 BOOST_ASSERT(f != l); // this should not happen
165 iter_type i = str.end();
166 while (i != str.begin() && std::isspace(static_cast<unsigned char>(*(i-1))))
167 --i;
168 print(str.begin(), i);
169 }
170 }
171
172 char prev;
173 std::string& out;
174 int& current_indent;
175 int column;
176 bool in_string;
177 int linewidth;
178
179 private:
180 printer& operator=(printer const&);
181 };
182
183 char const* block_tags_[] =
184 {
185 "author"
186 , "blockquote"
187 , "bridgehead"
188 , "callout"
189 , "calloutlist"
190 , "caution"
191 , "copyright"
192 , "entry"
193 , "important"
194 , "informaltable"
195 , "itemizedlist"
196 , "legalnotice"
197 , "listitem"
198 , "note"
199 , "orderedlist"
200 , "para"
201 , "row"
202 , "section"
203 , "simpara"
204 , "table"
205 , "tbody"
206 , "textobject"
207 , "tgroup"
208 , "thead"
209 , "tip"
210 , "variablelist"
211 , "varlistentry"
212 , "warning"
213 , "xml"
214 , "xi:include"
215 };
216
217 char const* doc_types_[] =
218 {
219 "book"
220 , "article"
221 , "library"
222 , "chapter"
223 , "part"
224 , "appendix"
225 , "preface"
226 , "qandadiv"
227 , "qandaset"
228 , "reference"
229 , "set"
230 };
231
232 struct tidy_compiler
233 {
234 tidy_compiler(std::string& out_, int linewidth_)
235 : out(out_), current_indent(0), printer_(out, current_indent, linewidth_)
236 {
237 static std::size_t const n_block_tags = sizeof(block_tags_)/sizeof(char const*);
238 for (std::size_t i = 0; i != n_block_tags; ++i)
239 {
240 block_tags.insert(block_tags_[i]);
241 }
242
243 static std::size_t const n_doc_types = sizeof(doc_types_)/sizeof(char const*);
244 for (std::size_t i = 0; i != n_doc_types; ++i)
245 {
246 block_tags.insert(doc_types_[i]);
247 block_tags.insert(doc_types_[i] + std::string("info"));
248 block_tags.insert(doc_types_[i] + std::string("purpose"));
249 }
250 }
251
252 bool is_flow_tag(std::string const& tag)
253 {
254 return block_tags.find(tag) == block_tags.end();
255 }
256
257 std::set<std::string> block_tags;
258 std::stack<std::string> tags;
259 std::string& out;
260 int current_indent;
261 printer printer_;
262 std::string current_tag;
263
264 private:
265 tidy_compiler& operator=(tidy_compiler const&);
266 };
267
268 struct tidy_grammar : cl::grammar<tidy_grammar>
269 {
270 tidy_grammar(tidy_compiler& state_, int indent_)
271 : state(state_), indent(indent_) {}
272
273 template <typename Scanner>
274 struct definition
275 {
276 definition(tidy_grammar const& self)
277 {
278 tag = (cl::lexeme_d[+(cl::alpha_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
279
280 code =
281 "<programlisting>"
282 >> *(cl::anychar_p - "</programlisting>")
283 >> "</programlisting>"
284 ;
285
286 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
287 // It is there to preserve the space after the tag that is
288 // otherwise consumed by the cl::space_p skipper.
289
290 escape =
291 cl::str_p("<!--quickbook-escape-prefix-->") >>
292 (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
293 [
294 boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
295 ]
296 >> cl::lexeme_d
297 [
298 cl::str_p("<!--quickbook-escape-postfix-->") >>
299 (*cl::space_p)
300 [
301 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
302 ]
303 ]
304 ;
305
306 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
307 start_end_tag =
308 '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
309 | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
310 | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
311 | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
312 ;
313 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
314 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
315
316 markup =
317 escape
318 | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
319 | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
320 | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
321 | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
322 | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
323 ;
324
325 tidy = +markup;
326 }
327
328 cl::rule<Scanner> const&
329 start() { return tidy; }
330
331 cl::rule<Scanner>
332 tidy, tag, start_tag, start_end_tag,
333 content, end_tag, markup, code, escape;
334 };
335
336 void do_escape_post(iter_type f, iter_type l) const
337 {
338 for (iter_type i = f; i != l; ++i)
339 state.out += *i;
340 }
341
342 void do_escape(iter_type f, iter_type l) const
343 {
344 while (f != l && std::isspace(*f))
345 ++f;
346 for (iter_type i = f; i != l; ++i)
347 state.out += *i;
348 }
349
350 void do_code(iter_type f, iter_type l) const
351 {
352 state.printer_.trim_spaces();
353 if (state.out[state.out.size() - 1] != '\n')
354 state.out += '\n';
355 // print the string taking care of line
356 // ending CR/LF platform issues
357 for (iter_type i = f; i != l; ++i)
358 {
359 if (*i == '\n')
360 {
361 state.printer_.trim_spaces();
362 state.out += '\n';
363 ++i;
364 if (i != l && *i != '\r')
365 state.out += *i;
366 }
367 else if (*i == '\r')
368 {
369 state.printer_.trim_spaces();
370 state.out += '\n';
371 ++i;
372 if (i != l && *i != '\n')
373 state.out += *i;
374 }
375 else
376 {
377 state.out += *i;
378 }
379 }
380 state.out += '\n';
381 state.printer_.indent();
382 }
383
384 void do_tag(iter_type f, iter_type l) const
385 {
386 state.current_tag = std::string(f, l);
387 }
388
389 void do_start_end_tag(iter_type f, iter_type l) const
390 {
391 bool is_flow_tag = state.is_flow_tag(state.current_tag);
392 if (!is_flow_tag)
393 state.printer_.align_indent();
394 state.printer_.print_tag(f, l, is_flow_tag);
395 if (!is_flow_tag)
396 state.printer_.break_line();
397 }
398
399 void do_start_tag(iter_type f, iter_type l) const
400 {
401 state.tags.push(state.current_tag);
402 bool is_flow_tag = state.is_flow_tag(state.current_tag);
403 if (!is_flow_tag)
404 state.printer_.align_indent();
405 state.printer_.print_tag(f, l, is_flow_tag);
406 if (!is_flow_tag)
407 {
408 state.current_indent += indent;
409 state.printer_.break_line();
410 }
411 }
412
413 void do_content(iter_type f, iter_type l) const
414 {
415 state.printer_.print(f, l);
416 }
417
418 void do_end_tag(iter_type f, iter_type l) const
419 {
420 if (state.tags.empty())
421 throw quickbook::post_process_failure("Mismatched tags.");
422
423 bool is_flow_tag = state.is_flow_tag(state.tags.top());
424 if (!is_flow_tag)
425 {
426 state.current_indent -= indent;
427 state.printer_.align_indent();
428 }
429 state.printer_.print_tag(f, l, is_flow_tag);
430 if (!is_flow_tag)
431 state.printer_.break_line();
432 state.tags.pop();
433 }
434
435 tidy_compiler& state;
436 int indent;
437
438 private:
439 tidy_grammar& operator=(tidy_grammar const&);
440 };
441
442 std::string post_process(
443 std::string const& in
444 , int indent
445 , int linewidth)
446 {
447 if (indent == -1)
448 indent = 2; // set default to 2
449 if (linewidth == -1)
450 linewidth = 80; // set default to 80
451
452 std::string tidy;
453 tidy_compiler state(tidy, linewidth);
454 tidy_grammar g(state, indent);
455 cl::parse_info<iter_type> r = parse(in.begin(), in.end(), g, cl::space_p);
456 if (r.full)
457 {
458 return tidy;
459 }
460 else
461 {
462 throw quickbook::post_process_failure("Post Processing Failed.");
463 }
464 }
465 }
466