]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*============================================================================= |
2 | Copyright (c) 2005 2006 Joel de Guzman | |
3 | http://spirit.sourceforge.net/ | |
4 | ||
5 | Use, modification and distribution is subject to the Boost Software | |
6 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at | |
7 | http://www.boost.org/LICENSE_1_0.txt) | |
8 | =============================================================================*/ | |
9 | #include "post_process.hpp" | |
10 | #include <boost/spirit/include/classic_core.hpp> | |
11 | #include <boost/bind.hpp> | |
12 | #include <set> | |
13 | #include <stack> | |
14 | #include <cctype> | |
15 | ||
16 | namespace quickbook | |
17 | { | |
18 | namespace cl = boost::spirit::classic; | |
19 | typedef std::string::const_iterator iter_type; | |
20 | ||
21 | struct printer | |
22 | { | |
23 | printer(std::string& out, int& current_indent, int linewidth) | |
24 | : prev(0), out(out), current_indent(current_indent) , column(0) | |
25 | , in_string(false), linewidth(linewidth) {} | |
26 | ||
27 | void indent() | |
28 | { | |
29 | BOOST_ASSERT(current_indent >= 0); // this should not happen! | |
30 | for (int i = 0; i < current_indent; ++i) | |
31 | out += ' '; | |
32 | column = current_indent; | |
33 | } | |
34 | ||
35 | void trim_spaces() | |
36 | { | |
37 | out.erase(out.find_last_not_of(' ')+1); // trim trailing spaces | |
38 | } | |
39 | ||
40 | void break_line() | |
41 | { | |
42 | trim_spaces(); | |
43 | out += '\n'; | |
44 | indent(); | |
45 | } | |
46 | ||
47 | bool line_is_empty() const | |
48 | { | |
49 | for (iter_type i = out.end()-(column-current_indent); i != out.end(); ++i) | |
50 | { | |
51 | if (*i != ' ') | |
52 | return false; | |
53 | } | |
54 | return true; | |
55 | } | |
56 | ||
57 | void align_indent() | |
58 | { | |
59 | // make sure we are at the proper indent position | |
60 | if (column != current_indent) | |
61 | { | |
62 | if (column > current_indent) | |
63 | { | |
64 | if (line_is_empty()) | |
65 | { | |
66 | // trim just enough trailing spaces down to current_indent position | |
67 | out.erase(out.end()-(column-current_indent), out.end()); | |
68 | column = current_indent; | |
69 | } | |
70 | else | |
71 | { | |
72 | // nope, line is not empty. do a hard CR | |
73 | break_line(); | |
74 | } | |
75 | } | |
76 | else | |
77 | { | |
78 | // will this happen? (i.e. column <= current_indent) | |
79 | while (column != current_indent) | |
80 | { | |
81 | out += ' '; | |
82 | ++column; | |
83 | } | |
84 | } | |
85 | } | |
86 | } | |
87 | ||
88 | void print(char ch) | |
89 | { | |
90 | // Print a char. Attempt to break the line if we are exceeding | |
91 | // the target linewidth. The linewidth is not an absolute limit. | |
92 | // There are many cases where a line will exceed the linewidth | |
93 | // and there is no way to properly break the line. Preformatted | |
94 | // code that exceeds the linewidth are examples. We cannot break | |
95 | // preformatted code. We shall not attempt to be very strict with | |
96 | // line breaking. What's more important is to have a reproducable | |
97 | // output (i.e. processing two logically equivalent xml files | |
98 | // results in two lexically equivalent xml files). *** pretty | |
99 | // formatting is a secondary goal *** | |
100 | ||
101 | // Strings will occur only in tag attributes. Normal content | |
102 | // will have " instead. We shall deal only with tag | |
103 | // attributes here. | |
104 | if (ch == '"') | |
105 | in_string = !in_string; // don't break strings! | |
106 | ||
107 | if (!in_string && std::isspace(static_cast<unsigned char>(ch))) | |
108 | { | |
109 | // we can break spaces if they are not inside strings | |
110 | if (!std::isspace(static_cast<unsigned char>(prev))) | |
111 | { | |
112 | if (column >= linewidth) | |
113 | { | |
114 | break_line(); | |
115 | if (column == 0 && ch == ' ') | |
116 | { | |
117 | ++column; | |
118 | out += ' '; | |
119 | } | |
120 | } | |
121 | else | |
122 | { | |
123 | ++column; | |
124 | out += ' '; | |
125 | } | |
126 | } | |
127 | } | |
128 | else | |
129 | { | |
130 | // we can break tag boundaries and stuff after | |
131 | // delimiters if they are not inside strings | |
132 | // and *only-if* the preceding char is a space | |
133 | if (!in_string | |
134 | && column >= linewidth | |
135 | && (ch == '<' && std::isspace(static_cast<unsigned char>(prev)))) | |
136 | break_line(); | |
137 | out += ch; | |
138 | ++column; | |
139 | } | |
140 | ||
141 | prev = ch; | |
142 | } | |
143 | ||
144 | void | |
145 | print(iter_type f, iter_type l) | |
146 | { | |
147 | for (iter_type i = f; i != l; ++i) | |
148 | print(*i); | |
149 | } | |
150 | ||
151 | void | |
152 | print_tag(iter_type f, iter_type l, bool is_flow_tag) | |
153 | { | |
154 | if (is_flow_tag) | |
155 | { | |
156 | print(f, l); | |
157 | } | |
158 | else | |
159 | { | |
160 | // This is not a flow tag, so, we're going to do a | |
161 | // carriage return anyway. Let us remove extra right | |
162 | // spaces. | |
163 | std::string str(f, l); | |
164 | BOOST_ASSERT(f != l); // this should not happen | |
165 | iter_type i = str.end(); | |
166 | while (i != str.begin() && std::isspace(static_cast<unsigned char>(*(i-1)))) | |
167 | --i; | |
168 | print(str.begin(), i); | |
169 | } | |
170 | } | |
171 | ||
172 | char prev; | |
173 | std::string& out; | |
174 | int& current_indent; | |
175 | int column; | |
176 | bool in_string; | |
177 | int linewidth; | |
178 | }; | |
179 | ||
180 | char const* block_tags_[] = | |
181 | { | |
182 | "author" | |
183 | , "blockquote" | |
184 | , "bridgehead" | |
185 | , "callout" | |
186 | , "calloutlist" | |
187 | , "caution" | |
188 | , "copyright" | |
189 | , "entry" | |
190 | , "important" | |
191 | , "informaltable" | |
192 | , "itemizedlist" | |
193 | , "legalnotice" | |
194 | , "listitem" | |
195 | , "note" | |
196 | , "orderedlist" | |
197 | , "para" | |
198 | , "row" | |
199 | , "section" | |
200 | , "simpara" | |
201 | , "table" | |
202 | , "tbody" | |
203 | , "textobject" | |
204 | , "tgroup" | |
205 | , "thead" | |
206 | , "tip" | |
207 | , "variablelist" | |
208 | , "varlistentry" | |
209 | , "warning" | |
210 | , "xml" | |
211 | , "xi:include" | |
212 | }; | |
213 | ||
214 | char const* doc_types_[] = | |
215 | { | |
216 | "book" | |
217 | , "article" | |
218 | , "library" | |
219 | , "chapter" | |
220 | , "part" | |
221 | , "appendix" | |
222 | , "preface" | |
223 | , "qandadiv" | |
224 | , "qandaset" | |
225 | , "reference" | |
226 | , "set" | |
227 | }; | |
228 | ||
229 | struct tidy_compiler | |
230 | { | |
231 | tidy_compiler(std::string& out, int linewidth) | |
232 | : out(out), current_indent(0), printer_(out, current_indent, linewidth) | |
233 | { | |
234 | static int const n_block_tags = sizeof(block_tags_)/sizeof(char const*); | |
235 | for (int i = 0; i != n_block_tags; ++i) | |
236 | { | |
237 | block_tags.insert(block_tags_[i]); | |
238 | } | |
239 | ||
240 | static int const n_doc_types = sizeof(doc_types_)/sizeof(char const*); | |
241 | for (int i = 0; i != n_doc_types; ++i) | |
242 | { | |
243 | block_tags.insert(doc_types_[i]); | |
244 | block_tags.insert(doc_types_[i] + std::string("info")); | |
245 | block_tags.insert(doc_types_[i] + std::string("purpose")); | |
246 | } | |
247 | } | |
248 | ||
249 | bool is_flow_tag(std::string const& tag) | |
250 | { | |
251 | return block_tags.find(tag) == block_tags.end(); | |
252 | } | |
253 | ||
254 | std::set<std::string> block_tags; | |
255 | std::stack<std::string> tags; | |
256 | std::string& out; | |
257 | int current_indent; | |
258 | printer printer_; | |
259 | std::string current_tag; | |
260 | }; | |
261 | ||
262 | struct tidy_grammar : cl::grammar<tidy_grammar> | |
263 | { | |
264 | tidy_grammar(tidy_compiler& state, int indent) | |
265 | : state(state), indent(indent) {} | |
266 | ||
267 | template <typename Scanner> | |
268 | struct definition | |
269 | { | |
270 | definition(tidy_grammar const& self) | |
271 | { | |
272 | tag = (cl::lexeme_d[+(cl::alpha_p | '_' | ':')]) [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)]; | |
273 | ||
274 | code = | |
275 | "<programlisting>" | |
276 | >> *(cl::anychar_p - "</programlisting>") | |
277 | >> "</programlisting>" | |
278 | ; | |
279 | ||
280 | // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ? | |
281 | // It is there to preserve the space after the tag that is | |
282 | // otherwise consumed by the cl::space_p skipper. | |
283 | ||
284 | escape = | |
285 | cl::str_p("<!--quickbook-escape-prefix-->") >> | |
286 | (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->"))) | |
287 | [ | |
288 | boost::bind(&tidy_grammar::do_escape, &self, _1, _2) | |
289 | ] | |
290 | >> cl::lexeme_d | |
291 | [ | |
292 | cl::str_p("<!--quickbook-escape-postfix-->") >> | |
293 | (*cl::space_p) | |
294 | [ | |
295 | boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2) | |
296 | ] | |
297 | ] | |
298 | ; | |
299 | ||
300 | start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
301 | start_end_tag = | |
302 | '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p] | |
303 | | "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p] | |
304 | | "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p] | |
305 | | "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p] | |
306 | ; | |
307 | content = cl::lexeme_d[ +(cl::anychar_p - '<') ]; | |
308 | end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]; | |
309 | ||
310 | markup = | |
311 | escape | |
312 | | code [boost::bind(&tidy_grammar::do_code, &self, _1, _2)] | |
313 | | start_end_tag [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)] | |
314 | | start_tag [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)] | |
315 | | end_tag [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)] | |
316 | | content [boost::bind(&tidy_grammar::do_content, &self, _1, _2)] | |
317 | ; | |
318 | ||
319 | tidy = +markup; | |
320 | } | |
321 | ||
322 | cl::rule<Scanner> const& | |
323 | start() { return tidy; } | |
324 | ||
325 | cl::rule<Scanner> | |
326 | tidy, tag, start_tag, start_end_tag, | |
327 | content, end_tag, markup, code, escape; | |
328 | }; | |
329 | ||
330 | void do_escape_post(iter_type f, iter_type l) const | |
331 | { | |
332 | for (iter_type i = f; i != l; ++i) | |
333 | state.out += *i; | |
334 | } | |
335 | ||
336 | void do_escape(iter_type f, iter_type l) const | |
337 | { | |
338 | while (f != l && std::isspace(*f)) | |
339 | ++f; | |
340 | for (iter_type i = f; i != l; ++i) | |
341 | state.out += *i; | |
342 | } | |
343 | ||
344 | void do_code(iter_type f, iter_type l) const | |
345 | { | |
346 | state.printer_.trim_spaces(); | |
347 | if (state.out[state.out.size() - 1] != '\n') | |
348 | state.out += '\n'; | |
349 | // print the string taking care of line | |
350 | // ending CR/LF platform issues | |
351 | for (iter_type i = f; i != l; ++i) | |
352 | { | |
353 | if (*i == '\n') | |
354 | { | |
355 | state.printer_.trim_spaces(); | |
356 | state.out += '\n'; | |
357 | ++i; | |
358 | if (i != l && *i != '\r') | |
359 | state.out += *i; | |
360 | } | |
361 | else if (*i == '\r') | |
362 | { | |
363 | state.printer_.trim_spaces(); | |
364 | state.out += '\n'; | |
365 | ++i; | |
366 | if (i != l && *i != '\n') | |
367 | state.out += *i; | |
368 | } | |
369 | else | |
370 | { | |
371 | state.out += *i; | |
372 | } | |
373 | } | |
374 | state.out += '\n'; | |
375 | state.printer_.indent(); | |
376 | } | |
377 | ||
378 | void do_tag(iter_type f, iter_type l) const | |
379 | { | |
380 | state.current_tag = std::string(f, l); | |
381 | } | |
382 | ||
383 | void do_start_end_tag(iter_type f, iter_type l) const | |
384 | { | |
385 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
386 | if (!is_flow_tag) | |
387 | state.printer_.align_indent(); | |
388 | state.printer_.print_tag(f, l, is_flow_tag); | |
389 | if (!is_flow_tag) | |
390 | state.printer_.break_line(); | |
391 | } | |
392 | ||
393 | void do_start_tag(iter_type f, iter_type l) const | |
394 | { | |
395 | state.tags.push(state.current_tag); | |
396 | bool is_flow_tag = state.is_flow_tag(state.current_tag); | |
397 | if (!is_flow_tag) | |
398 | state.printer_.align_indent(); | |
399 | state.printer_.print_tag(f, l, is_flow_tag); | |
400 | if (!is_flow_tag) | |
401 | { | |
402 | state.current_indent += indent; | |
403 | state.printer_.break_line(); | |
404 | } | |
405 | } | |
406 | ||
407 | void do_content(iter_type f, iter_type l) const | |
408 | { | |
409 | state.printer_.print(f, l); | |
410 | } | |
411 | ||
412 | void do_end_tag(iter_type f, iter_type l) const | |
413 | { | |
414 | if (state.tags.empty()) | |
415 | throw quickbook::post_process_failure("Mismatched tags."); | |
416 | ||
417 | bool is_flow_tag = state.is_flow_tag(state.tags.top()); | |
418 | if (!is_flow_tag) | |
419 | { | |
420 | state.current_indent -= indent; | |
421 | state.printer_.align_indent(); | |
422 | } | |
423 | state.printer_.print_tag(f, l, is_flow_tag); | |
424 | if (!is_flow_tag) | |
425 | state.printer_.break_line(); | |
426 | state.tags.pop(); | |
427 | } | |
428 | ||
429 | tidy_compiler& state; | |
430 | int indent; | |
431 | }; | |
432 | ||
433 | std::string post_process( | |
434 | std::string const& in | |
435 | , int indent | |
436 | , int linewidth) | |
437 | { | |
438 | if (indent == -1) | |
439 | indent = 2; // set default to 2 | |
440 | if (linewidth == -1) | |
441 | linewidth = 80; // set default to 80 | |
442 | ||
443 | std::string tidy; | |
444 | tidy_compiler state(tidy, linewidth); | |
445 | tidy_grammar g(state, indent); | |
446 | cl::parse_info<iter_type> r = parse(in.begin(), in.end(), g, cl::space_p); | |
447 | if (r.full) | |
448 | { | |
449 | return tidy; | |
450 | } | |
451 | else | |
452 | { | |
453 | throw quickbook::post_process_failure("Post Processing Failed."); | |
454 | } | |
455 | } | |
456 | } | |
457 |