ceph/src/boost/tools/quickbook/src/post_process.cpp

   1 /*=============================================================================
   2     Copyright (c) 2005 2006 Joel de Guzman
   3     http://spirit.sourceforge.net/
   4
   5     Use, modification and distribution is subject to the Boost Software
   6     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
   7     http://www.boost.org/LICENSE_1_0.txt)
   8 =============================================================================*/
   9 #include "post_process.hpp"
  10 #include <cctype>
  11 #include <set>
  12 #include <stack>
  13 #include <boost/bind.hpp>
  14 #include <boost/spirit/include/classic_core.hpp>
  15 #include <boost/spirit/include/phoenix1_operators.hpp>
  16 #include <boost/spirit/include/phoenix1_primitives.hpp>
  17
  18 namespace quickbook
  19 {
  20     namespace cl = boost::spirit::classic;
  21     namespace ph = phoenix;
  22     typedef std::string::const_iterator iter_type;
  23
  24     struct pretty_printer
  25     {
  26         pretty_printer(std::string& out_, int& current_indent_, int linewidth_)
  27             : prev(0)
  28             , out(out_)
  29             , current_indent(current_indent_)
  30             , column(0)
  31             , in_string(false)
  32             , linewidth(linewidth_)
  33         {
  34         }
  35
  36         void indent()
  37         {
  38             BOOST_ASSERT(current_indent >= 0); // this should not happen!
  39             for (int i = 0; i < current_indent; ++i)
  40                 out += ' ';
  41             column = current_indent;
  42         }
  43
  44         void trim_spaces()
  45         {
  46             out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces
  47         }
  48
  49         void break_line()
  50         {
  51             trim_spaces();
  52             out += '\n';
  53             indent();
  54         }
  55
  56         bool line_is_empty() const
  57         {
  58             for (iter_type i = out.end() - (column - current_indent);
  59                  i != out.end(); ++i) {
  60                 if (*i != ' ') return false;
  61             }
  62             return true;
  63         }
  64
  65         void align_indent()
  66         {
  67             // make sure we are at the proper indent position
  68             if (column != current_indent) {
  69                 if (column > current_indent) {
  70                     if (line_is_empty()) {
  71                         // trim just enough trailing spaces down to
  72                         // current_indent position
  73                         out.erase(
  74                             out.end() - (column - current_indent), out.end());
  75                         column = current_indent;
  76                     }
  77                     else {
  78                         // nope, line is not empty. do a hard CR
  79                         break_line();
  80                     }
  81                 }
  82                 else {
  83                     // will this happen? (i.e. column <= current_indent)
  84                     while (column != current_indent) {
  85                         out += ' ';
  86                         ++column;
  87                     }
  88                 }
  89             }
  90         }
  91
  92         void print(char ch)
  93         {
  94             // Print a char. Attempt to break the line if we are exceeding
  95             // the target linewidth. The linewidth is not an absolute limit.
  96             // There are many cases where a line will exceed the linewidth
  97             // and there is no way to properly break the line. Preformatted
  98             // code that exceeds the linewidth are examples. We cannot break
  99             // preformatted code. We shall not attempt to be very strict with
 100             // line breaking. What's more important is to have a reproducable
 101             // output (i.e. processing two logically equivalent xml files
 102             // results in two lexically equivalent xml files). *** pretty
 103             // formatting is a secondary goal ***
 104
 105             // Strings will occur only in tag attributes. Normal content
 106             // will have &quot; instead. We shall deal only with tag
 107             // attributes here.
 108             if (ch == '"') in_string = !in_string; // don't break strings!
 109
 110             if (!in_string && std::isspace(static_cast<unsigned char>(ch))) {
 111                 // we can break spaces if they are not inside strings
 112                 if (!std::isspace(static_cast<unsigned char>(prev))) {
 113                     if (column >= linewidth) {
 114                         break_line();
 115                         if (column == 0 && ch == ' ') {
 116                             ++column;
 117                             out += ' ';
 118                         }
 119                     }
 120                     else {
 121                         ++column;
 122                         out += ' ';
 123                     }
 124                 }
 125             }
 126             else {
 127                 // we can break tag boundaries and stuff after
 128                 // delimiters if they are not inside strings
 129                 // and *only-if* the preceding char is a space
 130                 if (!in_string && column >= linewidth &&
 131                     (ch == '<' &&
 132                      std::isspace(static_cast<unsigned char>(prev))))
 133                     break_line();
 134                 out += ch;
 135                 ++column;
 136             }
 137
 138             prev = ch;
 139         }
 140
 141         void print(iter_type f, iter_type l)
 142         {
 143             for (iter_type i = f; i != l; ++i)
 144                 print(*i);
 145         }
 146
 147         void print_tag(iter_type f, iter_type l, bool is_flow_tag)
 148         {
 149             if (is_flow_tag) {
 150                 print(f, l);
 151             }
 152             else {
 153                 // This is not a flow tag, so, we're going to do a
 154                 // carriage return anyway. Let us remove extra right
 155                 // spaces.
 156                 std::string str(f, l);
 157                 BOOST_ASSERT(f != l); // this should not happen
 158                 iter_type i = str.end();
 159                 while (i != str.begin() &&
 160                        std::isspace(static_cast<unsigned char>(*(i - 1))))
 161                     --i;
 162                 print(str.begin(), i);
 163             }
 164         }
 165
 166         char prev;
 167         std::string& out;
 168         int& current_indent;
 169         int column;
 170         bool in_string;
 171         int linewidth;
 172
 173       private:
 174         pretty_printer& operator=(pretty_printer const&);
 175     };
 176
 177     char const* html_block_tags_[] = {
 178         "div",   "p",    "blockquote", "address", "h1",       "h2",   "h3",
 179         "h4",    "h5",   "h6",         "ul",      "ol",       "li",   "dl",
 180         "dt",    "dd",   "table",      "tr",      "th",       "td",   "tbody",
 181         "thead", "form", "fieldset",   "hr",      "noscript", "html", "body"};
 182
 183     char const* block_tags_[] = {
 184         "author",      "blockquote",    "bridgehead",   "callout",
 185         "calloutlist", "caution",       "copyright",    "entry",
 186         "important",   "informaltable", "itemizedlist", "legalnotice",
 187         "listitem",    "note",          "orderedlist",  "para",
 188         "row",         "section",       "simpara",      "table",
 189         "tbody",       "textobject",    "tgroup",       "thead",
 190         "tip",         "variablelist",  "varlistentry", "warning",
 191         "xml",         "xi:include"};
 192
 193     char const* doc_types_[] = {"book",     "article",   "library", "chapter",
 194                                 "part",     "appendix",  "preface", "qandadiv",
 195                                 "qandaset", "reference", "set"};
 196
 197     struct tidy_compiler
 198     {
 199         tidy_compiler(std::string& out_, int linewidth_, bool is_html)
 200             : out(out_)
 201             , current_indent(0)
 202             , printer(out_, current_indent, linewidth_)
 203         {
 204             if (is_html) {
 205                 static std::size_t const n_block_tags =
 206                     sizeof(html_block_tags_) / sizeof(char const*);
 207                 for (std::size_t i = 0; i != n_block_tags; ++i) {
 208                     block_tags.insert(html_block_tags_[i]);
 209                 }
 210             }
 211             else {
 212                 static std::size_t const n_block_tags =
 213                     sizeof(block_tags_) / sizeof(char const*);
 214                 for (std::size_t i = 0; i != n_block_tags; ++i) {
 215                     block_tags.insert(block_tags_[i]);
 216                 }
 217
 218                 static std::size_t const n_doc_types =
 219                     sizeof(doc_types_) / sizeof(char const*);
 220                 for (std::size_t i = 0; i != n_doc_types; ++i) {
 221                     block_tags.insert(doc_types_[i]);
 222                     block_tags.insert(doc_types_[i] + std::string("info"));
 223                     block_tags.insert(doc_types_[i] + std::string("purpose"));
 224                 }
 225             }
 226         }
 227
 228         bool is_flow_tag(std::string const& tag)
 229         {
 230             return block_tags.find(tag) == block_tags.end();
 231         }
 232
 233         std::set<std::string> block_tags;
 234         std::stack<std::string> tags;
 235         std::string& out;
 236         int current_indent;
 237         pretty_printer printer;
 238         std::string current_tag;
 239
 240       private:
 241         tidy_compiler& operator=(tidy_compiler const&);
 242     };
 243
 244     struct tidy_grammar : cl::grammar<tidy_grammar>
 245     {
 246         tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_)
 247             : state(state_), indent(indent_), is_html(is_html_)
 248         {
 249         }
 250
 251         template <typename Scanner> struct definition
 252         {
 253             definition(tidy_grammar const& self)
 254             {
 255                 // clang-format off
 256
 257                 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')])  [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
 258
 259                 code =  cl::eps_p(ph::var(self.is_html))
 260                     >>  "<"
 261                     >>  cl::lexeme_d[cl::str_p("pre")]
 262                     >>  *(cl::anychar_p - '>')
 263                     >>  ">"
 264                     >>  *(cl::anychar_p - "</pre>")
 265                     >>  "</pre"
 266                     >>  cl::lexeme_d[">" >> *cl::space_p]
 267                     |   cl::eps_p(!ph::var(self.is_html))
 268                     >>   "<programlisting>"
 269                     >>  *(cl::anychar_p - "</programlisting>")
 270                     >>  "</programlisting"
 271                     >>  cl::lexeme_d[">" >> *cl::space_p]
 272                     ;
 273
 274                 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
 275                 // It is there to preserve the space after the tag that is
 276                 // otherwise consumed by the cl::space_p skipper.
 277
 278                 escape =
 279                     cl::str_p("<!--quickbook-escape-prefix-->") >>
 280                     (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
 281                     [
 282                         boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
 283                     ]
 284                     >>  cl::lexeme_d
 285                         [
 286                             cl::str_p("<!--quickbook-escape-postfix-->") >>
 287                             (*cl::space_p)
 288                             [
 289                                 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
 290                             ]
 291                         ]
 292                     ;
 293
 294                 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
 295                 start_end_tag =
 296                         '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
 297                     |   "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
 298                     |   "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
 299                     |   "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
 300                     ;
 301                 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
 302                 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
 303
 304                 markup =
 305                         escape
 306                     |   code            [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
 307                     |   start_end_tag   [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
 308                     |   start_tag       [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
 309                     |   end_tag         [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
 310                     |   content         [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
 311                     ;
 312
 313                 tidy = *markup;
 314
 315                 // clang-format on
 316             }
 317
 318             cl::rule<Scanner> const& start() { return tidy; }
 319
 320             cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content,
 321                 end_tag, markup, code, escape;
 322         };
 323
 324         void do_escape_post(iter_type f, iter_type l) const
 325         {
 326             for (iter_type i = f; i != l; ++i)
 327                 state.out += *i;
 328         }
 329
 330         void do_escape(iter_type f, iter_type l) const
 331         {
 332             while (f != l && std::isspace(*f)) {
 333                 ++f;
 334             }
 335             while (f != l && std::isspace(*(l - 1))) {
 336                 --l;
 337             }
 338             for (iter_type i = f; i != l; ++i) {
 339                 state.out += *i;
 340             }
 341         }
 342
 343         void do_code(iter_type f, iter_type l) const
 344         {
 345             state.printer.trim_spaces();
 346             if (state.out[state.out.size() - 1] != '\n') state.out += '\n';
 347
 348             // trim trailing space from after closing tag
 349             while (f != l && std::isspace(*(l - 1))) {
 350                 --l;
 351             }
 352
 353             // print the string taking care of line
 354             // ending CR/LF platform issues
 355             for (iter_type i = f; i != l;) {
 356                 if (*i == '\n') {
 357                     state.printer.trim_spaces();
 358                     state.out += '\n';
 359                     ++i;
 360                     if (i != l && *i == '\r') {
 361                         ++i;
 362                     }
 363                 }
 364                 else if (*i == '\r') {
 365                     state.printer.trim_spaces();
 366                     state.out += '\n';
 367                     ++i;
 368                     if (i != l && *i == '\n') {
 369                         ++i;
 370                     }
 371                 }
 372                 else {
 373                     state.out += *i;
 374                     ++i;
 375                 }
 376             }
 377             state.out += '\n';
 378             state.printer.indent();
 379         }
 380
 381         void do_tag(iter_type f, iter_type l) const
 382         {
 383             state.current_tag = std::string(f, l);
 384         }
 385
 386         void do_start_end_tag(iter_type f, iter_type l) const
 387         {
 388             bool is_flow_tag = state.is_flow_tag(state.current_tag);
 389             if (!is_flow_tag) state.printer.align_indent();
 390             state.printer.print_tag(f, l, is_flow_tag);
 391             if (!is_flow_tag) state.printer.break_line();
 392         }
 393
 394         void do_start_tag(iter_type f, iter_type l) const
 395         {
 396             state.tags.push(state.current_tag);
 397             bool is_flow_tag = state.is_flow_tag(state.current_tag);
 398             if (!is_flow_tag) state.printer.align_indent();
 399             state.printer.print_tag(f, l, is_flow_tag);
 400             if (!is_flow_tag) {
 401                 state.current_indent += indent;
 402                 state.printer.break_line();
 403             }
 404         }
 405
 406         void do_content(iter_type f, iter_type l) const
 407         {
 408             state.printer.print(f, l);
 409         }
 410
 411         void do_end_tag(iter_type f, iter_type l) const
 412         {
 413             if (state.tags.empty())
 414                 throw quickbook::post_process_failure("Mismatched tags.");
 415
 416             bool is_flow_tag = state.is_flow_tag(state.tags.top());
 417             if (!is_flow_tag) {
 418                 state.current_indent -= indent;
 419                 state.printer.align_indent();
 420             }
 421             state.printer.print_tag(f, l, is_flow_tag);
 422             if (!is_flow_tag) state.printer.break_line();
 423             state.tags.pop();
 424         }
 425
 426         tidy_compiler& state;
 427         int indent;
 428         bool is_html;
 429
 430       private:
 431         tidy_grammar& operator=(tidy_grammar const&);
 432     };
 433
 434     std::string post_process(
 435         std::string const& in, int indent, int linewidth, bool is_html)
 436     {
 437         if (indent == -1) indent = 2;        // set default to 2
 438         if (linewidth == -1) linewidth = 80; // set default to 80
 439
 440         std::string tidy;
 441         tidy_compiler state(tidy, linewidth, is_html);
 442         tidy_grammar g(state, indent, is_html);
 443         cl::parse_info<iter_type> r =
 444             parse(in.begin(), in.end(), g, cl::space_p);
 445         if (r.full) {
 446             return tidy;
 447         }
 448         else {
 449             throw quickbook::post_process_failure("Post Processing Failed.");
 450         }
 451     }
 452 }