ceph/src/boost/tools/quickbook/src/post_process.cpp

   1 /*=============================================================================
   2     Copyright (c) 2005 2006 Joel de Guzman
   3     http://spirit.sourceforge.net/
   4
   5     Use, modification and distribution is subject to the Boost Software
   6     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
   7     http://www.boost.org/LICENSE_1_0.txt)
   8 =============================================================================*/
   9 #include "post_process.hpp"
  10 #include <cctype>
  11 #include <set>
  12 #include <stack>
  13 #include <boost/bind/bind.hpp>
  14 #include <boost/spirit/include/classic_core.hpp>
  15 #include <boost/spirit/include/phoenix1_operators.hpp>
  16 #include <boost/spirit/include/phoenix1_primitives.hpp>
  17
  18 using namespace boost::placeholders;
  19
  20 namespace quickbook
  21 {
  22     namespace cl = boost::spirit::classic;
  23     namespace ph = phoenix;
  24     typedef std::string::const_iterator iter_type;
  25
  26     struct pretty_printer
  27     {
  28         pretty_printer(std::string& out_, int& current_indent_, int linewidth_)
  29             : prev(0)
  30             , out(out_)
  31             , current_indent(current_indent_)
  32             , column(0)
  33             , in_string(false)
  34             , linewidth(linewidth_)
  35         {
  36         }
  37
  38         void indent()
  39         {
  40             BOOST_ASSERT(current_indent >= 0); // this should not happen!
  41             for (int i = 0; i < current_indent; ++i)
  42                 out += ' ';
  43             column = current_indent;
  44         }
  45
  46         void trim_spaces()
  47         {
  48             out.erase(out.find_last_not_of(' ') + 1); // trim trailing spaces
  49         }
  50
  51         void break_line()
  52         {
  53             trim_spaces();
  54             out += '\n';
  55             indent();
  56         }
  57
  58         bool line_is_empty() const
  59         {
  60             for (iter_type i = out.end() - (column - current_indent);
  61                  i != out.end(); ++i) {
  62                 if (*i != ' ') return false;
  63             }
  64             return true;
  65         }
  66
  67         void align_indent()
  68         {
  69             // make sure we are at the proper indent position
  70             if (column != current_indent) {
  71                 if (column > current_indent) {
  72                     if (line_is_empty()) {
  73                         // trim just enough trailing spaces down to
  74                         // current_indent position
  75                         out.erase(
  76                             out.end() - (column - current_indent), out.end());
  77                         column = current_indent;
  78                     }
  79                     else {
  80                         // nope, line is not empty. do a hard CR
  81                         break_line();
  82                     }
  83                 }
  84                 else {
  85                     // will this happen? (i.e. column <= current_indent)
  86                     while (column != current_indent) {
  87                         out += ' ';
  88                         ++column;
  89                     }
  90                 }
  91             }
  92         }
  93
  94         void print(char ch)
  95         {
  96             // Print a char. Attempt to break the line if we are exceeding
  97             // the target linewidth. The linewidth is not an absolute limit.
  98             // There are many cases where a line will exceed the linewidth
  99             // and there is no way to properly break the line. Preformatted
 100             // code that exceeds the linewidth are examples. We cannot break
 101             // preformatted code. We shall not attempt to be very strict with
 102             // line breaking. What's more important is to have a reproducable
 103             // output (i.e. processing two logically equivalent xml files
 104             // results in two lexically equivalent xml files). *** pretty
 105             // formatting is a secondary goal ***
 106
 107             // Strings will occur only in tag attributes. Normal content
 108             // will have &quot; instead. We shall deal only with tag
 109             // attributes here.
 110             if (ch == '"') in_string = !in_string; // don't break strings!
 111
 112             if (!in_string && std::isspace(static_cast<unsigned char>(ch))) {
 113                 // we can break spaces if they are not inside strings
 114                 if (!std::isspace(static_cast<unsigned char>(prev))) {
 115                     if (column >= linewidth) {
 116                         break_line();
 117                         if (column == 0 && ch == ' ') {
 118                             ++column;
 119                             out += ' ';
 120                         }
 121                     }
 122                     else {
 123                         ++column;
 124                         out += ' ';
 125                     }
 126                 }
 127             }
 128             else {
 129                 // we can break tag boundaries and stuff after
 130                 // delimiters if they are not inside strings
 131                 // and *only-if* the preceding char is a space
 132                 if (!in_string && column >= linewidth &&
 133                     (ch == '<' &&
 134                      std::isspace(static_cast<unsigned char>(prev))))
 135                     break_line();
 136                 out += ch;
 137                 ++column;
 138             }
 139
 140             prev = ch;
 141         }
 142
 143         void print(iter_type f, iter_type l)
 144         {
 145             for (iter_type i = f; i != l; ++i)
 146                 print(*i);
 147         }
 148
 149         void print_tag(iter_type f, iter_type l, bool is_flow_tag)
 150         {
 151             if (is_flow_tag) {
 152                 print(f, l);
 153             }
 154             else {
 155                 // This is not a flow tag, so, we're going to do a
 156                 // carriage return anyway. Let us remove extra right
 157                 // spaces.
 158                 std::string str(f, l);
 159                 BOOST_ASSERT(f != l); // this should not happen
 160                 iter_type i = str.end();
 161                 while (i != str.begin() &&
 162                        std::isspace(static_cast<unsigned char>(*(i - 1))))
 163                     --i;
 164                 print(str.begin(), i);
 165             }
 166         }
 167
 168         char prev;
 169         std::string& out;
 170         int& current_indent;
 171         int column;
 172         bool in_string;
 173         int linewidth;
 174
 175       private:
 176         pretty_printer& operator=(pretty_printer const&);
 177     };
 178
 179     char const* html_block_tags_[] = {
 180         "div",   "p",    "blockquote", "address", "h1",       "h2",   "h3",
 181         "h4",    "h5",   "h6",         "ul",      "ol",       "li",   "dl",
 182         "dt",    "dd",   "table",      "tr",      "th",       "td",   "tbody",
 183         "thead", "form", "fieldset",   "hr",      "noscript", "html", "body"};
 184
 185     char const* block_tags_[] = {
 186         "author",      "blockquote",    "bridgehead",   "callout",
 187         "calloutlist", "caution",       "copyright",    "entry",
 188         "important",   "informaltable", "itemizedlist", "legalnotice",
 189         "listitem",    "note",          "orderedlist",  "para",
 190         "row",         "section",       "simpara",      "table",
 191         "tbody",       "textobject",    "tgroup",       "thead",
 192         "tip",         "variablelist",  "varlistentry", "warning",
 193         "xml",         "xi:include"};
 194
 195     char const* doc_types_[] = {"book",     "article",   "library", "chapter",
 196                                 "part",     "appendix",  "preface", "qandadiv",
 197                                 "qandaset", "reference", "set"};
 198
 199     struct tidy_compiler
 200     {
 201         tidy_compiler(std::string& out_, int linewidth_, bool is_html)
 202             : out(out_)
 203             , current_indent(0)
 204             , printer(out_, current_indent, linewidth_)
 205         {
 206             if (is_html) {
 207                 static std::size_t const n_block_tags =
 208                     sizeof(html_block_tags_) / sizeof(char const*);
 209                 for (std::size_t i = 0; i != n_block_tags; ++i) {
 210                     block_tags.insert(html_block_tags_[i]);
 211                 }
 212             }
 213             else {
 214                 static std::size_t const n_block_tags =
 215                     sizeof(block_tags_) / sizeof(char const*);
 216                 for (std::size_t i = 0; i != n_block_tags; ++i) {
 217                     block_tags.insert(block_tags_[i]);
 218                 }
 219
 220                 static std::size_t const n_doc_types =
 221                     sizeof(doc_types_) / sizeof(char const*);
 222                 for (std::size_t i = 0; i != n_doc_types; ++i) {
 223                     block_tags.insert(doc_types_[i]);
 224                     block_tags.insert(doc_types_[i] + std::string("info"));
 225                     block_tags.insert(doc_types_[i] + std::string("purpose"));
 226                 }
 227             }
 228         }
 229
 230         bool is_flow_tag(std::string const& tag)
 231         {
 232             return block_tags.find(tag) == block_tags.end();
 233         }
 234
 235         std::set<std::string> block_tags;
 236         std::stack<std::string> tags;
 237         std::string& out;
 238         int current_indent;
 239         pretty_printer printer;
 240         std::string current_tag;
 241
 242       private:
 243         tidy_compiler& operator=(tidy_compiler const&);
 244     };
 245
 246     struct tidy_grammar : cl::grammar<tidy_grammar>
 247     {
 248         tidy_grammar(tidy_compiler& state_, int indent_, bool is_html_)
 249             : state(state_), indent(indent_), is_html(is_html_)
 250         {
 251         }
 252
 253         template <typename Scanner> struct definition
 254         {
 255             definition(tidy_grammar const& self)
 256             {
 257                 // clang-format off
 258
 259                 tag = (cl::lexeme_d[+(cl::alnum_p | '_' | ':')])  [boost::bind(&tidy_grammar::do_tag, &self, _1, _2)];
 260
 261                 code =  cl::eps_p(ph::var(self.is_html))
 262                     >>  "<"
 263                     >>  cl::lexeme_d[cl::str_p("pre")]
 264                     >>  *(cl::anychar_p - '>')
 265                     >>  ">"
 266                     >>  *(cl::anychar_p - "</pre>")
 267                     >>  "</pre"
 268                     >>  cl::lexeme_d[">" >> *cl::space_p]
 269                     |   cl::eps_p(!ph::var(self.is_html))
 270                     >>   "<programlisting>"
 271                     >>  *(cl::anychar_p - "</programlisting>")
 272                     >>  "</programlisting"
 273                     >>  cl::lexeme_d[">" >> *cl::space_p]
 274                     ;
 275
 276                 // What's the business of cl::lexeme_d['>' >> *cl::space_p]; ?
 277                 // It is there to preserve the space after the tag that is
 278                 // otherwise consumed by the cl::space_p skipper.
 279
 280                 escape =
 281                     cl::str_p("<!--quickbook-escape-prefix-->") >>
 282                     (*(cl::anychar_p - cl::str_p("<!--quickbook-escape-postfix-->")))
 283                     [
 284                         boost::bind(&tidy_grammar::do_escape, &self, _1, _2)
 285                     ]
 286                     >>  cl::lexeme_d
 287                         [
 288                             cl::str_p("<!--quickbook-escape-postfix-->") >>
 289                             (*cl::space_p)
 290                             [
 291                                 boost::bind(&tidy_grammar::do_escape_post, &self, _1, _2)
 292                             ]
 293                         ]
 294                     ;
 295
 296                 start_tag = '<' >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
 297                 start_end_tag =
 298                         '<' >> tag >> *(cl::anychar_p - ("/>" | cl::ch_p('>'))) >> cl::lexeme_d["/>" >> *cl::space_p]
 299                     |   "<?" >> tag >> *(cl::anychar_p - '?') >> cl::lexeme_d["?>" >> *cl::space_p]
 300                     |   "<!--" >> *(cl::anychar_p - "-->") >> cl::lexeme_d["-->" >> *cl::space_p]
 301                     |   "<!" >> tag >> *(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p]
 302                     ;
 303                 content = cl::lexeme_d[ +(cl::anychar_p - '<') ];
 304                 end_tag = "</" >> +(cl::anychar_p - '>') >> cl::lexeme_d['>' >> *cl::space_p];
 305
 306                 markup =
 307                         escape
 308                     |   code            [boost::bind(&tidy_grammar::do_code, &self, _1, _2)]
 309                     |   start_end_tag   [boost::bind(&tidy_grammar::do_start_end_tag, &self, _1, _2)]
 310                     |   start_tag       [boost::bind(&tidy_grammar::do_start_tag, &self, _1, _2)]
 311                     |   end_tag         [boost::bind(&tidy_grammar::do_end_tag, &self, _1, _2)]
 312                     |   content         [boost::bind(&tidy_grammar::do_content, &self, _1, _2)]
 313                     ;
 314
 315                 tidy = *markup;
 316
 317                 // clang-format on
 318             }
 319
 320             cl::rule<Scanner> const& start() { return tidy; }
 321
 322             cl::rule<Scanner> tidy, tag, start_tag, start_end_tag, content,
 323                 end_tag, markup, code, escape;
 324         };
 325
 326         void do_escape_post(iter_type f, iter_type l) const
 327         {
 328             for (iter_type i = f; i != l; ++i)
 329                 state.out += *i;
 330         }
 331
 332         void do_escape(iter_type f, iter_type l) const
 333         {
 334             while (f != l && std::isspace(*f)) {
 335                 ++f;
 336             }
 337             while (f != l && std::isspace(*(l - 1))) {
 338                 --l;
 339             }
 340             for (iter_type i = f; i != l; ++i) {
 341                 state.out += *i;
 342             }
 343         }
 344
 345         void do_code(iter_type f, iter_type l) const
 346         {
 347             state.printer.trim_spaces();
 348             if (state.out[state.out.size() - 1] != '\n') state.out += '\n';
 349
 350             // trim trailing space from after closing tag
 351             while (f != l && std::isspace(*(l - 1))) {
 352                 --l;
 353             }
 354
 355             // print the string taking care of line
 356             // ending CR/LF platform issues
 357             for (iter_type i = f; i != l;) {
 358                 if (*i == '\n') {
 359                     state.printer.trim_spaces();
 360                     state.out += '\n';
 361                     ++i;
 362                     if (i != l && *i == '\r') {
 363                         ++i;
 364                     }
 365                 }
 366                 else if (*i == '\r') {
 367                     state.printer.trim_spaces();
 368                     state.out += '\n';
 369                     ++i;
 370                     if (i != l && *i == '\n') {
 371                         ++i;
 372                     }
 373                 }
 374                 else {
 375                     state.out += *i;
 376                     ++i;
 377                 }
 378             }
 379             state.out += '\n';
 380             state.printer.indent();
 381         }
 382
 383         void do_tag(iter_type f, iter_type l) const
 384         {
 385             state.current_tag = std::string(f, l);
 386         }
 387
 388         void do_start_end_tag(iter_type f, iter_type l) const
 389         {
 390             bool is_flow_tag = state.is_flow_tag(state.current_tag);
 391             if (!is_flow_tag) state.printer.align_indent();
 392             state.printer.print_tag(f, l, is_flow_tag);
 393             if (!is_flow_tag) state.printer.break_line();
 394         }
 395
 396         void do_start_tag(iter_type f, iter_type l) const
 397         {
 398             state.tags.push(state.current_tag);
 399             bool is_flow_tag = state.is_flow_tag(state.current_tag);
 400             if (!is_flow_tag) state.printer.align_indent();
 401             state.printer.print_tag(f, l, is_flow_tag);
 402             if (!is_flow_tag) {
 403                 state.current_indent += indent;
 404                 state.printer.break_line();
 405             }
 406         }
 407
 408         void do_content(iter_type f, iter_type l) const
 409         {
 410             state.printer.print(f, l);
 411         }
 412
 413         void do_end_tag(iter_type f, iter_type l) const
 414         {
 415             if (state.tags.empty())
 416                 throw quickbook::post_process_failure("Mismatched tags.");
 417
 418             bool is_flow_tag = state.is_flow_tag(state.tags.top());
 419             if (!is_flow_tag) {
 420                 state.current_indent -= indent;
 421                 state.printer.align_indent();
 422             }
 423             state.printer.print_tag(f, l, is_flow_tag);
 424             if (!is_flow_tag) state.printer.break_line();
 425             state.tags.pop();
 426         }
 427
 428         tidy_compiler& state;
 429         int indent;
 430         bool is_html;
 431
 432       private:
 433         tidy_grammar& operator=(tidy_grammar const&);
 434     };
 435
 436     std::string post_process(
 437         std::string const& in, int indent, int linewidth, bool is_html)
 438     {
 439         if (indent == -1) indent = 2;        // set default to 2
 440         if (linewidth == -1) linewidth = 80; // set default to 80
 441
 442         std::string tidy;
 443         tidy_compiler state(tidy, linewidth, is_html);
 444         tidy_grammar g(state, indent, is_html);
 445         cl::parse_info<iter_type> r =
 446             parse(in.begin(), in.end(), g, cl::space_p);
 447         if (r.full) {
 448             return tidy;
 449         }
 450         else {
 451             throw quickbook::post_process_failure("Post Processing Failed.");
 452         }
 453     }
 454 }