src/etc/extract_grammar.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
   4 # file at the top-level directory of this distribution and at
   5 # http://rust-lang.org/COPYRIGHT.
   6 #
   7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
  10 # option. This file may not be copied, modified, or distributed
  11 # except according to those terms.
  12
  13 # This script is for extracting the grammar from the rust docs.
  14
  15 import fileinput
  16
  17 collections = {"gram": [],
  18                "keyword": [],
  19                "reserved": [],
  20                "binop": [],
  21                "unop": []}
  22
  23
  24 in_coll = False
  25 coll = ""
  26
  27 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
  28     if in_coll:
  29         if line.startswith("~~~~"):
  30             in_coll = False
  31         else:
  32             if coll in ["keyword", "reserved", "binop", "unop"]:
  33                 for word in line.split():
  34                     if word not in collections[coll]:
  35                         collections[coll].append(word)
  36             else:
  37                 collections[coll].append(line)
  38
  39     else:
  40         if line.startswith("~~~~"):
  41             for cname in collections:
  42                 if ("." + cname) in line:
  43                     coll = cname
  44                     in_coll = True
  45                     break
  46
  47 # Define operator symbol-names here
  48
  49 tokens = ["non_star", "non_slash", "non_eol",
  50           "non_single_quote", "non_double_quote", "ident"]
  51
  52 symnames = {
  53     ".": "dot",
  54     "+": "plus",
  55     "-": "minus",
  56     "/": "slash",
  57     "*": "star",
  58     "%": "percent",
  59
  60     "~": "tilde",
  61     "@": "at",
  62
  63     "!": "not",
  64     "&": "and",
  65     "|": "or",
  66     "^": "xor",
  67
  68     "<<": "lsl",
  69     ">>": "lsr",
  70     ">>>": "asr",
  71
  72     "&&": "andand",
  73     "||": "oror",
  74
  75     "<": "lt",
  76     "<=": "le",
  77     "==": "eqeq",
  78     ">=": "ge",
  79     ">": "gt",
  80
  81     "=": "eq",
  82
  83     "+=": "plusequal",
  84     "-=": "minusequal",
  85     "/=": "divequal",
  86     "*=": "starequal",
  87     "%=": "percentequal",
  88
  89     "&=": "andequal",
  90     "|=": "orequal",
  91     "^=": "xorequal",
  92
  93     ">>=": "lsrequal",
  94     ">>>=": "asrequal",
  95     "<<=": "lslequal",
  96
  97     "::": "coloncolon",
  98
  99     "->": "rightarrow",
 100     "<-": "leftarrow",
 101     "<->": "swaparrow",
 102
 103     "//": "linecomment",
 104     "/*": "openblockcomment",
 105     "*/": "closeblockcomment",
 106     "macro_rules": "macro_rules",
 107     "=>": "eg",
 108     "..": "dotdot",
 109     ",": "comma"
 110 }
 111
 112 lines = []
 113
 114 for line in collections["gram"]:
 115     line2 = ""
 116     for word in line.split():
 117         # replace strings with keyword-names or symbol-names from table
 118         if word.startswith("\""):
 119             word = word[1:-1]
 120             if word in symnames:
 121                 word = symnames[word]
 122             else:
 123                 for ch in word:
 124                     if not ch.isalpha():
 125                         raise Exception("non-alpha apparent keyword: "
 126                                         + word)
 127                 if word not in tokens:
 128                     if (word in collections["keyword"] or
 129                             word in collections["reserved"]):
 130                         tokens.append(word)
 131                     else:
 132                         raise Exception("unknown keyword/reserved word: "
 133                                         + word)
 134
 135         line2 += " " + word
 136     lines.append(line2)
 137
 138
 139 for word in collections["keyword"] + collections["reserved"]:
 140     if word not in tokens:
 141         tokens.append(word)
 142
 143 for sym in collections["unop"] + collections["binop"] + symnames.keys():
 144     word = symnames[sym]
 145     if word not in tokens:
 146         tokens.append(word)
 147
 148
 149 print("%start parser, token;")
 150 print("%%token %s ;" % ("\n\t, ".join(tokens)))
 151 for coll in ["keyword", "reserved"]:
 152     print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])))
 153 for coll in ["binop", "unop"]:
 154     print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
 155                                               for x in collections[coll]])))
 156 print("\n".join(lines))