]> git.proxmox.com Git - rustc.git/blob - src/etc/extract_grammar.py
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / etc / extract_grammar.py
1 #!/usr/bin/env python
2 #
3 # Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
4 # file at the top-level directory of this distribution and at
5 # http://rust-lang.org/COPYRIGHT.
6 #
7 # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8 # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9 # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10 # option. This file may not be copied, modified, or distributed
11 # except according to those terms.
12
13 # This script is for extracting the grammar from the rust docs.
14
15 import fileinput
16
17 collections = {"gram": [],
18 "keyword": [],
19 "reserved": [],
20 "binop": [],
21 "unop": []}
22
23
24 in_coll = False
25 coll = ""
26
27 for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
28 if in_coll:
29 if line.startswith("~~~~"):
30 in_coll = False
31 else:
32 if coll in ["keyword", "reserved", "binop", "unop"]:
33 for word in line.split():
34 if word not in collections[coll]:
35 collections[coll].append(word)
36 else:
37 collections[coll].append(line)
38
39 else:
40 if line.startswith("~~~~"):
41 for cname in collections:
42 if ("." + cname) in line:
43 coll = cname
44 in_coll = True
45 break
46
47 # Define operator symbol-names here
48
49 tokens = ["non_star", "non_slash", "non_eol",
50 "non_single_quote", "non_double_quote", "ident"]
51
52 symnames = {
53 ".": "dot",
54 "+": "plus",
55 "-": "minus",
56 "/": "slash",
57 "*": "star",
58 "%": "percent",
59
60 "~": "tilde",
61 "@": "at",
62
63 "!": "not",
64 "&": "and",
65 "|": "or",
66 "^": "xor",
67
68 "<<": "lsl",
69 ">>": "lsr",
70 ">>>": "asr",
71
72 "&&": "andand",
73 "||": "oror",
74
75 "<": "lt",
76 "<=": "le",
77 "==": "eqeq",
78 ">=": "ge",
79 ">": "gt",
80
81 "=": "eq",
82
83 "+=": "plusequal",
84 "-=": "minusequal",
85 "/=": "divequal",
86 "*=": "starequal",
87 "%=": "percentequal",
88
89 "&=": "andequal",
90 "|=": "orequal",
91 "^=": "xorequal",
92
93 ">>=": "lsrequal",
94 ">>>=": "asrequal",
95 "<<=": "lslequal",
96
97 "::": "coloncolon",
98
99 "->": "rightarrow",
100 "<-": "leftarrow",
101 "<->": "swaparrow",
102
103 "//": "linecomment",
104 "/*": "openblockcomment",
105 "*/": "closeblockcomment",
106 "macro_rules": "macro_rules",
107 "=>": "eg",
108 "..": "dotdot",
109 ",": "comma"
110 }
111
112 lines = []
113
114 for line in collections["gram"]:
115 line2 = ""
116 for word in line.split():
117 # replace strings with keyword-names or symbol-names from table
118 if word.startswith("\""):
119 word = word[1:-1]
120 if word in symnames:
121 word = symnames[word]
122 else:
123 for ch in word:
124 if not ch.isalpha():
125 raise Exception("non-alpha apparent keyword: "
126 + word)
127 if word not in tokens:
128 if (word in collections["keyword"] or
129 word in collections["reserved"]):
130 tokens.append(word)
131 else:
132 raise Exception("unknown keyword/reserved word: "
133 + word)
134
135 line2 += " " + word
136 lines.append(line2)
137
138
139 for word in collections["keyword"] + collections["reserved"]:
140 if word not in tokens:
141 tokens.append(word)
142
143 for sym in collections["unop"] + collections["binop"] + symnames.keys():
144 word = symnames[sym]
145 if word not in tokens:
146 tokens.append(word)
147
148
149 print("%start parser, token;")
150 print("%%token %s ;" % ("\n\t, ".join(tokens)))
151 for coll in ["keyword", "reserved"]:
152 print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])))
153 for coll in ["binop", "unop"]:
154 print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
155 for x in collections[coll]])))
156 print("\n".join(lines))