]>
Commit | Line | Data |
---|---|---|
223e47cc | 1 | #!/usr/bin/env python |
1a4d82fc JJ |
2 | # |
3 | # Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT | |
4 | # file at the top-level directory of this distribution and at | |
5 | # http://rust-lang.org/COPYRIGHT. | |
6 | # | |
7 | # Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
8 | # http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
9 | # <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
10 | # option. This file may not be copied, modified, or distributed | |
11 | # except according to those terms. | |
223e47cc LB |
12 | |
13 | # This script is for extracting the grammar from the rust docs. | |
14 | ||
15 | import fileinput | |
16 | ||
85aaf69f SL |
17 | collections = {"gram": [], |
18 | "keyword": [], | |
19 | "reserved": [], | |
20 | "binop": [], | |
21 | "unop": []} | |
223e47cc LB |
22 | |
23 | ||
24 | in_coll = False | |
25 | coll = "" | |
26 | ||
27 | for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")): | |
28 | if in_coll: | |
29 | if line.startswith("~~~~"): | |
30 | in_coll = False | |
31 | else: | |
32 | if coll in ["keyword", "reserved", "binop", "unop"]: | |
33 | for word in line.split(): | |
34 | if word not in collections[coll]: | |
35 | collections[coll].append(word) | |
36 | else: | |
37 | collections[coll].append(line) | |
38 | ||
39 | else: | |
40 | if line.startswith("~~~~"): | |
41 | for cname in collections: | |
42 | if ("." + cname) in line: | |
43 | coll = cname | |
44 | in_coll = True | |
45 | break | |
46 | ||
47 | # Define operator symbol-names here | |
48 | ||
49 | tokens = ["non_star", "non_slash", "non_eol", | |
85aaf69f | 50 | "non_single_quote", "non_double_quote", "ident"] |
223e47cc LB |
51 | |
52 | symnames = { | |
85aaf69f SL |
53 | ".": "dot", |
54 | "+": "plus", | |
55 | "-": "minus", | |
56 | "/": "slash", | |
57 | "*": "star", | |
58 | "%": "percent", | |
59 | ||
60 | "~": "tilde", | |
61 | "@": "at", | |
62 | ||
63 | "!": "not", | |
64 | "&": "and", | |
65 | "|": "or", | |
66 | "^": "xor", | |
67 | ||
68 | "<<": "lsl", | |
69 | ">>": "lsr", | |
70 | ">>>": "asr", | |
71 | ||
72 | "&&": "andand", | |
73 | "||": "oror", | |
74 | ||
75 | "<": "lt", | |
76 | "<=": "le", | |
77 | "==": "eqeq", | |
78 | ">=": "ge", | |
79 | ">": "gt", | |
80 | ||
81 | "=": "eq", | |
82 | ||
83 | "+=": "plusequal", | |
84 | "-=": "minusequal", | |
85 | "/=": "divequal", | |
86 | "*=": "starequal", | |
87 | "%=": "percentequal", | |
88 | ||
89 | "&=": "andequal", | |
90 | "|=": "orequal", | |
91 | "^=": "xorequal", | |
92 | ||
93 | ">>=": "lsrequal", | |
94 | ">>>=": "asrequal", | |
95 | "<<=": "lslequal", | |
96 | ||
97 | "::": "coloncolon", | |
98 | ||
99 | "->": "rightarrow", | |
100 | "<-": "leftarrow", | |
101 | "<->": "swaparrow", | |
102 | ||
103 | "//": "linecomment", | |
104 | "/*": "openblockcomment", | |
105 | "*/": "closeblockcomment", | |
106 | "macro_rules": "macro_rules", | |
107 | "=>": "eg", | |
108 | "..": "dotdot", | |
109 | ",": "comma" | |
223e47cc LB |
110 | } |
111 | ||
112 | lines = [] | |
113 | ||
114 | for line in collections["gram"]: | |
115 | line2 = "" | |
116 | for word in line.split(): | |
117 | # replace strings with keyword-names or symbol-names from table | |
118 | if word.startswith("\""): | |
119 | word = word[1:-1] | |
120 | if word in symnames: | |
121 | word = symnames[word] | |
122 | else: | |
123 | for ch in word: | |
124 | if not ch.isalpha(): | |
125 | raise Exception("non-alpha apparent keyword: " | |
126 | + word) | |
127 | if word not in tokens: | |
128 | if (word in collections["keyword"] or | |
85aaf69f SL |
129 | word in collections["reserved"]): |
130 | tokens.append(word) | |
223e47cc LB |
131 | else: |
132 | raise Exception("unknown keyword/reserved word: " | |
133 | + word) | |
134 | ||
135 | line2 += " " + word | |
136 | lines.append(line2) | |
137 | ||
138 | ||
139 | for word in collections["keyword"] + collections["reserved"]: | |
140 | if word not in tokens: | |
141 | tokens.append(word) | |
142 | ||
143 | for sym in collections["unop"] + collections["binop"] + symnames.keys(): | |
144 | word = symnames[sym] | |
145 | if word not in tokens: | |
146 | tokens.append(word) | |
147 | ||
148 | ||
149 | print("%start parser, token;") | |
150 | print("%%token %s ;" % ("\n\t, ".join(tokens))) | |
151 | for coll in ["keyword", "reserved"]: | |
85aaf69f | 152 | print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll]))) |
223e47cc LB |
153 | for coll in ["binop", "unop"]: |
154 | print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x] | |
85aaf69f SL |
155 | for x in collections[coll]]))) |
156 | print("\n".join(lines)) |