]> git.proxmox.com Git - rustc.git/blame - src/etc/extract_grammar.py
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / etc / extract_grammar.py
CommitLineData
223e47cc 1#!/usr/bin/env python
1a4d82fc
JJ
2#
3# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
4# file at the top-level directory of this distribution and at
5# http://rust-lang.org/COPYRIGHT.
6#
7# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
8# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
9# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
10# option. This file may not be copied, modified, or distributed
11# except according to those terms.
223e47cc
LB
12
13# This script is for extracting the grammar from the rust docs.
14
15import fileinput
16
85aaf69f
SL
17collections = {"gram": [],
18 "keyword": [],
19 "reserved": [],
20 "binop": [],
21 "unop": []}
223e47cc
LB
22
23
24in_coll = False
25coll = ""
26
27for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
28 if in_coll:
29 if line.startswith("~~~~"):
30 in_coll = False
31 else:
32 if coll in ["keyword", "reserved", "binop", "unop"]:
33 for word in line.split():
34 if word not in collections[coll]:
35 collections[coll].append(word)
36 else:
37 collections[coll].append(line)
38
39 else:
40 if line.startswith("~~~~"):
41 for cname in collections:
42 if ("." + cname) in line:
43 coll = cname
44 in_coll = True
45 break
46
47# Define operator symbol-names here
48
49tokens = ["non_star", "non_slash", "non_eol",
85aaf69f 50 "non_single_quote", "non_double_quote", "ident"]
223e47cc
LB
51
52symnames = {
85aaf69f
SL
53 ".": "dot",
54 "+": "plus",
55 "-": "minus",
56 "/": "slash",
57 "*": "star",
58 "%": "percent",
59
60 "~": "tilde",
61 "@": "at",
62
63 "!": "not",
64 "&": "and",
65 "|": "or",
66 "^": "xor",
67
68 "<<": "lsl",
69 ">>": "lsr",
70 ">>>": "asr",
71
72 "&&": "andand",
73 "||": "oror",
74
75 "<": "lt",
76 "<=": "le",
77 "==": "eqeq",
78 ">=": "ge",
79 ">": "gt",
80
81 "=": "eq",
82
83 "+=": "plusequal",
84 "-=": "minusequal",
85 "/=": "divequal",
86 "*=": "starequal",
87 "%=": "percentequal",
88
89 "&=": "andequal",
90 "|=": "orequal",
91 "^=": "xorequal",
92
93 ">>=": "lsrequal",
94 ">>>=": "asrequal",
95 "<<=": "lslequal",
96
97 "::": "coloncolon",
98
99 "->": "rightarrow",
100 "<-": "leftarrow",
101 "<->": "swaparrow",
102
103 "//": "linecomment",
104 "/*": "openblockcomment",
105 "*/": "closeblockcomment",
106 "macro_rules": "macro_rules",
107 "=>": "eg",
108 "..": "dotdot",
109 ",": "comma"
223e47cc
LB
110}
111
112lines = []
113
114for line in collections["gram"]:
115 line2 = ""
116 for word in line.split():
117 # replace strings with keyword-names or symbol-names from table
118 if word.startswith("\""):
119 word = word[1:-1]
120 if word in symnames:
121 word = symnames[word]
122 else:
123 for ch in word:
124 if not ch.isalpha():
125 raise Exception("non-alpha apparent keyword: "
126 + word)
127 if word not in tokens:
128 if (word in collections["keyword"] or
85aaf69f
SL
129 word in collections["reserved"]):
130 tokens.append(word)
223e47cc
LB
131 else:
132 raise Exception("unknown keyword/reserved word: "
133 + word)
134
135 line2 += " " + word
136 lines.append(line2)
137
138
139for word in collections["keyword"] + collections["reserved"]:
140 if word not in tokens:
141 tokens.append(word)
142
143for sym in collections["unop"] + collections["binop"] + symnames.keys():
144 word = symnames[sym]
145 if word not in tokens:
146 tokens.append(word)
147
148
149print("%start parser, token;")
150print("%%token %s ;" % ("\n\t, ".join(tokens)))
151for coll in ["keyword", "reserved"]:
85aaf69f 152 print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])))
223e47cc
LB
153for coll in ["binop", "unop"]:
154 print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
85aaf69f
SL
155 for x in collections[coll]])))
156print("\n".join(lines))