[rustc.git] / src / etc / extract_grammar.py

#!/usr/bin/env python
#
# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

# This script is for extracting the grammar from the rust docs.

import fileinput

collections = {"gram": [],
               "keyword": [],
               "reserved": [],
               "binop": [],
               "unop": []}


in_coll = False
coll = ""

for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
    if in_coll:
        if line.startswith("~~~~"):
            in_coll = False
        else:
            if coll in ["keyword", "reserved", "binop", "unop"]:
                for word in line.split():
                    if word not in collections[coll]:
                        collections[coll].append(word)
            else:
                collections[coll].append(line)

    else:
        if line.startswith("~~~~"):
            for cname in collections:
                if ("." + cname) in line:
                    coll = cname
                    in_coll = True
                    break

# Define operator symbol-names here

tokens = ["non_star", "non_slash", "non_eol",
          "non_single_quote", "non_double_quote", "ident"]

symnames = {
    ".": "dot",
    "+": "plus",
    "-": "minus",
    "/": "slash",
    "*": "star",
    "%": "percent",

    "~": "tilde",
    "@": "at",

    "!": "not",
    "&": "and",
    "|": "or",
    "^": "xor",

    "<<": "lsl",
    ">>": "lsr",
    ">>>": "asr",

    "&&": "andand",
    "||": "oror",

    "<": "lt",
    "<=": "le",
    "==": "eqeq",
    ">=": "ge",
    ">": "gt",

    "=": "eq",

    "+=": "plusequal",
    "-=": "minusequal",
    "/=": "divequal",
    "*=": "starequal",
    "%=": "percentequal",

    "&=": "andequal",
    "|=": "orequal",
    "^=": "xorequal",

    ">>=": "lsrequal",
    ">>>=": "asrequal",
    "<<=": "lslequal",

    "::": "coloncolon",

    "->": "rightarrow",
    "<-": "leftarrow",
    "<->": "swaparrow",

    "//": "linecomment",
    "/*": "openblockcomment",
    "*/": "closeblockcomment",
    "macro_rules": "macro_rules",
    "=>": "eg",
    "..": "dotdot",
    ",": "comma"
}

lines = []

for line in collections["gram"]:
    line2 = ""
    for word in line.split():
        # replace strings with keyword-names or symbol-names from table
        if word.startswith("\""):
            word = word[1:-1]
            if word in symnames:
                word = symnames[word]
            else:
                for ch in word:
                    if not ch.isalpha():
                        raise Exception("non-alpha apparent keyword: "
                                        + word)
                if word not in tokens:
                    if (word in collections["keyword"] or
                            word in collections["reserved"]):
                        tokens.append(word)
                    else:
                        raise Exception("unknown keyword/reserved word: "
                                        + word)

        line2 += " " + word
    lines.append(line2)


for word in collections["keyword"] + collections["reserved"]:
    if word not in tokens:
        tokens.append(word)

for sym in collections["unop"] + collections["binop"] + symnames.keys():
    word = symnames[sym]
    if word not in tokens:
        tokens.append(word)


print("%start parser, token;")
print("%%token %s ;" % ("\n\t, ".join(tokens)))
for coll in ["keyword", "reserved"]:
    print("%s: %s ; " % (coll, "\n\t| ".join(collections[coll])))
for coll in ["binop", "unop"]:
    print("%s: %s ; " % (coll, "\n\t| ".join([symnames[x]
                                              for x in collections[coll]])))
print("\n".join(lines))
Commit	Line	Data
223e47cc	1	#!/usr/bin/env python
1a4d82fc JJ	2	#
	3	# Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
	4	# file at the top-level directory of this distribution and at
	5	# http://rust-lang.org/COPYRIGHT.
	6	#
	7	# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	8	# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	9	# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	10	# option. This file may not be copied, modified, or distributed
	11	# except according to those terms.
223e47cc LB	12
	13	# This script is for extracting the grammar from the rust docs.
	14
	15	import fileinput
	16
85aaf69f SL	17	collections = {"gram": [],
	18	"keyword": [],
	19	"reserved": [],
	20	"binop": [],
	21	"unop": []}
223e47cc LB	22
	23
	24	in_coll = False
	25	coll = ""
	26
	27	for line in fileinput.input(openhook=fileinput.hook_encoded("utf-8")):
	28	if in_coll:
	29	if line.startswith("~~~~"):
	30	in_coll = False
	31	else:
	32	if coll in ["keyword", "reserved", "binop", "unop"]:
	33	for word in line.split():
	34	if word not in collections[coll]:
	35	collections[coll].append(word)
	36	else:
	37	collections[coll].append(line)
	38
	39	else:
	40	if line.startswith("~~~~"):
	41	for cname in collections:
	42	if ("." + cname) in line:
	43	coll = cname
	44	in_coll = True
	45	break
	46
	47	# Define operator symbol-names here
	48
	49	tokens = ["non_star", "non_slash", "non_eol",
85aaf69f	50	"non_single_quote", "non_double_quote", "ident"]
223e47cc LB	51
223e47cc LB	52	symnames = {
85aaf69f SL	53	".": "dot",
	54	"+": "plus",
	55	"-": "minus",
	56	"/": "slash",
	57	"*": "star",
	58	"%": "percent",
	59
	60	"~": "tilde",
	61	"@": "at",
	62
	63	"!": "not",
	64	"&": "and",
	65	"\|": "or",
	66	"^": "xor",
	67
	68	"<<": "lsl",
	69	">>": "lsr",
	70	">>>": "asr",
	71
	72	"&&": "andand",
	73	"\|\|": "oror",
	74
	75	"<": "lt",
	76	"<=": "le",
	77	"==": "eqeq",
	78	">=": "ge",
	79	">": "gt",
	80
	81	"=": "eq",
	82
	83	"+=": "plusequal",
	84	"-=": "minusequal",
	85	"/=": "divequal",
	86	"*=": "starequal",
	87	"%=": "percentequal",
	88
	89	"&=": "andequal",
	90	"\|=": "orequal",
	91	"^=": "xorequal",
	92
	93	">>=": "lsrequal",
	94	">>>=": "asrequal",
	95	"<<=": "lslequal",
	96
	97	"::": "coloncolon",
	98
	99	"->": "rightarrow",
	100	"<-": "leftarrow",
	101	"<->": "swaparrow",
	102
	103	"//": "linecomment",
	104	"/*": "openblockcomment",
	105	"*/": "closeblockcomment",
	106	"macro_rules": "macro_rules",
	107	"=>": "eg",
	108	"..": "dotdot",
	109	",": "comma"
223e47cc LB	110	}
	111
	112	lines = []
	113
	114	for line in collections["gram"]:
	115	line2 = ""
	116	for word in line.split():
	117	# replace strings with keyword-names or symbol-names from table
	118	if word.startswith("\""):
	119	word = word[1:-1]
	120	if word in symnames:
	121	word = symnames[word]
	122	else:
	123	for ch in word:
	124	if not ch.isalpha():
	125	raise Exception("non-alpha apparent keyword: "
	126	+ word)
	127	if word not in tokens:
	128	if (word in collections["keyword"] or
85aaf69f SL	129	word in collections["reserved"]):
85aaf69f SL	130	tokens.append(word)
223e47cc LB	131	else:
	132	raise Exception("unknown keyword/reserved word: "
	133	+ word)
	134
	135	line2 += " " + word
	136	lines.append(line2)
	137
	138
	139	for word in collections["keyword"] + collections["reserved"]:
	140	if word not in tokens:
	141	tokens.append(word)
	142
	143	for sym in collections["unop"] + collections["binop"] + symnames.keys():
	144	word = symnames[sym]
	145	if word not in tokens:
	146	tokens.append(word)
	147
	148
	149	print("%start parser, token;")
	150	print("%%token %s ;" % ("\n\t, ".join(tokens)))
	151	for coll in ["keyword", "reserved"]:
85aaf69f	152	print("%s: %s ; " % (coll, "\n\t\| ".join(collections[coll])))
223e47cc LB	153	for coll in ["binop", "unop"]:
223e47cc LB	154	print("%s: %s ; " % (coll, "\n\t\| ".join([symnames[x]
85aaf69f SL	155	for x in collections[coll]])))
85aaf69f SL	156	print("\n".join(lines))