]>
Commit | Line | Data |
---|---|---|
20effc67 | 1 | /* Copyright 2002, 2020 Rene Rivera. |
7c673cae FG |
2 | ** Distributed under the Boost Software License, Version 1.0. |
3 | ** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
4 | */ | |
5 | ||
20effc67 TL |
6 | #include <cstdio> |
7 | #include <string> | |
8 | #include <algorithm> | |
9 | #include <cctype> | |
10 | #include <set> | |
11 | #include <cstring> | |
7c673cae FG |
12 | |
13 | /* | |
14 | # yyacc - yacc wrapper | |
15 | # | |
16 | # Allows tokens to be written as `literal` and then automatically | |
17 | # substituted with #defined tokens. | |
18 | # | |
19 | # Usage: | |
20 | # yyacc file.y filetab.h file.yy | |
21 | # | |
22 | # inputs: | |
23 | # file.yy yacc grammar with ` literals | |
24 | # | |
25 | # outputs: | |
26 | # file.y yacc grammar | |
27 | # filetab.h array of string <-> token mappings | |
28 | # | |
29 | # 3-13-93 | |
30 | # Documented and p moved in sed command (for some reason, | |
31 | # s/x/y/p doesn't work). | |
32 | # 10-12-93 | |
33 | # Take basename as second argument. | |
34 | # 12-31-96 | |
35 | # reversed order of args to be compatible with GenFile rule | |
36 | # 11-20-2002 | |
37 | # Reimplemented as a C program for portability. (Rene Rivera) | |
20effc67 TL |
38 | # 05-xx-2020 |
39 | # Reimplement yet again, in C++. (Rene Rivera) | |
7c673cae FG |
40 | */ |
41 | ||
20effc67 TL |
42 | static const std::string usage[] = { |
43 | "yyacc <grammar output.y> <token table output.h> <grammar source.yy>" | |
44 | }; | |
7c673cae | 45 | |
20effc67 TL |
46 | void print_usage() |
47 | { | |
48 | for (auto u: usage) | |
49 | { | |
50 | std::printf("%s\n", u.c_str()); | |
51 | } | |
52 | } | |
53 | ||
54 | std::string tokenize_string(std::string s) | |
55 | { | |
56 | std::string result = s; | |
57 | if (s == ":") result = "_colon"; | |
58 | else if (s == "!") result = "_bang"; | |
59 | else if (s == "!=") result = "_bang_equals"; | |
60 | else if (s == "&&") result = "_amperamper"; | |
61 | else if (s == "&") result = "_amper"; | |
62 | else if (s == "+") result = "_plus"; | |
63 | else if (s == "+=") result = "_plus_equals"; | |
64 | else if (s == "||") result = "_barbar"; | |
65 | else if (s == "|") result = "_bar"; | |
66 | else if (s == ";") result = "_semic"; | |
67 | else if (s == "-") result = "_minus"; | |
68 | else if (s == "<") result = "_langle"; | |
69 | else if (s == "<=") result = "_langle_equals"; | |
70 | else if (s == ">") result = "_rangle"; | |
71 | else if (s == ">=") result = "_rangle_equals"; | |
72 | else if (s == ".") result = "_period"; | |
73 | else if (s == "?") result = "_question"; | |
74 | else if (s == "?=") result = "_question_equals"; | |
75 | else if (s == "=") result = "_equals"; | |
76 | else if (s == ",") result = "_comma"; | |
77 | else if (s == "[") result = "_lbracket"; | |
78 | else if (s == "]") result = "_rbracket"; | |
79 | else if (s == "{") result = "_lbrace"; | |
80 | else if (s == "}") result = "_rbrace"; | |
81 | else if (s == "(") result = "_lparen"; | |
82 | else if (s == ")") result = "_rparen"; | |
83 | ||
84 | std::transform( | |
85 | result.begin(), result.end(), result.begin(), | |
86 | [](unsigned char c){ return std::toupper(c); }); | |
87 | return result+"_t"; | |
88 | } | |
89 | ||
90 | struct literal | |
7c673cae | 91 | { |
20effc67 TL |
92 | std::string string; |
93 | std::string token; | |
94 | ||
95 | bool operator<(const literal & x) const | |
96 | { | |
97 | return this->string < x.string; | |
98 | } | |
99 | }; | |
7c673cae FG |
100 | |
101 | int main(int argc, char ** argv) | |
102 | { | |
103 | int result = 0; | |
104 | if (argc != 4) | |
105 | { | |
106 | print_usage(); | |
107 | result = 1; | |
108 | } | |
109 | else | |
110 | { | |
111 | FILE * token_output_f = 0; | |
112 | FILE * grammar_output_f = 0; | |
113 | FILE * grammar_source_f = 0; | |
114 | ||
115 | grammar_source_f = fopen(argv[3],"r"); | |
116 | if (grammar_source_f == 0) { result = 1; } | |
117 | if (result == 0) | |
118 | { | |
20effc67 | 119 | std::set<literal> literals; |
7c673cae FG |
120 | char l[2048]; |
121 | while (1) | |
122 | { | |
123 | if (fgets(l,2048,grammar_source_f) != 0) | |
124 | { | |
125 | char * c = l; | |
126 | while (1) | |
127 | { | |
20effc67 | 128 | char * c1 = std::strchr(c,'`'); |
7c673cae FG |
129 | if (c1 != 0) |
130 | { | |
20effc67 | 131 | char * c2 = std::strchr(c1+1,'`'); |
7c673cae FG |
132 | if (c2 != 0) |
133 | { | |
20effc67 TL |
134 | auto l = std::string(c1+1,c2-c1-1); |
135 | literals.insert({ l, tokenize_string(l) }); | |
7c673cae FG |
136 | c = c2+1; |
137 | } | |
138 | else | |
139 | break; | |
140 | } | |
141 | else | |
142 | break; | |
143 | } | |
144 | } | |
145 | else | |
146 | { | |
147 | break; | |
148 | } | |
149 | } | |
20effc67 | 150 | token_output_f = std::fopen(argv[2],"w"); |
7c673cae FG |
151 | if (token_output_f != 0) |
152 | { | |
20effc67 | 153 | for (const literal & l: literals) |
7c673cae | 154 | { |
20effc67 | 155 | std::fprintf(token_output_f," { \"%s\", %s },\n",l.string.c_str(), l.token.c_str()); |
7c673cae | 156 | } |
20effc67 | 157 | std::fclose(token_output_f); |
7c673cae FG |
158 | } |
159 | else | |
160 | result = 1; | |
161 | if (result == 0) | |
162 | { | |
20effc67 | 163 | grammar_output_f = std::fopen(argv[1],"w"); |
7c673cae FG |
164 | if (grammar_output_f != 0) |
165 | { | |
20effc67 | 166 | for (const literal & l: literals) |
7c673cae | 167 | { |
20effc67 | 168 | fprintf(grammar_output_f,"%%token %s\n",l.token.c_str()); |
7c673cae FG |
169 | } |
170 | rewind(grammar_source_f); | |
171 | while (1) | |
172 | { | |
173 | if (fgets(l,2048,grammar_source_f) != 0) | |
174 | { | |
175 | char * c = l; | |
176 | while (1) | |
177 | { | |
178 | char * c1 = strchr(c,'`'); | |
179 | if (c1 != 0) | |
180 | { | |
181 | char * c2 = strchr(c1+1,'`'); | |
182 | if (c2 != 0) | |
183 | { | |
20effc67 | 184 | auto replacement = literals.find({std::string(c1+1,c2-c1-1), ""}); |
7c673cae | 185 | *c1 = 0; |
20effc67 | 186 | std::fprintf(grammar_output_f,"%s%s",c,replacement->token.c_str()); |
7c673cae FG |
187 | c = c2+1; |
188 | } | |
189 | else | |
190 | { | |
20effc67 | 191 | std::fprintf(grammar_output_f,"%s",c); |
7c673cae FG |
192 | break; |
193 | } | |
194 | } | |
195 | else | |
196 | { | |
20effc67 | 197 | std::fprintf(grammar_output_f,"%s",c); |
7c673cae FG |
198 | break; |
199 | } | |
200 | } | |
201 | } | |
202 | else | |
203 | { | |
204 | break; | |
205 | } | |
206 | } | |
20effc67 | 207 | std::fclose(grammar_output_f); |
7c673cae FG |
208 | } |
209 | else | |
210 | result = 1; | |
211 | } | |
212 | } | |
213 | if (result != 0) | |
214 | { | |
215 | perror("yyacc"); | |
216 | } | |
217 | } | |
218 | return result; | |
219 | } |