]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* Copyright 2002 Rene Rivera. |
2 | ** Distributed under the Boost Software License, Version 1.0. | |
3 | ** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) | |
4 | */ | |
5 | ||
6 | #include <stdio.h> | |
7 | #include <string.h> | |
8 | #include <ctype.h> | |
9 | #include <stdlib.h> | |
10 | ||
11 | /* | |
12 | # yyacc - yacc wrapper | |
13 | # | |
14 | # Allows tokens to be written as `literal` and then automatically | |
15 | # substituted with #defined tokens. | |
16 | # | |
17 | # Usage: | |
18 | # yyacc file.y filetab.h file.yy | |
19 | # | |
20 | # inputs: | |
21 | # file.yy yacc grammar with ` literals | |
22 | # | |
23 | # outputs: | |
24 | # file.y yacc grammar | |
25 | # filetab.h array of string <-> token mappings | |
26 | # | |
27 | # 3-13-93 | |
28 | # Documented and p moved in sed command (for some reason, | |
29 | # s/x/y/p doesn't work). | |
30 | # 10-12-93 | |
31 | # Take basename as second argument. | |
32 | # 12-31-96 | |
33 | # reversed order of args to be compatible with GenFile rule | |
34 | # 11-20-2002 | |
35 | # Reimplemented as a C program for portability. (Rene Rivera) | |
36 | */ | |
37 | ||
38 | void print_usage(); | |
39 | char * copy_string(char * s, int l); | |
40 | char * tokenize_string(char * s); | |
41 | int cmp_literal(const void * a, const void * b); | |
42 | ||
43 | typedef struct | |
44 | { | |
45 | char * string; | |
46 | char * token; | |
47 | } literal; | |
48 | ||
49 | int main(int argc, char ** argv) | |
50 | { | |
51 | int result = 0; | |
52 | if (argc != 4) | |
53 | { | |
54 | print_usage(); | |
55 | result = 1; | |
56 | } | |
57 | else | |
58 | { | |
59 | FILE * token_output_f = 0; | |
60 | FILE * grammar_output_f = 0; | |
61 | FILE * grammar_source_f = 0; | |
62 | ||
63 | grammar_source_f = fopen(argv[3],"r"); | |
64 | if (grammar_source_f == 0) { result = 1; } | |
65 | if (result == 0) | |
66 | { | |
67 | literal literals[1024]; | |
68 | int t = 0; | |
69 | char l[2048]; | |
70 | while (1) | |
71 | { | |
72 | if (fgets(l,2048,grammar_source_f) != 0) | |
73 | { | |
74 | char * c = l; | |
75 | while (1) | |
76 | { | |
77 | char * c1 = strchr(c,'`'); | |
78 | if (c1 != 0) | |
79 | { | |
80 | char * c2 = strchr(c1+1,'`'); | |
81 | if (c2 != 0) | |
82 | { | |
83 | literals[t].string = copy_string(c1+1,c2-c1-1); | |
84 | literals[t].token = tokenize_string(literals[t].string); | |
85 | t += 1; | |
86 | c = c2+1; | |
87 | } | |
88 | else | |
89 | break; | |
90 | } | |
91 | else | |
92 | break; | |
93 | } | |
94 | } | |
95 | else | |
96 | { | |
97 | break; | |
98 | } | |
99 | } | |
100 | literals[t].string = 0; | |
101 | literals[t].token = 0; | |
102 | qsort(literals,t,sizeof(literal),cmp_literal); | |
103 | { | |
104 | int p = 1; | |
105 | int i = 1; | |
106 | while (literals[i].string != 0) | |
107 | { | |
108 | if (strcmp(literals[p-1].string,literals[i].string) != 0) | |
109 | { | |
110 | literals[p] = literals[i]; | |
111 | p += 1; | |
112 | } | |
113 | i += 1; | |
114 | } | |
115 | literals[p].string = 0; | |
116 | literals[p].token = 0; | |
117 | t = p; | |
118 | } | |
119 | token_output_f = fopen(argv[2],"w"); | |
120 | if (token_output_f != 0) | |
121 | { | |
122 | int i = 0; | |
123 | while (literals[i].string != 0) | |
124 | { | |
125 | fprintf(token_output_f," { \"%s\", %s },\n",literals[i].string,literals[i].token); | |
126 | i += 1; | |
127 | } | |
128 | fclose(token_output_f); | |
129 | } | |
130 | else | |
131 | result = 1; | |
132 | if (result == 0) | |
133 | { | |
134 | grammar_output_f = fopen(argv[1],"w"); | |
135 | if (grammar_output_f != 0) | |
136 | { | |
137 | int i = 0; | |
138 | while (literals[i].string != 0) | |
139 | { | |
140 | fprintf(grammar_output_f,"%%token %s\n",literals[i].token); | |
141 | i += 1; | |
142 | } | |
143 | rewind(grammar_source_f); | |
144 | while (1) | |
145 | { | |
146 | if (fgets(l,2048,grammar_source_f) != 0) | |
147 | { | |
148 | char * c = l; | |
149 | while (1) | |
150 | { | |
151 | char * c1 = strchr(c,'`'); | |
152 | if (c1 != 0) | |
153 | { | |
154 | char * c2 = strchr(c1+1,'`'); | |
155 | if (c2 != 0) | |
156 | { | |
157 | literal key; | |
158 | literal * replacement = 0; | |
159 | key.string = copy_string(c1+1,c2-c1-1); | |
160 | key.token = 0; | |
161 | replacement = (literal*)bsearch( | |
162 | &key,literals,t,sizeof(literal),cmp_literal); | |
163 | *c1 = 0; | |
164 | fprintf(grammar_output_f,"%s%s",c,replacement->token); | |
165 | c = c2+1; | |
166 | } | |
167 | else | |
168 | { | |
169 | fprintf(grammar_output_f,"%s",c); | |
170 | break; | |
171 | } | |
172 | } | |
173 | else | |
174 | { | |
175 | fprintf(grammar_output_f,"%s",c); | |
176 | break; | |
177 | } | |
178 | } | |
179 | } | |
180 | else | |
181 | { | |
182 | break; | |
183 | } | |
184 | } | |
185 | fclose(grammar_output_f); | |
186 | } | |
187 | else | |
188 | result = 1; | |
189 | } | |
190 | } | |
191 | if (result != 0) | |
192 | { | |
193 | perror("yyacc"); | |
194 | } | |
195 | } | |
196 | return result; | |
197 | } | |
198 | ||
199 | static char * usage[] = { | |
200 | "yyacc <grammar output.y> <token table output.h> <grammar source.yy>", | |
201 | 0 }; | |
202 | ||
203 | void print_usage() | |
204 | { | |
205 | char ** u; | |
206 | for (u = usage; *u != 0; ++u) | |
207 | { | |
208 | fputs(*u,stderr); putc('\n',stderr); | |
209 | } | |
210 | } | |
211 | ||
212 | char * copy_string(char * s, int l) | |
213 | { | |
214 | char * result = (char*)malloc(l+1); | |
215 | strncpy(result,s,l); | |
216 | result[l] = 0; | |
217 | return result; | |
218 | } | |
219 | ||
220 | char * tokenize_string(char * s) | |
221 | { | |
222 | char * result; | |
223 | char * literal = s; | |
224 | int l; | |
225 | int c; | |
226 | ||
227 | if (strcmp(s,":") == 0) literal = "_colon"; | |
228 | else if (strcmp(s,"!") == 0) literal = "_bang"; | |
229 | else if (strcmp(s,"!=") == 0) literal = "_bang_equals"; | |
230 | else if (strcmp(s,"&&") == 0) literal = "_amperamper"; | |
231 | else if (strcmp(s,"&") == 0) literal = "_amper"; | |
232 | else if (strcmp(s,"+") == 0) literal = "_plus"; | |
233 | else if (strcmp(s,"+=") == 0) literal = "_plus_equals"; | |
234 | else if (strcmp(s,"||") == 0) literal = "_barbar"; | |
235 | else if (strcmp(s,"|") == 0) literal = "_bar"; | |
236 | else if (strcmp(s,";") == 0) literal = "_semic"; | |
237 | else if (strcmp(s,"-") == 0) literal = "_minus"; | |
238 | else if (strcmp(s,"<") == 0) literal = "_langle"; | |
239 | else if (strcmp(s,"<=") == 0) literal = "_langle_equals"; | |
240 | else if (strcmp(s,">") == 0) literal = "_rangle"; | |
241 | else if (strcmp(s,">=") == 0) literal = "_rangle_equals"; | |
242 | else if (strcmp(s,".") == 0) literal = "_period"; | |
243 | else if (strcmp(s,"?") == 0) literal = "_question"; | |
244 | else if (strcmp(s,"?=") == 0) literal = "_question_equals"; | |
245 | else if (strcmp(s,"=") == 0) literal = "_equals"; | |
246 | else if (strcmp(s,",") == 0) literal = "_comma"; | |
247 | else if (strcmp(s,"[") == 0) literal = "_lbracket"; | |
248 | else if (strcmp(s,"]") == 0) literal = "_rbracket"; | |
249 | else if (strcmp(s,"{") == 0) literal = "_lbrace"; | |
250 | else if (strcmp(s,"}") == 0) literal = "_rbrace"; | |
251 | else if (strcmp(s,"(") == 0) literal = "_lparen"; | |
252 | else if (strcmp(s,")") == 0) literal = "_rparen"; | |
253 | l = strlen(literal)+2; | |
254 | result = (char*)malloc(l+1); | |
255 | for (c = 0; literal[c] != 0; ++c) | |
256 | { | |
257 | result[c] = toupper(literal[c]); | |
258 | } | |
259 | result[l-2] = '_'; | |
260 | result[l-1] = 't'; | |
261 | result[l] = 0; | |
262 | return result; | |
263 | } | |
264 | ||
265 | int cmp_literal(const void * a, const void * b) | |
266 | { | |
267 | return strcmp(((const literal *)a)->string,((const literal *)b)->string); | |
268 | } |