]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. | |
3 | * | |
4 | * This file is part of Jam - see jam.c for Copyright information. | |
5 | */ | |
6 | ||
7 | /* | |
8 | * scan.c - the jam yacc scanner | |
9 | * | |
10 | */ | |
11 | ||
12 | #include "jam.h" | |
13 | #include "scan.h" | |
14 | #include "output.h" | |
15 | ||
16 | #include "constants.h" | |
17 | #include "jambase.h" | |
18 | #include "jamgram.h" | |
19 | ||
20 | ||
21 | struct keyword | |
22 | { | |
23 | char * word; | |
24 | int type; | |
25 | } keywords[] = | |
26 | { | |
27 | #include "jamgramtab.h" | |
28 | { 0, 0 } | |
29 | }; | |
30 | ||
31 | typedef struct include include; | |
32 | struct include | |
33 | { | |
34 | include * next; /* next serial include file */ | |
35 | char * string; /* pointer into current line */ | |
36 | char * * strings; /* for yyfparse() -- text to parse */ | |
37 | FILE * file; /* for yyfparse() -- file being read */ | |
38 | OBJECT * fname; /* for yyfparse() -- file name */ | |
39 | int line; /* line counter for error messages */ | |
40 | char buf[ 512 ]; /* for yyfparse() -- line buffer */ | |
41 | }; | |
42 | ||
43 | static include * incp = 0; /* current file; head of chain */ | |
44 | ||
45 | static int scanmode = SCAN_NORMAL; | |
46 | static int anyerrors = 0; | |
47 | ||
48 | ||
49 | static char * symdump( YYSTYPE * ); | |
50 | ||
51 | #define BIGGEST_TOKEN 10240 /* no single token can be larger */ | |
52 | ||
53 | ||
54 | /* | |
55 | * Set parser mode: normal, string, or keyword. | |
56 | */ | |
57 | ||
58 | void yymode( int n ) | |
59 | { | |
60 | scanmode = n; | |
61 | } | |
62 | ||
63 | ||
64 | void yyerror( char const * s ) | |
65 | { | |
66 | /* We use yylval instead of incp to access the error location information as | |
67 | * the incp pointer will already be reset to 0 in case the error occurred at | |
68 | * EOF. | |
69 | * | |
70 | * The two may differ only if ran into an unexpected EOF or we get an error | |
71 | * while reading a lexical token spanning multiple lines, e.g. a multi-line | |
72 | * string literal or action body, in which case yylval location information | |
73 | * will hold the information about where the token started while incp will | |
74 | * hold the information about where reading it broke. | |
75 | */ | |
76 | out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s, | |
77 | symdump( &yylval ) ); | |
78 | ++anyerrors; | |
79 | } | |
80 | ||
81 | ||
82 | int yyanyerrors() | |
83 | { | |
84 | return anyerrors != 0; | |
85 | } | |
86 | ||
87 | ||
88 | void yyfparse( OBJECT * s ) | |
89 | { | |
90 | include * i = (include *)BJAM_MALLOC( sizeof( *i ) ); | |
91 | ||
92 | /* Push this onto the incp chain. */ | |
93 | i->string = ""; | |
94 | i->strings = 0; | |
95 | i->file = 0; | |
96 | i->fname = object_copy( s ); | |
97 | i->line = 0; | |
98 | i->next = incp; | |
99 | incp = i; | |
100 | ||
101 | /* If the filename is "+", it means use the internal jambase. */ | |
102 | if ( !strcmp( object_str( s ), "+" ) ) | |
103 | i->strings = jambase; | |
104 | } | |
105 | ||
106 | ||
107 | /* | |
108 | * yyfdone() - cleanup after we're done parsing a file. | |
109 | */ | |
110 | void yyfdone( void ) | |
111 | { | |
112 | include * const i = incp; | |
113 | incp = i->next; | |
114 | ||
115 | /* Close file, free name. */ | |
116 | if(i->file && (i->file != stdin)) | |
117 | fclose(i->file); | |
118 | object_free(i->fname); | |
119 | BJAM_FREE((char *)i); | |
120 | } | |
121 | ||
122 | ||
123 | /* | |
124 | * yyline() - read new line and return first character. | |
125 | * | |
126 | * Fabricates a continuous stream of characters across include files, returning | |
127 | * EOF at the bitter end. | |
128 | */ | |
129 | ||
130 | int yyline() | |
131 | { | |
132 | include * const i = incp; | |
133 | ||
134 | if ( !incp ) | |
135 | return EOF; | |
136 | ||
137 | /* Once we start reading from the input stream, we reset the include | |
138 | * insertion point so that the next include file becomes the head of the | |
139 | * list. | |
140 | */ | |
141 | ||
142 | /* If there is more data in this line, return it. */ | |
143 | if ( *i->string ) | |
144 | return *i->string++; | |
145 | ||
146 | /* If we are reading from an internal string list, go to the next string. */ | |
147 | if ( i->strings ) | |
148 | { | |
149 | if ( *i->strings ) | |
150 | { | |
151 | ++i->line; | |
152 | i->string = *(i->strings++); | |
153 | return *i->string++; | |
154 | } | |
155 | } | |
156 | else | |
157 | { | |
158 | /* If necessary, open the file. */ | |
159 | if ( !i->file ) | |
160 | { | |
161 | FILE * f = stdin; | |
162 | if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) ) | |
163 | perror( object_str( i->fname ) ); | |
164 | i->file = f; | |
165 | } | |
166 | ||
167 | /* If there is another line in this file, start it. */ | |
168 | if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) | |
169 | { | |
170 | ++i->line; | |
171 | i->string = i->buf; | |
172 | return *i->string++; | |
173 | } | |
174 | } | |
175 | ||
176 | /* This include is done. Return EOF so yyparse() returns to | |
177 | * parse_file(). | |
178 | */ | |
179 | ||
180 | return EOF; | |
181 | } | |
182 | ||
183 | ||
184 | /* | |
185 | * yylex() - set yylval to current token; return its type. | |
186 | * | |
187 | * Macros to move things along: | |
188 | * | |
189 | * yychar() - return and advance character; invalid after EOF. | |
190 | * yyprev() - back up one character; invalid before yychar(). | |
191 | * | |
192 | * yychar() returns a continuous stream of characters, until it hits the EOF of | |
193 | * the current include file. | |
194 | */ | |
195 | ||
196 | #define yychar() ( *incp->string ? *incp->string++ : yyline() ) | |
197 | #define yyprev() ( incp->string-- ) | |
198 | ||
199 | int yylex() | |
200 | { | |
201 | int c; | |
202 | char buf[ BIGGEST_TOKEN ]; | |
203 | char * b = buf; | |
204 | ||
205 | if ( !incp ) | |
206 | goto eof; | |
207 | ||
208 | /* Get first character (whitespace or of token). */ | |
209 | c = yychar(); | |
210 | ||
211 | if ( scanmode == SCAN_STRING ) | |
212 | { | |
213 | /* If scanning for a string (action's {}'s), look for the closing brace. | |
214 | * We handle matching braces, if they match. | |
215 | */ | |
216 | ||
217 | int nest = 1; | |
218 | ||
219 | while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) ) | |
220 | { | |
221 | if ( c == '{' ) | |
222 | ++nest; | |
223 | ||
224 | if ( ( c == '}' ) && !--nest ) | |
225 | break; | |
226 | ||
227 | *b++ = c; | |
228 | ||
229 | c = yychar(); | |
230 | ||
231 | /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */ | |
232 | if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) ) | |
233 | --b; | |
234 | } | |
235 | ||
236 | /* We ate the ending brace -- regurgitate it. */ | |
237 | if ( c != EOF ) | |
238 | yyprev(); | |
239 | ||
240 | /* Check for obvious errors. */ | |
241 | if ( b == buf + sizeof( buf ) ) | |
242 | { | |
243 | yyerror( "action block too big" ); | |
244 | goto eof; | |
245 | } | |
246 | ||
247 | if ( nest ) | |
248 | { | |
249 | yyerror( "unmatched {} in action block" ); | |
250 | goto eof; | |
251 | } | |
252 | ||
253 | *b = 0; | |
254 | yylval.type = STRING; | |
255 | yylval.string = object_new( buf ); | |
256 | yylval.file = incp->fname; | |
257 | yylval.line = incp->line; | |
258 | } | |
259 | else | |
260 | { | |
261 | char * b = buf; | |
262 | struct keyword * k; | |
263 | int inquote = 0; | |
264 | int notkeyword; | |
265 | ||
266 | /* Eat white space. */ | |
267 | for ( ; ; ) | |
268 | { | |
269 | /* Skip past white space. */ | |
270 | while ( ( c != EOF ) && isspace( c ) ) | |
271 | c = yychar(); | |
272 | ||
273 | /* Not a comment? */ | |
274 | if ( c != '#' ) | |
275 | break; | |
276 | ||
277 | /* Swallow up comment line. */ | |
278 | while ( ( ( c = yychar() ) != EOF ) && ( c != '\n' ) ) ; | |
279 | } | |
280 | ||
281 | /* c now points to the first character of a token. */ | |
282 | if ( c == EOF ) | |
283 | goto eof; | |
284 | ||
285 | yylval.file = incp->fname; | |
286 | yylval.line = incp->line; | |
287 | ||
288 | /* While scanning the word, disqualify it for (expensive) keyword lookup | |
289 | * when we can: $anything, "anything", \anything | |
290 | */ | |
291 | notkeyword = c == '$'; | |
292 | ||
293 | /* Look for white space to delimit word. "'s get stripped but preserve | |
294 | * white space. \ protects next character. | |
295 | */ | |
296 | while | |
297 | ( | |
298 | ( c != EOF ) && | |
299 | ( b < buf + sizeof( buf ) ) && | |
300 | ( inquote || !isspace( c ) ) | |
301 | ) | |
302 | { | |
303 | if ( c == '"' ) | |
304 | { | |
305 | /* begin or end " */ | |
306 | inquote = !inquote; | |
307 | notkeyword = 1; | |
308 | } | |
309 | else if ( c != '\\' ) | |
310 | { | |
311 | /* normal char */ | |
312 | *b++ = c; | |
313 | } | |
314 | else if ( ( c = yychar() ) != EOF ) | |
315 | { | |
316 | /* \c */ | |
317 | if (c == 'n') | |
318 | c = '\n'; | |
319 | else if (c == 'r') | |
320 | c = '\r'; | |
321 | else if (c == 't') | |
322 | c = '\t'; | |
323 | *b++ = c; | |
324 | notkeyword = 1; | |
325 | } | |
326 | else | |
327 | { | |
328 | /* \EOF */ | |
329 | break; | |
330 | } | |
331 | ||
332 | c = yychar(); | |
333 | } | |
334 | ||
335 | /* Check obvious errors. */ | |
336 | if ( b == buf + sizeof( buf ) ) | |
337 | { | |
338 | yyerror( "string too big" ); | |
339 | goto eof; | |
340 | } | |
341 | ||
342 | if ( inquote ) | |
343 | { | |
344 | yyerror( "unmatched \" in string" ); | |
345 | goto eof; | |
346 | } | |
347 | ||
348 | /* We looked ahead a character - back up. */ | |
349 | if ( c != EOF ) | |
350 | yyprev(); | |
351 | ||
352 | /* Scan token table. Do not scan if it is obviously not a keyword or if | |
353 | * it is an alphabetic when were looking for punctuation. | |
354 | */ | |
355 | ||
356 | *b = 0; | |
357 | yylval.type = ARG; | |
358 | ||
359 | if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) ) | |
360 | for ( k = keywords; k->word; ++k ) | |
361 | if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) ) | |
362 | { | |
363 | yylval.type = k->type; | |
364 | yylval.keyword = k->word; /* used by symdump */ | |
365 | break; | |
366 | } | |
367 | ||
368 | if ( yylval.type == ARG ) | |
369 | yylval.string = object_new( buf ); | |
370 | } | |
371 | ||
372 | if ( DEBUG_SCAN ) | |
373 | out_printf( "scan %s\n", symdump( &yylval ) ); | |
374 | ||
375 | return yylval.type; | |
376 | ||
377 | eof: | |
378 | /* We do not reset yylval.file & yylval.line here so unexpected EOF error | |
379 | * messages would include correct error location information. | |
380 | */ | |
381 | yylval.type = EOF; | |
382 | return yylval.type; | |
383 | } | |
384 | ||
385 | ||
386 | static char * symdump( YYSTYPE * s ) | |
387 | { | |
388 | static char buf[ BIGGEST_TOKEN + 20 ]; | |
389 | switch ( s->type ) | |
390 | { | |
391 | case EOF : sprintf( buf, "EOF" ); break; | |
392 | case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break; | |
393 | case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break; | |
394 | case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break; | |
395 | default : sprintf( buf, "keyword %s" , s->keyword ); break; | |
396 | } | |
397 | return buf; | |
398 | } | |
399 | ||
400 | ||
401 | /* | |
402 | * Get information about the current file and line, for those epsilon | |
403 | * transitions that produce a parse. | |
404 | */ | |
405 | ||
406 | void yyinput_last_read_token( OBJECT * * name, int * line ) | |
407 | { | |
408 | /* TODO: Consider whether and when we might want to report where the last | |
409 | * read token ended, e.g. EOF errors inside string literals. | |
410 | */ | |
411 | *name = yylval.file; | |
412 | *line = yylval.line; | |
413 | } |