]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. | |
3 | * | |
4 | * This file is part of Jam - see jam.c for Copyright information. | |
5 | */ | |
6 | ||
7 | /* | |
8 | * scan.c - the jam yacc scanner | |
9 | * | |
10 | */ | |
11 | ||
12 | #include "jam.h" | |
13 | #include "scan.h" | |
14 | #include "output.h" | |
15 | ||
16 | #include "constants.h" | |
17 | #include "jambase.h" | |
18 | #include "jamgram.h" | |
19 | ||
20 | ||
21 | struct keyword | |
22 | { | |
23 | char * word; | |
24 | int type; | |
25 | } keywords[] = | |
26 | { | |
27 | #include "jamgramtab.h" | |
28 | { 0, 0 } | |
29 | }; | |
30 | ||
31 | typedef struct include include; | |
32 | struct include | |
33 | { | |
34 | include * next; /* next serial include file */ | |
35 | char * string; /* pointer into current line */ | |
36 | char * * strings; /* for yyfparse() -- text to parse */ | |
b32b8144 FG |
37 | LISTITER pos; /* for yysparse() -- text to parse */ |
38 | LIST * list; /* for yysparse() -- text to parse */ | |
7c673cae FG |
39 | FILE * file; /* for yyfparse() -- file being read */ |
40 | OBJECT * fname; /* for yyfparse() -- file name */ | |
41 | int line; /* line counter for error messages */ | |
42 | char buf[ 512 ]; /* for yyfparse() -- line buffer */ | |
43 | }; | |
44 | ||
45 | static include * incp = 0; /* current file; head of chain */ | |
46 | ||
47 | static int scanmode = SCAN_NORMAL; | |
48 | static int anyerrors = 0; | |
49 | ||
50 | ||
51 | static char * symdump( YYSTYPE * ); | |
52 | ||
53 | #define BIGGEST_TOKEN 10240 /* no single token can be larger */ | |
54 | ||
55 | ||
56 | /* | |
57 | * Set parser mode: normal, string, or keyword. | |
58 | */ | |
59 | ||
60 | void yymode( int n ) | |
61 | { | |
62 | scanmode = n; | |
63 | } | |
64 | ||
65 | ||
66 | void yyerror( char const * s ) | |
67 | { | |
68 | /* We use yylval instead of incp to access the error location information as | |
69 | * the incp pointer will already be reset to 0 in case the error occurred at | |
70 | * EOF. | |
71 | * | |
72 | * The two may differ only if ran into an unexpected EOF or we get an error | |
73 | * while reading a lexical token spanning multiple lines, e.g. a multi-line | |
74 | * string literal or action body, in which case yylval location information | |
75 | * will hold the information about where the token started while incp will | |
76 | * hold the information about where reading it broke. | |
77 | */ | |
78 | out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s, | |
79 | symdump( &yylval ) ); | |
80 | ++anyerrors; | |
81 | } | |
82 | ||
83 | ||
84 | int yyanyerrors() | |
85 | { | |
86 | return anyerrors != 0; | |
87 | } | |
88 | ||
89 | ||
90 | void yyfparse( OBJECT * s ) | |
91 | { | |
92 | include * i = (include *)BJAM_MALLOC( sizeof( *i ) ); | |
93 | ||
94 | /* Push this onto the incp chain. */ | |
95 | i->string = ""; | |
96 | i->strings = 0; | |
97 | i->file = 0; | |
98 | i->fname = object_copy( s ); | |
99 | i->line = 0; | |
100 | i->next = incp; | |
101 | incp = i; | |
102 | ||
103 | /* If the filename is "+", it means use the internal jambase. */ | |
104 | if ( !strcmp( object_str( s ), "+" ) ) | |
105 | i->strings = jambase; | |
106 | } | |
107 | ||
108 | ||
b32b8144 FG |
109 | void yysparse( OBJECT * name, const char * * lines ) |
110 | { | |
111 | yyfparse( name ); | |
112 | incp->strings = (char * *)lines; | |
113 | } | |
114 | ||
115 | ||
7c673cae FG |
116 | /* |
117 | * yyfdone() - cleanup after we're done parsing a file. | |
118 | */ | |
119 | void yyfdone( void ) | |
120 | { | |
121 | include * const i = incp; | |
122 | incp = i->next; | |
123 | ||
124 | /* Close file, free name. */ | |
125 | if(i->file && (i->file != stdin)) | |
126 | fclose(i->file); | |
127 | object_free(i->fname); | |
128 | BJAM_FREE((char *)i); | |
129 | } | |
130 | ||
131 | ||
132 | /* | |
133 | * yyline() - read new line and return first character. | |
134 | * | |
135 | * Fabricates a continuous stream of characters across include files, returning | |
136 | * EOF at the bitter end. | |
137 | */ | |
138 | ||
139 | int yyline() | |
140 | { | |
141 | include * const i = incp; | |
142 | ||
143 | if ( !incp ) | |
144 | return EOF; | |
145 | ||
146 | /* Once we start reading from the input stream, we reset the include | |
147 | * insertion point so that the next include file becomes the head of the | |
148 | * list. | |
149 | */ | |
150 | ||
151 | /* If there is more data in this line, return it. */ | |
152 | if ( *i->string ) | |
153 | return *i->string++; | |
154 | ||
155 | /* If we are reading from an internal string list, go to the next string. */ | |
156 | if ( i->strings ) | |
157 | { | |
158 | if ( *i->strings ) | |
159 | { | |
160 | ++i->line; | |
161 | i->string = *(i->strings++); | |
162 | return *i->string++; | |
163 | } | |
164 | } | |
165 | else | |
166 | { | |
167 | /* If necessary, open the file. */ | |
168 | if ( !i->file ) | |
169 | { | |
170 | FILE * f = stdin; | |
171 | if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) ) | |
172 | perror( object_str( i->fname ) ); | |
173 | i->file = f; | |
174 | } | |
175 | ||
176 | /* If there is another line in this file, start it. */ | |
177 | if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) | |
178 | { | |
179 | ++i->line; | |
180 | i->string = i->buf; | |
181 | return *i->string++; | |
182 | } | |
183 | } | |
184 | ||
185 | /* This include is done. Return EOF so yyparse() returns to | |
186 | * parse_file(). | |
187 | */ | |
188 | ||
189 | return EOF; | |
190 | } | |
191 | ||
192 | ||
193 | /* | |
194 | * yylex() - set yylval to current token; return its type. | |
195 | * | |
196 | * Macros to move things along: | |
197 | * | |
198 | * yychar() - return and advance character; invalid after EOF. | |
199 | * yyprev() - back up one character; invalid before yychar(). | |
200 | * | |
201 | * yychar() returns a continuous stream of characters, until it hits the EOF of | |
202 | * the current include file. | |
203 | */ | |
204 | ||
205 | #define yychar() ( *incp->string ? *incp->string++ : yyline() ) | |
206 | #define yyprev() ( incp->string-- ) | |
207 | ||
208 | int yylex() | |
209 | { | |
210 | int c; | |
211 | char buf[ BIGGEST_TOKEN ]; | |
212 | char * b = buf; | |
213 | ||
214 | if ( !incp ) | |
215 | goto eof; | |
216 | ||
217 | /* Get first character (whitespace or of token). */ | |
218 | c = yychar(); | |
219 | ||
220 | if ( scanmode == SCAN_STRING ) | |
221 | { | |
222 | /* If scanning for a string (action's {}'s), look for the closing brace. | |
223 | * We handle matching braces, if they match. | |
224 | */ | |
225 | ||
226 | int nest = 1; | |
227 | ||
228 | while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) ) | |
229 | { | |
230 | if ( c == '{' ) | |
231 | ++nest; | |
232 | ||
233 | if ( ( c == '}' ) && !--nest ) | |
234 | break; | |
235 | ||
236 | *b++ = c; | |
237 | ||
238 | c = yychar(); | |
239 | ||
240 | /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */ | |
241 | if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) ) | |
242 | --b; | |
243 | } | |
244 | ||
245 | /* We ate the ending brace -- regurgitate it. */ | |
246 | if ( c != EOF ) | |
247 | yyprev(); | |
248 | ||
249 | /* Check for obvious errors. */ | |
250 | if ( b == buf + sizeof( buf ) ) | |
251 | { | |
252 | yyerror( "action block too big" ); | |
253 | goto eof; | |
254 | } | |
255 | ||
256 | if ( nest ) | |
257 | { | |
258 | yyerror( "unmatched {} in action block" ); | |
259 | goto eof; | |
260 | } | |
261 | ||
262 | *b = 0; | |
263 | yylval.type = STRING; | |
264 | yylval.string = object_new( buf ); | |
265 | yylval.file = incp->fname; | |
266 | yylval.line = incp->line; | |
267 | } | |
268 | else | |
269 | { | |
270 | char * b = buf; | |
271 | struct keyword * k; | |
272 | int inquote = 0; | |
273 | int notkeyword; | |
274 | ||
275 | /* Eat white space. */ | |
276 | for ( ; ; ) | |
277 | { | |
278 | /* Skip past white space. */ | |
279 | while ( ( c != EOF ) && isspace( c ) ) | |
280 | c = yychar(); | |
281 | ||
282 | /* Not a comment? */ | |
283 | if ( c != '#' ) | |
284 | break; | |
285 | ||
b32b8144 FG |
286 | c = yychar(); |
287 | if ( ( c != EOF ) && c == '|' ) | |
288 | { | |
289 | /* Swallow up block comment. */ | |
290 | int c0 = yychar(); | |
291 | int c1 = yychar(); | |
292 | while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) ) | |
293 | { | |
294 | c0 = c1; | |
295 | c1 = yychar(); | |
296 | } | |
297 | c = c1; | |
298 | } | |
299 | else | |
300 | { | |
301 | /* Swallow up comment line. */ | |
302 | while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar(); | |
303 | } | |
7c673cae FG |
304 | } |
305 | ||
306 | /* c now points to the first character of a token. */ | |
307 | if ( c == EOF ) | |
308 | goto eof; | |
309 | ||
310 | yylval.file = incp->fname; | |
311 | yylval.line = incp->line; | |
312 | ||
313 | /* While scanning the word, disqualify it for (expensive) keyword lookup | |
314 | * when we can: $anything, "anything", \anything | |
315 | */ | |
316 | notkeyword = c == '$'; | |
317 | ||
318 | /* Look for white space to delimit word. "'s get stripped but preserve | |
319 | * white space. \ protects next character. | |
320 | */ | |
321 | while | |
322 | ( | |
323 | ( c != EOF ) && | |
324 | ( b < buf + sizeof( buf ) ) && | |
325 | ( inquote || !isspace( c ) ) | |
326 | ) | |
327 | { | |
328 | if ( c == '"' ) | |
329 | { | |
330 | /* begin or end " */ | |
331 | inquote = !inquote; | |
332 | notkeyword = 1; | |
333 | } | |
334 | else if ( c != '\\' ) | |
335 | { | |
336 | /* normal char */ | |
337 | *b++ = c; | |
338 | } | |
339 | else if ( ( c = yychar() ) != EOF ) | |
340 | { | |
341 | /* \c */ | |
342 | if (c == 'n') | |
343 | c = '\n'; | |
344 | else if (c == 'r') | |
345 | c = '\r'; | |
346 | else if (c == 't') | |
347 | c = '\t'; | |
348 | *b++ = c; | |
349 | notkeyword = 1; | |
350 | } | |
351 | else | |
352 | { | |
353 | /* \EOF */ | |
354 | break; | |
355 | } | |
356 | ||
357 | c = yychar(); | |
358 | } | |
359 | ||
360 | /* Check obvious errors. */ | |
361 | if ( b == buf + sizeof( buf ) ) | |
362 | { | |
363 | yyerror( "string too big" ); | |
364 | goto eof; | |
365 | } | |
366 | ||
367 | if ( inquote ) | |
368 | { | |
369 | yyerror( "unmatched \" in string" ); | |
370 | goto eof; | |
371 | } | |
372 | ||
373 | /* We looked ahead a character - back up. */ | |
374 | if ( c != EOF ) | |
375 | yyprev(); | |
376 | ||
377 | /* Scan token table. Do not scan if it is obviously not a keyword or if | |
378 | * it is an alphabetic when were looking for punctuation. | |
379 | */ | |
380 | ||
381 | *b = 0; | |
382 | yylval.type = ARG; | |
383 | ||
384 | if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) ) | |
385 | for ( k = keywords; k->word; ++k ) | |
386 | if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) ) | |
387 | { | |
388 | yylval.type = k->type; | |
389 | yylval.keyword = k->word; /* used by symdump */ | |
390 | break; | |
391 | } | |
392 | ||
393 | if ( yylval.type == ARG ) | |
394 | yylval.string = object_new( buf ); | |
395 | } | |
396 | ||
397 | if ( DEBUG_SCAN ) | |
398 | out_printf( "scan %s\n", symdump( &yylval ) ); | |
399 | ||
400 | return yylval.type; | |
401 | ||
402 | eof: | |
403 | /* We do not reset yylval.file & yylval.line here so unexpected EOF error | |
404 | * messages would include correct error location information. | |
405 | */ | |
406 | yylval.type = EOF; | |
407 | return yylval.type; | |
408 | } | |
409 | ||
410 | ||
411 | static char * symdump( YYSTYPE * s ) | |
412 | { | |
413 | static char buf[ BIGGEST_TOKEN + 20 ]; | |
414 | switch ( s->type ) | |
415 | { | |
416 | case EOF : sprintf( buf, "EOF" ); break; | |
417 | case 0 : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break; | |
418 | case ARG : sprintf( buf, "argument %s" , object_str( s->string ) ); break; | |
419 | case STRING: sprintf( buf, "string \"%s\"" , object_str( s->string ) ); break; | |
420 | default : sprintf( buf, "keyword %s" , s->keyword ); break; | |
421 | } | |
422 | return buf; | |
423 | } | |
424 | ||
425 | ||
426 | /* | |
427 | * Get information about the current file and line, for those epsilon | |
428 | * transitions that produce a parse. | |
429 | */ | |
430 | ||
431 | void yyinput_last_read_token( OBJECT * * name, int * line ) | |
432 | { | |
433 | /* TODO: Consider whether and when we might want to report where the last | |
434 | * read token ended, e.g. EOF errors inside string literals. | |
435 | */ | |
436 | *name = yylval.file; | |
437 | *line = yylval.line; | |
438 | } |