ceph/src/boost/tools/build/src/engine/scan.c

   1 /*
   2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
   3  *
   4  * This file is part of Jam - see jam.c for Copyright information.
   5  */
   6
   7 /*
   8  * scan.c - the jam yacc scanner
   9  *
  10  */
  11
  12 #include "jam.h"
  13 #include "scan.h"
  14 #include "output.h"
  15
  16 #include "constants.h"
  17 #include "jambase.h"
  18 #include "jamgram.h"
  19
  20
  21 struct keyword
  22 {
  23     char * word;
  24     int    type;
  25 } keywords[] =
  26 {
  27 #include "jamgramtab.h"
  28     { 0, 0 }
  29 };
  30
  31 typedef struct include include;
  32 struct include
  33 {
  34     include   * next;        /* next serial include file */
  35     char      * string;      /* pointer into current line */
  36     char    * * strings;     /* for yyfparse() -- text to parse */
  37     LISTITER    pos;         /* for yysparse() -- text to parse */
  38     LIST      * list;        /* for yysparse() -- text to parse */
  39     FILE      * file;        /* for yyfparse() -- file being read */
  40     OBJECT    * fname;       /* for yyfparse() -- file name */
  41     int         line;        /* line counter for error messages */
  42     char        buf[ 512 ];  /* for yyfparse() -- line buffer */
  43 };
  44
  45 static include * incp = 0;  /* current file; head of chain */
  46
  47 static int scanmode = SCAN_NORMAL;
  48 static int anyerrors = 0;
  49
  50
  51 static char * symdump( YYSTYPE * );
  52
  53 #define BIGGEST_TOKEN 10240  /* no single token can be larger */
  54
  55
  56 /*
  57  * Set parser mode: normal, string, or keyword.
  58  */
  59
  60 void yymode( int n )
  61 {
  62     scanmode = n;
  63 }
  64
  65
  66 void yyerror( char const * s )
  67 {
  68     /* We use yylval instead of incp to access the error location information as
  69      * the incp pointer will already be reset to 0 in case the error occurred at
  70      * EOF.
  71      *
  72      * The two may differ only if ran into an unexpected EOF or we get an error
  73      * while reading a lexical token spanning multiple lines, e.g. a multi-line
  74      * string literal or action body, in which case yylval location information
  75      * will hold the information about where the token started while incp will
  76      * hold the information about where reading it broke.
  77      */
  78     out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
  79             symdump( &yylval ) );
  80     ++anyerrors;
  81 }
  82
  83
  84 int yyanyerrors()
  85 {
  86     return anyerrors != 0;
  87 }
  88
  89
  90 void yyfparse( OBJECT * s )
  91 {
  92     include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
  93
  94     /* Push this onto the incp chain. */
  95     i->string = "";
  96     i->strings = 0;
  97     i->file = 0;
  98     i->fname = object_copy( s );
  99     i->line = 0;
 100     i->next = incp;
 101     incp = i;
 102
 103     /* If the filename is "+", it means use the internal jambase. */
 104     if ( !strcmp( object_str( s ), "+" ) )
 105         i->strings = jambase;
 106 }
 107
 108
 109 void yysparse( OBJECT * name, const char * * lines )
 110 {
 111     yyfparse( name );
 112     incp->strings = (char * *)lines;
 113 }
 114
 115
 116 /*
 117  * yyfdone() - cleanup after we're done parsing a file.
 118  */
 119 void yyfdone( void )
 120 {
 121     include * const i = incp;
 122     incp = i->next;
 123
 124     /* Close file, free name. */
 125     if(i->file && (i->file != stdin))
 126         fclose(i->file);
 127     object_free(i->fname);
 128     BJAM_FREE((char *)i);
 129 }
 130
 131
 132 /*
 133  * yyline() - read new line and return first character.
 134  *
 135  * Fabricates a continuous stream of characters across include files, returning
 136  * EOF at the bitter end.
 137  */
 138
 139 int yyline()
 140 {
 141     include * const i = incp;
 142
 143     if ( !incp )
 144         return EOF;
 145
 146     /* Once we start reading from the input stream, we reset the include
 147      * insertion point so that the next include file becomes the head of the
 148      * list.
 149      */
 150
 151     /* If there is more data in this line, return it. */
 152     if ( *i->string )
 153         return *i->string++;
 154
 155     /* If we are reading from an internal string list, go to the next string. */
 156     if ( i->strings )
 157     {
 158         if ( *i->strings )
 159         {
 160             ++i->line;
 161             i->string = *(i->strings++);
 162             return *i->string++;
 163         }
 164     }
 165     else
 166     {
 167         /* If necessary, open the file. */
 168         if ( !i->file )
 169         {
 170             FILE * f = stdin;
 171             if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
 172                 perror( object_str( i->fname ) );
 173             i->file = f;
 174         }
 175
 176         /* If there is another line in this file, start it. */
 177         if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
 178         {
 179             ++i->line;
 180             i->string = i->buf;
 181             return *i->string++;
 182         }
 183     }
 184
 185     /* This include is done. Return EOF so yyparse() returns to
 186      * parse_file().
 187      */
 188
 189     return EOF;
 190 }
 191
 192
 193 /*
 194  * yylex() - set yylval to current token; return its type.
 195  *
 196  * Macros to move things along:
 197  *
 198  *  yychar() - return and advance character; invalid after EOF.
 199  *  yyprev() - back up one character; invalid before yychar().
 200  *
 201  * yychar() returns a continuous stream of characters, until it hits the EOF of
 202  * the current include file.
 203  */
 204
 205 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
 206 #define yyprev() ( incp->string-- )
 207
 208 int yylex()
 209 {
 210     int c;
 211     char buf[ BIGGEST_TOKEN ];
 212     char * b = buf;
 213
 214     if ( !incp )
 215         goto eof;
 216
 217     /* Get first character (whitespace or of token). */
 218     c = yychar();
 219
 220     if ( scanmode == SCAN_STRING )
 221     {
 222         /* If scanning for a string (action's {}'s), look for the closing brace.
 223          * We handle matching braces, if they match.
 224          */
 225
 226         int nest = 1;
 227
 228         while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
 229         {
 230             if ( c == '{' )
 231                 ++nest;
 232
 233             if ( ( c == '}' ) && !--nest )
 234                 break;
 235
 236             *b++ = c;
 237
 238             c = yychar();
 239
 240             /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
 241             if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
 242                 --b;
 243         }
 244
 245         /* We ate the ending brace -- regurgitate it. */
 246         if ( c != EOF )
 247             yyprev();
 248
 249         /* Check for obvious errors. */
 250         if ( b == buf + sizeof( buf ) )
 251         {
 252             yyerror( "action block too big" );
 253             goto eof;
 254         }
 255
 256         if ( nest )
 257         {
 258             yyerror( "unmatched {} in action block" );
 259             goto eof;
 260         }
 261
 262         *b = 0;
 263         yylval.type = STRING;
 264         yylval.string = object_new( buf );
 265         yylval.file = incp->fname;
 266         yylval.line = incp->line;
 267     }
 268     else
 269     {
 270         char * b = buf;
 271         struct keyword * k;
 272         int inquote = 0;
 273         int notkeyword;
 274
 275         /* Eat white space. */
 276         for ( ; ; )
 277         {
 278             /* Skip past white space. */
 279             while ( ( c != EOF ) && isspace( c ) )
 280                 c = yychar();
 281
 282             /* Not a comment? */
 283             if ( c != '#' )
 284                 break;
 285
 286             c = yychar();
 287             if ( ( c != EOF ) && c == '|' )
 288             {
 289                 /* Swallow up block comment. */
 290                 int c0 = yychar();
 291                 int c1 = yychar();
 292                 while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) )
 293                 {
 294                     c0 = c1;
 295                     c1 = yychar();
 296                 }
 297                 c = c1;
 298             }
 299             else
 300             {
 301                 /* Swallow up comment line. */
 302                 while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar();
 303             }
 304         }
 305
 306         /* c now points to the first character of a token. */
 307         if ( c == EOF )
 308             goto eof;
 309
 310         yylval.file = incp->fname;
 311         yylval.line = incp->line;
 312
 313         /* While scanning the word, disqualify it for (expensive) keyword lookup
 314          * when we can: $anything, "anything", \anything
 315          */
 316         notkeyword = c == '$';
 317
 318         /* Look for white space to delimit word. "'s get stripped but preserve
 319          * white space. \ protects next character.
 320          */
 321         while
 322         (
 323             ( c != EOF ) &&
 324             ( b < buf + sizeof( buf ) ) &&
 325             ( inquote || !isspace( c ) )
 326         )
 327         {
 328             if ( c == '"' )
 329             {
 330                 /* begin or end " */
 331                 inquote = !inquote;
 332                 notkeyword = 1;
 333             }
 334             else if ( c != '\\' )
 335             {
 336                 /* normal char */
 337                 *b++ = c;
 338             }
 339             else if ( ( c = yychar() ) != EOF )
 340             {
 341                 /* \c */
 342                 if (c == 'n')
 343                     c = '\n';
 344                 else if (c == 'r')
 345                     c = '\r';
 346                 else if (c == 't')
 347                     c = '\t';
 348                 *b++ = c;
 349                 notkeyword = 1;
 350             }
 351             else
 352             {
 353                 /* \EOF */
 354                 break;
 355             }
 356
 357             c = yychar();
 358         }
 359
 360         /* Check obvious errors. */
 361         if ( b == buf + sizeof( buf ) )
 362         {
 363             yyerror( "string too big" );
 364             goto eof;
 365         }
 366
 367         if ( inquote )
 368         {
 369             yyerror( "unmatched \" in string" );
 370             goto eof;
 371         }
 372
 373         /* We looked ahead a character - back up. */
 374         if ( c != EOF )
 375             yyprev();
 376
 377         /* Scan token table. Do not scan if it is obviously not a keyword or if
 378          * it is an alphabetic when were looking for punctuation.
 379          */
 380
 381         *b = 0;
 382         yylval.type = ARG;
 383
 384         if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT ) ) )
 385             for ( k = keywords; k->word; ++k )
 386                 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
 387                 {
 388                     yylval.type = k->type;
 389                     yylval.keyword = k->word;  /* used by symdump */
 390                     break;
 391                 }
 392
 393         if ( yylval.type == ARG )
 394             yylval.string = object_new( buf );
 395     }
 396
 397     if ( DEBUG_SCAN )
 398         out_printf( "scan %s\n", symdump( &yylval ) );
 399
 400     return yylval.type;
 401
 402 eof:
 403     /* We do not reset yylval.file & yylval.line here so unexpected EOF error
 404      * messages would include correct error location information.
 405      */
 406     yylval.type = EOF;
 407     return yylval.type;
 408 }
 409
 410
 411 static char * symdump( YYSTYPE * s )
 412 {
 413     static char buf[ BIGGEST_TOKEN + 20 ];
 414     switch ( s->type )
 415     {
 416         case EOF   : sprintf( buf, "EOF"                                        ); break;
 417         case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
 418         case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
 419         case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
 420         default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
 421     }
 422     return buf;
 423 }
 424
 425
 426 /*
 427  * Get information about the current file and line, for those epsilon
 428  * transitions that produce a parse.
 429  */
 430
 431 void yyinput_last_read_token( OBJECT * * name, int * line )
 432 {
 433     /* TODO: Consider whether and when we might want to report where the last
 434      * read token ended, e.g. EOF errors inside string literals.
 435      */
 436     *name = yylval.file;
 437     *line = yylval.line;
 438 }