ceph/src/boost/tools/build/src/engine/scan.c

   1 /*
   2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
   3  *
   4  * This file is part of Jam - see jam.c for Copyright information.
   5  */
   6
   7 /*
   8  * scan.c - the jam yacc scanner
   9  *
  10  */
  11
  12 #include "jam.h"
  13 #include "scan.h"
  14 #include "output.h"
  15
  16 #include "constants.h"
  17 #include "jambase.h"
  18 #include "jamgram.h"
  19
  20
  21 struct keyword
  22 {
  23     char * word;
  24     int    type;
  25 } keywords[] =
  26 {
  27 #include "jamgramtab.h"
  28     { 0, 0 }
  29 };
  30
  31 typedef struct include include;
  32 struct include
  33 {
  34     include   * next;        /* next serial include file */
  35     char      * string;      /* pointer into current line */
  36     char    * * strings;     /* for yyfparse() -- text to parse */
  37     LISTITER    pos;         /* for yysparse() -- text to parse */
  38     LIST      * list;        /* for yysparse() -- text to parse */
  39     FILE      * file;        /* for yyfparse() -- file being read */
  40     OBJECT    * fname;       /* for yyfparse() -- file name */
  41     int         line;        /* line counter for error messages */
  42     char        buf[ 512 ];  /* for yyfparse() -- line buffer */
  43 };
  44
  45 static include * incp = 0;  /* current file; head of chain */
  46
  47 static int scanmode = SCAN_NORMAL;
  48 static int anyerrors = 0;
  49
  50
  51 static char * symdump( YYSTYPE * );
  52
  53 #define BIGGEST_TOKEN 10240  /* no single token can be larger */
  54
  55
  56 /*
  57  * Set parser mode: normal, string, or keyword.
  58  */
  59
  60 int yymode( int n )
  61 {
  62     int result = scanmode;
  63     scanmode = n;
  64     return result;
  65 }
  66
  67
  68 void yyerror( char const * s )
  69 {
  70     /* We use yylval instead of incp to access the error location information as
  71      * the incp pointer will already be reset to 0 in case the error occurred at
  72      * EOF.
  73      *
  74      * The two may differ only if ran into an unexpected EOF or we get an error
  75      * while reading a lexical token spanning multiple lines, e.g. a multi-line
  76      * string literal or action body, in which case yylval location information
  77      * will hold the information about where the token started while incp will
  78      * hold the information about where reading it broke.
  79      */
  80     out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
  81             symdump( &yylval ) );
  82     ++anyerrors;
  83 }
  84
  85
  86 int yyanyerrors()
  87 {
  88     return anyerrors != 0;
  89 }
  90
  91
  92 void yyfparse( OBJECT * s )
  93 {
  94     include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
  95
  96     /* Push this onto the incp chain. */
  97     i->string = "";
  98     i->strings = 0;
  99     i->file = 0;
 100     i->fname = object_copy( s );
 101     i->line = 0;
 102     i->next = incp;
 103     incp = i;
 104
 105     /* If the filename is "+", it means use the internal jambase. */
 106     if ( !strcmp( object_str( s ), "+" ) )
 107         i->strings = jambase;
 108 }
 109
 110
 111 void yysparse( OBJECT * name, const char * * lines )
 112 {
 113     yyfparse( name );
 114     incp->strings = (char * *)lines;
 115 }
 116
 117
 118 /*
 119  * yyfdone() - cleanup after we're done parsing a file.
 120  */
 121 void yyfdone( void )
 122 {
 123     include * const i = incp;
 124     incp = i->next;
 125
 126     /* Close file, free name. */
 127     if(i->file && (i->file != stdin))
 128         fclose(i->file);
 129     object_free(i->fname);
 130     BJAM_FREE((char *)i);
 131 }
 132
 133
 134 /*
 135  * yyline() - read new line and return first character.
 136  *
 137  * Fabricates a continuous stream of characters across include files, returning
 138  * EOF at the bitter end.
 139  */
 140
 141 int yyline()
 142 {
 143     include * const i = incp;
 144
 145     if ( !incp )
 146         return EOF;
 147
 148     /* Once we start reading from the input stream, we reset the include
 149      * insertion point so that the next include file becomes the head of the
 150      * list.
 151      */
 152
 153     /* If there is more data in this line, return it. */
 154     if ( *i->string )
 155         return *i->string++;
 156
 157     /* If we are reading from an internal string list, go to the next string. */
 158     if ( i->strings )
 159     {
 160         if ( *i->strings )
 161         {
 162             ++i->line;
 163             i->string = *(i->strings++);
 164             return *i->string++;
 165         }
 166     }
 167     else
 168     {
 169         /* If necessary, open the file. */
 170         if ( !i->file )
 171         {
 172             FILE * f = stdin;
 173             if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
 174                 perror( object_str( i->fname ) );
 175             i->file = f;
 176         }
 177
 178         /* If there is another line in this file, start it. */
 179         if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
 180         {
 181             ++i->line;
 182             i->string = i->buf;
 183             return *i->string++;
 184         }
 185     }
 186
 187     /* This include is done. Return EOF so yyparse() returns to
 188      * parse_file().
 189      */
 190
 191     return EOF;
 192 }
 193
 194 /* This allows us to get an extra character of lookahead.
 195  * There are a few places where we need to look ahead two
 196  * characters and yyprev only guarantees a single character
 197  * of putback.
 198  */
 199 int yypeek()
 200 {
 201     if ( *incp->string )
 202     {
 203         return *incp->string;
 204     }
 205     else if ( incp->strings )
 206     {
 207         if ( *incp->strings )
 208             return **incp->strings;
 209     }
 210     else if ( incp->file )
 211     {
 212         /* Don't bother opening the file.  yypeek is
 213          * only used in special cases and never at the
 214          * beginning of a file.
 215          */
 216         int ch = fgetc( incp->file );
 217         if ( ch != EOF )
 218             ungetc( ch, incp->file );
 219         return ch;
 220     }
 221     return EOF;
 222 }
 223
 224 /*
 225  * yylex() - set yylval to current token; return its type.
 226  *
 227  * Macros to move things along:
 228  *
 229  *  yychar() - return and advance character; invalid after EOF.
 230  *  yyprev() - back up one character; invalid before yychar().
 231  *
 232  * yychar() returns a continuous stream of characters, until it hits the EOF of
 233  * the current include file.
 234  */
 235
 236 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
 237 #define yyprev() ( incp->string-- )
 238
 239 static int use_new_scanner = 0;
 240 static int expect_whitespace;
 241
 242 #define yystartkeyword() if(use_new_scanner) break; else token_warning()
 243 #define yyendkeyword() if(use_new_scanner) break; else if ( 1 ) { expect_whitespace = 1; continue; } else (void)0
 244
 245 void do_token_warning()
 246 {
 247     out_printf( "%s:%d: %s %s\n", object_str( yylval.file ), yylval.line, "Unescaped special character in",
 248             symdump( &yylval ) );
 249 }
 250
 251 #define token_warning() has_token_warning = 1
 252
 253 int yylex()
 254 {
 255     int c;
 256     char buf[ BIGGEST_TOKEN ];
 257     char * b = buf;
 258
 259     if ( !incp )
 260         goto eof;
 261
 262     /* Get first character (whitespace or of token). */
 263     c = yychar();
 264
 265     if ( scanmode == SCAN_STRING )
 266     {
 267         /* If scanning for a string (action's {}'s), look for the closing brace.
 268          * We handle matching braces, if they match.
 269          */
 270
 271         int nest = 1;
 272
 273         while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
 274         {
 275             if ( c == '{' )
 276                 ++nest;
 277
 278             if ( ( c == '}' ) && !--nest )
 279                 break;
 280
 281             *b++ = c;
 282
 283             c = yychar();
 284
 285             /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
 286             if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
 287                 --b;
 288         }
 289
 290         /* We ate the ending brace -- regurgitate it. */
 291         if ( c != EOF )
 292             yyprev();
 293
 294         /* Check for obvious errors. */
 295         if ( b == buf + sizeof( buf ) )
 296         {
 297             yyerror( "action block too big" );
 298             goto eof;
 299         }
 300
 301         if ( nest )
 302         {
 303             yyerror( "unmatched {} in action block" );
 304             goto eof;
 305         }
 306
 307         *b = 0;
 308         yylval.type = STRING;
 309         yylval.string = object_new( buf );
 310         yylval.file = incp->fname;
 311         yylval.line = incp->line;
 312     }
 313     else
 314     {
 315         char * b = buf;
 316         struct keyword * k;
 317         int inquote = 0;
 318         int notkeyword;
 319         int hastoken = 0;
 320         int hasquote = 0;
 321         int ingrist = 0;
 322         int invarexpand = 0;
 323         int expect_whitespace = 0;
 324         int has_token_warning = 0;
 325
 326         /* Eat white space. */
 327         for ( ; ; )
 328         {
 329             /* Skip past white space. */
 330             while ( ( c != EOF ) && isspace( c ) )
 331                 c = yychar();
 332
 333             /* Not a comment? */
 334             if ( c != '#' )
 335                 break;
 336
 337             c = yychar();
 338             if ( ( c != EOF ) && c == '|' )
 339             {
 340                 /* Swallow up block comment. */
 341                 int c0 = yychar();
 342                 int c1 = yychar();
 343                 while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) )
 344                 {
 345                     c0 = c1;
 346                     c1 = yychar();
 347                 }
 348                 c = yychar();
 349             }
 350             else
 351             {
 352                 /* Swallow up comment line. */
 353                 while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar();
 354             }
 355         }
 356
 357         /* c now points to the first character of a token. */
 358         if ( c == EOF )
 359             goto eof;
 360
 361         yylval.file = incp->fname;
 362         yylval.line = incp->line;
 363
 364         /* While scanning the word, disqualify it for (expensive) keyword lookup
 365          * when we can: $anything, "anything", \anything
 366          */
 367         notkeyword = c == '$';
 368
 369         /* Look for white space to delimit word. "'s get stripped but preserve
 370          * white space. \ protects next character.
 371          */
 372         while
 373         (
 374             ( c != EOF ) &&
 375             ( b < buf + sizeof( buf ) ) &&
 376             ( inquote || invarexpand || !isspace( c ) )
 377         )
 378         {
 379             if ( expect_whitespace || ( isspace( c ) && ! inquote ) )
 380             {
 381                 token_warning();
 382                 expect_whitespace = 0;
 383             }
 384             if ( !inquote && !invarexpand )
 385             {
 386                 if ( scanmode == SCAN_COND || scanmode == SCAN_CONDB )
 387                 {
 388                     if ( hastoken && ( c == '=' || c == '<' || c == '>' || c == '!' || c == '(' || c == ')' || c == '&' || c == '|' ) )
 389                     {
 390                         /* Don't treat > as special if we started with a grist. */
 391                         if ( ! ( scanmode == SCAN_CONDB && ingrist == 1 && c == '>' ) )
 392                         {
 393                             yystartkeyword();
 394                         }
 395                     }
 396                     else if ( c == '=' || c == '(' || c == ')' )
 397                     {
 398                         *b++ = c;
 399                         c = yychar();
 400                         yyendkeyword();
 401                     }
 402                     else if ( c == '!' || ( scanmode == SCAN_COND && ( c == '<' || c == '>' ) ) )
 403                     {
 404                         *b++ = c;
 405                         if ( ( c = yychar() ) == '=' )
 406                         {
 407                             *b++ = c;
 408                             c = yychar();
 409                         }
 410                         yyendkeyword();
 411                     }
 412                     else if ( c == '&' || c == '|' )
 413                     {
 414                         *b++ = c;
 415                         if ( yychar() == c )
 416                         {
 417                             *b++ = c;
 418                             c = yychar();
 419                         }
 420                         yyendkeyword();
 421                     }
 422                 }
 423                 else if ( scanmode == SCAN_PARAMS )
 424                 {
 425                     if ( c == '*' || c == '+' || c == '?' || c == '(' || c == ')' )
 426                     {
 427                         if ( !hastoken )
 428                         {
 429                             *b++ = c;
 430                             c = yychar();
 431                             yyendkeyword();
 432                         }
 433                         else
 434                         {
 435                             yystartkeyword();
 436                         }
 437                     }
 438                 }
 439                 else if ( scanmode == SCAN_XASSIGN && ! hastoken )
 440                 {
 441                     if ( c == '=' )
 442                     {
 443                         *b++ = c;
 444                         c = yychar();
 445                         yyendkeyword();
 446                     }
 447                     else if ( c == '+' || c == '?' )
 448                     {
 449                         if ( yypeek() == '=' )
 450                         {
 451                             *b++ = c;
 452                             *b++ = yychar();
 453                             c = yychar();
 454                             yyendkeyword();
 455                         }
 456                     }
 457                 }
 458                 else if ( scanmode == SCAN_NORMAL || scanmode == SCAN_ASSIGN )
 459                 {
 460                     if ( c == '=' )
 461                     {
 462                         if ( !hastoken )
 463                         {
 464                             *b++ = c;
 465                             c = yychar();
 466                             yyendkeyword();
 467                         }
 468                         else
 469                         {
 470                             yystartkeyword();
 471                         }
 472                     }
 473                     else if ( c == '+' || c == '?' )
 474                     {
 475                         if ( yypeek() == '=' )
 476                         {
 477                             if ( hastoken )
 478                             {
 479                                 yystartkeyword();
 480                             }
 481                             else
 482                             {
 483                                 *b++ = c;
 484                                 *b++ = yychar();
 485                                 c = yychar();
 486                                 yyendkeyword();
 487                             }
 488                         }
 489                     }
 490                 }
 491                 if ( scanmode != SCAN_CASE && ( c == ';' || c == '{' || c == '}' ||
 492                     ( scanmode != SCAN_PARAMS && ( c == '[' || c == ']' ) ) ) )
 493                 {
 494                     if ( ! hastoken )
 495                     {
 496                         *b++ = c;
 497                         c = yychar();
 498                         yyendkeyword();
 499                     }
 500                     else
 501                     {
 502                         yystartkeyword();
 503                     }
 504                 }
 505                 else if ( c == ':' )
 506                 {
 507                     if ( ! hastoken )
 508                     {
 509                         *b++ = c;
 510                         c = yychar();
 511                         yyendkeyword();
 512                         break;
 513                     }
 514                     else if ( hasquote )
 515                     {
 516                         /* Special rules for ':' do not apply after we quote anything. */
 517                         yystartkeyword();
 518                     }
 519                     else if ( ingrist == 0 )
 520                     {
 521                         int next = yychar();
 522                         int is_win_path = 0;
 523                         int is_conditional = 0;
 524                         if ( next == '\\' )
 525                         {
 526                             if( yypeek() == '\\' )
 527                             {
 528                                 is_win_path = 1;
 529                             }
 530                         }
 531                         else if ( next == '/' )
 532                         {
 533                             is_win_path = 1;
 534                         }
 535                         yyprev();
 536                         if ( is_win_path )
 537                         {
 538                             /* Accept windows paths iff they are at the start or immediately follow a grist. */
 539                             if ( b > buf && isalpha( b[ -1 ] ) && ( b == buf + 1 || b[ -2 ] == '>' ) )
 540                             {
 541                                 is_win_path = 1;
 542                             }
 543                             else
 544                             {
 545                                 is_win_path = 0;
 546                             }
 547                         }
 548                         if ( next == '<' )
 549                         {
 550                             /* Accept conditionals only for tokens that start with "<" or "!<" */
 551                             if ( b > buf && buf[ 0 ] == '<' ||
 552                                 ( b > buf + 1 && buf[ 0 ] == '!' && buf[ 1 ] == '<' ))
 553                             {
 554                                 is_conditional = 1;
 555                             }
 556                         }
 557                         if ( !is_conditional && !is_win_path )
 558                         {
 559                             yystartkeyword();
 560                         }
 561                     }
 562                 }
 563             }
 564             hastoken = 1;
 565             if ( c == '"' )
 566             {
 567                 /* begin or end " */
 568                 inquote = !inquote;
 569                 hasquote = 1;
 570                 notkeyword = 1;
 571             }
 572             else if ( c != '\\' )
 573             {
 574                 if ( !invarexpand && c == '<' )
 575                 {
 576                     if ( ingrist == 0 ) ingrist = 1;
 577                     else ingrist = -1;
 578                 }
 579                 else if ( !invarexpand && c == '>' )
 580                 {
 581                     if ( ingrist == 1 ) ingrist = 0;
 582                     else ingrist = -1;
 583                 }
 584                 else if ( c == '$' )
 585                 {
 586                     if ( ( c = yychar() ) == EOF )
 587                     {
 588                         *b++ = '$';
 589                         break;
 590                     }
 591                     else if ( c == '(' )
 592                     {
 593                         /* inside $(), we only care about quotes */
 594                         *b++ = '$';
 595                         c = '(';
 596                         ++invarexpand;
 597                     }
 598                     else
 599                     {
 600                         c = '$';
 601                         yyprev();
 602                     }
 603                 }
 604                 else if ( c == '@' )
 605                 {
 606                     if ( ( c = yychar() ) == EOF )
 607                     {
 608                         *b++ = '@';
 609                         break;
 610                     }
 611                     else if ( c == '(' )
 612                     {
 613                         /* inside @(), we only care about quotes */
 614                         *b++ = '@';
 615                         c = '(';
 616                         ++invarexpand;
 617                     }
 618                     else
 619                     {
 620                         c = '@';
 621                         yyprev();
 622                     }
 623                 }
 624                 else if ( invarexpand && c == '(' )
 625                 {
 626                     ++invarexpand;
 627                 }
 628                 else if ( invarexpand && c == ')' )
 629                 {
 630                     --invarexpand;
 631                 }
 632                 /* normal char */
 633                 *b++ = c;
 634             }
 635             else if ( ( c = yychar() ) != EOF )
 636             {
 637                 /* \c */
 638                 if (c == 'n')
 639                     c = '\n';
 640                 else if (c == 'r')
 641                     c = '\r';
 642                 else if (c == 't')
 643                     c = '\t';
 644                 *b++ = c;
 645                 notkeyword = 1;
 646             }
 647             else
 648             {
 649                 /* \EOF */
 650                 break;
 651             }
 652
 653             c = yychar();
 654         }
 655
 656         /* Automatically switch modes after reading the token. */
 657         if ( scanmode == SCAN_CONDB )
 658             scanmode = SCAN_COND;
 659
 660         /* Check obvious errors. */
 661         if ( b == buf + sizeof( buf ) )
 662         {
 663             yyerror( "string too big" );
 664             goto eof;
 665         }
 666
 667         if ( inquote )
 668         {
 669             yyerror( "unmatched \" in string" );
 670             goto eof;
 671         }
 672
 673         /* We looked ahead a character - back up. */
 674         if ( c != EOF )
 675             yyprev();
 676
 677         /* Scan token table. Do not scan if it is obviously not a keyword or if
 678          * it is an alphabetic when were looking for punctuation.
 679          */
 680
 681         *b = 0;
 682         yylval.type = ARG;
 683
 684         if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT || scanmode == SCAN_PARAMS || scanmode == SCAN_ASSIGN ) ) )
 685             for ( k = keywords; k->word; ++k )
 686                 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
 687                 {
 688                     yylval.type = k->type;
 689                     yylval.keyword = k->word;  /* used by symdump */
 690                     break;
 691                 }
 692
 693         if ( yylval.type == ARG )
 694             yylval.string = object_new( buf );
 695
 696         if ( scanmode == SCAN_NORMAL && yylval.type == ARG )
 697             scanmode = SCAN_XASSIGN;
 698
 699         if ( has_token_warning )
 700             do_token_warning();
 701     }
 702
 703     if ( DEBUG_SCAN )
 704         out_printf( "scan %s\n", symdump( &yylval ) );
 705
 706     return yylval.type;
 707
 708 eof:
 709     /* We do not reset yylval.file & yylval.line here so unexpected EOF error
 710      * messages would include correct error location information.
 711      */
 712     yylval.type = EOF;
 713     return yylval.type;
 714 }
 715
 716
 717 static char * symdump( YYSTYPE * s )
 718 {
 719     static char buf[ BIGGEST_TOKEN + 20 ];
 720     switch ( s->type )
 721     {
 722         case EOF   : sprintf( buf, "EOF"                                        ); break;
 723         case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
 724         case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
 725         case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
 726         default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
 727     }
 728     return buf;
 729 }
 730
 731
 732 /*
 733  * Get information about the current file and line, for those epsilon
 734  * transitions that produce a parse.
 735  */
 736
 737 void yyinput_last_read_token( OBJECT * * name, int * line )
 738 {
 739     /* TODO: Consider whether and when we might want to report where the last
 740      * read token ended, e.g. EOF errors inside string literals.
 741      */
 742     *name = yylval.file;
 743     *line = yylval.line;
 744 }