ceph/src/boost/tools/build/src/engine/scan.cpp

   1 /*
   2  * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
   3  *
   4  * This file is part of Jam - see jam.c for Copyright information.
   5  */
   6
   7 /*
   8  * scan.c - the jam yacc scanner
   9  *
  10  */
  11
  12 #include "jam.h"
  13 #include "scan.h"
  14 #include "output.h"
  15
  16 #include "constants.h"
  17 #include "jamgram.hpp"
  18
  19
  20 struct keyword
  21 {
  22     const char * word;
  23     int    type;
  24 } keywords[] =
  25 {
  26 #include "jamgramtab.h"
  27     { 0, 0 }
  28 };
  29
  30 typedef struct include include;
  31 struct include
  32 {
  33     include   * next;        /* next serial include file */
  34     char      * string;      /* pointer into current line */
  35     char    * * strings;     /* for yyfparse() -- text to parse */
  36     LISTITER    pos;         /* for yysparse() -- text to parse */
  37     LIST      * list;        /* for yysparse() -- text to parse */
  38     FILE      * file;        /* for yyfparse() -- file being read */
  39     OBJECT    * fname;       /* for yyfparse() -- file name */
  40     int         line;        /* line counter for error messages */
  41     char        buf[ 512 ];  /* for yyfparse() -- line buffer */
  42 };
  43
  44 static include * incp = 0;  /* current file; head of chain */
  45
  46 static int scanmode = SCAN_NORMAL;
  47 static int anyerrors = 0;
  48
  49
  50 static char * symdump( YYSTYPE * );
  51
  52 #define BIGGEST_TOKEN 10240  /* no single token can be larger */
  53
  54
  55 /*
  56  * Set parser mode: normal, string, or keyword.
  57  */
  58
  59 int yymode( int n )
  60 {
  61     int result = scanmode;
  62     scanmode = n;
  63     return result;
  64 }
  65
  66
  67 void yyerror( char const * s )
  68 {
  69     /* We use yylval instead of incp to access the error location information as
  70      * the incp pointer will already be reset to 0 in case the error occurred at
  71      * EOF.
  72      *
  73      * The two may differ only if ran into an unexpected EOF or we get an error
  74      * while reading a lexical token spanning multiple lines, e.g. a multi-line
  75      * string literal or action body, in which case yylval location information
  76      * will hold the information about where the token started while incp will
  77      * hold the information about where reading it broke.
  78      */
  79     out_printf( "%s:%d: %s at %s\n", object_str( yylval.file ), yylval.line, s,
  80             symdump( &yylval ) );
  81     ++anyerrors;
  82 }
  83
  84
  85 int yyanyerrors()
  86 {
  87     return anyerrors != 0;
  88 }
  89
  90
  91 void yyfparse( OBJECT * s )
  92 {
  93     include * i = (include *)BJAM_MALLOC( sizeof( *i ) );
  94
  95     /* Push this onto the incp chain. */
  96     i->string = (char*)"";
  97     i->strings = 0;
  98     i->file = 0;
  99     i->fname = object_copy( s );
 100     i->line = 0;
 101     i->next = incp;
 102     incp = i;
 103 }
 104
 105
 106 void yysparse( OBJECT * name, const char * * lines )
 107 {
 108     yyfparse( name );
 109     incp->strings = (char * *)lines;
 110 }
 111
 112
 113 /*
 114  * yyfdone() - cleanup after we're done parsing a file.
 115  */
 116 void yyfdone( void )
 117 {
 118     include * const i = incp;
 119     incp = i->next;
 120
 121     /* Close file, free name. */
 122     if(i->file && (i->file != stdin))
 123         fclose(i->file);
 124     object_free(i->fname);
 125     BJAM_FREE((char *)i);
 126 }
 127
 128
 129 /*
 130  * yyline() - read new line and return first character.
 131  *
 132  * Fabricates a continuous stream of characters across include files, returning
 133  * EOF at the bitter end.
 134  */
 135
 136 int yyline()
 137 {
 138     include * const i = incp;
 139
 140     if ( !incp )
 141         return EOF;
 142
 143     /* Once we start reading from the input stream, we reset the include
 144      * insertion point so that the next include file becomes the head of the
 145      * list.
 146      */
 147
 148     /* If there is more data in this line, return it. */
 149     if ( *i->string )
 150         return *i->string++;
 151
 152     /* If we are reading from an internal string list, go to the next string. */
 153     if ( i->strings )
 154     {
 155         if ( *i->strings )
 156         {
 157             ++i->line;
 158             i->string = *(i->strings++);
 159             return *i->string++;
 160         }
 161     }
 162     else
 163     {
 164         /* If necessary, open the file. */
 165         if ( !i->file )
 166         {
 167             FILE * f = stdin;
 168             if ( strcmp( object_str( i->fname ), "-" ) && !( f = fopen( object_str( i->fname ), "r" ) ) )
 169                 perror( object_str( i->fname ) );
 170             i->file = f;
 171         }
 172
 173         /* If there is another line in this file, start it. */
 174         if ( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) )
 175         {
 176             ++i->line;
 177             i->string = i->buf;
 178             return *i->string++;
 179         }
 180     }
 181
 182     /* This include is done. Return EOF so yyparse() returns to
 183      * parse_file().
 184      */
 185
 186     return EOF;
 187 }
 188
 189 /* This allows us to get an extra character of lookahead.
 190  * There are a few places where we need to look ahead two
 191  * characters and yyprev only guarantees a single character
 192  * of putback.
 193  */
 194 int yypeek()
 195 {
 196     if ( *incp->string )
 197     {
 198         return *incp->string;
 199     }
 200     else if ( incp->strings )
 201     {
 202         if ( *incp->strings )
 203             return **incp->strings;
 204     }
 205     else if ( incp->file )
 206     {
 207         /* Don't bother opening the file.  yypeek is
 208          * only used in special cases and never at the
 209          * beginning of a file.
 210          */
 211         int ch = fgetc( incp->file );
 212         if ( ch != EOF )
 213             ungetc( ch, incp->file );
 214         return ch;
 215     }
 216     return EOF;
 217 }
 218
 219 /*
 220  * yylex() - set yylval to current token; return its type.
 221  *
 222  * Macros to move things along:
 223  *
 224  *  yychar() - return and advance character; invalid after EOF.
 225  *  yyprev() - back up one character; invalid before yychar().
 226  *
 227  * yychar() returns a continuous stream of characters, until it hits the EOF of
 228  * the current include file.
 229  */
 230
 231 #define yychar() ( *incp->string ? *incp->string++ : yyline() )
 232 #define yyprev() ( incp->string-- )
 233
 234 static int use_new_scanner = 0;
 235
 236 #define yystartkeyword() if(use_new_scanner) break; else token_warning()
 237 #define yyendkeyword() if(use_new_scanner) break; else if ( 1 ) { expect_whitespace = 1; continue; } else (void)0
 238
 239 void do_token_warning()
 240 {
 241     out_printf( "%s:%d: %s %s\n", object_str( yylval.file ), yylval.line, "Unescaped special character in",
 242             symdump( &yylval ) );
 243 }
 244
 245 #define token_warning() has_token_warning = 1
 246
 247 int yylex()
 248 {
 249     int c;
 250     char buf[ BIGGEST_TOKEN ];
 251     char * b = buf;
 252
 253     if ( !incp )
 254         goto eof;
 255
 256     /* Get first character (whitespace or of token). */
 257     c = yychar();
 258
 259     if ( scanmode == SCAN_STRING )
 260     {
 261         /* If scanning for a string (action's {}'s), look for the closing brace.
 262          * We handle matching braces, if they match.
 263          */
 264
 265         int nest = 1;
 266
 267         while ( ( c != EOF ) && ( b < buf + sizeof( buf ) ) )
 268         {
 269             if ( c == '{' )
 270                 ++nest;
 271
 272             if ( ( c == '}' ) && !--nest )
 273                 break;
 274
 275             *b++ = c;
 276
 277             c = yychar();
 278
 279             /* Turn trailing "\r\n" sequences into plain "\n" for Cygwin. */
 280             if ( ( c == '\n' ) && ( b[ -1 ] == '\r' ) )
 281                 --b;
 282         }
 283
 284         /* We ate the ending brace -- regurgitate it. */
 285         if ( c != EOF )
 286             yyprev();
 287
 288         /* Check for obvious errors. */
 289         if ( b == buf + sizeof( buf ) )
 290         {
 291             yyerror( "action block too big" );
 292             goto eof;
 293         }
 294
 295         if ( nest )
 296         {
 297             yyerror( "unmatched {} in action block" );
 298             goto eof;
 299         }
 300
 301         *b = 0;
 302         yylval.type = STRING;
 303         yylval.string = object_new( buf );
 304         yylval.file = incp->fname;
 305         yylval.line = incp->line;
 306     }
 307     else
 308     {
 309         char * b = buf;
 310         struct keyword * k;
 311         int inquote = 0;
 312         int notkeyword;
 313         int hastoken = 0;
 314         int hasquote = 0;
 315         int ingrist = 0;
 316         int invarexpand = 0;
 317         int expect_whitespace = 0;
 318         int has_token_warning = 0;
 319
 320         /* Eat white space. */
 321         for ( ; ; )
 322         {
 323             /* Skip past white space. */
 324             while ( ( c != EOF ) && isspace( c ) )
 325                 c = yychar();
 326
 327             /* Not a comment? */
 328             if ( c != '#' )
 329                 break;
 330
 331             c = yychar();
 332             if ( ( c != EOF ) && c == '|' )
 333             {
 334                 /* Swallow up block comment. */
 335                 int c0 = yychar();
 336                 int c1 = yychar();
 337                 while ( ! ( c0 == '|' && c1 == '#' ) && ( c0 != EOF && c1 != EOF ) )
 338                 {
 339                     c0 = c1;
 340                     c1 = yychar();
 341                 }
 342                 c = yychar();
 343             }
 344             else
 345             {
 346                 /* Swallow up comment line. */
 347                 while ( ( c != EOF ) && ( c != '\n' ) ) c = yychar();
 348             }
 349         }
 350
 351         /* c now points to the first character of a token. */
 352         if ( c == EOF )
 353             goto eof;
 354
 355         yylval.file = incp->fname;
 356         yylval.line = incp->line;
 357
 358         /* While scanning the word, disqualify it for (expensive) keyword lookup
 359          * when we can: $anything, "anything", \anything
 360          */
 361         notkeyword = c == '$';
 362
 363         /* Look for white space to delimit word. "'s get stripped but preserve
 364          * white space. \ protects next character.
 365          */
 366         while
 367         (
 368             ( c != EOF ) &&
 369             ( b < buf + sizeof( buf ) ) &&
 370             ( inquote || invarexpand || !isspace( c ) )
 371         )
 372         {
 373             if ( expect_whitespace || ( isspace( c ) && ! inquote ) )
 374             {
 375                 token_warning();
 376                 expect_whitespace = 0;
 377             }
 378             if ( !inquote && !invarexpand )
 379             {
 380                 if ( scanmode == SCAN_COND || scanmode == SCAN_CONDB )
 381                 {
 382                     if ( hastoken && ( c == '=' || c == '<' || c == '>' || c == '!' || c == '(' || c == ')' || c == '&' || c == '|' ) )
 383                     {
 384                         /* Don't treat > as special if we started with a grist. */
 385                         if ( ! ( scanmode == SCAN_CONDB && ingrist == 1 && c == '>' ) )
 386                         {
 387                             yystartkeyword();
 388                         }
 389                     }
 390                     else if ( c == '=' || c == '(' || c == ')' )
 391                     {
 392                         *b++ = c;
 393                         c = yychar();
 394                         yyendkeyword();
 395                     }
 396                     else if ( c == '!' || ( scanmode == SCAN_COND && ( c == '<' || c == '>' ) ) )
 397                     {
 398                         *b++ = c;
 399                         if ( ( c = yychar() ) == '=' )
 400                         {
 401                             *b++ = c;
 402                             c = yychar();
 403                         }
 404                         yyendkeyword();
 405                     }
 406                     else if ( c == '&' || c == '|' )
 407                     {
 408                         *b++ = c;
 409                         if ( yychar() == c )
 410                         {
 411                             *b++ = c;
 412                             c = yychar();
 413                         }
 414                         yyendkeyword();
 415                     }
 416                 }
 417                 else if ( scanmode == SCAN_PARAMS )
 418                 {
 419                     if ( c == '*' || c == '+' || c == '?' || c == '(' || c == ')' )
 420                     {
 421                         if ( !hastoken )
 422                         {
 423                             *b++ = c;
 424                             c = yychar();
 425                             yyendkeyword();
 426                         }
 427                         else
 428                         {
 429                             yystartkeyword();
 430                         }
 431                     }
 432                 }
 433                 else if ( scanmode == SCAN_XASSIGN && ! hastoken )
 434                 {
 435                     if ( c == '=' )
 436                     {
 437                         *b++ = c;
 438                         c = yychar();
 439                         yyendkeyword();
 440                     }
 441                     else if ( c == '+' || c == '?' )
 442                     {
 443                         if ( yypeek() == '=' )
 444                         {
 445                             *b++ = c;
 446                             *b++ = yychar();
 447                             c = yychar();
 448                             yyendkeyword();
 449                         }
 450                     }
 451                 }
 452                 else if ( scanmode == SCAN_NORMAL || scanmode == SCAN_ASSIGN )
 453                 {
 454                     if ( c == '=' )
 455                     {
 456                         if ( !hastoken )
 457                         {
 458                             *b++ = c;
 459                             c = yychar();
 460                             yyendkeyword();
 461                         }
 462                         else
 463                         {
 464                             yystartkeyword();
 465                         }
 466                     }
 467                     else if ( c == '+' || c == '?' )
 468                     {
 469                         if ( yypeek() == '=' )
 470                         {
 471                             if ( hastoken )
 472                             {
 473                                 yystartkeyword();
 474                             }
 475                             else
 476                             {
 477                                 *b++ = c;
 478                                 *b++ = yychar();
 479                                 c = yychar();
 480                                 yyendkeyword();
 481                             }
 482                         }
 483                     }
 484                 }
 485                 if ( scanmode != SCAN_CASE && ( c == ';' || c == '{' || c == '}' ||
 486                     ( scanmode != SCAN_PARAMS && ( c == '[' || c == ']' ) ) ) )
 487                 {
 488                     if ( ! hastoken )
 489                     {
 490                         *b++ = c;
 491                         c = yychar();
 492                         yyendkeyword();
 493                     }
 494                     else
 495                     {
 496                         yystartkeyword();
 497                     }
 498                 }
 499                 else if ( c == ':' )
 500                 {
 501                     if ( ! hastoken )
 502                     {
 503                         *b++ = c;
 504                         c = yychar();
 505                         yyendkeyword();
 506                         break;
 507                     }
 508                     else if ( hasquote )
 509                     {
 510                         /* Special rules for ':' do not apply after we quote anything. */
 511                         yystartkeyword();
 512                     }
 513                     else if ( ingrist == 0 )
 514                     {
 515                         int next = yychar();
 516                         int is_win_path = 0;
 517                         int is_conditional = 0;
 518                         if ( next == '\\' )
 519                         {
 520                             if( yypeek() == '\\' )
 521                             {
 522                                 is_win_path = 1;
 523                             }
 524                         }
 525                         else if ( next == '/' )
 526                         {
 527                             is_win_path = 1;
 528                         }
 529                         yyprev();
 530                         if ( is_win_path )
 531                         {
 532                             /* Accept windows paths iff they are at the start or immediately follow a grist. */
 533                             if ( b > buf && isalpha( b[ -1 ] ) && ( b == buf + 1 || b[ -2 ] == '>' ) )
 534                             {
 535                                 is_win_path = 1;
 536                             }
 537                             else
 538                             {
 539                                 is_win_path = 0;
 540                             }
 541                         }
 542                         if ( next == '<' )
 543                         {
 544                             /* Accept conditionals only for tokens that start with "<" or "!<" */
 545                             if ( ( (b > buf) && (buf[ 0 ] == '<') ) ||
 546                                 ( (b > (buf + 1)) && (buf[ 0 ] == '!') && (buf[ 1 ] == '<') ))
 547                             {
 548                                 is_conditional = 1;
 549                             }
 550                         }
 551                         if ( !is_conditional && !is_win_path )
 552                         {
 553                             yystartkeyword();
 554                         }
 555                     }
 556                 }
 557             }
 558             hastoken = 1;
 559             if ( c == '"' )
 560             {
 561                 /* begin or end " */
 562                 inquote = !inquote;
 563                 hasquote = 1;
 564                 notkeyword = 1;
 565             }
 566             else if ( c != '\\' )
 567             {
 568                 if ( !invarexpand && c == '<' )
 569                 {
 570                     if ( ingrist == 0 ) ingrist = 1;
 571                     else ingrist = -1;
 572                 }
 573                 else if ( !invarexpand && c == '>' )
 574                 {
 575                     if ( ingrist == 1 ) ingrist = 0;
 576                     else ingrist = -1;
 577                 }
 578                 else if ( c == '$' )
 579                 {
 580                     if ( ( c = yychar() ) == EOF )
 581                     {
 582                         *b++ = '$';
 583                         break;
 584                     }
 585                     else if ( c == '(' )
 586                     {
 587                         /* inside $(), we only care about quotes */
 588                         *b++ = '$';
 589                         c = '(';
 590                         ++invarexpand;
 591                     }
 592                     else
 593                     {
 594                         c = '$';
 595                         yyprev();
 596                     }
 597                 }
 598                 else if ( c == '@' )
 599                 {
 600                     if ( ( c = yychar() ) == EOF )
 601                     {
 602                         *b++ = '@';
 603                         break;
 604                     }
 605                     else if ( c == '(' )
 606                     {
 607                         /* inside @(), we only care about quotes */
 608                         *b++ = '@';
 609                         c = '(';
 610                         ++invarexpand;
 611                     }
 612                     else
 613                     {
 614                         c = '@';
 615                         yyprev();
 616                     }
 617                 }
 618                 else if ( invarexpand && c == '(' )
 619                 {
 620                     ++invarexpand;
 621                 }
 622                 else if ( invarexpand && c == ')' )
 623                 {
 624                     --invarexpand;
 625                 }
 626                 /* normal char */
 627                 *b++ = c;
 628             }
 629             else if ( ( c = yychar() ) != EOF )
 630             {
 631                 /* \c */
 632                 if (c == 'n')
 633                     c = '\n';
 634                 else if (c == 'r')
 635                     c = '\r';
 636                 else if (c == 't')
 637                     c = '\t';
 638                 *b++ = c;
 639                 notkeyword = 1;
 640             }
 641             else
 642             {
 643                 /* \EOF */
 644                 break;
 645             }
 646
 647             c = yychar();
 648         }
 649
 650         /* Automatically switch modes after reading the token. */
 651         if ( scanmode == SCAN_CONDB )
 652             scanmode = SCAN_COND;
 653
 654         /* Check obvious errors. */
 655         if ( b == buf + sizeof( buf ) )
 656         {
 657             yyerror( "string too big" );
 658             goto eof;
 659         }
 660
 661         if ( inquote )
 662         {
 663             yyerror( "unmatched \" in string" );
 664             goto eof;
 665         }
 666
 667         /* We looked ahead a character - back up. */
 668         if ( c != EOF )
 669             yyprev();
 670
 671         /* Scan token table. Do not scan if it is obviously not a keyword or if
 672          * it is an alphabetic when were looking for punctuation.
 673          */
 674
 675         *b = 0;
 676         yylval.type = ARG;
 677
 678         if ( !notkeyword && !( isalpha( *buf ) && ( scanmode == SCAN_PUNCT || scanmode == SCAN_PARAMS || scanmode == SCAN_ASSIGN ) ) )
 679             for ( k = keywords; k->word; ++k )
 680                 if ( ( *buf == *k->word ) && !strcmp( k->word, buf ) )
 681                 {
 682                     yylval.type = k->type;
 683                     yylval.keyword = k->word;  /* used by symdump */
 684                     break;
 685                 }
 686
 687         if ( yylval.type == ARG )
 688             yylval.string = object_new( buf );
 689
 690         if ( scanmode == SCAN_NORMAL && yylval.type == ARG )
 691             scanmode = SCAN_XASSIGN;
 692
 693         if ( has_token_warning )
 694             do_token_warning();
 695     }
 696
 697     if ( DEBUG_SCAN )
 698         out_printf( "scan %s\n", symdump( &yylval ) );
 699
 700     return yylval.type;
 701
 702 eof:
 703     /* We do not reset yylval.file & yylval.line here so unexpected EOF error
 704      * messages would include correct error location information.
 705      */
 706     yylval.type = EOF;
 707     return yylval.type;
 708 }
 709
 710
 711 static char * symdump( YYSTYPE * s )
 712 {
 713     static char buf[ BIGGEST_TOKEN + 20 ];
 714     switch ( s->type )
 715     {
 716         case EOF   : sprintf( buf, "EOF"                                        ); break;
 717         case 0     : sprintf( buf, "unknown symbol %s", object_str( s->string ) ); break;
 718         case ARG   : sprintf( buf, "argument %s"      , object_str( s->string ) ); break;
 719         case STRING: sprintf( buf, "string \"%s\""    , object_str( s->string ) ); break;
 720         default    : sprintf( buf, "keyword %s"       , s->keyword              ); break;
 721     }
 722     return buf;
 723 }
 724
 725
 726 /*
 727  * Get information about the current file and line, for those epsilon
 728  * transitions that produce a parse.
 729  */
 730
 731 void yyinput_last_read_token( OBJECT * * name, int * line )
 732 {
 733     /* TODO: Consider whether and when we might want to report where the last
 734      * read token ended, e.g. EOF errors inside string literals.
 735      */
 736     *name = yylval.file;
 737     *line = yylval.line;
 738 }