EdkCompatibilityPkg/Sample/Tools/Source/StrGather/StrGather.c

   1 /*++
   2
   3 Copyright (c) 2004 - 2010, Intel Corporation. All rights reserved.<BR>
   4 This program and the accompanying materials
   5 are licensed and made available under the terms and conditions of the BSD License
   6 which accompanies this distribution.  The full text of the license may be found at
   7 http://opensource.org/licenses/bsd-license.php
   8
   9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  11
  12 Module Name:
  13
  14   StrGather.c
  15
  16 Abstract:
  17
  18   Parse a strings file and create or add to a string database file.
  19
  20 --*/
  21
  22 #include <stdio.h>
  23 #include <string.h>
  24 #include <stdlib.h>
  25 #include <ctype.h>
  26
  27 #include "Tiano.h"
  28 #include "EfiUtilityMsgs.h"
  29 #include "StrGather.h"
  30 #include "StringDB.h"
  31
  32 #define UTILITY_NAME     "StrGather"
  33 #define UTILITY_VERSION  "v1.0"
  34
  35 typedef UINT16  WCHAR;
  36
  37 #define MAX_PATH                    1024
  38 #define MAX_NEST_DEPTH              20  // just in case we get in an endless loop.
  39 #define MAX_STRING_IDENTIFIER_NAME  100 // number of wchars
  40 #define MAX_LINE_LEN                400
  41 #define STRING_TOKEN                "STRING_TOKEN"
  42 #define DEFAULT_BASE_NAME           "BaseName"
  43 //
  44 // Operational modes for this utility
  45 //
  46 #define MODE_UNKNOWN  0
  47 #define MODE_PARSE    1
  48 #define MODE_SCAN     2
  49 #define MODE_DUMP     3
  50
  51 //
  52 // We keep a linked list of these for the source files we process
  53 //
  54 typedef struct _SOURCE_FILE {
  55   FILE                *Fptr;
  56   WCHAR               *FileBuffer;
  57   WCHAR               *FileBufferPtr;
  58   UINT32              FileSize;
  59   INT8                FileName[MAX_PATH];
  60   UINT32              LineNum;
  61   BOOLEAN             EndOfFile;
  62   BOOLEAN             SkipToHash;
  63   struct _SOURCE_FILE *Previous;
  64   struct _SOURCE_FILE *Next;
  65   WCHAR               ControlCharacter;
  66 } SOURCE_FILE;
  67
  68 #define DEFAULT_CONTROL_CHARACTER UNICODE_SLASH
  69
  70 //
  71 // Here's all our globals. We need a linked list of include paths, a linked
  72 // list of source files, a linked list of subdirectories (appended to each
  73 // include path when searching), and a couple other fields.
  74 //
  75 static struct {
  76   SOURCE_FILE                 SourceFiles;
  77   TEXT_STRING_LIST            *IncludePaths;                    // all include paths to search
  78   TEXT_STRING_LIST            *LastIncludePath;
  79   TEXT_STRING_LIST            *ScanFileName;
  80   TEXT_STRING_LIST            *LastScanFileName;
  81   TEXT_STRING_LIST            *SkipExt;                         // if -skipext .uni
  82   TEXT_STRING_LIST            *LastSkipExt;
  83   TEXT_STRING_LIST            *IndirectionFileName;
  84   TEXT_STRING_LIST            *LastIndirectionFileName;
  85   TEXT_STRING_LIST            *DatabaseFileName;
  86   TEXT_STRING_LIST            *LastDatabaseFileName;
  87   WCHAR_STRING_LIST           *Language;
  88   WCHAR_STRING_LIST           *LastLanguage;
  89   WCHAR_MATCHING_STRING_LIST  *IndirectionList;                 // from indirection file(s)
  90   WCHAR_MATCHING_STRING_LIST  *LastIndirectionList;
  91   BOOLEAN                     Verbose;                          // for more detailed output
  92   BOOLEAN                     VerboseDatabaseWrite;             // for more detailed output when writing database
  93   BOOLEAN                     VerboseDatabaseRead;              // for more detailed output when reading database
  94   BOOLEAN                     NewDatabase;                      // to start from scratch
  95   BOOLEAN                     IgnoreNotFound;                   // when scanning
  96   BOOLEAN                     VerboseScan;
  97   BOOLEAN                     UnquotedStrings;                  // -uqs option
  98   INT8                        OutputDatabaseFileName[MAX_PATH];
  99   INT8                        StringHFileName[MAX_PATH];
 100   INT8                        StringCFileName[MAX_PATH];        // output .C filename
 101   INT8                        DumpUFileName[MAX_PATH];          // output unicode dump file name
 102   INT8                        HiiExportPackFileName[MAX_PATH];  // HII export pack file name
 103   INT8                        BaseName[MAX_PATH];               // base filename of the strings file
 104   INT8                        OutputDependencyFileName[MAX_PATH];
 105   FILE                        *OutputDependencyFptr;
 106   UINT32                      Mode;
 107 } mGlobals;
 108
 109 static
 110 BOOLEAN
 111 IsValidIdentifierChar (
 112   INT8      Char,
 113   BOOLEAN   FirstChar
 114   );
 115
 116 static
 117 void
 118 RewindFile (
 119   SOURCE_FILE *SourceFile
 120   );
 121
 122 static
 123 BOOLEAN
 124 SkipTo (
 125   SOURCE_FILE *SourceFile,
 126   WCHAR       WChar,
 127   BOOLEAN     StopAfterNewline
 128   );
 129
 130 static
 131 UINT32
 132 SkipWhiteSpace (
 133   SOURCE_FILE *SourceFile
 134   );
 135
 136 static
 137 BOOLEAN
 138 IsWhiteSpace (
 139   SOURCE_FILE *SourceFile
 140   );
 141
 142 static
 143 BOOLEAN
 144 EndOfFile (
 145   SOURCE_FILE *SourceFile
 146   );
 147
 148 static
 149 void
 150 PreprocessFile (
 151   SOURCE_FILE *SourceFile
 152   );
 153
 154 static
 155 UINT32
 156 GetStringIdentifierName (
 157   IN SOURCE_FILE  *SourceFile,
 158   IN OUT WCHAR    *StringIdentifierName,
 159   IN UINT32       StringIdentifierNameLen
 160   );
 161
 162 static
 163 UINT32
 164 GetLanguageIdentifierName (
 165   IN SOURCE_FILE  *SourceFile,
 166   IN OUT WCHAR    *LanguageIdentifierName,
 167   IN UINT32       LanguageIdentifierNameLen,
 168   IN BOOLEAN      Optional
 169   );
 170
 171 static
 172 WCHAR *
 173 GetPrintableLanguageName (
 174   IN SOURCE_FILE  *SourceFile
 175   );
 176
 177 static
 178 STATUS
 179 AddCommandLineLanguage (
 180   IN INT8          *Language
 181   );
 182
 183 static
 184 WCHAR *
 185 GetQuotedString (
 186   SOURCE_FILE *SourceFile,
 187   BOOLEAN     Optional
 188   );
 189
 190 static
 191 STATUS
 192 ProcessIncludeFile (
 193   SOURCE_FILE *SourceFile,
 194   SOURCE_FILE *ParentSourceFile
 195   );
 196
 197 static
 198 STATUS
 199 ParseFile (
 200   SOURCE_FILE *SourceFile
 201   );
 202
 203 static
 204 FILE  *
 205 FindFile (
 206   IN INT8     *FileName,
 207   OUT INT8    *FoundFileName,
 208   IN UINT32   FoundFileNameLen
 209   );
 210
 211 static
 212 STATUS
 213 ProcessArgs (
 214   int   Argc,
 215   char  *Argv[]
 216   );
 217
 218 static
 219 STATUS
 220 ProcessFile (
 221   SOURCE_FILE *SourceFile
 222   );
 223
 224 static
 225 UINT32
 226 wstrcmp (
 227   WCHAR *Buffer,
 228   WCHAR *Str
 229   );
 230
 231 static
 232 void
 233 Usage (
 234   VOID
 235   );
 236
 237 static
 238 void
 239 FreeLists (
 240   VOID
 241   );
 242
 243 static
 244 void
 245 ProcessTokenString (
 246   SOURCE_FILE *SourceFile
 247   );
 248
 249 static
 250 void
 251 ProcessTokenInclude (
 252   SOURCE_FILE *SourceFile
 253   );
 254
 255 static
 256 void
 257 ProcessTokenScope (
 258   SOURCE_FILE *SourceFile
 259   );
 260
 261 static
 262 void
 263 ProcessTokenLanguage (
 264   SOURCE_FILE *SourceFile
 265   );
 266
 267 static
 268 void
 269 ProcessTokenLangDef (
 270   SOURCE_FILE *SourceFile
 271   );
 272
 273 static
 274 STATUS
 275 ScanFiles (
 276   TEXT_STRING_LIST *ScanFiles
 277   );
 278
 279 static
 280 STATUS
 281 ParseIndirectionFiles (
 282   TEXT_STRING_LIST    *Files
 283   );
 284
 285 int
 286 main (
 287   int   Argc,
 288   char  *Argv[]
 289   )
 290 /*++
 291
 292 Routine Description:
 293
 294   Call the routine to parse the command-line options, then process the file.
 295
 296 Arguments:
 297
 298   Argc - Standard C main() argc and argv.
 299   Argv - Standard C main() argc and argv.
 300
 301 Returns:
 302
 303   0       if successful
 304   nonzero otherwise
 305
 306 --*/
 307 {
 308   STATUS  Status;
 309
 310   SetUtilityName (UTILITY_NAME);
 311   //
 312   // Process the command-line arguments
 313   //
 314   Status = ProcessArgs (Argc, Argv);
 315   if (Status != STATUS_SUCCESS) {
 316     return Status;
 317   }
 318   //
 319   // Initialize the database manager
 320   //
 321   StringDBConstructor ();
 322   //
 323   // We always try to read in an existing database file. It may not
 324   // exist, which is ok usually.
 325   //
 326   if (mGlobals.NewDatabase == 0) {
 327     //
 328     // Read all databases specified.
 329     //
 330     for (mGlobals.LastDatabaseFileName = mGlobals.DatabaseFileName;
 331          mGlobals.LastDatabaseFileName != NULL;
 332          mGlobals.LastDatabaseFileName = mGlobals.LastDatabaseFileName->Next
 333         ) {
 334       Status = StringDBReadDatabase (mGlobals.LastDatabaseFileName->Str, TRUE, mGlobals.VerboseDatabaseRead);
 335       if (Status != STATUS_SUCCESS) {
 336         return Status;
 337       }
 338     }
 339   }
 340   //
 341   // Read indirection file(s) if specified
 342   //
 343   if (ParseIndirectionFiles (mGlobals.IndirectionFileName) != STATUS_SUCCESS) {
 344     goto Finish;
 345   }
 346   //
 347   // If scanning source files, do that now
 348   //
 349   if (mGlobals.Mode == MODE_SCAN) {
 350     ScanFiles (mGlobals.ScanFileName);
 351   } else if (mGlobals.Mode == MODE_PARSE) {
 352     //
 353     // Parsing a unicode strings file
 354     //
 355     mGlobals.SourceFiles.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
 356     if (mGlobals.OutputDependencyFileName[0] != 0) {
 357       if ((mGlobals.OutputDependencyFptr = fopen (mGlobals.OutputDependencyFileName, "w")) == NULL) {
 358         Error (NULL, 0, 0, mGlobals.OutputDependencyFileName, "failed to open output dependency file");
 359         goto Finish;
 360       }
 361     }
 362     Status = ProcessIncludeFile (&mGlobals.SourceFiles, NULL);
 363     if (mGlobals.OutputDependencyFptr != NULL) {
 364       fclose (mGlobals.OutputDependencyFptr);
 365     }
 366     if (Status != STATUS_SUCCESS) {
 367       goto Finish;
 368     }
 369   }
 370   //
 371   // Create the string defines header file if there have been no errors.
 372   //
 373   ParserSetPosition (NULL, 0);
 374   if ((mGlobals.StringHFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 375     Status = StringDBDumpStringDefines (mGlobals.StringHFileName, mGlobals.BaseName);
 376     if (Status != EFI_SUCCESS) {
 377       goto Finish;
 378     }
 379   }
 380   //
 381   // Dump the strings to a .c file if there have still been no errors.
 382   //
 383   if ((mGlobals.StringCFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 384     Status = StringDBDumpCStrings (
 385               mGlobals.StringCFileName,
 386               mGlobals.BaseName,
 387               mGlobals.Language,
 388               mGlobals.IndirectionList
 389               );
 390     if (Status != EFI_SUCCESS) {
 391       goto Finish;
 392     }
 393   }
 394   //
 395   // Dump the database if requested
 396   //
 397   if ((mGlobals.DumpUFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 398     StringDBDumpDatabase (NULL, mGlobals.DumpUFileName, FALSE);
 399   }
 400   //
 401   // Dump the string data as HII binary string pack if requested
 402   //
 403   if ((mGlobals.HiiExportPackFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 404     StringDBCreateHiiExportPack (mGlobals.HiiExportPackFileName, mGlobals.Language);
 405   }
 406   //
 407   // Always update the database if no errors and not in dump mode. If they specified -od
 408   // for an output database file name, then use that name. Otherwise use the name of
 409   // the first database file specified with -db
 410   //
 411   if ((mGlobals.Mode != MODE_DUMP) && (GetUtilityStatus () < STATUS_ERROR)) {
 412     if (mGlobals.OutputDatabaseFileName[0]) {
 413       Status = StringDBWriteDatabase (mGlobals.OutputDatabaseFileName, mGlobals.VerboseDatabaseWrite);
 414     } else {
 415       Status = StringDBWriteDatabase (mGlobals.DatabaseFileName->Str, mGlobals.VerboseDatabaseWrite);
 416     }
 417
 418     if (Status != EFI_SUCCESS) {
 419       goto Finish;
 420     }
 421   }
 422
 423 Finish:
 424   //
 425   // Free up memory
 426   //
 427   FreeLists ();
 428   StringDBDestructor ();
 429   return GetUtilityStatus ();
 430 }
 431
 432 static
 433 STATUS
 434 ProcessIncludeFile (
 435   SOURCE_FILE *SourceFile,
 436   SOURCE_FILE *ParentSourceFile
 437   )
 438 /*++
 439
 440 Routine Description:
 441
 442   Given a source file, open the file and parse it
 443
 444 Arguments:
 445
 446   SourceFile        - name of file to parse
 447   ParentSourceFile  - for error reporting purposes, the file that #included SourceFile.
 448
 449 Returns:
 450
 451   Standard status.
 452
 453 --*/
 454 {
 455   static UINT32 NestDepth = 0;
 456   INT8          FoundFileName[MAX_PATH];
 457   STATUS        Status;
 458
 459   Status = STATUS_SUCCESS;
 460   NestDepth++;
 461   //
 462   // Print the file being processed. Indent so you can tell the include nesting
 463   // depth.
 464   //
 465   if (mGlobals.Verbose) {
 466     fprintf (stdout, "%*cProcessing file '%s'\n", NestDepth * 2, ' ', SourceFile->FileName);
 467   }
 468
 469   //
 470   // Make sure we didn't exceed our maximum nesting depth
 471   //
 472   if (NestDepth > MAX_NEST_DEPTH) {
 473     Error (NULL, 0, 0, SourceFile->FileName, "max nesting depth (%d) exceeded", NestDepth);
 474     Status = STATUS_ERROR;
 475     goto Finish;
 476   }
 477   //
 478   // Try to open the file locally, and if that fails try along our include paths.
 479   //
 480   strcpy (FoundFileName, SourceFile->FileName);
 481   if ((SourceFile->Fptr = fopen (FoundFileName, "rb")) == NULL) {
 482     //
 483     // Try to find it among the paths if it has a parent (that is, it is included
 484     // by someone else).
 485     //
 486     if (ParentSourceFile == NULL) {
 487       Error (NULL, 0, 0, SourceFile->FileName, "file not found");
 488       Status = STATUS_ERROR;
 489       goto Finish;
 490     }
 491
 492     SourceFile->Fptr = FindFile (SourceFile->FileName, FoundFileName, sizeof (FoundFileName));
 493     if (SourceFile->Fptr == NULL) {
 494       Error (ParentSourceFile->FileName, ParentSourceFile->LineNum, 0, SourceFile->FileName, "include file not found");
 495       Status = STATUS_ERROR;
 496       goto Finish;
 497     }
 498   }
 499
 500   //
 501   // Output the dependency
 502   //
 503   if (mGlobals.OutputDependencyFptr != NULL) {
 504     fprintf (mGlobals.OutputDependencyFptr, "%s : %s\n", mGlobals.DatabaseFileName->Str, FoundFileName);
 505     //
 506     // Add pseudo target to avoid incremental build failure when the file is deleted
 507     //
 508     fprintf (mGlobals.OutputDependencyFptr, "%s : \n", FoundFileName);
 509   }
 510
 511   //
 512   // Process the file found
 513   //
 514   ProcessFile (SourceFile);
 515
 516 Finish:
 517   NestDepth--;
 518   //
 519   // Close open files and return status
 520   //
 521   if (SourceFile->Fptr != NULL) {
 522     fclose (SourceFile->Fptr);
 523   }
 524
 525   return Status;
 526 }
 527
 528 static
 529 STATUS
 530 ProcessFile (
 531   SOURCE_FILE *SourceFile
 532   )
 533 {
 534   //
 535   // Get the file size, and then read the entire thing into memory.
 536   // Allocate space for a terminator character.
 537   //
 538   fseek (SourceFile->Fptr, 0, SEEK_END);
 539   SourceFile->FileSize = ftell (SourceFile->Fptr);
 540   fseek (SourceFile->Fptr, 0, SEEK_SET);
 541   SourceFile->FileBuffer = (WCHAR *) malloc (SourceFile->FileSize + sizeof (WCHAR));
 542   if (SourceFile->FileBuffer == NULL) {
 543     Error (NULL, 0, 0, "memory allocation failure", NULL);
 544     return STATUS_ERROR;
 545   }
 546
 547   fread ((VOID *) SourceFile->FileBuffer, SourceFile->FileSize, 1, SourceFile->Fptr);
 548   SourceFile->FileBuffer[(SourceFile->FileSize / sizeof (WCHAR))] = UNICODE_NULL;
 549   //
 550   // Pre-process the file to replace comments with spaces
 551   //
 552   PreprocessFile (SourceFile);
 553   //
 554   // Parse the file
 555   //
 556   ParseFile (SourceFile);
 557   free (SourceFile->FileBuffer);
 558   return STATUS_SUCCESS;
 559 }
 560
 561 static
 562 STATUS
 563 ParseFile (
 564   SOURCE_FILE *SourceFile
 565   )
 566 {
 567   BOOLEAN InComment;
 568   UINT32  Len;
 569
 570   //
 571   // First character of a unicode file is special. Make sure
 572   //
 573   if (SourceFile->FileBufferPtr[0] != UNICODE_FILE_START) {
 574     Error (SourceFile->FileName, 1, 0, SourceFile->FileName, "file does not appear to be a unicode file");
 575     return STATUS_ERROR;
 576   }
 577
 578   SourceFile->FileBufferPtr++;
 579   InComment = FALSE;
 580   //
 581   // Print the first line if in verbose mode
 582   //
 583   if (mGlobals.Verbose) {
 584     printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 585   }
 586   //
 587   // Since the syntax is relatively straightforward, just switch on the next char
 588   //
 589   while (!EndOfFile (SourceFile)) {
 590     //
 591     // Check for whitespace
 592     //
 593     if (SourceFile->FileBufferPtr[0] == UNICODE_SPACE) {
 594       SourceFile->FileBufferPtr++;
 595     } else if (SourceFile->FileBufferPtr[0] == UNICODE_TAB) {
 596       SourceFile->FileBufferPtr++;
 597     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 598       SourceFile->FileBufferPtr++;
 599     } else if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 600       SourceFile->FileBufferPtr++;
 601       SourceFile->LineNum++;
 602       if (mGlobals.Verbose) {
 603         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 604       }
 605
 606       InComment = FALSE;
 607     } else if (SourceFile->FileBufferPtr[0] == 0) {
 608       SourceFile->FileBufferPtr++;
 609     } else if (InComment) {
 610       SourceFile->FileBufferPtr++;
 611     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 612       SourceFile->FileBufferPtr += 2;
 613       InComment = TRUE;
 614     } else if (SourceFile->SkipToHash && (SourceFile->FileBufferPtr[0] != SourceFile->ControlCharacter)) {
 615       SourceFile->FileBufferPtr++;
 616     } else {
 617       SourceFile->SkipToHash = FALSE;
 618       if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 619           ((Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"include")) > 0)
 620           ) {
 621         SourceFile->FileBufferPtr += Len + 1;
 622         ProcessTokenInclude (SourceFile);
 623       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 624                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"scope")) > 0
 625               ) {
 626         SourceFile->FileBufferPtr += Len + 1;
 627         ProcessTokenScope (SourceFile);
 628       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 629                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"language")) > 0
 630               ) {
 631         SourceFile->FileBufferPtr += Len + 1;
 632         ProcessTokenLanguage (SourceFile);
 633       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 634                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"langdef")) > 0
 635               ) {
 636         SourceFile->FileBufferPtr += Len + 1;
 637         ProcessTokenLangDef (SourceFile);
 638       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 639                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"string")) > 0
 640               ) {
 641         SourceFile->FileBufferPtr += Len + 1;
 642         ProcessTokenString (SourceFile);
 643       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 644                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"EFI_BREAKPOINT()")) > 0
 645               ) {
 646         SourceFile->FileBufferPtr += Len;
 647         EFI_BREAKPOINT ();
 648       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 649                (SourceFile->FileBufferPtr[1] == UNICODE_EQUAL_SIGN)
 650               ) {
 651         SourceFile->ControlCharacter = SourceFile->FileBufferPtr[2];
 652         SourceFile->FileBufferPtr += 3;
 653       } else {
 654         Error (SourceFile->FileName, SourceFile->LineNum, 0, "unrecognized token", "%S", SourceFile->FileBufferPtr);
 655         //
 656         // Treat rest of line as a comment.
 657         //
 658         InComment = TRUE;
 659       }
 660     }
 661   }
 662
 663   return STATUS_SUCCESS;
 664 }
 665
 666 static
 667 void
 668 PreprocessFile (
 669   SOURCE_FILE *SourceFile
 670   )
 671 /*++
 672
 673 Routine Description:
 674   Preprocess a file to replace all carriage returns with NULLs so
 675   we can print lines from the file to the screen.
 676
 677 Arguments:
 678   SourceFile - structure that we use to keep track of an input file.
 679
 680 Returns:
 681   Nothing.
 682
 683 --*/
 684 {
 685   BOOLEAN InComment;
 686
 687   RewindFile (SourceFile);
 688   InComment = FALSE;
 689   while (!EndOfFile (SourceFile)) {
 690     //
 691     // If a line-feed, then no longer in a comment
 692     //
 693     if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 694       SourceFile->FileBufferPtr++;
 695       SourceFile->LineNum++;
 696       InComment = 0;
 697     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 698       //
 699       // Replace all carriage returns with a NULL so we can print stuff
 700       //
 701       SourceFile->FileBufferPtr[0] = 0;
 702       SourceFile->FileBufferPtr++;
 703     } else if (InComment) {
 704       SourceFile->FileBufferPtr[0] = UNICODE_SPACE;
 705       SourceFile->FileBufferPtr++;
 706     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 707       SourceFile->FileBufferPtr += 2;
 708       InComment = TRUE;
 709     } else {
 710       SourceFile->FileBufferPtr++;
 711     }
 712   }
 713   //
 714   // Could check for end-of-file and still in a comment, but
 715   // should not be necessary. So just restore the file pointers.
 716   //
 717   RewindFile (SourceFile);
 718 }
 719
 720 static
 721 WCHAR *
 722 GetPrintableLanguageName (
 723   IN SOURCE_FILE  *SourceFile
 724   )
 725 {
 726   WCHAR   *String;
 727   WCHAR   *Start;
 728   WCHAR   *Ptr;
 729   UINT32  Len;
 730
 731   SkipWhiteSpace (SourceFile);
 732   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 733     Error (
 734       SourceFile->FileName,
 735       SourceFile->LineNum,
 736       0,
 737       "expected quoted printable language name",
 738       "%S",
 739       SourceFile->FileBufferPtr
 740       );
 741     SourceFile->SkipToHash = TRUE;
 742     return NULL;
 743   }
 744
 745   Len = 0;
 746   SourceFile->FileBufferPtr++;
 747   Start = Ptr = SourceFile->FileBufferPtr;
 748   while (!EndOfFile (SourceFile)) {
 749     if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 750       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 751       break;
 752     } else if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
 753       break;
 754     }
 755
 756     SourceFile->FileBufferPtr++;
 757     Len++;
 758   }
 759
 760   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 761     Warning (
 762       SourceFile->FileName,
 763       SourceFile->LineNum,
 764       0,
 765       "missing closing quote on printable language name string",
 766       "%S",
 767       Start
 768       );
 769   } else {
 770     SourceFile->FileBufferPtr++;
 771   }
 772   //
 773   // Now allocate memory for the string and save it off
 774   //
 775   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 776   if (String == NULL) {
 777     Error (NULL, 0, 0, "memory allocation failed", NULL);
 778     return NULL;
 779   }
 780   //
 781   // Copy the string from the file buffer to the local copy.
 782   // We do no reformatting of it whatsoever at this point.
 783   //
 784   Ptr = String;
 785   while (Len > 0) {
 786     *Ptr = *Start;
 787     Start++;
 788     Ptr++;
 789     Len--;
 790   }
 791
 792   *Ptr = 0;
 793   //
 794   // Now format the string to convert \wide and \narrow controls
 795   //
 796   StringDBFormatString (String);
 797   return String;
 798 }
 799
 800 static
 801 WCHAR *
 802 GetQuotedString (
 803   SOURCE_FILE *SourceFile,
 804   BOOLEAN     Optional
 805   )
 806 {
 807   WCHAR   *String;
 808   WCHAR   *Start;
 809   WCHAR   *Ptr;
 810   UINT32  Len;
 811   BOOLEAN PreviousBackslash;
 812
 813   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 814     if (!Optional) {
 815       Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted string", "%S", SourceFile->FileBufferPtr);
 816     }
 817
 818     return NULL;
 819   }
 820
 821   Len = 0;
 822   SourceFile->FileBufferPtr++;
 823   Start             = Ptr = SourceFile->FileBufferPtr;
 824   PreviousBackslash = FALSE;
 825   while (!EndOfFile (SourceFile)) {
 826     if ((SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) && (!PreviousBackslash)) {
 827       break;
 828     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 829       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 830       PreviousBackslash = FALSE;
 831     } else if (SourceFile->FileBufferPtr[0] == UNICODE_BACKSLASH) {
 832       PreviousBackslash = TRUE;
 833     } else {
 834       PreviousBackslash = FALSE;
 835     }
 836
 837     SourceFile->FileBufferPtr++;
 838     Len++;
 839   }
 840
 841   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 842     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "missing closing quote on string", "%S", Start);
 843   } else {
 844     SourceFile->FileBufferPtr++;
 845   }
 846   //
 847   // Now allocate memory for the string and save it off
 848   //
 849   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 850   if (String == NULL) {
 851     Error (NULL, 0, 0, "memory allocation failed", NULL);
 852     return NULL;
 853   }
 854   //
 855   // Copy the string from the file buffer to the local copy.
 856   // We do no reformatting of it whatsoever at this point.
 857   //
 858   Ptr = String;
 859   while (Len > 0) {
 860     *Ptr = *Start;
 861     Start++;
 862     Ptr++;
 863     Len--;
 864   }
 865
 866   *Ptr = 0;
 867   return String;
 868 }
 869 //
 870 // Parse:
 871 //    #string STR_ID_NAME
 872 //
 873 // All we can do is call the string database to add the string identifier. Unfortunately
 874 // he'll have to keep track of the last identifier we added.
 875 //
 876 static
 877 void
 878 ProcessTokenString (
 879   SOURCE_FILE *SourceFile
 880   )
 881 {
 882   WCHAR   StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
 883   UINT16  StringId;
 884   //
 885   // Extract the string identifier name and add it to the database.
 886   //
 887   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
 888     StringId = STRING_ID_INVALID;
 889     StringDBAddStringIdentifier (StringIdentifier, &StringId, 0);
 890   } else {
 891     //
 892     // Error recovery -- skip to the next #
 893     //
 894     SourceFile->SkipToHash = TRUE;
 895   }
 896 }
 897
 898 static
 899 BOOLEAN
 900 EndOfFile (
 901   SOURCE_FILE *SourceFile
 902   )
 903 {
 904   //
 905   // The file buffer pointer will typically get updated before the End-of-file flag in the
 906   // source file structure, so check it first.
 907   //
 908   if (SourceFile->FileBufferPtr >= SourceFile->FileBuffer + SourceFile->FileSize / sizeof (WCHAR)) {
 909     SourceFile->EndOfFile = TRUE;
 910     return TRUE;
 911   }
 912
 913   if (SourceFile->EndOfFile) {
 914     return TRUE;
 915   }
 916
 917   return FALSE;
 918 }
 919
 920 static
 921 UINT32
 922 GetStringIdentifierName (
 923   IN SOURCE_FILE  *SourceFile,
 924   IN OUT WCHAR    *StringIdentifierName,
 925   IN UINT32       StringIdentifierNameLen
 926   )
 927 {
 928   UINT32  Len;
 929   WCHAR   *From;
 930   WCHAR   *Start;
 931
 932   //
 933   // Skip whitespace
 934   //
 935   SkipWhiteSpace (SourceFile);
 936   if (SourceFile->EndOfFile) {
 937     Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-file encountered", "expected string identifier");
 938     return 0;
 939   }
 940   //
 941   // Verify first character of name is [A-Za-z]
 942   //
 943   Len = 0;
 944   StringIdentifierNameLen /= 2;
 945   From  = SourceFile->FileBufferPtr;
 946   Start = SourceFile->FileBufferPtr;
 947   if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 948       ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))
 949       ) {
 950     //
 951     // Do nothing
 952     //
 953   } else {
 954     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid character in string identifier name", "%S", Start);
 955     return 0;
 956   }
 957
 958   while (!EndOfFile (SourceFile)) {
 959     if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 960         ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z)) ||
 961         ((SourceFile->FileBufferPtr[0] >= UNICODE_0) && (SourceFile->FileBufferPtr[0] <= UNICODE_9)) ||
 962         (SourceFile->FileBufferPtr[0] == UNICODE_UNDERSCORE)
 963         ) {
 964       Len++;
 965       if (Len >= StringIdentifierNameLen) {
 966         Error (SourceFile->FileName, SourceFile->LineNum, 0, "string identifier name too long", "%S", Start);
 967         return 0;
 968       }
 969
 970       *StringIdentifierName = SourceFile->FileBufferPtr[0];
 971       StringIdentifierName++;
 972       SourceFile->FileBufferPtr++;
 973     } else if (SkipWhiteSpace (SourceFile) == 0) {
 974       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid string identifier name", "%S", Start);
 975       return 0;
 976     } else {
 977       break;
 978     }
 979   }
 980   //
 981   // Terminate the copy of the string.
 982   //
 983   *StringIdentifierName = 0;
 984   return Len;
 985 }
 986
 987 static
 988 UINT32
 989 GetLanguageIdentifierName (
 990   IN SOURCE_FILE  *SourceFile,
 991   IN OUT WCHAR    *LanguageIdentifierName,
 992   IN UINT32       LanguageIdentifierNameLen,
 993   IN BOOLEAN      Optional
 994   )
 995 {
 996   UINT32  Len;
 997   WCHAR   *From;
 998   WCHAR   *Start;
 999   //
1000   // Skip whitespace
1001   //
1002   SkipWhiteSpace (SourceFile);
1003   if (SourceFile->EndOfFile) {
1004     if (!Optional) {
1005       Error (
1006         SourceFile->FileName,
1007         SourceFile->LineNum,
1008         0,
1009         "end-of-file encountered",
1010         "expected language identifier"
1011         );
1012     }
1013
1014     return 0;
1015   }
1016   //
1017   // This function is called to optionally get a language identifier name in:
1018   //   #string STR_ID eng "the string"
1019   // If it's optional, and we find a double-quote, then return now.
1020   //
1021   if (Optional) {
1022     if (*SourceFile->FileBufferPtr == UNICODE_DOUBLE_QUOTE) {
1023       return 0;
1024     }
1025   }
1026
1027   Len = 0;
1028   LanguageIdentifierNameLen /= 2;
1029   //
1030   // Internal error if we weren't given at least 4 WCHAR's to work with.
1031   //
1032   if (LanguageIdentifierNameLen < LANGUAGE_IDENTIFIER_NAME_LEN + 1) {
1033     Error (
1034       SourceFile->FileName,
1035       SourceFile->LineNum,
1036       0,
1037       "app error -- language identifier name length is invalid",
1038       NULL
1039       );
1040   }
1041
1042   From  = SourceFile->FileBufferPtr;
1043   Start = SourceFile->FileBufferPtr;
1044   while (!EndOfFile (SourceFile)) {
1045     if (((SourceFile->FileBufferPtr[0] >= UNICODE_a) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))) {
1046       Len++;
1047       if (Len > LANGUAGE_IDENTIFIER_NAME_LEN) {
1048         Error (SourceFile->FileName, SourceFile->LineNum, 0, "language identifier name too long", "%S", Start);
1049         return 0;
1050       }
1051
1052       *LanguageIdentifierName = SourceFile->FileBufferPtr[0];
1053       SourceFile->FileBufferPtr++;
1054       LanguageIdentifierName++;
1055     } else if (!IsWhiteSpace (SourceFile)) {
1056       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid language identifier name", "%S", Start);
1057       return 0;
1058     } else {
1059       break;
1060     }
1061   }
1062   //
1063   // Terminate the copy of the string.
1064   //
1065   *LanguageIdentifierName = 0;
1066   return Len;
1067 }
1068
1069 static
1070 void
1071 ProcessTokenInclude (
1072   SOURCE_FILE *SourceFile
1073   )
1074 {
1075   INT8        IncludeFileName[MAX_PATH];
1076   INT8        *To;
1077   UINT32      Len;
1078   BOOLEAN     ReportedError;
1079   SOURCE_FILE IncludedSourceFile;
1080
1081   ReportedError = FALSE;
1082   if (SkipWhiteSpace (SourceFile) == 0) {
1083     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "expected whitespace following #include keyword", NULL);
1084   }
1085   //
1086   // Should be quoted file name
1087   //
1088   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
1089     Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted include file name", NULL);
1090     goto FailDone;
1091   }
1092
1093   SourceFile->FileBufferPtr++;
1094   //
1095   // Copy the filename as ascii to our local string
1096   //
1097   To  = IncludeFileName;
1098   Len = 0;
1099   while (!EndOfFile (SourceFile)) {
1100     if ((SourceFile->FileBufferPtr[0] == UNICODE_CR) || (SourceFile->FileBufferPtr[0] == UNICODE_LF)) {
1101       Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-line found in quoted include file name", NULL);
1102       goto FailDone;
1103     }
1104
1105     if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
1106       SourceFile->FileBufferPtr++;
1107       break;
1108     }
1109     //
1110     // If too long, then report the error once and process until the closing quote
1111     //
1112     Len++;
1113     if (!ReportedError && (Len >= sizeof (IncludeFileName))) {
1114       Error (SourceFile->FileName, SourceFile->LineNum, 0, "length of include file name exceeds limit", NULL);
1115       ReportedError = TRUE;
1116     }
1117
1118     if (!ReportedError) {
1119       *To = UNICODE_TO_ASCII (SourceFile->FileBufferPtr[0]);
1120       To++;
1121     }
1122
1123     SourceFile->FileBufferPtr++;
1124   }
1125
1126   if (!ReportedError) {
1127     *To = 0;
1128     memset ((char *) &IncludedSourceFile, 0, sizeof (SOURCE_FILE));
1129     strcpy (IncludedSourceFile.FileName, IncludeFileName);
1130     IncludedSourceFile.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
1131     ProcessIncludeFile (&IncludedSourceFile, SourceFile);
1132     //
1133     // printf ("including file '%s'\n", IncludeFileName);
1134     //
1135   }
1136
1137   return ;
1138 FailDone:
1139   //
1140   // Error recovery -- skip to next #
1141   //
1142   SourceFile->SkipToHash = TRUE;
1143 }
1144
1145 static
1146 void
1147 ProcessTokenScope (
1148   SOURCE_FILE *SourceFile
1149   )
1150 {
1151   WCHAR StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
1152   //
1153   // Extract the scope name
1154   //
1155   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
1156     StringDBSetScope (StringIdentifier);
1157   }
1158 }
1159 //
1160 // Parse:  #langdef eng "English"
1161 //         #langdef chn "\wideChinese"
1162 //
1163 static
1164 void
1165 ProcessTokenLangDef (
1166   SOURCE_FILE *SourceFile
1167   )
1168 {
1169   WCHAR   LanguageIdentifier[MAX_STRING_IDENTIFIER_NAME];
1170   UINT32  Len;
1171   WCHAR   *PrintableName;
1172   //
1173   // Extract the 3-character language identifier
1174   //
1175   Len = GetLanguageIdentifierName (SourceFile, LanguageIdentifier, sizeof (LanguageIdentifier), FALSE);
1176   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1177     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", NULL);
1178   } else {
1179     //
1180     // Extract the printable name
1181     //
1182     PrintableName = GetPrintableLanguageName (SourceFile);
1183     if (PrintableName != NULL) {
1184       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1185       StringDBAddLanguage (LanguageIdentifier, PrintableName);
1186       free (PrintableName);
1187       return ;
1188     }
1189   }
1190   //
1191   // Error recovery -- skip to next #
1192   //
1193   SourceFile->SkipToHash = TRUE;
1194 }
1195
1196 static
1197 BOOLEAN
1198 ApparentQuotedString (
1199   SOURCE_FILE *SourceFile
1200   )
1201 {
1202   WCHAR *Ptr;
1203   //
1204   // See if the first and last nonblank characters on the line are double quotes
1205   //
1206   for (Ptr = SourceFile->FileBufferPtr; *Ptr && (*Ptr == UNICODE_SPACE); Ptr++)
1207     ;
1208   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1209     return FALSE;
1210   }
1211
1212   while (*Ptr) {
1213     Ptr++;
1214   }
1215
1216   Ptr--;
1217   for (; *Ptr && (*Ptr == UNICODE_SPACE); Ptr--)
1218     ;
1219   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1220     return FALSE;
1221   }
1222
1223   return TRUE;
1224 }
1225 //
1226 // Parse:
1227 //   #language eng "some string " "more string"
1228 //
1229 static
1230 void
1231 ProcessTokenLanguage (
1232   SOURCE_FILE *SourceFile
1233   )
1234 {
1235   WCHAR   *String;
1236   WCHAR   *SecondString;
1237   WCHAR   *TempString;
1238   WCHAR   *From;
1239   WCHAR   *To;
1240   WCHAR   Language[LANGUAGE_IDENTIFIER_NAME_LEN + 1];
1241   UINT32  Len;
1242   BOOLEAN PreviousNewline;
1243   //
1244   // Get the language identifier
1245   //
1246   Language[0] = 0;
1247   Len         = GetLanguageIdentifierName (SourceFile, Language, sizeof (Language), TRUE);
1248   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1249     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", "%S", Language);
1250     SourceFile->SkipToHash = TRUE;
1251     return ;
1252   }
1253   //
1254   // Extract the string value. It's either a quoted string that starts on the current line, or
1255   // an unquoted string that starts on the following line and continues until the next control
1256   // character in column 1.
1257   // Look ahead to find a quote or a newline
1258   //
1259   if (SkipTo (SourceFile, UNICODE_DOUBLE_QUOTE, TRUE)) {
1260     String = GetQuotedString (SourceFile, FALSE);
1261     if (String != NULL) {
1262       //
1263       // Set the position in the file of where we are parsing for error
1264       // reporting purposes. Then start looking ahead for additional
1265       // quoted strings, and concatenate them until we get a failure
1266       // back from the string parser.
1267       //
1268       Len = wcslen (String) + 1;
1269       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1270       do {
1271         SkipWhiteSpace (SourceFile);
1272         SecondString = GetQuotedString (SourceFile, TRUE);
1273         if (SecondString != NULL) {
1274           Len += wcslen (SecondString);
1275           TempString = (WCHAR *) malloc (Len * sizeof (WCHAR));
1276           if (TempString == NULL) {
1277             Error (NULL, 0, 0, "application error", "failed to allocate memory");
1278             return ;
1279           }
1280
1281           wcscpy (TempString, String);
1282           wcscat (TempString, SecondString);
1283           free (String);
1284           free (SecondString);
1285           String = TempString;
1286         }
1287       } while (SecondString != NULL);
1288       StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1289       free (String);
1290     } else {
1291       //
1292       // Error was reported at lower level. Error recovery mode.
1293       //
1294       SourceFile->SkipToHash = TRUE;
1295     }
1296   } else {
1297     if (!mGlobals.UnquotedStrings) {
1298       //
1299       // They're using unquoted strings. If the next non-blank character is a double quote, and the
1300       // last non-blank character on the line is a double quote, then more than likely they're using
1301       // quotes, so they need to put the quoted string on the end of the previous line
1302       //
1303       if (ApparentQuotedString (SourceFile)) {
1304         Warning (
1305           SourceFile->FileName,
1306           SourceFile->LineNum,
1307           0,
1308           "unexpected quoted string on line",
1309           "specify -uqs option if necessary"
1310           );
1311       }
1312     }
1313     //
1314     // Found end-of-line (hopefully). Skip over it and start taking in characters
1315     // until we find a control character at the start of a line.
1316     //
1317     Len             = 0;
1318     From            = SourceFile->FileBufferPtr;
1319     PreviousNewline = FALSE;
1320     while (!EndOfFile (SourceFile)) {
1321       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
1322         PreviousNewline = TRUE;
1323         SourceFile->LineNum++;
1324       } else {
1325         Len++;
1326         if (PreviousNewline && (SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter)) {
1327           break;
1328         }
1329
1330         PreviousNewline = FALSE;
1331       }
1332
1333       SourceFile->FileBufferPtr++;
1334     }
1335
1336     if ((Len == 0) && EndOfFile (SourceFile)) {
1337       Error (SourceFile->FileName, SourceFile->LineNum, 0, "unexpected end of file", NULL);
1338       SourceFile->SkipToHash = TRUE;
1339       return ;
1340     }
1341     //
1342     // Now allocate a buffer, copy the characters, and add the string.
1343     //
1344     String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
1345     if (String == NULL) {
1346       Error (NULL, 0, 0, "application error", "failed to allocate memory");
1347       return ;
1348     }
1349
1350     To = String;
1351     while (From < SourceFile->FileBufferPtr) {
1352       switch (*From) {
1353       case UNICODE_LF:
1354       case 0:
1355         break;
1356
1357       default:
1358         *To = *From;
1359         To++;
1360         break;
1361       }
1362
1363       From++;
1364     }
1365
1366     //
1367     // String[Len] = 0;
1368     //
1369     *To = 0;
1370     StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1371   }
1372 }
1373
1374 static
1375 BOOLEAN
1376 IsWhiteSpace (
1377   SOURCE_FILE *SourceFile
1378   )
1379 {
1380   switch (SourceFile->FileBufferPtr[0]) {
1381   case UNICODE_NULL:
1382   case UNICODE_CR:
1383   case UNICODE_SPACE:
1384   case UNICODE_TAB:
1385   case UNICODE_LF:
1386     return TRUE;
1387
1388   default:
1389     return FALSE;
1390   }
1391 }
1392
1393 static
1394 UINT32
1395 SkipWhiteSpace (
1396   SOURCE_FILE *SourceFile
1397   )
1398 {
1399   UINT32  Count;
1400
1401   Count = 0;
1402   while (!EndOfFile (SourceFile)) {
1403     Count++;
1404     switch (*SourceFile->FileBufferPtr) {
1405     case UNICODE_NULL:
1406     case UNICODE_CR:
1407     case UNICODE_SPACE:
1408     case UNICODE_TAB:
1409       SourceFile->FileBufferPtr++;
1410       break;
1411
1412     case UNICODE_LF:
1413       SourceFile->FileBufferPtr++;
1414       SourceFile->LineNum++;
1415       if (mGlobals.Verbose) {
1416         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
1417       }
1418       break;
1419
1420     default:
1421       return Count - 1;
1422     }
1423   }
1424   //
1425   // Some tokens require trailing whitespace. If we're at the end of the
1426   // file, then we count that as well.
1427   //
1428   if ((Count == 0) && (EndOfFile (SourceFile))) {
1429     Count++;
1430   }
1431
1432   return Count;
1433 }
1434
1435 static
1436 UINT32
1437 wstrcmp (
1438   WCHAR *Buffer,
1439   WCHAR *Str
1440   )
1441 {
1442   UINT32  Len;
1443
1444   Len = 0;
1445   while (*Str == *Buffer) {
1446     Buffer++;
1447     Str++;
1448     Len++;
1449   }
1450
1451   if (*Str) {
1452     return 0;
1453   }
1454
1455   return Len;
1456 }
1457 //
1458 // Given a filename, try to find it along the include paths.
1459 //
1460 static
1461 FILE *
1462 FindFile (
1463   IN INT8    *FileName,
1464   OUT INT8   *FoundFileName,
1465   IN UINT32  FoundFileNameLen
1466   )
1467 {
1468   FILE              *Fptr;
1469   TEXT_STRING_LIST  *List;
1470
1471   //
1472   // Traverse the list of paths and try to find the file
1473   //
1474   List = mGlobals.IncludePaths;
1475   while (List != NULL) {
1476     //
1477     // Put the path and filename together
1478     //
1479     if (strlen (List->Str) + strlen (FileName) + 1 > FoundFileNameLen) {
1480       Error (UTILITY_NAME, 0, 0, NULL, "internal error - cannot concatenate path+filename");
1481       return NULL;
1482     }
1483     //
1484     // Append the filename to this include path and try to open the file.
1485     //
1486     strcpy (FoundFileName, List->Str);
1487     strcat (FoundFileName, FileName);
1488     if ((Fptr = fopen (FoundFileName, "rb")) != NULL) {
1489       //
1490       // Return the file pointer
1491       //
1492       return Fptr;
1493     }
1494
1495     List = List->Next;
1496   }
1497   //
1498   // Not found
1499   //
1500   FoundFileName[0] = 0;
1501   return NULL;
1502 }
1503 //
1504 // Process the command-line arguments
1505 //
1506 static
1507 STATUS
1508 ProcessArgs (
1509   int   Argc,
1510   char  *Argv[]
1511   )
1512 {
1513   TEXT_STRING_LIST  *NewList;
1514   //
1515   // Clear our globals
1516   //
1517   memset ((char *) &mGlobals, 0, sizeof (mGlobals));
1518   strcpy (mGlobals.BaseName, DEFAULT_BASE_NAME);
1519   //
1520   // Skip program name
1521   //
1522   Argc--;
1523   Argv++;
1524
1525   if (Argc == 0) {
1526     Usage ();
1527     return STATUS_ERROR;
1528   }
1529
1530   mGlobals.Mode = MODE_UNKNOWN;
1531   //
1532   // Process until no more -args.
1533   //
1534   while ((Argc > 0) && (Argv[0][0] == '-')) {
1535     //
1536     // -parse option
1537     //
1538     if (_stricmp (Argv[0], "-parse") == 0) {
1539       if (mGlobals.Mode != MODE_UNKNOWN) {
1540         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1541         return STATUS_ERROR;
1542       }
1543
1544       mGlobals.Mode = MODE_PARSE;
1545       //
1546       // -scan option
1547       //
1548     } else if (_stricmp (Argv[0], "-scan") == 0) {
1549       if (mGlobals.Mode != MODE_UNKNOWN) {
1550         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1551         return STATUS_ERROR;
1552       }
1553
1554       mGlobals.Mode = MODE_SCAN;
1555       //
1556       // -vscan verbose scanning option
1557       //
1558     } else if (_stricmp (Argv[0], "-vscan") == 0) {
1559       mGlobals.VerboseScan = TRUE;
1560       //
1561       // -dump option
1562       //
1563     } else if (_stricmp (Argv[0], "-dump") == 0) {
1564       if (mGlobals.Mode != MODE_UNKNOWN) {
1565         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1566         return STATUS_ERROR;
1567       }
1568
1569       mGlobals.Mode = MODE_DUMP;
1570     } else if (_stricmp (Argv[0], "-uqs") == 0) {
1571       mGlobals.UnquotedStrings = TRUE;
1572       //
1573       // -i path    add include search path when parsing
1574       //
1575     } else if (_stricmp (Argv[0], "-i") == 0) {
1576       //
1577       // check for one more arg
1578       //
1579       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1580         Error (UTILITY_NAME, 0, 0, Argv[0], "missing include path");
1581         return STATUS_ERROR;
1582       }
1583       //
1584       // Allocate memory for a new list element, fill it in, and
1585       // add it to our list of include paths. Always make sure it
1586       // has a "\" on the end of it.
1587       //
1588       NewList = malloc (sizeof (TEXT_STRING_LIST));
1589       if (NewList == NULL) {
1590         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1591         return STATUS_ERROR;
1592       }
1593
1594       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1595       NewList->Str = malloc (strlen (Argv[1]) + 2);
1596       if (NewList->Str == NULL) {
1597         free (NewList);
1598         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1599         return STATUS_ERROR;
1600       }
1601
1602       strcpy (NewList->Str, Argv[1]);
1603       if (NewList->Str[strlen (NewList->Str) - 1] != '\\') {
1604         strcat (NewList->Str, "\\");
1605       }
1606       //
1607       // Add it to our linked list
1608       //
1609       if (mGlobals.IncludePaths == NULL) {
1610         mGlobals.IncludePaths = NewList;
1611       } else {
1612         mGlobals.LastIncludePath->Next = NewList;
1613       }
1614
1615       mGlobals.LastIncludePath = NewList;
1616       Argc--;
1617       Argv++;
1618     } else if (_stricmp (Argv[0], "-if") == 0) {
1619       //
1620       // Indirection file -- check for one more arg
1621       //
1622       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1623         Error (UTILITY_NAME, 0, 0, Argv[0], "missing indirection file name");
1624         return STATUS_ERROR;
1625       }
1626       //
1627       // Allocate memory for a new list element, fill it in, and
1628       // add it to our list of include paths. Always make sure it
1629       // has a "\" on the end of it.
1630       //
1631       NewList = malloc (sizeof (TEXT_STRING_LIST));
1632       if (NewList == NULL) {
1633         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1634         return STATUS_ERROR;
1635       }
1636
1637       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1638       NewList->Str = malloc (strlen (Argv[1]) + 1);
1639       if (NewList->Str == NULL) {
1640         free (NewList);
1641         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1642         return STATUS_ERROR;
1643       }
1644
1645       strcpy (NewList->Str, Argv[1]);
1646       //
1647       // Add it to our linked list
1648       //
1649       if (mGlobals.IndirectionFileName == NULL) {
1650         mGlobals.IndirectionFileName = NewList;
1651       } else {
1652         mGlobals.LastIndirectionFileName->Next = NewList;
1653       }
1654
1655       mGlobals.LastIndirectionFileName = NewList;
1656       Argc--;
1657       Argv++;
1658     } else if (_stricmp (Argv[0], "-db") == 0) {
1659       //
1660       // -db option to specify a database file.
1661       // Check for one more arg (the database file name)
1662       //
1663       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1664         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database file name");
1665         return STATUS_ERROR;
1666       }
1667
1668       NewList = malloc (sizeof (TEXT_STRING_LIST));
1669       if (NewList == NULL) {
1670         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1671         return STATUS_ERROR;
1672       }
1673
1674       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1675       NewList->Str = malloc (strlen (Argv[1]) + 1);
1676       if (NewList->Str == NULL) {
1677         free (NewList);
1678         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1679         return STATUS_ERROR;
1680       }
1681
1682       strcpy (NewList->Str, Argv[1]);
1683       //
1684       // Add it to our linked list
1685       //
1686       if (mGlobals.DatabaseFileName == NULL) {
1687         mGlobals.DatabaseFileName = NewList;
1688       } else {
1689         mGlobals.LastDatabaseFileName->Next = NewList;
1690       }
1691
1692       mGlobals.LastDatabaseFileName = NewList;
1693       Argc--;
1694       Argv++;
1695     } else if (_stricmp (Argv[0], "-ou") == 0) {
1696       //
1697       // -ou option to specify an output unicode file to
1698       // which we can dump our database.
1699       //
1700       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1701         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database dump output file name");
1702         return STATUS_ERROR;
1703       }
1704
1705       if (mGlobals.DumpUFileName[0] == 0) {
1706         strcpy (mGlobals.DumpUFileName, Argv[1]);
1707       } else {
1708         Error (UTILITY_NAME, 0, 0, Argv[1], "-ou option already specified with '%s'", mGlobals.DumpUFileName);
1709         return STATUS_ERROR;
1710       }
1711
1712       Argc--;
1713       Argv++;
1714     } else if (_stricmp (Argv[0], "-hpk") == 0) {
1715       //
1716       // -hpk option to create an HII export pack of the input database file
1717       //
1718       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1719         Error (UTILITY_NAME, 0, 0, Argv[0], "missing raw string data dump output file name");
1720         return STATUS_ERROR;
1721       }
1722
1723       if (mGlobals.HiiExportPackFileName[0] == 0) {
1724         strcpy (mGlobals.HiiExportPackFileName, Argv[1]);
1725       } else {
1726         Error (UTILITY_NAME, 0, 0, Argv[1], "-or option already specified with '%s'", mGlobals.HiiExportPackFileName);
1727         return STATUS_ERROR;
1728       }
1729
1730       Argc--;
1731       Argv++;
1732     } else if ((_stricmp (Argv[0], "-?") == 0) || (_stricmp (Argv[0], "-h") == 0)) {
1733       Usage ();
1734       return STATUS_ERROR;
1735     } else if (_stricmp (Argv[0], "-v") == 0) {
1736       mGlobals.Verbose = 1;
1737     } else if (_stricmp (Argv[0], "-vdbw") == 0) {
1738       mGlobals.VerboseDatabaseWrite = 1;
1739     } else if (_stricmp (Argv[0], "-vdbr") == 0) {
1740       mGlobals.VerboseDatabaseRead = 1;
1741     } else if (_stricmp (Argv[0], "-newdb") == 0) {
1742       mGlobals.NewDatabase = 1;
1743     } else if (_stricmp (Argv[0], "-ignorenotfound") == 0) {
1744       mGlobals.IgnoreNotFound = 1;
1745     } else if (_stricmp (Argv[0], "-oc") == 0) {
1746       //
1747       // check for one more arg
1748       //
1749       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1750         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output C filename");
1751         return STATUS_ERROR;
1752       }
1753
1754       strcpy (mGlobals.StringCFileName, Argv[1]);
1755       Argc--;
1756       Argv++;
1757     } else if (_stricmp (Argv[0], "-bn") == 0) {
1758       //
1759       // check for one more arg
1760       //
1761       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1762         Error (UTILITY_NAME, 0, 0, Argv[0], "missing base name");
1763         Usage ();
1764         return STATUS_ERROR;
1765       }
1766
1767       strcpy (mGlobals.BaseName, Argv[1]);
1768       Argc--;
1769       Argv++;
1770     } else if (_stricmp (Argv[0], "-oh") == 0) {
1771       //
1772       // -oh to specify output .h defines file name
1773       //
1774       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1775         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output .h filename");
1776         return STATUS_ERROR;
1777       }
1778
1779       strcpy (mGlobals.StringHFileName, Argv[1]);
1780       Argc--;
1781       Argv++;
1782     } else if (_stricmp (Argv[0], "-dep") == 0) {
1783       //
1784       // -dep to specify output dependency file name
1785       //
1786       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1787         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output dependency filename");
1788         return STATUS_ERROR;
1789       }
1790
1791       strcpy (mGlobals.OutputDependencyFileName, Argv[1]);
1792       Argc--;
1793       Argv++;
1794     } else if (_stricmp (Argv[0], "-skipext") == 0) {
1795       //
1796       // -skipext to skip scanning of files with certain filename extensions
1797       //
1798       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1799         Error (UTILITY_NAME, 0, 0, Argv[0], "missing filename extension");
1800         return STATUS_ERROR;
1801       }
1802       //
1803       // Allocate memory for a new list element, fill it in, and
1804       // add it to our list of excluded extensions. Always make sure it
1805       // has a "." as the first character.
1806       //
1807       NewList = malloc (sizeof (TEXT_STRING_LIST));
1808       if (NewList == NULL) {
1809         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1810         return STATUS_ERROR;
1811       }
1812
1813       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1814       NewList->Str = malloc (strlen (Argv[1]) + 2);
1815       if (NewList->Str == NULL) {
1816         free (NewList);
1817         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1818         return STATUS_ERROR;
1819       }
1820
1821       if (Argv[1][0] == '.') {
1822         strcpy (NewList->Str, Argv[1]);
1823       } else {
1824         NewList->Str[0] = '.';
1825         strcpy (NewList->Str + 1, Argv[1]);
1826       }
1827       //
1828       // Add it to our linked list
1829       //
1830       if (mGlobals.SkipExt == NULL) {
1831         mGlobals.SkipExt = NewList;
1832       } else {
1833         mGlobals.LastSkipExt->Next = NewList;
1834       }
1835
1836       mGlobals.LastSkipExt = NewList;
1837       Argc--;
1838       Argv++;
1839     } else if (_stricmp (Argv[0], "-lang") == 0) {
1840       //
1841       // "-lang eng" or "-lang spa+cat" to only output certain languages
1842       //
1843       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1844         Error (UTILITY_NAME, 0, 0, Argv[0], "missing language name");
1845         Usage ();
1846         return STATUS_ERROR;
1847       }
1848
1849       if (AddCommandLineLanguage (Argv[1]) != STATUS_SUCCESS) {
1850         return STATUS_ERROR;
1851       }
1852
1853       Argc--;
1854       Argv++;
1855     } else if (_stricmp (Argv[0], "-od") == 0) {
1856       //
1857       // Output database file name -- check for another arg
1858       //
1859       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1860         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output database file name");
1861         return STATUS_ERROR;
1862       }
1863
1864       strcpy (mGlobals.OutputDatabaseFileName, Argv[1]);
1865       Argv++;
1866       Argc--;
1867     } else {
1868       //
1869       // Unrecognized arg
1870       //
1871       Error (UTILITY_NAME, 0, 0, Argv[0], "unrecognized option");
1872       Usage ();
1873       return STATUS_ERROR;
1874     }
1875
1876     Argv++;
1877     Argc--;
1878   }
1879   //
1880   // Make sure they specified the mode parse/scan/dump
1881   //
1882   if (mGlobals.Mode == MODE_UNKNOWN) {
1883     Error (NULL, 0, 0, "must specify one of -parse/-scan/-dump", NULL);
1884     return STATUS_ERROR;
1885   }
1886   //
1887   // All modes require a database filename
1888   //
1889   if (mGlobals.DatabaseFileName == 0) {
1890     Error (NULL, 0, 0, "must specify a database filename using -db DbFileName", NULL);
1891     Usage ();
1892     return STATUS_ERROR;
1893   }
1894   //
1895   // If dumping the database file, then return immediately if all
1896   // parameters check out.
1897   //
1898   if (mGlobals.Mode == MODE_DUMP) {
1899     //
1900     // Not much use if they didn't specify -oh or -oc or -ou or -hpk
1901     //
1902     if ((mGlobals.DumpUFileName[0] == 0) &&
1903         (mGlobals.StringHFileName[0] == 0) &&
1904         (mGlobals.StringCFileName[0] == 0) &&
1905         (mGlobals.HiiExportPackFileName[0] == 0)
1906         ) {
1907       Error (NULL, 0, 0, "-dump without -oc/-oh/-ou/-hpk is a NOP", NULL);
1908       return STATUS_ERROR;
1909     }
1910
1911     return STATUS_SUCCESS;
1912   }
1913   //
1914   // Had to specify source string file and output string defines header filename.
1915   //
1916   if (mGlobals.Mode == MODE_SCAN) {
1917     if (Argc < 1) {
1918       Error (UTILITY_NAME, 0, 0, NULL, "must specify at least one source file to scan with -scan");
1919       Usage ();
1920       return STATUS_ERROR;
1921     }
1922     //
1923     // Get the list of filenames
1924     //
1925     while (Argc > 0) {
1926       NewList = malloc (sizeof (TEXT_STRING_LIST));
1927       if (NewList == NULL) {
1928         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
1929         return STATUS_ERROR;
1930       }
1931
1932       memset (NewList, 0, sizeof (TEXT_STRING_LIST));
1933       NewList->Str = (UINT8 *) malloc (strlen (Argv[0]) + 1);
1934       if (NewList->Str == NULL) {
1935         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
1936         return STATUS_ERROR;
1937       }
1938
1939       strcpy (NewList->Str, Argv[0]);
1940       if (mGlobals.ScanFileName == NULL) {
1941         mGlobals.ScanFileName = NewList;
1942       } else {
1943         mGlobals.LastScanFileName->Next = NewList;
1944       }
1945
1946       mGlobals.LastScanFileName = NewList;
1947       Argc--;
1948       Argv++;
1949     }
1950   } else {
1951     //
1952     // Parse mode -- must specify an input unicode file name
1953     //
1954     if (Argc < 1) {
1955       Error (UTILITY_NAME, 0, 0, NULL, "must specify input unicode string file name with -parse");
1956       Usage ();
1957       return STATUS_ERROR;
1958     }
1959
1960     strcpy (mGlobals.SourceFiles.FileName, Argv[0]);
1961   }
1962
1963   return STATUS_SUCCESS;
1964 }
1965 //
1966 // Found "-lang eng,spa+cat" on the command line. Parse the
1967 // language list and save the setting for later processing.
1968 //
1969 static
1970 STATUS
1971 AddCommandLineLanguage (
1972   IN INT8          *Language
1973   )
1974 {
1975   WCHAR_STRING_LIST *WNewList;
1976   WCHAR             *From;
1977   WCHAR             *To;
1978   //
1979   // Keep processing the input string until we find the end.
1980   //
1981   while (*Language) {
1982     //
1983     // Allocate memory for a new list element, fill it in, and
1984     // add it to our list.
1985     //
1986     WNewList = MALLOC (sizeof (WCHAR_STRING_LIST));
1987     if (WNewList == NULL) {
1988       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1989       return STATUS_ERROR;
1990     }
1991
1992     memset ((char *) WNewList, 0, sizeof (WCHAR_STRING_LIST));
1993     WNewList->Str = malloc ((strlen (Language) + 1) * sizeof (WCHAR));
1994     if (WNewList->Str == NULL) {
1995       free (WNewList);
1996       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1997       return STATUS_ERROR;
1998     }
1999     //
2000     // Copy it as unicode to our new structure. Then remove the
2001     // plus signs in it, and verify each language name is 3 characters
2002     // long. If we find a comma, then we're done with this group, so
2003     // break out.
2004     //
2005 #ifdef USE_VC8
2006     swprintf (WNewList->Str, (strlen (Language) + 1) * sizeof (WCHAR), L"%S", Language);
2007 #else
2008     swprintf (WNewList->Str, L"%S", Language);
2009 #endif
2010     From = To = WNewList->Str;
2011     while (*From) {
2012       if (*From == L',') {
2013         break;
2014       }
2015
2016       if ((wcslen (From) < LANGUAGE_IDENTIFIER_NAME_LEN) ||
2017             (
2018               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != 0) &&
2019               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != UNICODE_PLUS_SIGN) &&
2020               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != L',')
2021             )
2022           ) {
2023         Error (UTILITY_NAME, 0, 0, Language, "invalid format for language name on command line");
2024         FREE (WNewList->Str);
2025         FREE (WNewList);
2026         return STATUS_ERROR;
2027       }
2028
2029       wcsncpy (To, From, LANGUAGE_IDENTIFIER_NAME_LEN);
2030       To += LANGUAGE_IDENTIFIER_NAME_LEN;
2031       From += LANGUAGE_IDENTIFIER_NAME_LEN;
2032       if (*From == L'+') {
2033         From++;
2034       }
2035     }
2036
2037     *To = 0;
2038     //
2039     // Add it to our linked list
2040     //
2041     if (mGlobals.Language == NULL) {
2042       mGlobals.Language = WNewList;
2043     } else {
2044       mGlobals.LastLanguage->Next = WNewList;
2045     }
2046
2047     mGlobals.LastLanguage = WNewList;
2048     //
2049     // Skip to next entry (comma-separated list)
2050     //
2051     while (*Language) {
2052       if (*Language == L',') {
2053         Language++;
2054         break;
2055       }
2056
2057       Language++;
2058     }
2059   }
2060
2061   return STATUS_SUCCESS;
2062 }
2063 //
2064 // The contents of the text file are expected to be (one per line)
2065 //   STRING_IDENTIFIER_NAME   ScopeName
2066 // For example:
2067 //   STR_ID_MY_FAVORITE_STRING   IBM
2068 //
2069 static
2070 STATUS
2071 ParseIndirectionFiles (
2072   TEXT_STRING_LIST    *Files
2073   )
2074 {
2075   FILE                        *Fptr;
2076   INT8                        Line[200];
2077   INT8                        *StringName;
2078   INT8                        *ScopeName;
2079   INT8                        *End;
2080   UINT32                      LineCount;
2081   WCHAR_MATCHING_STRING_LIST  *NewList;
2082
2083   Line[sizeof (Line) - 1] = 0;
2084   Fptr                    = NULL;
2085   while (Files != NULL) {
2086     Fptr      = fopen (Files->Str, "r");
2087     LineCount = 0;
2088     if (Fptr == NULL) {
2089       Error (NULL, 0, 0, Files->Str, "failed to open input indirection file for reading");
2090       return STATUS_ERROR;
2091     }
2092
2093     while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2094       //
2095       // remove terminating newline for error printing purposes.
2096       //
2097       if (Line[strlen (Line) - 1] == '\n') {
2098         Line[strlen (Line) - 1] = 0;
2099       }
2100
2101       LineCount++;
2102       if (Line[sizeof (Line) - 1] != 0) {
2103         Error (Files->Str, LineCount, 0, "line length exceeds maximum supported", NULL);
2104         goto Done;
2105       }
2106
2107       StringName = Line;
2108       while (*StringName && (isspace (*StringName))) {
2109         StringName++;
2110       }
2111
2112       if (*StringName) {
2113         if ((*StringName == '_') || isalpha (*StringName)) {
2114           End = StringName;
2115           while ((*End) && (*End == '_') || (isalnum (*End))) {
2116             End++;
2117           }
2118
2119           if (isspace (*End)) {
2120             *End = 0;
2121             End++;
2122             while (isspace (*End)) {
2123               End++;
2124             }
2125
2126             if (*End) {
2127               ScopeName = End;
2128               while (*End && !isspace (*End)) {
2129                 End++;
2130               }
2131
2132               *End = 0;
2133               //
2134               // Add the string name/scope pair
2135               //
2136               NewList = malloc (sizeof (WCHAR_MATCHING_STRING_LIST));
2137               if (NewList == NULL) {
2138                 Error (NULL, 0, 0, "memory allocation error", NULL);
2139                 goto Done;
2140               }
2141
2142               memset (NewList, 0, sizeof (WCHAR_MATCHING_STRING_LIST));
2143               NewList->Str1 = (WCHAR *) malloc ((strlen (StringName) + 1) * sizeof (WCHAR));
2144               NewList->Str2 = (WCHAR *) malloc ((strlen (ScopeName) + 1) * sizeof (WCHAR));
2145               if ((NewList->Str1 == NULL) || (NewList->Str2 == NULL)) {
2146                 Error (NULL, 0, 0, "memory allocation error", NULL);
2147                 goto Done;
2148               }
2149
2150 #ifdef USE_VC8
2151               swprintf (NewList->Str1, (strlen (StringName) + 1) * sizeof (WCHAR), L"%S", StringName);
2152               swprintf (NewList->Str2, (strlen (ScopeName) + 1) * sizeof (WCHAR), L"%S", ScopeName);
2153 #else
2154               swprintf (NewList->Str1, L"%S", StringName);
2155               swprintf (NewList->Str2, L"%S", ScopeName);
2156 #endif
2157               if (mGlobals.IndirectionList == NULL) {
2158                 mGlobals.IndirectionList = NewList;
2159               } else {
2160                 mGlobals.LastIndirectionList->Next = NewList;
2161               }
2162
2163               mGlobals.LastIndirectionList = NewList;
2164             } else {
2165               Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2166               goto Done;
2167             }
2168           } else {
2169             Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2170             goto Done;
2171           }
2172         } else {
2173           Error (Files->Str, LineCount, 0, StringName, "invalid string identifier");
2174           goto Done;
2175         }
2176       }
2177     }
2178
2179     fclose (Fptr);
2180     Fptr  = NULL;
2181     Files = Files->Next;
2182   }
2183
2184 Done:
2185   if (Fptr != NULL) {
2186     fclose (Fptr);
2187     return STATUS_ERROR;
2188   }
2189
2190   return STATUS_SUCCESS;
2191 }
2192
2193 static
2194 STATUS
2195 ScanFiles (
2196   TEXT_STRING_LIST *ScanFiles
2197   )
2198 {
2199   char              Line[MAX_LINE_LEN];
2200   FILE              *Fptr;
2201   UINT32            LineNum;
2202   char              *Cptr;
2203   char              *SavePtr;
2204   char              *TermPtr;
2205   char              *StringTokenPos;
2206   TEXT_STRING_LIST  *SList;
2207   BOOLEAN           SkipIt;
2208
2209   //
2210   // Put a null-terminator at the end of the line. If we read in
2211   // a line longer than we support, then we can catch it.
2212   //
2213   Line[MAX_LINE_LEN - 1] = 0;
2214   //
2215   // Process each file. If they gave us a skip extension list, then
2216   // skip it if the extension matches.
2217   //
2218   while (ScanFiles != NULL) {
2219     SkipIt = FALSE;
2220     for (SList = mGlobals.SkipExt; SList != NULL; SList = SList->Next) {
2221       if ((strlen (ScanFiles->Str) > strlen (SList->Str)) &&
2222           (strcmp (ScanFiles->Str + strlen (ScanFiles->Str) - strlen (SList->Str), SList->Str) == 0)
2223           ) {
2224         SkipIt = TRUE;
2225         //
2226         // printf ("Match: %s : %s\n", ScanFiles->Str, SList->Str);
2227         //
2228         break;
2229       }
2230     }
2231
2232     if (!SkipIt) {
2233       if (mGlobals.VerboseScan) {
2234         printf ("Scanning %s\n", ScanFiles->Str);
2235       }
2236
2237       Fptr = fopen (ScanFiles->Str, "r");
2238       if (Fptr == NULL) {
2239         Error (NULL, 0, 0, ScanFiles->Str, "failed to open input file for scanning");
2240         return STATUS_ERROR;
2241       }
2242
2243       LineNum = 0;
2244       while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2245         LineNum++;
2246         if (Line[MAX_LINE_LEN - 1] != 0) {
2247           Error (ScanFiles->Str, LineNum, 0, "line length exceeds maximum supported by tool", NULL);
2248           fclose (Fptr);
2249           return STATUS_ERROR;
2250         }
2251         //
2252         // Remove the newline from the input line so we can print a warning message
2253         //
2254         if (Line[strlen (Line) - 1] == '\n') {
2255           Line[strlen (Line) - 1] = 0;
2256         }
2257         //
2258         // Terminate the line at // comments
2259         //
2260         Cptr = strstr (Line, "//");
2261         if (Cptr != NULL) {
2262           *Cptr = 0;
2263         }
2264
2265         Cptr = Line;
2266         while ((Cptr = strstr (Cptr, STRING_TOKEN)) != NULL) {
2267           //
2268           // Found "STRING_TOKEN". Make sure we don't have NUM_STRING_TOKENS or
2269           // something like that. Then make sure it's followed by
2270           // an open parenthesis, a string identifier, and then a closing
2271           // parenthesis.
2272           //
2273           if (mGlobals.VerboseScan) {
2274             printf (" %d: %s", LineNum, Cptr);
2275           }
2276
2277           if (((Cptr == Line) || (!IsValidIdentifierChar (*(Cptr - 1), FALSE))) &&
2278               (!IsValidIdentifierChar (*(Cptr + sizeof (STRING_TOKEN) - 1), FALSE))
2279               ) {
2280             StringTokenPos  = Cptr;
2281             SavePtr         = Cptr;
2282             Cptr += strlen (STRING_TOKEN);
2283             while (*Cptr && isspace (*Cptr) && (*Cptr != '(')) {
2284               Cptr++;
2285             }
2286
2287             if (*Cptr != '(') {
2288               Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2289             } else {
2290               //
2291               // Skip over the open-parenthesis and find the next non-blank character
2292               //
2293               Cptr++;
2294               while (isspace (*Cptr)) {
2295                 Cptr++;
2296               }
2297
2298               SavePtr = Cptr;
2299               if ((*Cptr == '_') || isalpha (*Cptr)) {
2300                 while ((*Cptr == '_') || (isalnum (*Cptr))) {
2301                   Cptr++;
2302                 }
2303
2304                 TermPtr = Cptr;
2305                 while (*Cptr && isspace (*Cptr)) {
2306                   Cptr++;
2307                 }
2308
2309                 if (*Cptr != ')') {
2310                   Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2311                 }
2312
2313                 if (*TermPtr) {
2314                   *TermPtr  = 0;
2315                   Cptr      = TermPtr + 1;
2316                 } else {
2317                   Cptr = TermPtr;
2318                 }
2319                 //
2320                 // Add the string identifier to the list of used strings
2321                 //
2322                 ParserSetPosition (ScanFiles->Str, LineNum);
2323                 StringDBSetStringReferenced (SavePtr, mGlobals.IgnoreNotFound);
2324                 if (mGlobals.VerboseScan) {
2325                   printf ("...referenced %s", SavePtr);
2326                 }
2327               } else {
2328                 Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected valid string identifier name");
2329               }
2330             }
2331           } else {
2332             //
2333             // Found it, but it's a substring of something else. Advance our pointer.
2334             //
2335             Cptr++;
2336           }
2337
2338           if (mGlobals.VerboseScan) {
2339             printf ("\n");
2340           }
2341         }
2342       }
2343
2344       fclose (Fptr);
2345     } else {
2346       //
2347       // Skipping this file type
2348       //
2349       if (mGlobals.VerboseScan) {
2350         printf ("Skip scanning of %s\n", ScanFiles->Str);
2351       }
2352     }
2353
2354     ScanFiles = ScanFiles->Next;
2355   }
2356
2357   return STATUS_SUCCESS;
2358 }
2359 //
2360 // Free the global string lists we allocated memory for
2361 //
2362 static
2363 void
2364 FreeLists (
2365   VOID
2366   )
2367 {
2368   TEXT_STRING_LIST  *Temp;
2369   WCHAR_STRING_LIST *WTemp;
2370
2371   //
2372   // Traverse the include paths, freeing each
2373   //
2374   while (mGlobals.IncludePaths != NULL) {
2375     Temp = mGlobals.IncludePaths->Next;
2376     free (mGlobals.IncludePaths->Str);
2377     free (mGlobals.IncludePaths);
2378     mGlobals.IncludePaths = Temp;
2379   }
2380   //
2381   // If we did a scan, then free up our
2382   // list of files to scan.
2383   //
2384   while (mGlobals.ScanFileName != NULL) {
2385     Temp = mGlobals.ScanFileName->Next;
2386     free (mGlobals.ScanFileName->Str);
2387     free (mGlobals.ScanFileName);
2388     mGlobals.ScanFileName = Temp;
2389   }
2390   //
2391   // If they gave us a list of filename extensions to
2392   // skip on scan, then free them up.
2393   //
2394   while (mGlobals.SkipExt != NULL) {
2395     Temp = mGlobals.SkipExt->Next;
2396     free (mGlobals.SkipExt->Str);
2397     free (mGlobals.SkipExt);
2398     mGlobals.SkipExt = Temp;
2399   }
2400   //
2401   // Free up any languages specified
2402   //
2403   while (mGlobals.Language != NULL) {
2404     WTemp = mGlobals.Language->Next;
2405     free (mGlobals.Language->Str);
2406     free (mGlobals.Language);
2407     mGlobals.Language = WTemp;
2408   }
2409   //
2410   // Free up our indirection list
2411   //
2412   while (mGlobals.IndirectionList != NULL) {
2413     mGlobals.LastIndirectionList = mGlobals.IndirectionList->Next;
2414     free (mGlobals.IndirectionList->Str1);
2415     free (mGlobals.IndirectionList->Str2);
2416     free (mGlobals.IndirectionList);
2417     mGlobals.IndirectionList = mGlobals.LastIndirectionList;
2418   }
2419
2420   while (mGlobals.IndirectionFileName != NULL) {
2421     mGlobals.LastIndirectionFileName = mGlobals.IndirectionFileName->Next;
2422     free (mGlobals.IndirectionFileName->Str);
2423     free (mGlobals.IndirectionFileName);
2424     mGlobals.IndirectionFileName = mGlobals.LastIndirectionFileName;
2425   }
2426 }
2427
2428 static
2429 BOOLEAN
2430 IsValidIdentifierChar (
2431   INT8      Char,
2432   BOOLEAN   FirstChar
2433   )
2434 {
2435   //
2436   // If it's the first character of an identifier, then
2437   // it must be one of [A-Za-z_].
2438   //
2439   if (FirstChar) {
2440     if (isalpha (Char) || (Char == '_')) {
2441       return TRUE;
2442     }
2443   } else {
2444     //
2445     // If it's not the first character, then it can
2446     // be one of [A-Za-z_0-9]
2447     //
2448     if (isalnum (Char) || (Char == '_')) {
2449       return TRUE;
2450     }
2451   }
2452
2453   return FALSE;
2454 }
2455
2456 static
2457 void
2458 RewindFile (
2459   SOURCE_FILE *SourceFile
2460   )
2461 {
2462   SourceFile->LineNum       = 1;
2463   SourceFile->FileBufferPtr = SourceFile->FileBuffer;
2464   SourceFile->EndOfFile     = 0;
2465 }
2466
2467 static
2468 BOOLEAN
2469 SkipTo (
2470   SOURCE_FILE *SourceFile,
2471   WCHAR       WChar,
2472   BOOLEAN     StopAfterNewline
2473   )
2474 {
2475   while (!EndOfFile (SourceFile)) {
2476     //
2477     // Check for the character of interest
2478     //
2479     if (SourceFile->FileBufferPtr[0] == WChar) {
2480       return TRUE;
2481     } else {
2482       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
2483         SourceFile->LineNum++;
2484         if (StopAfterNewline) {
2485           SourceFile->FileBufferPtr++;
2486           if (SourceFile->FileBufferPtr[0] == 0) {
2487             SourceFile->FileBufferPtr++;
2488           }
2489
2490           return FALSE;
2491         }
2492       }
2493
2494       SourceFile->FileBufferPtr++;
2495     }
2496   }
2497
2498   return FALSE;
2499 }
2500
2501 static
2502 void
2503 Usage (
2504   VOID
2505   )
2506 /*++
2507
2508 Routine Description:
2509
2510   Print usage information for this utility.
2511
2512 Arguments:
2513
2514   None.
2515
2516 Returns:
2517
2518   Nothing.
2519
2520 --*/
2521 {
2522   int         Index;
2523   const char  *Str[] = {
2524     UTILITY_NAME" "UTILITY_VERSION" - Intel String Gather Utility",
2525     "  Copyright (C), 2004 - 2008 Intel Corporation",
2526
2527 #if ( defined(UTILITY_BUILD) && defined(UTILITY_VENDOR) )
2528     "  Built from "UTILITY_BUILD", project of "UTILITY_VENDOR,
2529 #endif
2530     "",
2531     "Usage:",
2532     "  "UTILITY_NAME" -parse [OPTION] FILE",
2533     "  "UTILITY_NAME" -scan  [OPTION] FILE",
2534     "  "UTILITY_NAME" -dump  [OPTION]",
2535     "Description:",
2536     "  Process unicode strings file.",
2537     "Common options include:",
2538     "  -h or -?         for this help information",
2539     "  -db Database     required name of output/input database file",
2540     "  -bn BaseName     for use in the .h and .c output files",
2541     "                   Default = "DEFAULT_BASE_NAME,
2542     "  -v               for verbose output",
2543     "  -vdbw            for verbose output when writing database",
2544     "  -vdbr            for verbose output when reading database",
2545     "  -od FileName     to specify an output database file name",
2546     "Parse options include:",
2547     "  -i IncludePath   add IncludePath to list of search paths",
2548     "  -dep FileName    to specify an output dependency file name",
2549     "  -newdb           to not read in existing database file",
2550     "  -uqs             to indicate that unquoted strings are used",
2551     "  FileNames        name of one or more unicode files to parse",
2552     "Scan options include:",
2553     "  -scan            scan text file(s) for STRING_TOKEN() usage",
2554     "  -skipext .ext    to skip scan of files with .ext filename extension",
2555     "  -ignorenotfound  ignore if a given STRING_TOKEN(STR) is not ",
2556     "                   found in the database",
2557     "  FileNames        one or more files to scan",
2558     "Dump options include:",
2559     "  -oc FileName     write string data to FileName",
2560     "  -oh FileName     write string defines to FileName",
2561     "  -ou FileName     dump database to unicode file FileName",
2562     "  -lang Lang       only dump for the language 'Lang'",
2563     "  -if FileName     to specify an indirection file",
2564     "  -hpk FileName    to create an HII export pack of the strings",
2565     "",
2566     "The expected process is to parse a unicode string file to create an initial",
2567     "database of string identifier names and string definitions. Then text files",
2568     "should be scanned for STRING_TOKEN() usages, and the referenced",
2569     "strings will be tagged as used in the database. After all files have been",
2570     "scanned, then the database should be dumped to create the necessary output",
2571     "files.",
2572     "",
2573     NULL
2574   };
2575   for (Index = 0; Str[Index] != NULL; Index++) {
2576     fprintf (stdout, "%s\n", Str[Index]);
2577   }
2578 }