Tools/CCode/Source/StrGather/StrGather.c

   1 /*++
   2
   3 Copyright (c) 2004, Intel Corporation
   4 All rights reserved. This program and the accompanying materials
   5 are licensed and made available under the terms and conditions of the BSD License
   6 which accompanies this distribution.  The full text of the license may be found at
   7 http://opensource.org/licenses/bsd-license.php
   8
   9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  11
  12 Module Name:
  13
  14   StrGather.c
  15
  16 Abstract:
  17
  18   Parse a strings file and create or add to a string database file.
  19
  20 --*/
  21
  22 #include <stdio.h>
  23 #include <string.h>
  24 #include <stdlib.h>
  25 #include <ctype.h>
  26
  27 #include <Common/UefiBaseTypes.h>
  28
  29 #include "CommonLib.h"
  30 #include "EfiUtilityMsgs.h"
  31 #include "StrGather.h"
  32 #include "StringDB.h"
  33
  34 #define TOOL_VERSION  "0.31"
  35
  36 #ifndef MAX_PATH
  37 #define MAX_PATH                    255
  38 #endif
  39 #define MAX_NEST_DEPTH              20  // just in case we get in an endless loop.
  40 #define MAX_STRING_IDENTIFIER_NAME  100 // number of wchars
  41 #define MAX_LINE_LEN                200
  42 #define STRING_TOKEN                "STRING_TOKEN"
  43 #define DEFAULT_BASE_NAME           "BaseName"
  44 //
  45 // Operational modes for this utility
  46 //
  47 #define MODE_UNKNOWN  0
  48 #define MODE_PARSE    1
  49 #define MODE_SCAN     2
  50 #define MODE_DUMP     3
  51 //
  52 // Different file separater for Linux and Windows
  53 //
  54 #ifdef __GNUC__
  55 #define FILE_SEP_CHAR '/'
  56 #define FILE_SEP_STRING "/"
  57 #else
  58 #define FILE_SEP_CHAR '\\'
  59 #define FILE_SEP_STRING "\\"
  60 #endif
  61
  62 //
  63 // We keep a linked list of these for the source files we process
  64 //
  65 typedef struct _SOURCE_FILE {
  66   FILE                *Fptr;
  67   WCHAR               *FileBuffer;
  68   WCHAR               *FileBufferPtr;
  69   UINT32              FileSize;
  70   CHAR8               FileName[MAX_PATH];
  71   UINT32              LineNum;
  72   BOOLEAN             EndOfFile;
  73   BOOLEAN             SkipToHash;
  74   struct _SOURCE_FILE *Previous;
  75   struct _SOURCE_FILE *Next;
  76   WCHAR               ControlCharacter;
  77 } SOURCE_FILE;
  78
  79 #define DEFAULT_CONTROL_CHARACTER UNICODE_SLASH
  80
  81 //
  82 // Here's all our globals. We need a linked list of include paths, a linked
  83 // list of source files, a linked list of subdirectories (appended to each
  84 // include path when searching), and a couple other fields.
  85 //
  86 static struct {
  87   SOURCE_FILE                 SourceFiles;
  88   TEXT_STRING_LIST            *IncludePaths;                    // all include paths to search
  89   TEXT_STRING_LIST            *LastIncludePath;
  90   TEXT_STRING_LIST            *ScanFileName;
  91   TEXT_STRING_LIST            *LastScanFileName;
  92   TEXT_STRING_LIST            *SkipExt;                         // if -skipext .uni
  93   TEXT_STRING_LIST            *LastSkipExt;
  94   TEXT_STRING_LIST            *IndirectionFileName;
  95   TEXT_STRING_LIST            *LastIndirectionFileName;
  96   TEXT_STRING_LIST            *DatabaseFileName;
  97   TEXT_STRING_LIST            *LastDatabaseFileName;
  98   WCHAR_STRING_LIST           *Language;
  99   WCHAR_STRING_LIST           *LastLanguage;
 100   WCHAR_MATCHING_STRING_LIST  *IndirectionList;                 // from indirection file(s)
 101   WCHAR_MATCHING_STRING_LIST  *LastIndirectionList;
 102   BOOLEAN                     Verbose;                          // for more detailed output
 103   BOOLEAN                     VerboseDatabaseWrite;             // for more detailed output when writing database
 104   BOOLEAN                     VerboseDatabaseRead;              // for more detailed output when reading database
 105   BOOLEAN                     NewDatabase;                      // to start from scratch
 106   BOOLEAN                     IgnoreNotFound;                   // when scanning
 107   BOOLEAN                     VerboseScan;
 108   BOOLEAN                     UnquotedStrings;                  // -uqs option
 109   CHAR8                       OutputDatabaseFileName[MAX_PATH];
 110   CHAR8                       StringHFileName[MAX_PATH];
 111   CHAR8                       StringCFileName[MAX_PATH];        // output .C filename
 112   CHAR8                       DumpUFileName[MAX_PATH];          // output unicode dump file name
 113   CHAR8                       HiiExportPackFileName[MAX_PATH];  // HII export pack file name
 114   CHAR8                       BaseName[MAX_PATH];               // base filename of the strings file
 115   UINT32                      Mode;
 116 } mGlobals;
 117
 118 static
 119 BOOLEAN
 120 IsValidIdentifierChar (
 121   CHAR8     Char,
 122   BOOLEAN   FirstChar
 123   );
 124
 125 static
 126 void
 127 RewindFile (
 128   SOURCE_FILE *SourceFile
 129   );
 130
 131 static
 132 BOOLEAN
 133 SkipTo (
 134   SOURCE_FILE *SourceFile,
 135   WCHAR       WChar,
 136   BOOLEAN     StopAfterNewline
 137   );
 138
 139 static
 140 UINT32
 141 SkipWhiteSpace (
 142   SOURCE_FILE *SourceFile
 143   );
 144
 145 static
 146 BOOLEAN
 147 IsWhiteSpace (
 148   SOURCE_FILE *SourceFile
 149   );
 150
 151 static
 152 BOOLEAN
 153 EndOfFile (
 154   SOURCE_FILE *SourceFile
 155   );
 156
 157 static
 158 void
 159 PreprocessFile (
 160   SOURCE_FILE *SourceFile
 161   );
 162
 163 static
 164 UINT32
 165 GetStringIdentifierName (
 166   IN SOURCE_FILE  *SourceFile,
 167   IN OUT WCHAR    *StringIdentifierName,
 168   IN UINT32       StringIdentifierNameLen
 169   );
 170
 171 static
 172 UINT32
 173 GetLanguageIdentifierName (
 174   IN SOURCE_FILE  *SourceFile,
 175   IN OUT WCHAR    *LanguageIdentifierName,
 176   IN UINT32       LanguageIdentifierNameLen,
 177   IN BOOLEAN      Optional
 178   );
 179
 180 static
 181 WCHAR *
 182 GetPrintableLanguageName (
 183   IN SOURCE_FILE  *SourceFile
 184   );
 185
 186 static
 187 STATUS
 188 AddCommandLineLanguage (
 189   IN CHAR8         *Language
 190   );
 191
 192 static
 193 WCHAR *
 194 GetQuotedString (
 195   SOURCE_FILE *SourceFile,
 196   BOOLEAN     Optional
 197   );
 198
 199 static
 200 STATUS
 201 ProcessIncludeFile (
 202   SOURCE_FILE *SourceFile,
 203   SOURCE_FILE *ParentSourceFile
 204   );
 205
 206 static
 207 STATUS
 208 ParseFile (
 209   SOURCE_FILE *SourceFile
 210   );
 211
 212 static
 213 FILE  *
 214 FindFile (
 215   IN CHAR8    *FileName,
 216   OUT CHAR8   *FoundFileName,
 217   IN UINT32   FoundFileNameLen
 218   );
 219
 220 static
 221 STATUS
 222 ProcessArgs (
 223   int   Argc,
 224   char  *Argv[]
 225   );
 226
 227 static
 228 STATUS
 229 ProcessFile (
 230   SOURCE_FILE *SourceFile
 231   );
 232
 233 static
 234 UINT32
 235 wstrcmp (
 236   WCHAR *Buffer,
 237   WCHAR *Str
 238   );
 239
 240 static
 241 void
 242 Usage (
 243   VOID
 244   );
 245
 246 static
 247 void
 248 FreeLists (
 249   VOID
 250   );
 251
 252 static
 253 void
 254 ProcessTokenString (
 255   SOURCE_FILE *SourceFile
 256   );
 257
 258 static
 259 void
 260 ProcessTokenInclude (
 261   SOURCE_FILE *SourceFile
 262   );
 263
 264 static
 265 void
 266 ProcessTokenScope (
 267   SOURCE_FILE *SourceFile
 268   );
 269
 270 static
 271 void
 272 ProcessTokenLanguage (
 273   SOURCE_FILE *SourceFile
 274   );
 275
 276 static
 277 void
 278 ProcessTokenLangDef (
 279   SOURCE_FILE *SourceFile
 280   );
 281
 282 static
 283 STATUS
 284 ScanFiles (
 285   TEXT_STRING_LIST *ScanFiles
 286   );
 287
 288 static
 289 STATUS
 290 ParseIndirectionFiles (
 291   TEXT_STRING_LIST    *Files
 292   );
 293
 294 STATUS
 295 StringDBCreateHiiExportPack (
 296   CHAR8               *OutputFileName
 297   );
 298
 299 int
 300 main (
 301   int   Argc,
 302   char  *Argv[]
 303   )
 304 /*++
 305
 306 Routine Description:
 307
 308   Call the routine to parse the command-line options, then process the file.
 309
 310 Arguments:
 311
 312   Argc - Standard C main() argc and argv.
 313   Argv - Standard C main() argc and argv.
 314
 315 Returns:
 316
 317   0       if successful
 318   nonzero otherwise
 319
 320 --*/
 321 {
 322   STATUS  Status;
 323
 324   SetUtilityName (PROGRAM_NAME);
 325   //
 326   // Process the command-line arguments
 327   //
 328   Status = ProcessArgs (Argc, Argv);
 329   if (Status != STATUS_SUCCESS) {
 330     return Status;
 331   }
 332   //
 333   // Initialize the database manager
 334   //
 335   StringDBConstructor ();
 336   //
 337   // We always try to read in an existing database file. It may not
 338   // exist, which is ok usually.
 339   //
 340   if (mGlobals.NewDatabase == 0) {
 341     //
 342     // Read all databases specified.
 343     //
 344     for (mGlobals.LastDatabaseFileName = mGlobals.DatabaseFileName;
 345          mGlobals.LastDatabaseFileName != NULL;
 346          mGlobals.LastDatabaseFileName = mGlobals.LastDatabaseFileName->Next
 347         ) {
 348       Status = StringDBReadDatabase (mGlobals.LastDatabaseFileName->Str, TRUE, mGlobals.VerboseDatabaseRead);
 349       if (Status != STATUS_SUCCESS) {
 350         return Status;
 351       }
 352     }
 353   }
 354   //
 355   // Read indirection file(s) if specified
 356   //
 357   if (ParseIndirectionFiles (mGlobals.IndirectionFileName) != STATUS_SUCCESS) {
 358     goto Finish;
 359   }
 360   //
 361   // If scanning source files, do that now
 362   //
 363   if (mGlobals.Mode == MODE_SCAN) {
 364     ScanFiles (mGlobals.ScanFileName);
 365   } else if (mGlobals.Mode == MODE_PARSE) {
 366     //
 367     // Parsing a unicode strings file
 368     //
 369     mGlobals.SourceFiles.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
 370     Status = ProcessIncludeFile (&mGlobals.SourceFiles, NULL);
 371     if (Status != STATUS_SUCCESS) {
 372       goto Finish;
 373     }
 374   }
 375   //
 376   // Create the string defines header file if there have been no errors.
 377   //
 378   ParserSetPosition (NULL, 0);
 379   if ((mGlobals.StringHFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 380     Status = StringDBDumpStringDefines (mGlobals.StringHFileName, mGlobals.BaseName);
 381     if (Status != EFI_SUCCESS) {
 382       goto Finish;
 383     }
 384   }
 385   //
 386   // Dump the strings to a .c file if there have still been no errors.
 387   //
 388   if ((mGlobals.StringCFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 389     Status = StringDBDumpCStrings (
 390               mGlobals.StringCFileName,
 391               mGlobals.BaseName,
 392               mGlobals.Language,
 393               mGlobals.IndirectionList
 394               );
 395     if (Status != EFI_SUCCESS) {
 396       goto Finish;
 397     }
 398   }
 399   //
 400   // Dump the database if requested
 401   //
 402   if ((mGlobals.DumpUFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 403     StringDBDumpDatabase (NULL, mGlobals.DumpUFileName, FALSE);
 404   }
 405   //
 406   // Dump the string data as HII binary string pack if requested
 407   //
 408   if ((mGlobals.HiiExportPackFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 409     StringDBCreateHiiExportPack (mGlobals.HiiExportPackFileName);
 410   }
 411   //
 412   // Always update the database if no errors and not in dump mode. If they specified -od
 413   // for an output database file name, then use that name. Otherwise use the name of
 414   // the first database file specified with -db
 415   //
 416   if ((mGlobals.Mode != MODE_DUMP) && (GetUtilityStatus () < STATUS_ERROR)) {
 417     if (mGlobals.OutputDatabaseFileName[0]) {
 418       Status = StringDBWriteDatabase (mGlobals.OutputDatabaseFileName, mGlobals.VerboseDatabaseWrite);
 419     } else {
 420       Status = StringDBWriteDatabase (mGlobals.DatabaseFileName->Str, mGlobals.VerboseDatabaseWrite);
 421     }
 422
 423     if (Status != EFI_SUCCESS) {
 424       goto Finish;
 425     }
 426   }
 427
 428 Finish:
 429   //
 430   // Free up memory
 431   //
 432   FreeLists ();
 433   StringDBDestructor ();
 434   return GetUtilityStatus ();
 435 }
 436
 437 static
 438 STATUS
 439 ProcessIncludeFile (
 440   SOURCE_FILE *SourceFile,
 441   SOURCE_FILE *ParentSourceFile
 442   )
 443 /*++
 444
 445 Routine Description:
 446
 447   Given a source file, open the file and parse it
 448
 449 Arguments:
 450
 451   SourceFile        - name of file to parse
 452   ParentSourceFile  - for error reporting purposes, the file that #included SourceFile.
 453
 454 Returns:
 455
 456   Standard status.
 457
 458 --*/
 459 {
 460   static UINT32 NestDepth = 0;
 461   CHAR8         FoundFileName[MAX_PATH];
 462   STATUS        Status;
 463
 464   Status = STATUS_SUCCESS;
 465   NestDepth++;
 466   //
 467   // Print the file being processed. Indent so you can tell the include nesting
 468   // depth.
 469   //
 470   if (mGlobals.Verbose) {
 471     fprintf (stdout, "%*cProcessing file '%s'\n", NestDepth * 2, ' ', SourceFile->FileName);
 472   }
 473
 474   //
 475   // Make sure we didn't exceed our maximum nesting depth
 476   //
 477   if (NestDepth > MAX_NEST_DEPTH) {
 478     Error (NULL, 0, 0, SourceFile->FileName, "max nesting depth (%d) exceeded", NestDepth);
 479     Status = STATUS_ERROR;
 480     goto Finish;
 481   }
 482   //
 483   // Try to open the file locally, and if that fails try along our include paths.
 484   //
 485   strcpy (FoundFileName, SourceFile->FileName);
 486   if ((SourceFile->Fptr = fopen (FoundFileName, "rb")) == NULL) {
 487     //
 488     // Try to find it among the paths if it has a parent (that is, it is included
 489     // by someone else).
 490     //
 491     if (ParentSourceFile == NULL) {
 492       Error (NULL, 0, 0, SourceFile->FileName, "file not found");
 493       return STATUS_ERROR;
 494     }
 495
 496     SourceFile->Fptr = FindFile (SourceFile->FileName, FoundFileName, sizeof (FoundFileName));
 497     if (SourceFile->Fptr == NULL) {
 498       Error (ParentSourceFile->FileName, ParentSourceFile->LineNum, 0, SourceFile->FileName, "include file not found");
 499       return STATUS_ERROR;
 500     }
 501   }
 502   //
 503   // Process the file found
 504   //
 505   ProcessFile (SourceFile);
 506 Finish:
 507   //
 508   // Close open files and return status
 509   //
 510   if (SourceFile->Fptr != NULL) {
 511     fclose (SourceFile->Fptr);
 512   }
 513
 514   return Status;
 515 }
 516
 517 static
 518 STATUS
 519 ProcessFile (
 520   SOURCE_FILE *SourceFile
 521   )
 522 {
 523   //
 524   // Get the file size, and then read the entire thing into memory.
 525   // Allocate space for a terminator character.
 526   //
 527   fseek (SourceFile->Fptr, 0, SEEK_END);
 528   SourceFile->FileSize = ftell (SourceFile->Fptr);
 529   fseek (SourceFile->Fptr, 0, SEEK_SET);
 530   SourceFile->FileBuffer = (WCHAR *) malloc (SourceFile->FileSize + sizeof (WCHAR));
 531   if (SourceFile->FileBuffer == NULL) {
 532     Error (NULL, 0, 0, "memory allocation failure", NULL);
 533     return STATUS_ERROR;
 534   }
 535
 536   fread ((VOID *) SourceFile->FileBuffer, SourceFile->FileSize, 1, SourceFile->Fptr);
 537   SourceFile->FileBuffer[(SourceFile->FileSize / sizeof (WCHAR))] = UNICODE_NULL;
 538   //
 539   // Pre-process the file to replace comments with spaces
 540   //
 541   PreprocessFile (SourceFile);
 542   //
 543   // Parse the file
 544   //
 545   ParseFile (SourceFile);
 546   free (SourceFile->FileBuffer);
 547   return STATUS_SUCCESS;
 548 }
 549
 550 static
 551 STATUS
 552 ParseFile (
 553   SOURCE_FILE *SourceFile
 554   )
 555 {
 556   BOOLEAN InComment;
 557   UINT32  Len;
 558
 559   //
 560   // First character of a unicode file is special. Make sure
 561   //
 562   if (SourceFile->FileBufferPtr[0] != UNICODE_FILE_START) {
 563     Error (SourceFile->FileName, 1, 0, SourceFile->FileName, "file does not appear to be a unicode file");
 564     return STATUS_ERROR;
 565   }
 566
 567   SourceFile->FileBufferPtr++;
 568   InComment = FALSE;
 569   //
 570   // Print the first line if in verbose mode
 571   //
 572   if (mGlobals.Verbose) {
 573     printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 574   }
 575   //
 576   // Since the syntax is relatively straightforward, just switch on the next char
 577   //
 578   while (!EndOfFile (SourceFile)) {
 579     //
 580     // Check for whitespace
 581     //
 582     if (SourceFile->FileBufferPtr[0] == UNICODE_SPACE) {
 583       SourceFile->FileBufferPtr++;
 584     } else if (SourceFile->FileBufferPtr[0] == UNICODE_TAB) {
 585       SourceFile->FileBufferPtr++;
 586     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 587       SourceFile->FileBufferPtr++;
 588     } else if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 589       SourceFile->FileBufferPtr++;
 590       SourceFile->LineNum++;
 591       if (mGlobals.Verbose) {
 592         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 593       }
 594
 595       InComment = FALSE;
 596     } else if (SourceFile->FileBufferPtr[0] == 0) {
 597       SourceFile->FileBufferPtr++;
 598     } else if (InComment) {
 599       SourceFile->FileBufferPtr++;
 600     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 601       SourceFile->FileBufferPtr += 2;
 602       InComment = TRUE;
 603     } else if (SourceFile->SkipToHash && (SourceFile->FileBufferPtr[0] != SourceFile->ControlCharacter)) {
 604       SourceFile->FileBufferPtr++;
 605     } else {
 606       SourceFile->SkipToHash = FALSE;
 607       if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 608           ((Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"include")) > 0)
 609           ) {
 610         SourceFile->FileBufferPtr += Len + 1;
 611         ProcessTokenInclude (SourceFile);
 612       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 613                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"scope")) > 0
 614               ) {
 615         SourceFile->FileBufferPtr += Len + 1;
 616         ProcessTokenScope (SourceFile);
 617       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 618                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"language")) > 0
 619               ) {
 620         SourceFile->FileBufferPtr += Len + 1;
 621         ProcessTokenLanguage (SourceFile);
 622       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 623                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"langdef")) > 0
 624               ) {
 625         SourceFile->FileBufferPtr += Len + 1;
 626         ProcessTokenLangDef (SourceFile);
 627       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 628                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"string")) > 0
 629               ) {
 630         SourceFile->FileBufferPtr += Len + 1;
 631         ProcessTokenString (SourceFile);
 632       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 633                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"EFI_BREAKPOINT()")) > 0
 634               ) {
 635         SourceFile->FileBufferPtr += Len;
 636         //
 637         // BUGBUG: Caling EFI_BREAKOINT() is breaking the link.  What is the proper action for this tool
 638         // in this condition?
 639         //
 640 //        EFI_BREAKPOINT ();
 641       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 642                (SourceFile->FileBufferPtr[1] == UNICODE_EQUAL_SIGN)
 643               ) {
 644         SourceFile->ControlCharacter = SourceFile->FileBufferPtr[2];
 645         SourceFile->FileBufferPtr += 3;
 646       } else {
 647         Error (SourceFile->FileName, SourceFile->LineNum, 0, "unrecognized token", "%S", SourceFile->FileBufferPtr);
 648         //
 649         // Treat rest of line as a comment.
 650         //
 651         InComment = TRUE;
 652       }
 653     }
 654   }
 655
 656   return STATUS_SUCCESS;
 657 }
 658
 659 static
 660 void
 661 PreprocessFile (
 662   SOURCE_FILE *SourceFile
 663   )
 664 /*++
 665
 666 Routine Description:
 667   Preprocess a file to replace all carriage returns with NULLs so
 668   we can print lines from the file to the screen.
 669
 670 Arguments:
 671   SourceFile - structure that we use to keep track of an input file.
 672
 673 Returns:
 674   Nothing.
 675
 676 --*/
 677 {
 678   BOOLEAN InComment;
 679
 680   RewindFile (SourceFile);
 681   InComment = FALSE;
 682   while (!EndOfFile (SourceFile)) {
 683     //
 684     // If a line-feed, then no longer in a comment
 685     //
 686     if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 687       SourceFile->FileBufferPtr++;
 688       SourceFile->LineNum++;
 689       InComment = 0;
 690     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 691       //
 692       // Replace all carriage returns with a NULL so we can print stuff
 693       //
 694       SourceFile->FileBufferPtr[0] = 0;
 695       SourceFile->FileBufferPtr++;
 696     } else if (InComment) {
 697       SourceFile->FileBufferPtr[0] = UNICODE_SPACE;
 698       SourceFile->FileBufferPtr++;
 699     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 700       SourceFile->FileBufferPtr += 2;
 701       InComment = TRUE;
 702     } else {
 703       SourceFile->FileBufferPtr++;
 704     }
 705   }
 706   //
 707   // Could check for end-of-file and still in a comment, but
 708   // should not be necessary. So just restore the file pointers.
 709   //
 710   RewindFile (SourceFile);
 711 }
 712
 713 static
 714 WCHAR *
 715 GetPrintableLanguageName (
 716   IN SOURCE_FILE  *SourceFile
 717   )
 718 {
 719   WCHAR   *String;
 720   WCHAR   *Start;
 721   WCHAR   *Ptr;
 722   UINT32  Len;
 723
 724   SkipWhiteSpace (SourceFile);
 725   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 726     Error (
 727       SourceFile->FileName,
 728       SourceFile->LineNum,
 729       0,
 730       "expected quoted printable language name",
 731       "%S",
 732       SourceFile->FileBufferPtr
 733       );
 734     SourceFile->SkipToHash = TRUE;
 735     return NULL;
 736   }
 737
 738   Len = 0;
 739   SourceFile->FileBufferPtr++;
 740   Start = Ptr = SourceFile->FileBufferPtr;
 741   while (!EndOfFile (SourceFile)) {
 742     if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 743       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 744       break;
 745     } else if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
 746       break;
 747     }
 748
 749     SourceFile->FileBufferPtr++;
 750     Len++;
 751   }
 752
 753   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 754     Warning (
 755       SourceFile->FileName,
 756       SourceFile->LineNum,
 757       0,
 758       "missing closing quote on printable language name string",
 759       "%S",
 760       Start
 761       );
 762   } else {
 763     SourceFile->FileBufferPtr++;
 764   }
 765   //
 766   // Now allocate memory for the string and save it off
 767   //
 768   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 769   if (String == NULL) {
 770     Error (NULL, 0, 0, "memory allocation failed", NULL);
 771     return NULL;
 772   }
 773   //
 774   // Copy the string from the file buffer to the local copy.
 775   // We do no reformatting of it whatsoever at this point.
 776   //
 777   Ptr = String;
 778   while (Len > 0) {
 779     *Ptr = *Start;
 780     Start++;
 781     Ptr++;
 782     Len--;
 783   }
 784
 785   *Ptr = 0;
 786   //
 787   // Now format the string to convert \wide and \narrow controls
 788   //
 789   StringDBFormatString (String);
 790   return String;
 791 }
 792
 793 static
 794 WCHAR *
 795 GetQuotedString (
 796   SOURCE_FILE *SourceFile,
 797   BOOLEAN     Optional
 798   )
 799 {
 800   WCHAR   *String;
 801   WCHAR   *Start;
 802   WCHAR   *Ptr;
 803   UINT32  Len;
 804   BOOLEAN PreviousBackslash;
 805
 806   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 807     if (!Optional) {
 808       Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted string", "%S", SourceFile->FileBufferPtr);
 809     }
 810
 811     return NULL;
 812   }
 813
 814   Len = 0;
 815   SourceFile->FileBufferPtr++;
 816   Start             = Ptr = SourceFile->FileBufferPtr;
 817   PreviousBackslash = FALSE;
 818   while (!EndOfFile (SourceFile)) {
 819     if ((SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) && (!PreviousBackslash)) {
 820       break;
 821     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 822       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 823       PreviousBackslash = FALSE;
 824     } else if (SourceFile->FileBufferPtr[0] == UNICODE_BACKSLASH) {
 825       PreviousBackslash = TRUE;
 826     } else {
 827       PreviousBackslash = FALSE;
 828     }
 829
 830     SourceFile->FileBufferPtr++;
 831     Len++;
 832   }
 833
 834   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 835     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "missing closing quote on string", "%S", Start);
 836   } else {
 837     SourceFile->FileBufferPtr++;
 838   }
 839   //
 840   // Now allocate memory for the string and save it off
 841   //
 842   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 843   if (String == NULL) {
 844     Error (NULL, 0, 0, "memory allocation failed", NULL);
 845     return NULL;
 846   }
 847   //
 848   // Copy the string from the file buffer to the local copy.
 849   // We do no reformatting of it whatsoever at this point.
 850   //
 851   Ptr = String;
 852   while (Len > 0) {
 853     *Ptr = *Start;
 854     Start++;
 855     Ptr++;
 856     Len--;
 857   }
 858
 859   *Ptr = 0;
 860   return String;
 861 }
 862 //
 863 // Parse:
 864 //    #string STR_ID_NAME
 865 //
 866 // All we can do is call the string database to add the string identifier. Unfortunately
 867 // he'll have to keep track of the last identifier we added.
 868 //
 869 static
 870 void
 871 ProcessTokenString (
 872   SOURCE_FILE *SourceFile
 873   )
 874 {
 875   WCHAR   StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
 876   UINT16  StringId;
 877   //
 878   // Extract the string identifier name and add it to the database.
 879   //
 880   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
 881     StringId = STRING_ID_INVALID;
 882     StringDBAddStringIdentifier (StringIdentifier, &StringId, 0);
 883   } else {
 884     //
 885     // Error recovery -- skip to the next #
 886     //
 887     SourceFile->SkipToHash = TRUE;
 888   }
 889 }
 890
 891 static
 892 BOOLEAN
 893 EndOfFile (
 894   SOURCE_FILE *SourceFile
 895   )
 896 {
 897   //
 898   // The file buffer pointer will typically get updated before the End-of-file flag in the
 899   // source file structure, so check it first.
 900   //
 901   if (SourceFile->FileBufferPtr >= SourceFile->FileBuffer + SourceFile->FileSize / sizeof (WCHAR)) {
 902     SourceFile->EndOfFile = TRUE;
 903     return TRUE;
 904   }
 905
 906   if (SourceFile->EndOfFile) {
 907     return TRUE;
 908   }
 909
 910   return FALSE;
 911 }
 912
 913 static
 914 UINT32
 915 GetStringIdentifierName (
 916   IN SOURCE_FILE  *SourceFile,
 917   IN OUT WCHAR    *StringIdentifierName,
 918   IN UINT32       StringIdentifierNameLen
 919   )
 920 {
 921   UINT32  Len;
 922   WCHAR   *From;
 923   WCHAR   *Start;
 924
 925   //
 926   // Skip whitespace
 927   //
 928   SkipWhiteSpace (SourceFile);
 929   if (SourceFile->EndOfFile) {
 930     Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-file encountered", "expected string identifier");
 931     return 0;
 932   }
 933   //
 934   // Verify first character of name is [A-Za-z]
 935   //
 936   Len = 0;
 937   StringIdentifierNameLen /= 2;
 938   From  = SourceFile->FileBufferPtr;
 939   Start = SourceFile->FileBufferPtr;
 940   if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 941       ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))
 942       ) {
 943     //
 944     // Do nothing
 945     //
 946   } else {
 947     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid character in string identifier name", "%S", Start);
 948     return 0;
 949   }
 950
 951   while (!EndOfFile (SourceFile)) {
 952     if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 953         ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z)) ||
 954         ((SourceFile->FileBufferPtr[0] >= UNICODE_0) && (SourceFile->FileBufferPtr[0] <= UNICODE_9)) ||
 955         (SourceFile->FileBufferPtr[0] == UNICODE_UNDERSCORE)
 956         ) {
 957       Len++;
 958       if (Len >= StringIdentifierNameLen) {
 959         Error (SourceFile->FileName, SourceFile->LineNum, 0, "string identifier name too long", "%S", Start);
 960         return 0;
 961       }
 962
 963       *StringIdentifierName = SourceFile->FileBufferPtr[0];
 964       StringIdentifierName++;
 965       SourceFile->FileBufferPtr++;
 966     } else if (SkipWhiteSpace (SourceFile) == 0) {
 967       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid string identifier name", "%S", Start);
 968       return 0;
 969     } else {
 970       break;
 971     }
 972   }
 973   //
 974   // Terminate the copy of the string.
 975   //
 976   *StringIdentifierName = 0;
 977   return Len;
 978 }
 979
 980 static
 981 UINT32
 982 GetLanguageIdentifierName (
 983   IN SOURCE_FILE  *SourceFile,
 984   IN OUT WCHAR    *LanguageIdentifierName,
 985   IN UINT32       LanguageIdentifierNameLen,
 986   IN BOOLEAN      Optional
 987   )
 988 {
 989   UINT32  Len;
 990   WCHAR   *From;
 991   WCHAR   *Start;
 992   //
 993   // Skip whitespace
 994   //
 995   SkipWhiteSpace (SourceFile);
 996   if (SourceFile->EndOfFile) {
 997     if (!Optional) {
 998       Error (
 999         SourceFile->FileName,
1000         SourceFile->LineNum,
1001         0,
1002         "end-of-file encountered",
1003         "expected language identifier"
1004         );
1005     }
1006
1007     return 0;
1008   }
1009   //
1010   // This function is called to optionally get a language identifier name in:
1011   //   #string STR_ID eng "the string"
1012   // If it's optional, and we find a double-quote, then return now.
1013   //
1014   if (Optional) {
1015     if (*SourceFile->FileBufferPtr == UNICODE_DOUBLE_QUOTE) {
1016       return 0;
1017     }
1018   }
1019
1020   Len = 0;
1021   LanguageIdentifierNameLen /= 2;
1022   //
1023   // Internal error if we weren't given at least 4 WCHAR's to work with.
1024   //
1025   if (LanguageIdentifierNameLen < LANGUAGE_IDENTIFIER_NAME_LEN + 1) {
1026     Error (
1027       SourceFile->FileName,
1028       SourceFile->LineNum,
1029       0,
1030       "app error -- language identifier name length is invalid",
1031       NULL
1032       );
1033   }
1034
1035   From  = SourceFile->FileBufferPtr;
1036   Start = SourceFile->FileBufferPtr;
1037   while (!EndOfFile (SourceFile)) {
1038     if (((SourceFile->FileBufferPtr[0] >= UNICODE_a) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))) {
1039       Len++;
1040       if (Len > LANGUAGE_IDENTIFIER_NAME_LEN) {
1041         Error (SourceFile->FileName, SourceFile->LineNum, 0, "language identifier name too long", "%S", Start);
1042         return 0;
1043       }
1044
1045       *LanguageIdentifierName = SourceFile->FileBufferPtr[0];
1046       SourceFile->FileBufferPtr++;
1047       LanguageIdentifierName++;
1048     } else if (!IsWhiteSpace (SourceFile)) {
1049       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid language identifier name", "%S", Start);
1050       return 0;
1051     } else {
1052       break;
1053     }
1054   }
1055   //
1056   // Terminate the copy of the string.
1057   //
1058   *LanguageIdentifierName = 0;
1059   return Len;
1060 }
1061
1062 static
1063 void
1064 ProcessTokenInclude (
1065   SOURCE_FILE *SourceFile
1066   )
1067 {
1068   CHAR8       IncludeFileName[MAX_PATH];
1069   CHAR8       *To;
1070   UINT32      Len;
1071   BOOLEAN     ReportedError;
1072   SOURCE_FILE IncludedSourceFile;
1073
1074   ReportedError = FALSE;
1075   if (SkipWhiteSpace (SourceFile) == 0) {
1076     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "expected whitespace following #include keyword", NULL);
1077   }
1078   //
1079   // Should be quoted file name
1080   //
1081   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
1082     Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted include file name", NULL);
1083     goto FailDone;
1084   }
1085
1086   SourceFile->FileBufferPtr++;
1087   //
1088   // Copy the filename as ascii to our local string
1089   //
1090   To  = IncludeFileName;
1091   Len = 0;
1092   while (!EndOfFile (SourceFile)) {
1093     if ((SourceFile->FileBufferPtr[0] == UNICODE_CR) || (SourceFile->FileBufferPtr[0] == UNICODE_LF)) {
1094       Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-line found in quoted include file name", NULL);
1095       goto FailDone;
1096     }
1097
1098     if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
1099       SourceFile->FileBufferPtr++;
1100       break;
1101     }
1102     //
1103     // If too long, then report the error once and process until the closing quote
1104     //
1105     Len++;
1106     if (!ReportedError && (Len >= sizeof (IncludeFileName))) {
1107       Error (SourceFile->FileName, SourceFile->LineNum, 0, "length of include file name exceeds limit", NULL);
1108       ReportedError = TRUE;
1109     }
1110
1111     if (!ReportedError) {
1112       *To = UNICODE_TO_ASCII (SourceFile->FileBufferPtr[0]);
1113       To++;
1114     }
1115
1116     SourceFile->FileBufferPtr++;
1117   }
1118
1119   if (!ReportedError) {
1120     *To = 0;
1121     memset ((char *) &IncludedSourceFile, 0, sizeof (SOURCE_FILE));
1122     strcpy (IncludedSourceFile.FileName, IncludeFileName);
1123     IncludedSourceFile.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
1124     ProcessIncludeFile (&IncludedSourceFile, SourceFile);
1125     //
1126     // printf ("including file '%s'\n", IncludeFileName);
1127     //
1128   }
1129
1130   return ;
1131 FailDone:
1132   //
1133   // Error recovery -- skip to next #
1134   //
1135   SourceFile->SkipToHash = TRUE;
1136 }
1137
1138 static
1139 void
1140 ProcessTokenScope (
1141   SOURCE_FILE *SourceFile
1142   )
1143 {
1144   WCHAR StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
1145   //
1146   // Extract the scope name
1147   //
1148   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
1149     StringDBSetScope (StringIdentifier);
1150   }
1151 }
1152 //
1153 // Parse:  #langdef eng "English"
1154 //         #langdef chn "\wideChinese"
1155 //
1156 static
1157 void
1158 ProcessTokenLangDef (
1159   SOURCE_FILE *SourceFile
1160   )
1161 {
1162   WCHAR   LanguageIdentifier[MAX_STRING_IDENTIFIER_NAME];
1163   UINT32  Len;
1164   WCHAR   *PrintableName;
1165   //
1166   // Extract the 3-character language identifier
1167   //
1168   Len = GetLanguageIdentifierName (SourceFile, LanguageIdentifier, sizeof (LanguageIdentifier), FALSE);
1169   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1170     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", NULL);
1171   } else {
1172     //
1173     // Extract the printable name
1174     //
1175     PrintableName = GetPrintableLanguageName (SourceFile);
1176     if (PrintableName != NULL) {
1177       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1178       StringDBAddLanguage (LanguageIdentifier, PrintableName);
1179       free (PrintableName);
1180       return ;
1181     }
1182   }
1183   //
1184   // Error recovery -- skip to next #
1185   //
1186   SourceFile->SkipToHash = TRUE;
1187 }
1188
1189 static
1190 BOOLEAN
1191 ApparentQuotedString (
1192   SOURCE_FILE *SourceFile
1193   )
1194 {
1195   WCHAR *Ptr;
1196   //
1197   // See if the first and last nonblank characters on the line are double quotes
1198   //
1199   for (Ptr = SourceFile->FileBufferPtr; *Ptr && (*Ptr == UNICODE_SPACE); Ptr++)
1200     ;
1201   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1202     return FALSE;
1203   }
1204
1205   while (*Ptr) {
1206     Ptr++;
1207   }
1208
1209   Ptr--;
1210   for (; *Ptr && (*Ptr == UNICODE_SPACE); Ptr--)
1211     ;
1212   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1213     return FALSE;
1214   }
1215
1216   return TRUE;
1217 }
1218 //
1219 // Parse:
1220 //   #language eng "some string " "more string"
1221 //
1222 static
1223 void
1224 ProcessTokenLanguage (
1225   SOURCE_FILE *SourceFile
1226   )
1227 {
1228   WCHAR   *String;
1229   WCHAR   *SecondString;
1230   WCHAR   *TempString;
1231   WCHAR   *From;
1232   WCHAR   *To;
1233   WCHAR   Language[LANGUAGE_IDENTIFIER_NAME_LEN + 1];
1234   UINT32  Len;
1235   BOOLEAN PreviousNewline;
1236   //
1237   // Get the language identifier
1238   //
1239   Language[0] = 0;
1240   Len         = GetLanguageIdentifierName (SourceFile, Language, sizeof (Language), TRUE);
1241   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1242     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", "%S", Language);
1243     SourceFile->SkipToHash = TRUE;
1244     return ;
1245   }
1246   //
1247   // Extract the string value. It's either a quoted string that starts on the current line, or
1248   // an unquoted string that starts on the following line and continues until the next control
1249   // character in column 1.
1250   // Look ahead to find a quote or a newline
1251   //
1252   if (SkipTo (SourceFile, UNICODE_DOUBLE_QUOTE, TRUE)) {
1253     String = GetQuotedString (SourceFile, FALSE);
1254     if (String != NULL) {
1255       //
1256       // Set the position in the file of where we are parsing for error
1257       // reporting purposes. Then start looking ahead for additional
1258       // quoted strings, and concatenate them until we get a failure
1259       // back from the string parser.
1260       //
1261       Len = StrLen (String) + 1;
1262       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1263       do {
1264         SkipWhiteSpace (SourceFile);
1265         SecondString = GetQuotedString (SourceFile, TRUE);
1266         if (SecondString != NULL) {
1267           Len += StrLen (SecondString);
1268           TempString = (WCHAR *) malloc (Len * sizeof (WCHAR));
1269           if (TempString == NULL) {
1270             Error (NULL, 0, 0, "application error", "failed to allocate memory");
1271             return ;
1272           }
1273
1274           StrCpy (TempString, String);
1275           StrCat (TempString, SecondString);
1276           free (String);
1277           free (SecondString);
1278           String = TempString;
1279         }
1280       } while (SecondString != NULL);
1281       StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1282       free (String);
1283     } else {
1284       //
1285       // Error was reported at lower level. Error recovery mode.
1286       //
1287       SourceFile->SkipToHash = TRUE;
1288     }
1289   } else {
1290     if (!mGlobals.UnquotedStrings) {
1291       //
1292       // They're using unquoted strings. If the next non-blank character is a double quote, and the
1293       // last non-blank character on the line is a double quote, then more than likely they're using
1294       // quotes, so they need to put the quoted string on the end of the previous line
1295       //
1296       if (ApparentQuotedString (SourceFile)) {
1297         Warning (
1298           SourceFile->FileName,
1299           SourceFile->LineNum,
1300           0,
1301           "unexpected quoted string on line",
1302           "specify -uqs option if necessary"
1303           );
1304       }
1305     }
1306     //
1307     // Found end-of-line (hopefully). Skip over it and start taking in characters
1308     // until we find a control character at the start of a line.
1309     //
1310     Len             = 0;
1311     From            = SourceFile->FileBufferPtr;
1312     PreviousNewline = FALSE;
1313     while (!EndOfFile (SourceFile)) {
1314       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
1315         PreviousNewline = TRUE;
1316         SourceFile->LineNum++;
1317       } else {
1318         Len++;
1319         if (PreviousNewline && (SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter)) {
1320           break;
1321         }
1322
1323         PreviousNewline = FALSE;
1324       }
1325
1326       SourceFile->FileBufferPtr++;
1327     }
1328
1329     if ((Len == 0) && EndOfFile (SourceFile)) {
1330       Error (SourceFile->FileName, SourceFile->LineNum, 0, "unexpected end of file", NULL);
1331       SourceFile->SkipToHash = TRUE;
1332       return ;
1333     }
1334     //
1335     // Now allocate a buffer, copy the characters, and add the string.
1336     //
1337     String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
1338     if (String == NULL) {
1339       Error (NULL, 0, 0, "application error", "failed to allocate memory");
1340       return ;
1341     }
1342
1343     To = String;
1344     while (From < SourceFile->FileBufferPtr) {
1345       switch (*From) {
1346       case UNICODE_LF:
1347       case 0:
1348         break;
1349
1350       default:
1351         *To = *From;
1352         To++;
1353         break;
1354       }
1355
1356       From++;
1357     }
1358
1359     //
1360     // String[Len] = 0;
1361     //
1362     *To = 0;
1363     StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1364   }
1365 }
1366
1367 static
1368 BOOLEAN
1369 IsWhiteSpace (
1370   SOURCE_FILE *SourceFile
1371   )
1372 {
1373   switch (SourceFile->FileBufferPtr[0]) {
1374   case UNICODE_NULL:
1375   case UNICODE_CR:
1376   case UNICODE_SPACE:
1377   case UNICODE_TAB:
1378   case UNICODE_LF:
1379     return TRUE;
1380
1381   default:
1382     return FALSE;
1383   }
1384 }
1385
1386 static
1387 UINT32
1388 SkipWhiteSpace (
1389   SOURCE_FILE *SourceFile
1390   )
1391 {
1392   UINT32  Count;
1393
1394   Count = 0;
1395   while (!EndOfFile (SourceFile)) {
1396     Count++;
1397     switch (*SourceFile->FileBufferPtr) {
1398     case UNICODE_NULL:
1399     case UNICODE_CR:
1400     case UNICODE_SPACE:
1401     case UNICODE_TAB:
1402       SourceFile->FileBufferPtr++;
1403       break;
1404
1405     case UNICODE_LF:
1406       SourceFile->FileBufferPtr++;
1407       SourceFile->LineNum++;
1408       if (mGlobals.Verbose) {
1409         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
1410       }
1411       break;
1412
1413     default:
1414       return Count - 1;
1415     }
1416   }
1417   //
1418   // Some tokens require trailing whitespace. If we're at the end of the
1419   // file, then we count that as well.
1420   //
1421   if ((Count == 0) && (EndOfFile (SourceFile))) {
1422     Count++;
1423   }
1424
1425   return Count;
1426 }
1427
1428 static
1429 UINT32
1430 wstrcmp (
1431   WCHAR *Buffer,
1432   WCHAR *Str
1433   )
1434 {
1435   UINT32  Len;
1436
1437   Len = 0;
1438   while (*Str == *Buffer) {
1439     Buffer++;
1440     Str++;
1441     Len++;
1442   }
1443
1444   if (*Str) {
1445     return 0;
1446   }
1447
1448   return Len;
1449 }
1450 //
1451 // Given a filename, try to find it along the include paths.
1452 //
1453 static
1454 FILE *
1455 FindFile (
1456   IN CHAR8   *FileName,
1457   OUT CHAR8  *FoundFileName,
1458   IN UINT32  FoundFileNameLen
1459   )
1460 {
1461   FILE              *Fptr;
1462   TEXT_STRING_LIST  *List;
1463
1464   //
1465   // Traverse the list of paths and try to find the file
1466   //
1467   List = mGlobals.IncludePaths;
1468   while (List != NULL) {
1469     //
1470     // Put the path and filename together
1471     //
1472     if (strlen (List->Str) + strlen (FileName) + 1 > FoundFileNameLen) {
1473       Error (PROGRAM_NAME, 0, 0, NULL, "internal error - cannot concatenate path+filename");
1474       return NULL;
1475     }
1476     //
1477     // Append the filename to this include path and try to open the file.
1478     //
1479     strcpy (FoundFileName, List->Str);
1480     strcat (FoundFileName, FileName);
1481     if ((Fptr = fopen (FoundFileName, "rb")) != NULL) {
1482       //
1483       // Return the file pointer
1484       //
1485       return Fptr;
1486     }
1487
1488     List = List->Next;
1489   }
1490   //
1491   // Not found
1492   //
1493   FoundFileName[0] = 0;
1494   return NULL;
1495 }
1496 //
1497 // Process the command-line arguments
1498 //
1499 static
1500 STATUS
1501 ProcessArgs (
1502   int   Argc,
1503   char  *Argv[]
1504   )
1505 {
1506   TEXT_STRING_LIST  *NewList;
1507   //
1508   // Clear our globals
1509   //
1510   memset ((char *) &mGlobals, 0, sizeof (mGlobals));
1511   strcpy (mGlobals.BaseName, DEFAULT_BASE_NAME);
1512   //
1513   // Skip program name
1514   //
1515   Argc--;
1516   Argv++;
1517
1518   if (Argc == 0) {
1519     Usage ();
1520     return STATUS_ERROR;
1521   }
1522
1523   mGlobals.Mode = MODE_UNKNOWN;
1524   //
1525   // Process until no more -args.
1526   //
1527   while ((Argc > 0) && (Argv[0][0] == '-')) {
1528     //
1529     // -parse option
1530     //
1531     if (stricmp (Argv[0], "-parse") == 0) {
1532       if (mGlobals.Mode != MODE_UNKNOWN) {
1533         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1534         return STATUS_ERROR;
1535       }
1536
1537       mGlobals.Mode = MODE_PARSE;
1538       //
1539       // -scan option
1540       //
1541     } else if (stricmp (Argv[0], "-scan") == 0) {
1542       if (mGlobals.Mode != MODE_UNKNOWN) {
1543         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1544         return STATUS_ERROR;
1545       }
1546
1547       mGlobals.Mode = MODE_SCAN;
1548       //
1549       // -vscan verbose scanning option
1550       //
1551     } else if (stricmp (Argv[0], "-vscan") == 0) {
1552       mGlobals.VerboseScan = TRUE;
1553       //
1554       // -dump option
1555       //
1556     } else if (stricmp (Argv[0], "-dump") == 0) {
1557       if (mGlobals.Mode != MODE_UNKNOWN) {
1558         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1559         return STATUS_ERROR;
1560       }
1561
1562       mGlobals.Mode = MODE_DUMP;
1563     } else if (stricmp (Argv[0], "-uqs") == 0) {
1564       mGlobals.UnquotedStrings = TRUE;
1565       //
1566       // -i path    add include search path when parsing
1567       //
1568     } else if (stricmp (Argv[0], "-i") == 0) {
1569       //
1570       // check for one more arg
1571       //
1572       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1573         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing include path");
1574         return STATUS_ERROR;
1575       }
1576       //
1577       // Allocate memory for a new list element, fill it in, and
1578       // add it to our list of include paths. Always make sure it
1579       // has a "\" on the end of it.
1580       //
1581       NewList = malloc (sizeof (TEXT_STRING_LIST));
1582       if (NewList == NULL) {
1583         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1584         return STATUS_ERROR;
1585       }
1586
1587       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1588       NewList->Str = malloc (strlen (Argv[1]) + 2);
1589       if (NewList->Str == NULL) {
1590         free (NewList);
1591         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1592         return STATUS_ERROR;
1593       }
1594
1595       strcpy (NewList->Str, Argv[1]);
1596       if (NewList->Str[strlen (NewList->Str) - 1] != FILE_SEP_CHAR) {
1597         strcat (NewList->Str, FILE_SEP_STRING);
1598       }
1599       //
1600       // Add it to our linked list
1601       //
1602       if (mGlobals.IncludePaths == NULL) {
1603         mGlobals.IncludePaths = NewList;
1604       } else {
1605         mGlobals.LastIncludePath->Next = NewList;
1606       }
1607
1608       mGlobals.LastIncludePath = NewList;
1609       Argc--;
1610       Argv++;
1611     } else if (stricmp (Argv[0], "-if") == 0) {
1612       //
1613       // Indirection file -- check for one more arg
1614       //
1615       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1616         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing indirection file name");
1617         return STATUS_ERROR;
1618       }
1619       //
1620       // Allocate memory for a new list element, fill it in, and
1621       // add it to our list of include paths. Always make sure it
1622       // has a "\" on the end of it.
1623       //
1624       NewList = malloc (sizeof (TEXT_STRING_LIST));
1625       if (NewList == NULL) {
1626         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1627         return STATUS_ERROR;
1628       }
1629
1630       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1631       NewList->Str = malloc (strlen (Argv[1]) + 1);
1632       if (NewList->Str == NULL) {
1633         free (NewList);
1634         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1635         return STATUS_ERROR;
1636       }
1637
1638       strcpy (NewList->Str, Argv[1]);
1639       //
1640       // Add it to our linked list
1641       //
1642       if (mGlobals.IndirectionFileName == NULL) {
1643         mGlobals.IndirectionFileName = NewList;
1644       } else {
1645         mGlobals.LastIndirectionFileName->Next = NewList;
1646       }
1647
1648       mGlobals.LastIndirectionFileName = NewList;
1649       Argc--;
1650       Argv++;
1651     } else if (stricmp (Argv[0], "-db") == 0) {
1652       //
1653       // -db option to specify a database file.
1654       // Check for one more arg (the database file name)
1655       //
1656       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1657         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing database file name");
1658         return STATUS_ERROR;
1659       }
1660
1661       NewList = malloc (sizeof (TEXT_STRING_LIST));
1662       if (NewList == NULL) {
1663         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1664         return STATUS_ERROR;
1665       }
1666
1667       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1668       NewList->Str = malloc (strlen (Argv[1]) + 1);
1669       if (NewList->Str == NULL) {
1670         free (NewList);
1671         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1672         return STATUS_ERROR;
1673       }
1674
1675       strcpy (NewList->Str, Argv[1]);
1676       //
1677       // Add it to our linked list
1678       //
1679       if (mGlobals.DatabaseFileName == NULL) {
1680         mGlobals.DatabaseFileName = NewList;
1681       } else {
1682         mGlobals.LastDatabaseFileName->Next = NewList;
1683       }
1684
1685       mGlobals.LastDatabaseFileName = NewList;
1686       Argc--;
1687       Argv++;
1688     } else if (stricmp (Argv[0], "-ou") == 0) {
1689       //
1690       // -ou option to specify an output unicode file to
1691       // which we can dump our database.
1692       //
1693       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1694         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing database dump output file name");
1695         return STATUS_ERROR;
1696       }
1697
1698       if (mGlobals.DumpUFileName[0] == 0) {
1699         strcpy (mGlobals.DumpUFileName, Argv[1]);
1700       } else {
1701         Error (PROGRAM_NAME, 0, 0, Argv[1], "-ou option already specified with '%s'", mGlobals.DumpUFileName);
1702         return STATUS_ERROR;
1703       }
1704
1705       Argc--;
1706       Argv++;
1707     } else if (stricmp (Argv[0], "-hpk") == 0) {
1708       //
1709       // -hpk option to create an HII export pack of the input database file
1710       //
1711       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1712         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing raw string data dump output file name");
1713         return STATUS_ERROR;
1714       }
1715
1716       if (mGlobals.HiiExportPackFileName[0] == 0) {
1717         strcpy (mGlobals.HiiExportPackFileName, Argv[1]);
1718       } else {
1719         Error (PROGRAM_NAME, 0, 0, Argv[1], "-or option already specified with '%s'", mGlobals.HiiExportPackFileName);
1720         return STATUS_ERROR;
1721       }
1722
1723       Argc--;
1724       Argv++;
1725     } else if ((stricmp (Argv[0], "-?") == 0) || (stricmp (Argv[0], "-h") == 0)) {
1726       Usage ();
1727       return STATUS_ERROR;
1728     } else if (stricmp (Argv[0], "-v") == 0) {
1729       mGlobals.Verbose = 1;
1730     } else if (stricmp (Argv[0], "-vdbw") == 0) {
1731       mGlobals.VerboseDatabaseWrite = 1;
1732     } else if (stricmp (Argv[0], "-vdbr") == 0) {
1733       mGlobals.VerboseDatabaseRead = 1;
1734     } else if (stricmp (Argv[0], "-newdb") == 0) {
1735       mGlobals.NewDatabase = 1;
1736     } else if (stricmp (Argv[0], "-ignorenotfound") == 0) {
1737       mGlobals.IgnoreNotFound = 1;
1738     } else if (stricmp (Argv[0], "-oc") == 0) {
1739       //
1740       // check for one more arg
1741       //
1742       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1743         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output C filename");
1744         return STATUS_ERROR;
1745       }
1746
1747       strcpy (mGlobals.StringCFileName, Argv[1]);
1748       Argc--;
1749       Argv++;
1750     } else if (stricmp (Argv[0], "-bn") == 0) {
1751       //
1752       // check for one more arg
1753       //
1754       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1755         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing base name");
1756         Usage ();
1757         return STATUS_ERROR;
1758       }
1759
1760       strcpy (mGlobals.BaseName, Argv[1]);
1761       Argc--;
1762       Argv++;
1763     } else if (stricmp (Argv[0], "-oh") == 0) {
1764       //
1765       // -oh to specify output .h defines file name
1766       //
1767       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1768         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output .h filename");
1769         return STATUS_ERROR;
1770       }
1771
1772       strcpy (mGlobals.StringHFileName, Argv[1]);
1773       Argc--;
1774       Argv++;
1775     } else if (stricmp (Argv[0], "-skipext") == 0) {
1776       //
1777       // -skipext to skip scanning of files with certain filename extensions
1778       //
1779       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1780         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing filename extension");
1781         return STATUS_ERROR;
1782       }
1783       //
1784       // Allocate memory for a new list element, fill it in, and
1785       // add it to our list of excluded extensions. Always make sure it
1786       // has a "." as the first character.
1787       //
1788       NewList = malloc (sizeof (TEXT_STRING_LIST));
1789       if (NewList == NULL) {
1790         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1791         return STATUS_ERROR;
1792       }
1793
1794       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1795       NewList->Str = malloc (strlen (Argv[1]) + 2);
1796       if (NewList->Str == NULL) {
1797         free (NewList);
1798         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1799         return STATUS_ERROR;
1800       }
1801
1802       if (Argv[1][0] == '.') {
1803         strcpy (NewList->Str, Argv[1]);
1804       } else {
1805         NewList->Str[0] = '.';
1806         strcpy (NewList->Str + 1, Argv[1]);
1807       }
1808       //
1809       // Add it to our linked list
1810       //
1811       if (mGlobals.SkipExt == NULL) {
1812         mGlobals.SkipExt = NewList;
1813       } else {
1814         mGlobals.LastSkipExt->Next = NewList;
1815       }
1816
1817       mGlobals.LastSkipExt = NewList;
1818       Argc--;
1819       Argv++;
1820     } else if (stricmp (Argv[0], "-lang") == 0) {
1821       //
1822       // "-lang eng" or "-lang spa+cat" to only output certain languages
1823       //
1824       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1825         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing language name");
1826         Usage ();
1827         return STATUS_ERROR;
1828       }
1829
1830       if (AddCommandLineLanguage (Argv[1]) != STATUS_SUCCESS) {
1831         return STATUS_ERROR;
1832       }
1833
1834       Argc--;
1835       Argv++;
1836     } else if (stricmp (Argv[0], "-od") == 0) {
1837       //
1838       // Output database file name -- check for another arg
1839       //
1840       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1841         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output database file name");
1842         return STATUS_ERROR;
1843       }
1844
1845       strcpy (mGlobals.OutputDatabaseFileName, Argv[1]);
1846       Argv++;
1847       Argc--;
1848     } else {
1849       //
1850       // Unrecognized arg
1851       //
1852       Error (PROGRAM_NAME, 0, 0, Argv[0], "unrecognized option");
1853       Usage ();
1854       return STATUS_ERROR;
1855     }
1856
1857     Argv++;
1858     Argc--;
1859   }
1860   //
1861   // Make sure they specified the mode parse/scan/dump
1862   //
1863   if (mGlobals.Mode == MODE_UNKNOWN) {
1864     Error (NULL, 0, 0, "must specify one of -parse/-scan/-dump", NULL);
1865     return STATUS_ERROR;
1866   }
1867   //
1868   // All modes require a database filename
1869   //
1870   if (mGlobals.DatabaseFileName == 0) {
1871     Error (NULL, 0, 0, "must specify a database filename using -db DbFileName", NULL);
1872     Usage ();
1873     return STATUS_ERROR;
1874   }
1875   //
1876   // If dumping the database file, then return immediately if all
1877   // parameters check out.
1878   //
1879   if (mGlobals.Mode == MODE_DUMP) {
1880     //
1881     // Not much use if they didn't specify -oh or -oc or -ou or -hpk
1882     //
1883     if ((mGlobals.DumpUFileName[0] == 0) &&
1884         (mGlobals.StringHFileName[0] == 0) &&
1885         (mGlobals.StringCFileName[0] == 0) &&
1886         (mGlobals.HiiExportPackFileName[0] == 0)
1887         ) {
1888       Error (NULL, 0, 0, "-dump without -oc/-oh/-ou/-hpk is a NOP", NULL);
1889       return STATUS_ERROR;
1890     }
1891
1892     return STATUS_SUCCESS;
1893   }
1894   //
1895   // Had to specify source string file and output string defines header filename.
1896   //
1897   if (mGlobals.Mode == MODE_SCAN) {
1898     if (Argc < 1) {
1899       Error (PROGRAM_NAME, 0, 0, NULL, "must specify at least one source file to scan with -scan");
1900       Usage ();
1901       return STATUS_ERROR;
1902     }
1903     //
1904     // Get the list of filenames
1905     //
1906     while (Argc > 0) {
1907       NewList = malloc (sizeof (TEXT_STRING_LIST));
1908       if (NewList == NULL) {
1909         Error (PROGRAM_NAME, 0, 0, "memory allocation failure", NULL);
1910         return STATUS_ERROR;
1911       }
1912
1913       memset (NewList, 0, sizeof (TEXT_STRING_LIST));
1914       NewList->Str = (CHAR8 *) malloc (strlen (Argv[0]) + 1);
1915       if (NewList->Str == NULL) {
1916         Error (PROGRAM_NAME, 0, 0, "memory allocation failure", NULL);
1917         return STATUS_ERROR;
1918       }
1919
1920       strcpy (NewList->Str, Argv[0]);
1921       if (mGlobals.ScanFileName == NULL) {
1922         mGlobals.ScanFileName = NewList;
1923       } else {
1924         mGlobals.LastScanFileName->Next = NewList;
1925       }
1926
1927       mGlobals.LastScanFileName = NewList;
1928       Argc--;
1929       Argv++;
1930     }
1931   } else {
1932     //
1933     // Parse mode -- must specify an input unicode file name
1934     //
1935     if (Argc < 1) {
1936       Error (PROGRAM_NAME, 0, 0, NULL, "must specify input unicode string file name with -parse");
1937       Usage ();
1938       return STATUS_ERROR;
1939     }
1940
1941     strcpy (mGlobals.SourceFiles.FileName, Argv[0]);
1942   }
1943
1944   return STATUS_SUCCESS;
1945 }
1946 //
1947 // Found "-lang eng,spa+cat" on the command line. Parse the
1948 // language list and save the setting for later processing.
1949 //
1950 static
1951 STATUS
1952 AddCommandLineLanguage (
1953   IN CHAR8         *Language
1954   )
1955 {
1956   WCHAR_STRING_LIST *WNewList;
1957   WCHAR             *From;
1958   WCHAR             *To;
1959   //
1960   // Keep processing the input string until we find the end.
1961   //
1962   while (*Language) {
1963     //
1964     // Allocate memory for a new list element, fill it in, and
1965     // add it to our list.
1966     //
1967     WNewList = MALLOC (sizeof (WCHAR_STRING_LIST));
1968     if (WNewList == NULL) {
1969       Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1970       return STATUS_ERROR;
1971     }
1972
1973     memset ((char *) WNewList, 0, sizeof (WCHAR_STRING_LIST));
1974     WNewList->Str = malloc ((strlen (Language) + 1) * sizeof (WCHAR));
1975     if (WNewList->Str == NULL) {
1976       free (WNewList);
1977       Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1978       return STATUS_ERROR;
1979     }
1980     //
1981     // Copy it as unicode to our new structure. Then remove the
1982     // plus signs in it, and verify each language name is 3 characters
1983     // long. If we find a comma, then we're done with this group, so
1984     // break out.
1985     //
1986     UnicodeSPrint (WNewList->Str, (strlen (Language) + 1) * sizeof (WCHAR), L"%a", Language);
1987     From = To = WNewList->Str;
1988     while (*From) {
1989       if (*From == L',') {
1990         break;
1991       }
1992
1993       if ((StrLen (From) < LANGUAGE_IDENTIFIER_NAME_LEN) ||
1994             (
1995               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != 0) &&
1996               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != UNICODE_PLUS_SIGN) &&
1997               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != L',')
1998             )
1999           ) {
2000         Error (PROGRAM_NAME, 0, 0, Language, "invalid format for language name on command line");
2001         FREE (WNewList->Str);
2002         FREE (WNewList);
2003         return STATUS_ERROR;
2004       }
2005
2006       StrnCpy (To, From, LANGUAGE_IDENTIFIER_NAME_LEN);
2007       To += LANGUAGE_IDENTIFIER_NAME_LEN;
2008       From += LANGUAGE_IDENTIFIER_NAME_LEN;
2009       if (*From == L'+') {
2010         From++;
2011       }
2012     }
2013
2014     *To = 0;
2015     //
2016     // Add it to our linked list
2017     //
2018     if (mGlobals.Language == NULL) {
2019       mGlobals.Language = WNewList;
2020     } else {
2021       mGlobals.LastLanguage->Next = WNewList;
2022     }
2023
2024     mGlobals.LastLanguage = WNewList;
2025     //
2026     // Skip to next entry (comma-separated list)
2027     //
2028     while (*Language) {
2029       if (*Language == L',') {
2030         Language++;
2031         break;
2032       }
2033
2034       Language++;
2035     }
2036   }
2037
2038   return STATUS_SUCCESS;
2039 }
2040 //
2041 // The contents of the text file are expected to be (one per line)
2042 //   STRING_IDENTIFIER_NAME   ScopeName
2043 // For example:
2044 //   STR_ID_MY_FAVORITE_STRING   IBM
2045 //
2046 static
2047 STATUS
2048 ParseIndirectionFiles (
2049   TEXT_STRING_LIST    *Files
2050   )
2051 {
2052   FILE                        *Fptr;
2053   CHAR8                       Line[200];
2054   CHAR8                       *StringName;
2055   CHAR8                       *ScopeName;
2056   CHAR8                       *End;
2057   UINT32                      LineCount;
2058   WCHAR_MATCHING_STRING_LIST  *NewList;
2059
2060   Line[sizeof (Line) - 1] = 0;
2061   Fptr                    = NULL;
2062   while (Files != NULL) {
2063     Fptr      = fopen (Files->Str, "r");
2064     LineCount = 0;
2065     if (Fptr == NULL) {
2066       Error (NULL, 0, 0, Files->Str, "failed to open input indirection file for reading");
2067       return STATUS_ERROR;
2068     }
2069
2070     while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2071       //
2072       // remove terminating newline for error printing purposes.
2073       //
2074       if (Line[strlen (Line) - 1] == '\n') {
2075         Line[strlen (Line) - 1] = 0;
2076       }
2077
2078       LineCount++;
2079       if (Line[sizeof (Line) - 1] != 0) {
2080         Error (Files->Str, LineCount, 0, "line length exceeds maximum supported", NULL);
2081         goto Done;
2082       }
2083
2084       StringName = Line;
2085       while (*StringName && (isspace (*StringName))) {
2086         StringName++;
2087       }
2088
2089       if (*StringName) {
2090         if ((*StringName == '_') || isalpha (*StringName)) {
2091           End = StringName;
2092           while ((*End) && (*End == '_') || (isalnum (*End))) {
2093             End++;
2094           }
2095
2096           if (isspace (*End)) {
2097             *End = 0;
2098             End++;
2099             while (isspace (*End)) {
2100               End++;
2101             }
2102
2103             if (*End) {
2104               ScopeName = End;
2105               while (*End && !isspace (*End)) {
2106                 End++;
2107               }
2108
2109               *End = 0;
2110               //
2111               // Add the string name/scope pair
2112               //
2113               NewList = malloc (sizeof (WCHAR_MATCHING_STRING_LIST));
2114               if (NewList == NULL) {
2115                 Error (NULL, 0, 0, "memory allocation error", NULL);
2116                 goto Done;
2117               }
2118
2119               memset (NewList, 0, sizeof (WCHAR_MATCHING_STRING_LIST));
2120               NewList->Str1 = (WCHAR *) malloc ((strlen (StringName) + 1) * sizeof (WCHAR));
2121               NewList->Str2 = (WCHAR *) malloc ((strlen (ScopeName) + 1) * sizeof (WCHAR));
2122               if ((NewList->Str1 == NULL) || (NewList->Str2 == NULL)) {
2123                 Error (NULL, 0, 0, "memory allocation error", NULL);
2124                 goto Done;
2125               }
2126
2127               UnicodeSPrint (NewList->Str1, strlen (StringName) + 1, L"%a", StringName);
2128               UnicodeSPrint (NewList->Str2, strlen (ScopeName) + 1, L"%a", ScopeName);
2129               if (mGlobals.IndirectionList == NULL) {
2130                 mGlobals.IndirectionList = NewList;
2131               } else {
2132                 mGlobals.LastIndirectionList->Next = NewList;
2133               }
2134
2135               mGlobals.LastIndirectionList = NewList;
2136             } else {
2137               Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2138               goto Done;
2139             }
2140           } else {
2141             Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2142             goto Done;
2143           }
2144         } else {
2145           Error (Files->Str, LineCount, 0, StringName, "invalid string identifier");
2146           goto Done;
2147         }
2148       }
2149     }
2150
2151     fclose (Fptr);
2152     Fptr  = NULL;
2153     Files = Files->Next;
2154   }
2155
2156 Done:
2157   if (Fptr != NULL) {
2158     fclose (Fptr);
2159     return STATUS_ERROR;
2160   }
2161
2162   return STATUS_SUCCESS;
2163 }
2164
2165 static
2166 STATUS
2167 ScanFiles (
2168   TEXT_STRING_LIST *ScanFiles
2169   )
2170 {
2171   char              Line[MAX_LINE_LEN];
2172   FILE              *Fptr;
2173   UINT32            LineNum;
2174   char              *Cptr;
2175   char              *SavePtr;
2176   char              *TermPtr;
2177   char              *StringTokenPos;
2178   TEXT_STRING_LIST  *SList;
2179   BOOLEAN           SkipIt;
2180
2181   //
2182   // Put a null-terminator at the end of the line. If we read in
2183   // a line longer than we support, then we can catch it.
2184   //
2185   Line[MAX_LINE_LEN - 1] = 0;
2186   //
2187   // Process each file. If they gave us a skip extension list, then
2188   // skip it if the extension matches.
2189   //
2190   while (ScanFiles != NULL) {
2191     SkipIt = FALSE;
2192     for (SList = mGlobals.SkipExt; SList != NULL; SList = SList->Next) {
2193       if ((strlen (ScanFiles->Str) > strlen (SList->Str)) &&
2194           (strcmp (ScanFiles->Str + strlen (ScanFiles->Str) - strlen (SList->Str), SList->Str) == 0)
2195           ) {
2196         SkipIt = TRUE;
2197         //
2198         // printf ("Match: %s : %s\n", ScanFiles->Str, SList->Str);
2199         //
2200         break;
2201       }
2202     }
2203
2204     if (!SkipIt) {
2205       if (mGlobals.VerboseScan) {
2206         printf ("Scanning %s\n", ScanFiles->Str);
2207       }
2208
2209       Fptr = fopen (ScanFiles->Str, "r");
2210       if (Fptr == NULL) {
2211         Error (NULL, 0, 0, ScanFiles->Str, "failed to open input file for scanning");
2212         return STATUS_ERROR;
2213       }
2214
2215       LineNum = 0;
2216       while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2217         LineNum++;
2218         if (Line[MAX_LINE_LEN - 1] != 0) {
2219           Error (ScanFiles->Str, LineNum, 0, "line length exceeds maximum supported by tool", NULL);
2220           fclose (Fptr);
2221           return STATUS_ERROR;
2222         }
2223         //
2224         // Remove the newline from the input line so we can print a warning message
2225         //
2226         if (Line[strlen (Line) - 1] == '\n') {
2227           Line[strlen (Line) - 1] = 0;
2228         }
2229         //
2230         // Terminate the line at // comments
2231         //
2232         Cptr = strstr (Line, "//");
2233         if (Cptr != NULL) {
2234           *Cptr = 0;
2235         }
2236
2237         Cptr = Line;
2238         while ((Cptr = strstr (Cptr, STRING_TOKEN)) != NULL) {
2239           //
2240           // Found "STRING_TOKEN". Make sure we don't have NUM_STRING_TOKENS or
2241           // something like that. Then make sure it's followed by
2242           // an open parenthesis, a string identifier, and then a closing
2243           // parenthesis.
2244           //
2245           if (mGlobals.VerboseScan) {
2246             printf (" %d: %s", LineNum, Cptr);
2247           }
2248
2249           if (((Cptr == Line) || (!IsValidIdentifierChar (*(Cptr - 1), FALSE))) &&
2250               (!IsValidIdentifierChar (*(Cptr + sizeof (STRING_TOKEN) - 1), FALSE))
2251               ) {
2252             StringTokenPos  = Cptr;
2253             SavePtr         = Cptr;
2254             Cptr += strlen (STRING_TOKEN);
2255             while (*Cptr && isspace (*Cptr) && (*Cptr != '(')) {
2256               Cptr++;
2257             }
2258
2259             if (*Cptr != '(') {
2260               Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2261             } else {
2262               //
2263               // Skip over the open-parenthesis and find the next non-blank character
2264               //
2265               Cptr++;
2266               while (isspace (*Cptr)) {
2267                 Cptr++;
2268               }
2269
2270               SavePtr = Cptr;
2271               if ((*Cptr == '_') || isalpha (*Cptr)) {
2272                 while ((*Cptr == '_') || (isalnum (*Cptr))) {
2273                   Cptr++;
2274                 }
2275
2276                 TermPtr = Cptr;
2277                 while (*Cptr && isspace (*Cptr)) {
2278                   Cptr++;
2279                 }
2280
2281                 if (*Cptr != ')') {
2282                   Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2283                 }
2284
2285                 if (*TermPtr) {
2286                   *TermPtr  = 0;
2287                   Cptr      = TermPtr + 1;
2288                 } else {
2289                   Cptr = TermPtr;
2290                 }
2291                 //
2292                 // Add the string identifier to the list of used strings
2293                 //
2294                 ParserSetPosition (ScanFiles->Str, LineNum);
2295                 StringDBSetStringReferenced (SavePtr, mGlobals.IgnoreNotFound);
2296                 if (mGlobals.VerboseScan) {
2297                   printf ("...referenced %s", SavePtr);
2298                 }
2299               } else {
2300                 Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected valid string identifier name");
2301               }
2302             }
2303           } else {
2304             //
2305             // Found it, but it's a substring of something else. Advance our pointer.
2306             //
2307             Cptr++;
2308           }
2309
2310           if (mGlobals.VerboseScan) {
2311             printf ("\n");
2312           }
2313         }
2314       }
2315
2316       fclose (Fptr);
2317     } else {
2318       //
2319       // Skipping this file type
2320       //
2321       if (mGlobals.VerboseScan) {
2322         printf ("Skip scanning of %s\n", ScanFiles->Str);
2323       }
2324     }
2325
2326     ScanFiles = ScanFiles->Next;
2327   }
2328
2329   return STATUS_SUCCESS;
2330 }
2331 //
2332 // Free the global string lists we allocated memory for
2333 //
2334 static
2335 void
2336 FreeLists (
2337   VOID
2338   )
2339 {
2340   TEXT_STRING_LIST  *Temp;
2341   WCHAR_STRING_LIST *WTemp;
2342
2343   //
2344   // Traverse the include paths, freeing each
2345   //
2346   while (mGlobals.IncludePaths != NULL) {
2347     Temp = mGlobals.IncludePaths->Next;
2348     free (mGlobals.IncludePaths->Str);
2349     free (mGlobals.IncludePaths);
2350     mGlobals.IncludePaths = Temp;
2351   }
2352   //
2353   // If we did a scan, then free up our
2354   // list of files to scan.
2355   //
2356   while (mGlobals.ScanFileName != NULL) {
2357     Temp = mGlobals.ScanFileName->Next;
2358     free (mGlobals.ScanFileName->Str);
2359     free (mGlobals.ScanFileName);
2360     mGlobals.ScanFileName = Temp;
2361   }
2362   //
2363   // If they gave us a list of filename extensions to
2364   // skip on scan, then free them up.
2365   //
2366   while (mGlobals.SkipExt != NULL) {
2367     Temp = mGlobals.SkipExt->Next;
2368     free (mGlobals.SkipExt->Str);
2369     free (mGlobals.SkipExt);
2370     mGlobals.SkipExt = Temp;
2371   }
2372   //
2373   // Free up any languages specified
2374   //
2375   while (mGlobals.Language != NULL) {
2376     WTemp = mGlobals.Language->Next;
2377     free (mGlobals.Language->Str);
2378     free (mGlobals.Language);
2379     mGlobals.Language = WTemp;
2380   }
2381   //
2382   // Free up our indirection list
2383   //
2384   while (mGlobals.IndirectionList != NULL) {
2385     mGlobals.LastIndirectionList = mGlobals.IndirectionList->Next;
2386     free (mGlobals.IndirectionList->Str1);
2387     free (mGlobals.IndirectionList->Str2);
2388     free (mGlobals.IndirectionList);
2389     mGlobals.IndirectionList = mGlobals.LastIndirectionList;
2390   }
2391
2392   while (mGlobals.IndirectionFileName != NULL) {
2393     mGlobals.LastIndirectionFileName = mGlobals.IndirectionFileName->Next;
2394     free (mGlobals.IndirectionFileName->Str);
2395     free (mGlobals.IndirectionFileName);
2396     mGlobals.IndirectionFileName = mGlobals.LastIndirectionFileName;
2397   }
2398 }
2399
2400 static
2401 BOOLEAN
2402 IsValidIdentifierChar (
2403   CHAR8     Char,
2404   BOOLEAN   FirstChar
2405   )
2406 {
2407   //
2408   // If it's the first character of an identifier, then
2409   // it must be one of [A-Za-z_].
2410   //
2411   if (FirstChar) {
2412     if (isalpha (Char) || (Char == '_')) {
2413       return TRUE;
2414     }
2415   } else {
2416     //
2417     // If it's not the first character, then it can
2418     // be one of [A-Za-z_0-9]
2419     //
2420     if (isalnum (Char) || (Char == '_')) {
2421       return TRUE;
2422     }
2423   }
2424
2425   return FALSE;
2426 }
2427
2428 static
2429 void
2430 RewindFile (
2431   SOURCE_FILE *SourceFile
2432   )
2433 {
2434   SourceFile->LineNum       = 1;
2435   SourceFile->FileBufferPtr = SourceFile->FileBuffer;
2436   SourceFile->EndOfFile     = 0;
2437 }
2438
2439 static
2440 BOOLEAN
2441 SkipTo (
2442   SOURCE_FILE *SourceFile,
2443   WCHAR       WChar,
2444   BOOLEAN     StopAfterNewline
2445   )
2446 {
2447   while (!EndOfFile (SourceFile)) {
2448     //
2449     // Check for the character of interest
2450     //
2451     if (SourceFile->FileBufferPtr[0] == WChar) {
2452       return TRUE;
2453     } else {
2454       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
2455         SourceFile->LineNum++;
2456         if (StopAfterNewline) {
2457           SourceFile->FileBufferPtr++;
2458           if (SourceFile->FileBufferPtr[0] == 0) {
2459             SourceFile->FileBufferPtr++;
2460           }
2461
2462           return FALSE;
2463         }
2464       }
2465
2466       SourceFile->FileBufferPtr++;
2467     }
2468   }
2469
2470   return FALSE;
2471 }
2472
2473 static
2474 void
2475 Usage (
2476   VOID
2477   )
2478 /*++
2479
2480 Routine Description:
2481
2482   Print usage information for this utility.
2483
2484 Arguments:
2485
2486   None.
2487
2488 Returns:
2489
2490   Nothing.
2491
2492 --*/
2493 {
2494   int               Index;
2495   static const char *Str[] = {
2496     "",
2497     PROGRAM_NAME " version "TOOL_VERSION " -- process unicode strings file",
2498     "  Usage: "PROGRAM_NAME " -parse {parse options} [FileNames]",
2499     "         "PROGRAM_NAME " -scan {scan options} [FileName]",
2500     "         "PROGRAM_NAME " -dump {dump options}",
2501     "    Common options include:",
2502     "      -h or -?         for this help information",
2503     "      -db Database     required name of output/input database file",
2504     "      -bn BaseName     for use in the .h and .c output files",
2505     "                       Default = "DEFAULT_BASE_NAME,
2506     "      -v               for verbose output",
2507     "      -vdbw            for verbose output when writing database",
2508     "      -vdbr            for verbose output when reading database",
2509     "      -od FileName     to specify an output database file name",
2510     "    Parse options include:",
2511     "      -i IncludePath   add IncludePath to list of search paths",
2512     "      -newdb           to not read in existing database file",
2513     "      -uqs             to indicate that unquoted strings are used",
2514     "      FileNames        name of one or more unicode files to parse",
2515     "    Scan options include:",
2516     "      -scan            scan text file(s) for STRING_TOKEN() usage",
2517     "      -skipext .ext    to skip scan of files with .ext filename extension",
2518     "      -ignorenotfound  ignore if a given STRING_TOKEN(STR) is not ",
2519     "                       found in the database",
2520     "      FileNames        one or more files to scan",
2521     "    Dump options include:",
2522     "      -oc FileName     write string data to FileName",
2523     "      -oh FileName     write string defines to FileName",
2524     "      -ou FileName     dump database to unicode file FileName",
2525     "      -lang Lang       only dump for the language 'Lang'",
2526     "      -if FileName     to specify an indirection file",
2527     "      -hpk FileName    to create an HII export pack of the strings",
2528     "",
2529     "  The expected process is to parse a unicode string file to create an initial",
2530     "  database of string identifier names and string definitions. Then text files",
2531     "  should be scanned for STRING_TOKEN() usages, and the referenced",
2532     "  strings will be tagged as used in the database. After all files have been",
2533     "  scanned, then the database should be dumped to create the necessary output",
2534     "  files.",
2535     "",
2536     NULL
2537   };
2538   for (Index = 0; Str[Index] != NULL; Index++) {
2539     fprintf (stdout, "%s\n", Str[Index]);
2540   }
2541 }