Tools/Source/TianoTools/StrGather/StrGather.c

   1 /*++
   2
   3 Copyright (c) 2004, Intel Corporation
   4 All rights reserved. This program and the accompanying materials
   5 are licensed and made available under the terms and conditions of the BSD License
   6 which accompanies this distribution.  The full text of the license may be found at
   7 http://opensource.org/licenses/bsd-license.php
   8
   9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  11
  12 Module Name:
  13
  14   StrGather.c
  15
  16 Abstract:
  17
  18   Parse a strings file and create or add to a string database file.
  19
  20 --*/
  21
  22 #include <stdio.h>
  23 #include <string.h>
  24 #include <stdlib.h>
  25 #include <ctype.h>
  26
  27 #include <Common/UefiBaseTypes.h>
  28
  29 #include "CommonLib.h"
  30 #include "EfiUtilityMsgs.h"
  31 #include "StrGather.h"
  32 #include "StringDB.h"
  33
  34 #define TOOL_VERSION  "0.31"
  35
  36 #ifndef MAX_PATH
  37 #define MAX_PATH                    255
  38 #endif
  39 #define MAX_NEST_DEPTH              20  // just in case we get in an endless loop.
  40 #define MAX_STRING_IDENTIFIER_NAME  100 // number of wchars
  41 #define MAX_LINE_LEN                200
  42 #define STRING_TOKEN                "STRING_TOKEN"
  43 #define DEFAULT_BASE_NAME           "BaseName"
  44 //
  45 // Operational modes for this utility
  46 //
  47 #define MODE_UNKNOWN  0
  48 #define MODE_PARSE    1
  49 #define MODE_SCAN     2
  50 #define MODE_DUMP     3
  51
  52 //
  53 // We keep a linked list of these for the source files we process
  54 //
  55 typedef struct _SOURCE_FILE {
  56   FILE                *Fptr;
  57   WCHAR               *FileBuffer;
  58   WCHAR               *FileBufferPtr;
  59   UINT32              FileSize;
  60   CHAR8               FileName[MAX_PATH];
  61   UINT32              LineNum;
  62   BOOLEAN             EndOfFile;
  63   BOOLEAN             SkipToHash;
  64   struct _SOURCE_FILE *Previous;
  65   struct _SOURCE_FILE *Next;
  66   WCHAR               ControlCharacter;
  67 } SOURCE_FILE;
  68
  69 #define DEFAULT_CONTROL_CHARACTER UNICODE_SLASH
  70
  71 //
  72 // Here's all our globals. We need a linked list of include paths, a linked
  73 // list of source files, a linked list of subdirectories (appended to each
  74 // include path when searching), and a couple other fields.
  75 //
  76 static struct {
  77   SOURCE_FILE                 SourceFiles;
  78   TEXT_STRING_LIST            *IncludePaths;                    // all include paths to search
  79   TEXT_STRING_LIST            *LastIncludePath;
  80   TEXT_STRING_LIST            *ScanFileName;
  81   TEXT_STRING_LIST            *LastScanFileName;
  82   TEXT_STRING_LIST            *SkipExt;                         // if -skipext .uni
  83   TEXT_STRING_LIST            *LastSkipExt;
  84   TEXT_STRING_LIST            *IndirectionFileName;
  85   TEXT_STRING_LIST            *LastIndirectionFileName;
  86   TEXT_STRING_LIST            *DatabaseFileName;
  87   TEXT_STRING_LIST            *LastDatabaseFileName;
  88   WCHAR_STRING_LIST           *Language;
  89   WCHAR_STRING_LIST           *LastLanguage;
  90   WCHAR_MATCHING_STRING_LIST  *IndirectionList;                 // from indirection file(s)
  91   WCHAR_MATCHING_STRING_LIST  *LastIndirectionList;
  92   BOOLEAN                     Verbose;                          // for more detailed output
  93   BOOLEAN                     VerboseDatabaseWrite;             // for more detailed output when writing database
  94   BOOLEAN                     VerboseDatabaseRead;              // for more detailed output when reading database
  95   BOOLEAN                     NewDatabase;                      // to start from scratch
  96   BOOLEAN                     IgnoreNotFound;                   // when scanning
  97   BOOLEAN                     VerboseScan;
  98   BOOLEAN                     UnquotedStrings;                  // -uqs option
  99   CHAR8                       OutputDatabaseFileName[MAX_PATH];
 100   CHAR8                       StringHFileName[MAX_PATH];
 101   CHAR8                       StringCFileName[MAX_PATH];        // output .C filename
 102   CHAR8                       DumpUFileName[MAX_PATH];          // output unicode dump file name
 103   CHAR8                       HiiExportPackFileName[MAX_PATH];  // HII export pack file name
 104   CHAR8                       BaseName[MAX_PATH];               // base filename of the strings file
 105   UINT32                      Mode;
 106 } mGlobals;
 107
 108 static
 109 BOOLEAN
 110 IsValidIdentifierChar (
 111   CHAR8     Char,
 112   BOOLEAN   FirstChar
 113   );
 114
 115 static
 116 void
 117 RewindFile (
 118   SOURCE_FILE *SourceFile
 119   );
 120
 121 static
 122 BOOLEAN
 123 SkipTo (
 124   SOURCE_FILE *SourceFile,
 125   WCHAR       WChar,
 126   BOOLEAN     StopAfterNewline
 127   );
 128
 129 static
 130 UINT32
 131 SkipWhiteSpace (
 132   SOURCE_FILE *SourceFile
 133   );
 134
 135 static
 136 BOOLEAN
 137 IsWhiteSpace (
 138   SOURCE_FILE *SourceFile
 139   );
 140
 141 static
 142 BOOLEAN
 143 EndOfFile (
 144   SOURCE_FILE *SourceFile
 145   );
 146
 147 static
 148 void
 149 PreprocessFile (
 150   SOURCE_FILE *SourceFile
 151   );
 152
 153 static
 154 UINT32
 155 GetStringIdentifierName (
 156   IN SOURCE_FILE  *SourceFile,
 157   IN OUT WCHAR    *StringIdentifierName,
 158   IN UINT32       StringIdentifierNameLen
 159   );
 160
 161 static
 162 UINT32
 163 GetLanguageIdentifierName (
 164   IN SOURCE_FILE  *SourceFile,
 165   IN OUT WCHAR    *LanguageIdentifierName,
 166   IN UINT32       LanguageIdentifierNameLen,
 167   IN BOOLEAN      Optional
 168   );
 169
 170 static
 171 WCHAR *
 172 GetPrintableLanguageName (
 173   IN SOURCE_FILE  *SourceFile
 174   );
 175
 176 static
 177 STATUS
 178 AddCommandLineLanguage (
 179   IN CHAR8         *Language
 180   );
 181
 182 static
 183 WCHAR *
 184 GetQuotedString (
 185   SOURCE_FILE *SourceFile,
 186   BOOLEAN     Optional
 187   );
 188
 189 static
 190 STATUS
 191 ProcessIncludeFile (
 192   SOURCE_FILE *SourceFile,
 193   SOURCE_FILE *ParentSourceFile
 194   );
 195
 196 static
 197 STATUS
 198 ParseFile (
 199   SOURCE_FILE *SourceFile
 200   );
 201
 202 static
 203 FILE  *
 204 FindFile (
 205   IN CHAR8    *FileName,
 206   OUT CHAR8   *FoundFileName,
 207   IN UINT32   FoundFileNameLen
 208   );
 209
 210 static
 211 STATUS
 212 ProcessArgs (
 213   int   Argc,
 214   char  *Argv[]
 215   );
 216
 217 static
 218 STATUS
 219 ProcessFile (
 220   SOURCE_FILE *SourceFile
 221   );
 222
 223 static
 224 UINT32
 225 wstrcmp (
 226   WCHAR *Buffer,
 227   WCHAR *Str
 228   );
 229
 230 static
 231 void
 232 Usage (
 233   VOID
 234   );
 235
 236 static
 237 void
 238 FreeLists (
 239   VOID
 240   );
 241
 242 static
 243 void
 244 ProcessTokenString (
 245   SOURCE_FILE *SourceFile
 246   );
 247
 248 static
 249 void
 250 ProcessTokenInclude (
 251   SOURCE_FILE *SourceFile
 252   );
 253
 254 static
 255 void
 256 ProcessTokenScope (
 257   SOURCE_FILE *SourceFile
 258   );
 259
 260 static
 261 void
 262 ProcessTokenLanguage (
 263   SOURCE_FILE *SourceFile
 264   );
 265
 266 static
 267 void
 268 ProcessTokenLangDef (
 269   SOURCE_FILE *SourceFile
 270   );
 271
 272 static
 273 STATUS
 274 ScanFiles (
 275   TEXT_STRING_LIST *ScanFiles
 276   );
 277
 278 static
 279 STATUS
 280 ParseIndirectionFiles (
 281   TEXT_STRING_LIST    *Files
 282   );
 283
 284 STATUS
 285 StringDBCreateHiiExportPack (
 286   CHAR8               *OutputFileName
 287   );
 288
 289 int
 290 main (
 291   int   Argc,
 292   char  *Argv[]
 293   )
 294 /*++
 295
 296 Routine Description:
 297
 298   Call the routine to parse the command-line options, then process the file.
 299
 300 Arguments:
 301
 302   Argc - Standard C main() argc and argv.
 303   Argv - Standard C main() argc and argv.
 304
 305 Returns:
 306
 307   0       if successful
 308   nonzero otherwise
 309
 310 --*/
 311 {
 312   STATUS  Status;
 313
 314   SetUtilityName (PROGRAM_NAME);
 315   //
 316   // Process the command-line arguments
 317   //
 318   Status = ProcessArgs (Argc, Argv);
 319   if (Status != STATUS_SUCCESS) {
 320     return Status;
 321   }
 322   //
 323   // Initialize the database manager
 324   //
 325   StringDBConstructor ();
 326   //
 327   // We always try to read in an existing database file. It may not
 328   // exist, which is ok usually.
 329   //
 330   if (mGlobals.NewDatabase == 0) {
 331     //
 332     // Read all databases specified.
 333     //
 334     for (mGlobals.LastDatabaseFileName = mGlobals.DatabaseFileName;
 335          mGlobals.LastDatabaseFileName != NULL;
 336          mGlobals.LastDatabaseFileName = mGlobals.LastDatabaseFileName->Next
 337         ) {
 338       Status = StringDBReadDatabase (mGlobals.LastDatabaseFileName->Str, TRUE, mGlobals.VerboseDatabaseRead);
 339       if (Status != STATUS_SUCCESS) {
 340         return Status;
 341       }
 342     }
 343   }
 344   //
 345   // Read indirection file(s) if specified
 346   //
 347   if (ParseIndirectionFiles (mGlobals.IndirectionFileName) != STATUS_SUCCESS) {
 348     goto Finish;
 349   }
 350   //
 351   // If scanning source files, do that now
 352   //
 353   if (mGlobals.Mode == MODE_SCAN) {
 354     ScanFiles (mGlobals.ScanFileName);
 355   } else if (mGlobals.Mode == MODE_PARSE) {
 356     //
 357     // Parsing a unicode strings file
 358     //
 359     mGlobals.SourceFiles.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
 360     Status = ProcessIncludeFile (&mGlobals.SourceFiles, NULL);
 361     if (Status != STATUS_SUCCESS) {
 362       goto Finish;
 363     }
 364   }
 365   //
 366   // Create the string defines header file if there have been no errors.
 367   //
 368   ParserSetPosition (NULL, 0);
 369   if ((mGlobals.StringHFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 370     Status = StringDBDumpStringDefines (mGlobals.StringHFileName, mGlobals.BaseName);
 371     if (Status != EFI_SUCCESS) {
 372       goto Finish;
 373     }
 374   }
 375   //
 376   // Dump the strings to a .c file if there have still been no errors.
 377   //
 378   if ((mGlobals.StringCFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 379     Status = StringDBDumpCStrings (
 380               mGlobals.StringCFileName,
 381               mGlobals.BaseName,
 382               mGlobals.Language,
 383               mGlobals.IndirectionList
 384               );
 385     if (Status != EFI_SUCCESS) {
 386       goto Finish;
 387     }
 388   }
 389   //
 390   // Dump the database if requested
 391   //
 392   if ((mGlobals.DumpUFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 393     StringDBDumpDatabase (NULL, mGlobals.DumpUFileName, FALSE);
 394   }
 395   //
 396   // Dump the string data as HII binary string pack if requested
 397   //
 398   if ((mGlobals.HiiExportPackFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 399     StringDBCreateHiiExportPack (mGlobals.HiiExportPackFileName);
 400   }
 401   //
 402   // Always update the database if no errors and not in dump mode. If they specified -od
 403   // for an output database file name, then use that name. Otherwise use the name of
 404   // the first database file specified with -db
 405   //
 406   if ((mGlobals.Mode != MODE_DUMP) && (GetUtilityStatus () < STATUS_ERROR)) {
 407     if (mGlobals.OutputDatabaseFileName[0]) {
 408       Status = StringDBWriteDatabase (mGlobals.OutputDatabaseFileName, mGlobals.VerboseDatabaseWrite);
 409     } else {
 410       Status = StringDBWriteDatabase (mGlobals.DatabaseFileName->Str, mGlobals.VerboseDatabaseWrite);
 411     }
 412
 413     if (Status != EFI_SUCCESS) {
 414       goto Finish;
 415     }
 416   }
 417
 418 Finish:
 419   //
 420   // Free up memory
 421   //
 422   FreeLists ();
 423   StringDBDestructor ();
 424   return GetUtilityStatus ();
 425 }
 426
 427 static
 428 STATUS
 429 ProcessIncludeFile (
 430   SOURCE_FILE *SourceFile,
 431   SOURCE_FILE *ParentSourceFile
 432   )
 433 /*++
 434
 435 Routine Description:
 436
 437   Given a source file, open the file and parse it
 438
 439 Arguments:
 440
 441   SourceFile        - name of file to parse
 442   ParentSourceFile  - for error reporting purposes, the file that #included SourceFile.
 443
 444 Returns:
 445
 446   Standard status.
 447
 448 --*/
 449 {
 450   static UINT32 NestDepth = 0;
 451   CHAR8         FoundFileName[MAX_PATH];
 452   STATUS        Status;
 453
 454   Status = STATUS_SUCCESS;
 455   NestDepth++;
 456   //
 457   // Print the file being processed. Indent so you can tell the include nesting
 458   // depth.
 459   //
 460   if (mGlobals.Verbose) {
 461     fprintf (stdout, "%*cProcessing file '%s'\n", NestDepth * 2, ' ', SourceFile->FileName);
 462   }
 463
 464   //
 465   // Make sure we didn't exceed our maximum nesting depth
 466   //
 467   if (NestDepth > MAX_NEST_DEPTH) {
 468     Error (NULL, 0, 0, SourceFile->FileName, "max nesting depth (%d) exceeded", NestDepth);
 469     Status = STATUS_ERROR;
 470     goto Finish;
 471   }
 472   //
 473   // Try to open the file locally, and if that fails try along our include paths.
 474   //
 475   strcpy (FoundFileName, SourceFile->FileName);
 476   if ((SourceFile->Fptr = fopen (FoundFileName, "rb")) == NULL) {
 477     //
 478     // Try to find it among the paths if it has a parent (that is, it is included
 479     // by someone else).
 480     //
 481     if (ParentSourceFile == NULL) {
 482       Error (NULL, 0, 0, SourceFile->FileName, "file not found");
 483       return STATUS_ERROR;
 484     }
 485
 486     SourceFile->Fptr = FindFile (SourceFile->FileName, FoundFileName, sizeof (FoundFileName));
 487     if (SourceFile->Fptr == NULL) {
 488       Error (ParentSourceFile->FileName, ParentSourceFile->LineNum, 0, SourceFile->FileName, "include file not found");
 489       return STATUS_ERROR;
 490     }
 491   }
 492   //
 493   // Process the file found
 494   //
 495   ProcessFile (SourceFile);
 496 Finish:
 497   //
 498   // Close open files and return status
 499   //
 500   if (SourceFile->Fptr != NULL) {
 501     fclose (SourceFile->Fptr);
 502   }
 503
 504   return Status;
 505 }
 506
 507 static
 508 STATUS
 509 ProcessFile (
 510   SOURCE_FILE *SourceFile
 511   )
 512 {
 513   //
 514   // Get the file size, and then read the entire thing into memory.
 515   // Allocate space for a terminator character.
 516   //
 517   fseek (SourceFile->Fptr, 0, SEEK_END);
 518   SourceFile->FileSize = ftell (SourceFile->Fptr);
 519   fseek (SourceFile->Fptr, 0, SEEK_SET);
 520   SourceFile->FileBuffer = (WCHAR *) malloc (SourceFile->FileSize + sizeof (WCHAR));
 521   if (SourceFile->FileBuffer == NULL) {
 522     Error (NULL, 0, 0, "memory allocation failure", NULL);
 523     return STATUS_ERROR;
 524   }
 525
 526   fread ((VOID *) SourceFile->FileBuffer, SourceFile->FileSize, 1, SourceFile->Fptr);
 527   SourceFile->FileBuffer[(SourceFile->FileSize / sizeof (WCHAR))] = UNICODE_NULL;
 528   //
 529   // Pre-process the file to replace comments with spaces
 530   //
 531   PreprocessFile (SourceFile);
 532   //
 533   // Parse the file
 534   //
 535   ParseFile (SourceFile);
 536   free (SourceFile->FileBuffer);
 537   return STATUS_SUCCESS;
 538 }
 539
 540 static
 541 STATUS
 542 ParseFile (
 543   SOURCE_FILE *SourceFile
 544   )
 545 {
 546   BOOLEAN InComment;
 547   UINT32  Len;
 548
 549   //
 550   // First character of a unicode file is special. Make sure
 551   //
 552   if (SourceFile->FileBufferPtr[0] != UNICODE_FILE_START) {
 553     Error (SourceFile->FileName, 1, 0, SourceFile->FileName, "file does not appear to be a unicode file");
 554     return STATUS_ERROR;
 555   }
 556
 557   SourceFile->FileBufferPtr++;
 558   InComment = FALSE;
 559   //
 560   // Print the first line if in verbose mode
 561   //
 562   if (mGlobals.Verbose) {
 563     printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 564   }
 565   //
 566   // Since the syntax is relatively straightforward, just switch on the next char
 567   //
 568   while (!EndOfFile (SourceFile)) {
 569     //
 570     // Check for whitespace
 571     //
 572     if (SourceFile->FileBufferPtr[0] == UNICODE_SPACE) {
 573       SourceFile->FileBufferPtr++;
 574     } else if (SourceFile->FileBufferPtr[0] == UNICODE_TAB) {
 575       SourceFile->FileBufferPtr++;
 576     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 577       SourceFile->FileBufferPtr++;
 578     } else if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 579       SourceFile->FileBufferPtr++;
 580       SourceFile->LineNum++;
 581       if (mGlobals.Verbose) {
 582         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 583       }
 584
 585       InComment = FALSE;
 586     } else if (SourceFile->FileBufferPtr[0] == 0) {
 587       SourceFile->FileBufferPtr++;
 588     } else if (InComment) {
 589       SourceFile->FileBufferPtr++;
 590     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 591       SourceFile->FileBufferPtr += 2;
 592       InComment = TRUE;
 593     } else if (SourceFile->SkipToHash && (SourceFile->FileBufferPtr[0] != SourceFile->ControlCharacter)) {
 594       SourceFile->FileBufferPtr++;
 595     } else {
 596       SourceFile->SkipToHash = FALSE;
 597       if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 598           ((Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"include")) > 0)
 599           ) {
 600         SourceFile->FileBufferPtr += Len + 1;
 601         ProcessTokenInclude (SourceFile);
 602       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 603                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"scope")) > 0
 604               ) {
 605         SourceFile->FileBufferPtr += Len + 1;
 606         ProcessTokenScope (SourceFile);
 607       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 608                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"language")) > 0
 609               ) {
 610         SourceFile->FileBufferPtr += Len + 1;
 611         ProcessTokenLanguage (SourceFile);
 612       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 613                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"langdef")) > 0
 614               ) {
 615         SourceFile->FileBufferPtr += Len + 1;
 616         ProcessTokenLangDef (SourceFile);
 617       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 618                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"string")) > 0
 619               ) {
 620         SourceFile->FileBufferPtr += Len + 1;
 621         ProcessTokenString (SourceFile);
 622       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 623                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"EFI_BREAKPOINT()")) > 0
 624               ) {
 625         SourceFile->FileBufferPtr += Len;
 626         //
 627         // BUGBUG: Caling EFI_BREAKOINT() is breaking the link.  What is the proper action for this tool
 628         // in this condition?
 629         //
 630 //        EFI_BREAKPOINT ();
 631       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 632                (SourceFile->FileBufferPtr[1] == UNICODE_EQUAL_SIGN)
 633               ) {
 634         SourceFile->ControlCharacter = SourceFile->FileBufferPtr[2];
 635         SourceFile->FileBufferPtr += 3;
 636       } else {
 637         Error (SourceFile->FileName, SourceFile->LineNum, 0, "unrecognized token", "%S", SourceFile->FileBufferPtr);
 638         //
 639         // Treat rest of line as a comment.
 640         //
 641         InComment = TRUE;
 642       }
 643     }
 644   }
 645
 646   return STATUS_SUCCESS;
 647 }
 648
 649 static
 650 void
 651 PreprocessFile (
 652   SOURCE_FILE *SourceFile
 653   )
 654 /*++
 655
 656 Routine Description:
 657   Preprocess a file to replace all carriage returns with NULLs so
 658   we can print lines from the file to the screen.
 659
 660 Arguments:
 661   SourceFile - structure that we use to keep track of an input file.
 662
 663 Returns:
 664   Nothing.
 665
 666 --*/
 667 {
 668   BOOLEAN InComment;
 669
 670   RewindFile (SourceFile);
 671   InComment = FALSE;
 672   while (!EndOfFile (SourceFile)) {
 673     //
 674     // If a line-feed, then no longer in a comment
 675     //
 676     if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 677       SourceFile->FileBufferPtr++;
 678       SourceFile->LineNum++;
 679       InComment = 0;
 680     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 681       //
 682       // Replace all carriage returns with a NULL so we can print stuff
 683       //
 684       SourceFile->FileBufferPtr[0] = 0;
 685       SourceFile->FileBufferPtr++;
 686     } else if (InComment) {
 687       SourceFile->FileBufferPtr[0] = UNICODE_SPACE;
 688       SourceFile->FileBufferPtr++;
 689     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 690       SourceFile->FileBufferPtr += 2;
 691       InComment = TRUE;
 692     } else {
 693       SourceFile->FileBufferPtr++;
 694     }
 695   }
 696   //
 697   // Could check for end-of-file and still in a comment, but
 698   // should not be necessary. So just restore the file pointers.
 699   //
 700   RewindFile (SourceFile);
 701 }
 702
 703 static
 704 WCHAR *
 705 GetPrintableLanguageName (
 706   IN SOURCE_FILE  *SourceFile
 707   )
 708 {
 709   WCHAR   *String;
 710   WCHAR   *Start;
 711   WCHAR   *Ptr;
 712   UINT32  Len;
 713
 714   SkipWhiteSpace (SourceFile);
 715   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 716     Error (
 717       SourceFile->FileName,
 718       SourceFile->LineNum,
 719       0,
 720       "expected quoted printable language name",
 721       "%S",
 722       SourceFile->FileBufferPtr
 723       );
 724     SourceFile->SkipToHash = TRUE;
 725     return NULL;
 726   }
 727
 728   Len = 0;
 729   SourceFile->FileBufferPtr++;
 730   Start = Ptr = SourceFile->FileBufferPtr;
 731   while (!EndOfFile (SourceFile)) {
 732     if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 733       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 734       break;
 735     } else if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
 736       break;
 737     }
 738
 739     SourceFile->FileBufferPtr++;
 740     Len++;
 741   }
 742
 743   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 744     Warning (
 745       SourceFile->FileName,
 746       SourceFile->LineNum,
 747       0,
 748       "missing closing quote on printable language name string",
 749       "%S",
 750       Start
 751       );
 752   } else {
 753     SourceFile->FileBufferPtr++;
 754   }
 755   //
 756   // Now allocate memory for the string and save it off
 757   //
 758   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 759   if (String == NULL) {
 760     Error (NULL, 0, 0, "memory allocation failed", NULL);
 761     return NULL;
 762   }
 763   //
 764   // Copy the string from the file buffer to the local copy.
 765   // We do no reformatting of it whatsoever at this point.
 766   //
 767   Ptr = String;
 768   while (Len > 0) {
 769     *Ptr = *Start;
 770     Start++;
 771     Ptr++;
 772     Len--;
 773   }
 774
 775   *Ptr = 0;
 776   //
 777   // Now format the string to convert \wide and \narrow controls
 778   //
 779   StringDBFormatString (String);
 780   return String;
 781 }
 782
 783 static
 784 WCHAR *
 785 GetQuotedString (
 786   SOURCE_FILE *SourceFile,
 787   BOOLEAN     Optional
 788   )
 789 {
 790   WCHAR   *String;
 791   WCHAR   *Start;
 792   WCHAR   *Ptr;
 793   UINT32  Len;
 794   BOOLEAN PreviousBackslash;
 795
 796   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 797     if (!Optional) {
 798       Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted string", "%S", SourceFile->FileBufferPtr);
 799     }
 800
 801     return NULL;
 802   }
 803
 804   Len = 0;
 805   SourceFile->FileBufferPtr++;
 806   Start             = Ptr = SourceFile->FileBufferPtr;
 807   PreviousBackslash = FALSE;
 808   while (!EndOfFile (SourceFile)) {
 809     if ((SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) && (!PreviousBackslash)) {
 810       break;
 811     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 812       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 813       PreviousBackslash = FALSE;
 814     } else if (SourceFile->FileBufferPtr[0] == UNICODE_BACKSLASH) {
 815       PreviousBackslash = TRUE;
 816     } else {
 817       PreviousBackslash = FALSE;
 818     }
 819
 820     SourceFile->FileBufferPtr++;
 821     Len++;
 822   }
 823
 824   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 825     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "missing closing quote on string", "%S", Start);
 826   } else {
 827     SourceFile->FileBufferPtr++;
 828   }
 829   //
 830   // Now allocate memory for the string and save it off
 831   //
 832   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 833   if (String == NULL) {
 834     Error (NULL, 0, 0, "memory allocation failed", NULL);
 835     return NULL;
 836   }
 837   //
 838   // Copy the string from the file buffer to the local copy.
 839   // We do no reformatting of it whatsoever at this point.
 840   //
 841   Ptr = String;
 842   while (Len > 0) {
 843     *Ptr = *Start;
 844     Start++;
 845     Ptr++;
 846     Len--;
 847   }
 848
 849   *Ptr = 0;
 850   return String;
 851 }
 852 //
 853 // Parse:
 854 //    #string STR_ID_NAME
 855 //
 856 // All we can do is call the string database to add the string identifier. Unfortunately
 857 // he'll have to keep track of the last identifier we added.
 858 //
 859 static
 860 void
 861 ProcessTokenString (
 862   SOURCE_FILE *SourceFile
 863   )
 864 {
 865   WCHAR   StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
 866   UINT16  StringId;
 867   //
 868   // Extract the string identifier name and add it to the database.
 869   //
 870   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
 871     StringId = STRING_ID_INVALID;
 872     StringDBAddStringIdentifier (StringIdentifier, &StringId, 0);
 873   } else {
 874     //
 875     // Error recovery -- skip to the next #
 876     //
 877     SourceFile->SkipToHash = TRUE;
 878   }
 879 }
 880
 881 static
 882 BOOLEAN
 883 EndOfFile (
 884   SOURCE_FILE *SourceFile
 885   )
 886 {
 887   //
 888   // The file buffer pointer will typically get updated before the End-of-file flag in the
 889   // source file structure, so check it first.
 890   //
 891   if (SourceFile->FileBufferPtr >= SourceFile->FileBuffer + SourceFile->FileSize / sizeof (WCHAR)) {
 892     SourceFile->EndOfFile = TRUE;
 893     return TRUE;
 894   }
 895
 896   if (SourceFile->EndOfFile) {
 897     return TRUE;
 898   }
 899
 900   return FALSE;
 901 }
 902
 903 static
 904 UINT32
 905 GetStringIdentifierName (
 906   IN SOURCE_FILE  *SourceFile,
 907   IN OUT WCHAR    *StringIdentifierName,
 908   IN UINT32       StringIdentifierNameLen
 909   )
 910 {
 911   UINT32  Len;
 912   WCHAR   *From;
 913   WCHAR   *Start;
 914
 915   //
 916   // Skip whitespace
 917   //
 918   SkipWhiteSpace (SourceFile);
 919   if (SourceFile->EndOfFile) {
 920     Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-file encountered", "expected string identifier");
 921     return 0;
 922   }
 923   //
 924   // Verify first character of name is [A-Za-z]
 925   //
 926   Len = 0;
 927   StringIdentifierNameLen /= 2;
 928   From  = SourceFile->FileBufferPtr;
 929   Start = SourceFile->FileBufferPtr;
 930   if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 931       ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))
 932       ) {
 933     //
 934     // Do nothing
 935     //
 936   } else {
 937     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid character in string identifier name", "%S", Start);
 938     return 0;
 939   }
 940
 941   while (!EndOfFile (SourceFile)) {
 942     if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 943         ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z)) ||
 944         ((SourceFile->FileBufferPtr[0] >= UNICODE_0) && (SourceFile->FileBufferPtr[0] <= UNICODE_9)) ||
 945         (SourceFile->FileBufferPtr[0] == UNICODE_UNDERSCORE)
 946         ) {
 947       Len++;
 948       if (Len >= StringIdentifierNameLen) {
 949         Error (SourceFile->FileName, SourceFile->LineNum, 0, "string identifier name too long", "%S", Start);
 950         return 0;
 951       }
 952
 953       *StringIdentifierName = SourceFile->FileBufferPtr[0];
 954       StringIdentifierName++;
 955       SourceFile->FileBufferPtr++;
 956     } else if (SkipWhiteSpace (SourceFile) == 0) {
 957       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid string identifier name", "%S", Start);
 958       return 0;
 959     } else {
 960       break;
 961     }
 962   }
 963   //
 964   // Terminate the copy of the string.
 965   //
 966   *StringIdentifierName = 0;
 967   return Len;
 968 }
 969
 970 static
 971 UINT32
 972 GetLanguageIdentifierName (
 973   IN SOURCE_FILE  *SourceFile,
 974   IN OUT WCHAR    *LanguageIdentifierName,
 975   IN UINT32       LanguageIdentifierNameLen,
 976   IN BOOLEAN      Optional
 977   )
 978 {
 979   UINT32  Len;
 980   WCHAR   *From;
 981   WCHAR   *Start;
 982   //
 983   // Skip whitespace
 984   //
 985   SkipWhiteSpace (SourceFile);
 986   if (SourceFile->EndOfFile) {
 987     if (!Optional) {
 988       Error (
 989         SourceFile->FileName,
 990         SourceFile->LineNum,
 991         0,
 992         "end-of-file encountered",
 993         "expected language identifier"
 994         );
 995     }
 996
 997     return 0;
 998   }
 999   //
1000   // This function is called to optionally get a language identifier name in:
1001   //   #string STR_ID eng "the string"
1002   // If it's optional, and we find a double-quote, then return now.
1003   //
1004   if (Optional) {
1005     if (*SourceFile->FileBufferPtr == UNICODE_DOUBLE_QUOTE) {
1006       return 0;
1007     }
1008   }
1009
1010   Len = 0;
1011   LanguageIdentifierNameLen /= 2;
1012   //
1013   // Internal error if we weren't given at least 4 WCHAR's to work with.
1014   //
1015   if (LanguageIdentifierNameLen < LANGUAGE_IDENTIFIER_NAME_LEN + 1) {
1016     Error (
1017       SourceFile->FileName,
1018       SourceFile->LineNum,
1019       0,
1020       "app error -- language identifier name length is invalid",
1021       NULL
1022       );
1023   }
1024
1025   From  = SourceFile->FileBufferPtr;
1026   Start = SourceFile->FileBufferPtr;
1027   while (!EndOfFile (SourceFile)) {
1028     if (((SourceFile->FileBufferPtr[0] >= UNICODE_a) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))) {
1029       Len++;
1030       if (Len > LANGUAGE_IDENTIFIER_NAME_LEN) {
1031         Error (SourceFile->FileName, SourceFile->LineNum, 0, "language identifier name too long", "%S", Start);
1032         return 0;
1033       }
1034
1035       *LanguageIdentifierName = SourceFile->FileBufferPtr[0];
1036       SourceFile->FileBufferPtr++;
1037       LanguageIdentifierName++;
1038     } else if (!IsWhiteSpace (SourceFile)) {
1039       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid language identifier name", "%S", Start);
1040       return 0;
1041     } else {
1042       break;
1043     }
1044   }
1045   //
1046   // Terminate the copy of the string.
1047   //
1048   *LanguageIdentifierName = 0;
1049   return Len;
1050 }
1051
1052 static
1053 void
1054 ProcessTokenInclude (
1055   SOURCE_FILE *SourceFile
1056   )
1057 {
1058   CHAR8       IncludeFileName[MAX_PATH];
1059   CHAR8       *To;
1060   UINT32      Len;
1061   BOOLEAN     ReportedError;
1062   SOURCE_FILE IncludedSourceFile;
1063
1064   ReportedError = FALSE;
1065   if (SkipWhiteSpace (SourceFile) == 0) {
1066     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "expected whitespace following #include keyword", NULL);
1067   }
1068   //
1069   // Should be quoted file name
1070   //
1071   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
1072     Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted include file name", NULL);
1073     goto FailDone;
1074   }
1075
1076   SourceFile->FileBufferPtr++;
1077   //
1078   // Copy the filename as ascii to our local string
1079   //
1080   To  = IncludeFileName;
1081   Len = 0;
1082   while (!EndOfFile (SourceFile)) {
1083     if ((SourceFile->FileBufferPtr[0] == UNICODE_CR) || (SourceFile->FileBufferPtr[0] == UNICODE_LF)) {
1084       Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-line found in quoted include file name", NULL);
1085       goto FailDone;
1086     }
1087
1088     if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
1089       SourceFile->FileBufferPtr++;
1090       break;
1091     }
1092     //
1093     // If too long, then report the error once and process until the closing quote
1094     //
1095     Len++;
1096     if (!ReportedError && (Len >= sizeof (IncludeFileName))) {
1097       Error (SourceFile->FileName, SourceFile->LineNum, 0, "length of include file name exceeds limit", NULL);
1098       ReportedError = TRUE;
1099     }
1100
1101     if (!ReportedError) {
1102       *To = UNICODE_TO_ASCII (SourceFile->FileBufferPtr[0]);
1103       To++;
1104     }
1105
1106     SourceFile->FileBufferPtr++;
1107   }
1108
1109   if (!ReportedError) {
1110     *To = 0;
1111     memset ((char *) &IncludedSourceFile, 0, sizeof (SOURCE_FILE));
1112     strcpy (IncludedSourceFile.FileName, IncludeFileName);
1113     IncludedSourceFile.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
1114     ProcessIncludeFile (&IncludedSourceFile, SourceFile);
1115     //
1116     // printf ("including file '%s'\n", IncludeFileName);
1117     //
1118   }
1119
1120   return ;
1121 FailDone:
1122   //
1123   // Error recovery -- skip to next #
1124   //
1125   SourceFile->SkipToHash = TRUE;
1126 }
1127
1128 static
1129 void
1130 ProcessTokenScope (
1131   SOURCE_FILE *SourceFile
1132   )
1133 {
1134   WCHAR StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
1135   //
1136   // Extract the scope name
1137   //
1138   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
1139     StringDBSetScope (StringIdentifier);
1140   }
1141 }
1142 //
1143 // Parse:  #langdef eng "English"
1144 //         #langdef chn "\wideChinese"
1145 //
1146 static
1147 void
1148 ProcessTokenLangDef (
1149   SOURCE_FILE *SourceFile
1150   )
1151 {
1152   WCHAR   LanguageIdentifier[MAX_STRING_IDENTIFIER_NAME];
1153   UINT32  Len;
1154   WCHAR   *PrintableName;
1155   //
1156   // Extract the 3-character language identifier
1157   //
1158   Len = GetLanguageIdentifierName (SourceFile, LanguageIdentifier, sizeof (LanguageIdentifier), FALSE);
1159   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1160     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", NULL);
1161   } else {
1162     //
1163     // Extract the printable name
1164     //
1165     PrintableName = GetPrintableLanguageName (SourceFile);
1166     if (PrintableName != NULL) {
1167       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1168       StringDBAddLanguage (LanguageIdentifier, PrintableName);
1169       free (PrintableName);
1170       return ;
1171     }
1172   }
1173   //
1174   // Error recovery -- skip to next #
1175   //
1176   SourceFile->SkipToHash = TRUE;
1177 }
1178
1179 static
1180 BOOLEAN
1181 ApparentQuotedString (
1182   SOURCE_FILE *SourceFile
1183   )
1184 {
1185   WCHAR *Ptr;
1186   //
1187   // See if the first and last nonblank characters on the line are double quotes
1188   //
1189   for (Ptr = SourceFile->FileBufferPtr; *Ptr && (*Ptr == UNICODE_SPACE); Ptr++)
1190     ;
1191   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1192     return FALSE;
1193   }
1194
1195   while (*Ptr) {
1196     Ptr++;
1197   }
1198
1199   Ptr--;
1200   for (; *Ptr && (*Ptr == UNICODE_SPACE); Ptr--)
1201     ;
1202   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1203     return FALSE;
1204   }
1205
1206   return TRUE;
1207 }
1208 //
1209 // Parse:
1210 //   #language eng "some string " "more string"
1211 //
1212 static
1213 void
1214 ProcessTokenLanguage (
1215   SOURCE_FILE *SourceFile
1216   )
1217 {
1218   WCHAR   *String;
1219   WCHAR   *SecondString;
1220   WCHAR   *TempString;
1221   WCHAR   *From;
1222   WCHAR   *To;
1223   WCHAR   Language[LANGUAGE_IDENTIFIER_NAME_LEN + 1];
1224   UINT32  Len;
1225   BOOLEAN PreviousNewline;
1226   //
1227   // Get the language identifier
1228   //
1229   Language[0] = 0;
1230   Len         = GetLanguageIdentifierName (SourceFile, Language, sizeof (Language), TRUE);
1231   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1232     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", "%S", Language);
1233     SourceFile->SkipToHash = TRUE;
1234     return ;
1235   }
1236   //
1237   // Extract the string value. It's either a quoted string that starts on the current line, or
1238   // an unquoted string that starts on the following line and continues until the next control
1239   // character in column 1.
1240   // Look ahead to find a quote or a newline
1241   //
1242   if (SkipTo (SourceFile, UNICODE_DOUBLE_QUOTE, TRUE)) {
1243     String = GetQuotedString (SourceFile, FALSE);
1244     if (String != NULL) {
1245       //
1246       // Set the position in the file of where we are parsing for error
1247       // reporting purposes. Then start looking ahead for additional
1248       // quoted strings, and concatenate them until we get a failure
1249       // back from the string parser.
1250       //
1251       Len = StrLen (String) + 1;
1252       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1253       do {
1254         SkipWhiteSpace (SourceFile);
1255         SecondString = GetQuotedString (SourceFile, TRUE);
1256         if (SecondString != NULL) {
1257           Len += StrLen (SecondString);
1258           TempString = (WCHAR *) malloc (Len * sizeof (WCHAR));
1259           if (TempString == NULL) {
1260             Error (NULL, 0, 0, "application error", "failed to allocate memory");
1261             return ;
1262           }
1263
1264           StrCpy (TempString, String);
1265           StrCat (TempString, SecondString);
1266           free (String);
1267           free (SecondString);
1268           String = TempString;
1269         }
1270       } while (SecondString != NULL);
1271       StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1272       free (String);
1273     } else {
1274       //
1275       // Error was reported at lower level. Error recovery mode.
1276       //
1277       SourceFile->SkipToHash = TRUE;
1278     }
1279   } else {
1280     if (!mGlobals.UnquotedStrings) {
1281       //
1282       // They're using unquoted strings. If the next non-blank character is a double quote, and the
1283       // last non-blank character on the line is a double quote, then more than likely they're using
1284       // quotes, so they need to put the quoted string on the end of the previous line
1285       //
1286       if (ApparentQuotedString (SourceFile)) {
1287         Warning (
1288           SourceFile->FileName,
1289           SourceFile->LineNum,
1290           0,
1291           "unexpected quoted string on line",
1292           "specify -uqs option if necessary"
1293           );
1294       }
1295     }
1296     //
1297     // Found end-of-line (hopefully). Skip over it and start taking in characters
1298     // until we find a control character at the start of a line.
1299     //
1300     Len             = 0;
1301     From            = SourceFile->FileBufferPtr;
1302     PreviousNewline = FALSE;
1303     while (!EndOfFile (SourceFile)) {
1304       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
1305         PreviousNewline = TRUE;
1306         SourceFile->LineNum++;
1307       } else {
1308         Len++;
1309         if (PreviousNewline && (SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter)) {
1310           break;
1311         }
1312
1313         PreviousNewline = FALSE;
1314       }
1315
1316       SourceFile->FileBufferPtr++;
1317     }
1318
1319     if ((Len == 0) && EndOfFile (SourceFile)) {
1320       Error (SourceFile->FileName, SourceFile->LineNum, 0, "unexpected end of file", NULL);
1321       SourceFile->SkipToHash = TRUE;
1322       return ;
1323     }
1324     //
1325     // Now allocate a buffer, copy the characters, and add the string.
1326     //
1327     String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
1328     if (String == NULL) {
1329       Error (NULL, 0, 0, "application error", "failed to allocate memory");
1330       return ;
1331     }
1332
1333     To = String;
1334     while (From < SourceFile->FileBufferPtr) {
1335       switch (*From) {
1336       case UNICODE_LF:
1337       case 0:
1338         break;
1339
1340       default:
1341         *To = *From;
1342         To++;
1343         break;
1344       }
1345
1346       From++;
1347     }
1348
1349     //
1350     // String[Len] = 0;
1351     //
1352     *To = 0;
1353     StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1354   }
1355 }
1356
1357 static
1358 BOOLEAN
1359 IsWhiteSpace (
1360   SOURCE_FILE *SourceFile
1361   )
1362 {
1363   switch (SourceFile->FileBufferPtr[0]) {
1364   case UNICODE_NULL:
1365   case UNICODE_CR:
1366   case UNICODE_SPACE:
1367   case UNICODE_TAB:
1368   case UNICODE_LF:
1369     return TRUE;
1370
1371   default:
1372     return FALSE;
1373   }
1374 }
1375
1376 static
1377 UINT32
1378 SkipWhiteSpace (
1379   SOURCE_FILE *SourceFile
1380   )
1381 {
1382   UINT32  Count;
1383
1384   Count = 0;
1385   while (!EndOfFile (SourceFile)) {
1386     Count++;
1387     switch (*SourceFile->FileBufferPtr) {
1388     case UNICODE_NULL:
1389     case UNICODE_CR:
1390     case UNICODE_SPACE:
1391     case UNICODE_TAB:
1392       SourceFile->FileBufferPtr++;
1393       break;
1394
1395     case UNICODE_LF:
1396       SourceFile->FileBufferPtr++;
1397       SourceFile->LineNum++;
1398       if (mGlobals.Verbose) {
1399         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
1400       }
1401       break;
1402
1403     default:
1404       return Count - 1;
1405     }
1406   }
1407   //
1408   // Some tokens require trailing whitespace. If we're at the end of the
1409   // file, then we count that as well.
1410   //
1411   if ((Count == 0) && (EndOfFile (SourceFile))) {
1412     Count++;
1413   }
1414
1415   return Count;
1416 }
1417
1418 static
1419 UINT32
1420 wstrcmp (
1421   WCHAR *Buffer,
1422   WCHAR *Str
1423   )
1424 {
1425   UINT32  Len;
1426
1427   Len = 0;
1428   while (*Str == *Buffer) {
1429     Buffer++;
1430     Str++;
1431     Len++;
1432   }
1433
1434   if (*Str) {
1435     return 0;
1436   }
1437
1438   return Len;
1439 }
1440 //
1441 // Given a filename, try to find it along the include paths.
1442 //
1443 static
1444 FILE *
1445 FindFile (
1446   IN CHAR8   *FileName,
1447   OUT CHAR8  *FoundFileName,
1448   IN UINT32  FoundFileNameLen
1449   )
1450 {
1451   FILE              *Fptr;
1452   TEXT_STRING_LIST  *List;
1453
1454   //
1455   // Traverse the list of paths and try to find the file
1456   //
1457   List = mGlobals.IncludePaths;
1458   while (List != NULL) {
1459     //
1460     // Put the path and filename together
1461     //
1462     if (strlen (List->Str) + strlen (FileName) + 1 > FoundFileNameLen) {
1463       Error (PROGRAM_NAME, 0, 0, NULL, "internal error - cannot concatenate path+filename");
1464       return NULL;
1465     }
1466     //
1467     // Append the filename to this include path and try to open the file.
1468     //
1469     strcpy (FoundFileName, List->Str);
1470     strcat (FoundFileName, FileName);
1471     if ((Fptr = fopen (FoundFileName, "rb")) != NULL) {
1472       //
1473       // Return the file pointer
1474       //
1475       return Fptr;
1476     }
1477
1478     List = List->Next;
1479   }
1480   //
1481   // Not found
1482   //
1483   FoundFileName[0] = 0;
1484   return NULL;
1485 }
1486 //
1487 // Process the command-line arguments
1488 //
1489 static
1490 STATUS
1491 ProcessArgs (
1492   int   Argc,
1493   char  *Argv[]
1494   )
1495 {
1496   TEXT_STRING_LIST  *NewList;
1497   //
1498   // Clear our globals
1499   //
1500   memset ((char *) &mGlobals, 0, sizeof (mGlobals));
1501   strcpy (mGlobals.BaseName, DEFAULT_BASE_NAME);
1502   //
1503   // Skip program name
1504   //
1505   Argc--;
1506   Argv++;
1507
1508   if (Argc == 0) {
1509     Usage ();
1510     return STATUS_ERROR;
1511   }
1512
1513   mGlobals.Mode = MODE_UNKNOWN;
1514   //
1515   // Process until no more -args.
1516   //
1517   while ((Argc > 0) && (Argv[0][0] == '-')) {
1518     //
1519     // -parse option
1520     //
1521     if (stricmp (Argv[0], "-parse") == 0) {
1522       if (mGlobals.Mode != MODE_UNKNOWN) {
1523         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1524         return STATUS_ERROR;
1525       }
1526
1527       mGlobals.Mode = MODE_PARSE;
1528       //
1529       // -scan option
1530       //
1531     } else if (stricmp (Argv[0], "-scan") == 0) {
1532       if (mGlobals.Mode != MODE_UNKNOWN) {
1533         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1534         return STATUS_ERROR;
1535       }
1536
1537       mGlobals.Mode = MODE_SCAN;
1538       //
1539       // -vscan verbose scanning option
1540       //
1541     } else if (stricmp (Argv[0], "-vscan") == 0) {
1542       mGlobals.VerboseScan = TRUE;
1543       //
1544       // -dump option
1545       //
1546     } else if (stricmp (Argv[0], "-dump") == 0) {
1547       if (mGlobals.Mode != MODE_UNKNOWN) {
1548         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1549         return STATUS_ERROR;
1550       }
1551
1552       mGlobals.Mode = MODE_DUMP;
1553     } else if (stricmp (Argv[0], "-uqs") == 0) {
1554       mGlobals.UnquotedStrings = TRUE;
1555       //
1556       // -i path    add include search path when parsing
1557       //
1558     } else if (stricmp (Argv[0], "-i") == 0) {
1559       //
1560       // check for one more arg
1561       //
1562       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1563         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing include path");
1564         return STATUS_ERROR;
1565       }
1566       //
1567       // Allocate memory for a new list element, fill it in, and
1568       // add it to our list of include paths. Always make sure it
1569       // has a "\" on the end of it.
1570       //
1571       NewList = malloc (sizeof (TEXT_STRING_LIST));
1572       if (NewList == NULL) {
1573         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1574         return STATUS_ERROR;
1575       }
1576
1577       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1578       NewList->Str = malloc (strlen (Argv[1]) + 2);
1579       if (NewList->Str == NULL) {
1580         free (NewList);
1581         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1582         return STATUS_ERROR;
1583       }
1584
1585       strcpy (NewList->Str, Argv[1]);
1586       if (NewList->Str[strlen (NewList->Str) - 1] != '\\') {
1587         strcat (NewList->Str, "\\");
1588       }
1589       //
1590       // Add it to our linked list
1591       //
1592       if (mGlobals.IncludePaths == NULL) {
1593         mGlobals.IncludePaths = NewList;
1594       } else {
1595         mGlobals.LastIncludePath->Next = NewList;
1596       }
1597
1598       mGlobals.LastIncludePath = NewList;
1599       Argc--;
1600       Argv++;
1601     } else if (stricmp (Argv[0], "-if") == 0) {
1602       //
1603       // Indirection file -- check for one more arg
1604       //
1605       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1606         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing indirection file name");
1607         return STATUS_ERROR;
1608       }
1609       //
1610       // Allocate memory for a new list element, fill it in, and
1611       // add it to our list of include paths. Always make sure it
1612       // has a "\" on the end of it.
1613       //
1614       NewList = malloc (sizeof (TEXT_STRING_LIST));
1615       if (NewList == NULL) {
1616         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1617         return STATUS_ERROR;
1618       }
1619
1620       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1621       NewList->Str = malloc (strlen (Argv[1]) + 1);
1622       if (NewList->Str == NULL) {
1623         free (NewList);
1624         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1625         return STATUS_ERROR;
1626       }
1627
1628       strcpy (NewList->Str, Argv[1]);
1629       //
1630       // Add it to our linked list
1631       //
1632       if (mGlobals.IndirectionFileName == NULL) {
1633         mGlobals.IndirectionFileName = NewList;
1634       } else {
1635         mGlobals.LastIndirectionFileName->Next = NewList;
1636       }
1637
1638       mGlobals.LastIndirectionFileName = NewList;
1639       Argc--;
1640       Argv++;
1641     } else if (stricmp (Argv[0], "-db") == 0) {
1642       //
1643       // -db option to specify a database file.
1644       // Check for one more arg (the database file name)
1645       //
1646       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1647         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing database file name");
1648         return STATUS_ERROR;
1649       }
1650
1651       NewList = malloc (sizeof (TEXT_STRING_LIST));
1652       if (NewList == NULL) {
1653         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1654         return STATUS_ERROR;
1655       }
1656
1657       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1658       NewList->Str = malloc (strlen (Argv[1]) + 1);
1659       if (NewList->Str == NULL) {
1660         free (NewList);
1661         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1662         return STATUS_ERROR;
1663       }
1664
1665       strcpy (NewList->Str, Argv[1]);
1666       //
1667       // Add it to our linked list
1668       //
1669       if (mGlobals.DatabaseFileName == NULL) {
1670         mGlobals.DatabaseFileName = NewList;
1671       } else {
1672         mGlobals.LastDatabaseFileName->Next = NewList;
1673       }
1674
1675       mGlobals.LastDatabaseFileName = NewList;
1676       Argc--;
1677       Argv++;
1678     } else if (stricmp (Argv[0], "-ou") == 0) {
1679       //
1680       // -ou option to specify an output unicode file to
1681       // which we can dump our database.
1682       //
1683       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1684         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing database dump output file name");
1685         return STATUS_ERROR;
1686       }
1687
1688       if (mGlobals.DumpUFileName[0] == 0) {
1689         strcpy (mGlobals.DumpUFileName, Argv[1]);
1690       } else {
1691         Error (PROGRAM_NAME, 0, 0, Argv[1], "-ou option already specified with '%s'", mGlobals.DumpUFileName);
1692         return STATUS_ERROR;
1693       }
1694
1695       Argc--;
1696       Argv++;
1697     } else if (stricmp (Argv[0], "-hpk") == 0) {
1698       //
1699       // -hpk option to create an HII export pack of the input database file
1700       //
1701       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1702         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing raw string data dump output file name");
1703         return STATUS_ERROR;
1704       }
1705
1706       if (mGlobals.HiiExportPackFileName[0] == 0) {
1707         strcpy (mGlobals.HiiExportPackFileName, Argv[1]);
1708       } else {
1709         Error (PROGRAM_NAME, 0, 0, Argv[1], "-or option already specified with '%s'", mGlobals.HiiExportPackFileName);
1710         return STATUS_ERROR;
1711       }
1712
1713       Argc--;
1714       Argv++;
1715     } else if ((stricmp (Argv[0], "-?") == 0) || (stricmp (Argv[0], "-h") == 0)) {
1716       Usage ();
1717       return STATUS_ERROR;
1718     } else if (stricmp (Argv[0], "-v") == 0) {
1719       mGlobals.Verbose = 1;
1720     } else if (stricmp (Argv[0], "-vdbw") == 0) {
1721       mGlobals.VerboseDatabaseWrite = 1;
1722     } else if (stricmp (Argv[0], "-vdbr") == 0) {
1723       mGlobals.VerboseDatabaseRead = 1;
1724     } else if (stricmp (Argv[0], "-newdb") == 0) {
1725       mGlobals.NewDatabase = 1;
1726     } else if (stricmp (Argv[0], "-ignorenotfound") == 0) {
1727       mGlobals.IgnoreNotFound = 1;
1728     } else if (stricmp (Argv[0], "-oc") == 0) {
1729       //
1730       // check for one more arg
1731       //
1732       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1733         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output C filename");
1734         return STATUS_ERROR;
1735       }
1736
1737       strcpy (mGlobals.StringCFileName, Argv[1]);
1738       Argc--;
1739       Argv++;
1740     } else if (stricmp (Argv[0], "-bn") == 0) {
1741       //
1742       // check for one more arg
1743       //
1744       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1745         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing base name");
1746         Usage ();
1747         return STATUS_ERROR;
1748       }
1749
1750       strcpy (mGlobals.BaseName, Argv[1]);
1751       Argc--;
1752       Argv++;
1753     } else if (stricmp (Argv[0], "-oh") == 0) {
1754       //
1755       // -oh to specify output .h defines file name
1756       //
1757       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1758         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output .h filename");
1759         return STATUS_ERROR;
1760       }
1761
1762       strcpy (mGlobals.StringHFileName, Argv[1]);
1763       Argc--;
1764       Argv++;
1765     } else if (stricmp (Argv[0], "-skipext") == 0) {
1766       //
1767       // -skipext to skip scanning of files with certain filename extensions
1768       //
1769       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1770         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing filename extension");
1771         return STATUS_ERROR;
1772       }
1773       //
1774       // Allocate memory for a new list element, fill it in, and
1775       // add it to our list of excluded extensions. Always make sure it
1776       // has a "." as the first character.
1777       //
1778       NewList = malloc (sizeof (TEXT_STRING_LIST));
1779       if (NewList == NULL) {
1780         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1781         return STATUS_ERROR;
1782       }
1783
1784       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1785       NewList->Str = malloc (strlen (Argv[1]) + 2);
1786       if (NewList->Str == NULL) {
1787         free (NewList);
1788         Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1789         return STATUS_ERROR;
1790       }
1791
1792       if (Argv[1][0] == '.') {
1793         strcpy (NewList->Str, Argv[1]);
1794       } else {
1795         NewList->Str[0] = '.';
1796         strcpy (NewList->Str + 1, Argv[1]);
1797       }
1798       //
1799       // Add it to our linked list
1800       //
1801       if (mGlobals.SkipExt == NULL) {
1802         mGlobals.SkipExt = NewList;
1803       } else {
1804         mGlobals.LastSkipExt->Next = NewList;
1805       }
1806
1807       mGlobals.LastSkipExt = NewList;
1808       Argc--;
1809       Argv++;
1810     } else if (stricmp (Argv[0], "-lang") == 0) {
1811       //
1812       // "-lang eng" or "-lang spa+cat" to only output certain languages
1813       //
1814       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1815         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing language name");
1816         Usage ();
1817         return STATUS_ERROR;
1818       }
1819
1820       if (AddCommandLineLanguage (Argv[1]) != STATUS_SUCCESS) {
1821         return STATUS_ERROR;
1822       }
1823
1824       Argc--;
1825       Argv++;
1826     } else if (stricmp (Argv[0], "-od") == 0) {
1827       //
1828       // Output database file name -- check for another arg
1829       //
1830       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1831         Error (PROGRAM_NAME, 0, 0, Argv[0], "missing output database file name");
1832         return STATUS_ERROR;
1833       }
1834
1835       strcpy (mGlobals.OutputDatabaseFileName, Argv[1]);
1836       Argv++;
1837       Argc--;
1838     } else {
1839       //
1840       // Unrecognized arg
1841       //
1842       Error (PROGRAM_NAME, 0, 0, Argv[0], "unrecognized option");
1843       Usage ();
1844       return STATUS_ERROR;
1845     }
1846
1847     Argv++;
1848     Argc--;
1849   }
1850   //
1851   // Make sure they specified the mode parse/scan/dump
1852   //
1853   if (mGlobals.Mode == MODE_UNKNOWN) {
1854     Error (NULL, 0, 0, "must specify one of -parse/-scan/-dump", NULL);
1855     return STATUS_ERROR;
1856   }
1857   //
1858   // All modes require a database filename
1859   //
1860   if (mGlobals.DatabaseFileName == 0) {
1861     Error (NULL, 0, 0, "must specify a database filename using -db DbFileName", NULL);
1862     Usage ();
1863     return STATUS_ERROR;
1864   }
1865   //
1866   // If dumping the database file, then return immediately if all
1867   // parameters check out.
1868   //
1869   if (mGlobals.Mode == MODE_DUMP) {
1870     //
1871     // Not much use if they didn't specify -oh or -oc or -ou or -hpk
1872     //
1873     if ((mGlobals.DumpUFileName[0] == 0) &&
1874         (mGlobals.StringHFileName[0] == 0) &&
1875         (mGlobals.StringCFileName[0] == 0) &&
1876         (mGlobals.HiiExportPackFileName[0] == 0)
1877         ) {
1878       Error (NULL, 0, 0, "-dump without -oc/-oh/-ou/-hpk is a NOP", NULL);
1879       return STATUS_ERROR;
1880     }
1881
1882     return STATUS_SUCCESS;
1883   }
1884   //
1885   // Had to specify source string file and output string defines header filename.
1886   //
1887   if (mGlobals.Mode == MODE_SCAN) {
1888     if (Argc < 1) {
1889       Error (PROGRAM_NAME, 0, 0, NULL, "must specify at least one source file to scan with -scan");
1890       Usage ();
1891       return STATUS_ERROR;
1892     }
1893     //
1894     // Get the list of filenames
1895     //
1896     while (Argc > 0) {
1897       NewList = malloc (sizeof (TEXT_STRING_LIST));
1898       if (NewList == NULL) {
1899         Error (PROGRAM_NAME, 0, 0, "memory allocation failure", NULL);
1900         return STATUS_ERROR;
1901       }
1902
1903       memset (NewList, 0, sizeof (TEXT_STRING_LIST));
1904       NewList->Str = (CHAR8 *) malloc (strlen (Argv[0]) + 1);
1905       if (NewList->Str == NULL) {
1906         Error (PROGRAM_NAME, 0, 0, "memory allocation failure", NULL);
1907         return STATUS_ERROR;
1908       }
1909
1910       strcpy (NewList->Str, Argv[0]);
1911       if (mGlobals.ScanFileName == NULL) {
1912         mGlobals.ScanFileName = NewList;
1913       } else {
1914         mGlobals.LastScanFileName->Next = NewList;
1915       }
1916
1917       mGlobals.LastScanFileName = NewList;
1918       Argc--;
1919       Argv++;
1920     }
1921   } else {
1922     //
1923     // Parse mode -- must specify an input unicode file name
1924     //
1925     if (Argc < 1) {
1926       Error (PROGRAM_NAME, 0, 0, NULL, "must specify input unicode string file name with -parse");
1927       Usage ();
1928       return STATUS_ERROR;
1929     }
1930
1931     strcpy (mGlobals.SourceFiles.FileName, Argv[0]);
1932   }
1933
1934   return STATUS_SUCCESS;
1935 }
1936 //
1937 // Found "-lang eng,spa+cat" on the command line. Parse the
1938 // language list and save the setting for later processing.
1939 //
1940 static
1941 STATUS
1942 AddCommandLineLanguage (
1943   IN CHAR8         *Language
1944   )
1945 {
1946   WCHAR_STRING_LIST *WNewList;
1947   WCHAR             *From;
1948   WCHAR             *To;
1949   //
1950   // Keep processing the input string until we find the end.
1951   //
1952   while (*Language) {
1953     //
1954     // Allocate memory for a new list element, fill it in, and
1955     // add it to our list.
1956     //
1957     WNewList = MALLOC (sizeof (WCHAR_STRING_LIST));
1958     if (WNewList == NULL) {
1959       Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1960       return STATUS_ERROR;
1961     }
1962
1963     memset ((char *) WNewList, 0, sizeof (WCHAR_STRING_LIST));
1964     WNewList->Str = malloc ((strlen (Language) + 1) * sizeof (WCHAR));
1965     if (WNewList->Str == NULL) {
1966       free (WNewList);
1967       Error (PROGRAM_NAME, 0, 0, NULL, "memory allocation failure");
1968       return STATUS_ERROR;
1969     }
1970     //
1971     // Copy it as unicode to our new structure. Then remove the
1972     // plus signs in it, and verify each language name is 3 characters
1973     // long. If we find a comma, then we're done with this group, so
1974     // break out.
1975     //
1976     UnicodeSPrint (WNewList->Str, (strlen (Language) + 1) * sizeof (WCHAR), L"%a", Language);
1977     From = To = WNewList->Str;
1978     while (*From) {
1979       if (*From == L',') {
1980         break;
1981       }
1982
1983       if ((StrLen (From) < LANGUAGE_IDENTIFIER_NAME_LEN) ||
1984             (
1985               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != 0) &&
1986               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != UNICODE_PLUS_SIGN) &&
1987               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != L',')
1988             )
1989           ) {
1990         Error (PROGRAM_NAME, 0, 0, Language, "invalid format for language name on command line");
1991         FREE (WNewList->Str);
1992         FREE (WNewList);
1993         return STATUS_ERROR;
1994       }
1995
1996       StrnCpy (To, From, LANGUAGE_IDENTIFIER_NAME_LEN);
1997       To += LANGUAGE_IDENTIFIER_NAME_LEN;
1998       From += LANGUAGE_IDENTIFIER_NAME_LEN;
1999       if (*From == L'+') {
2000         From++;
2001       }
2002     }
2003
2004     *To = 0;
2005     //
2006     // Add it to our linked list
2007     //
2008     if (mGlobals.Language == NULL) {
2009       mGlobals.Language = WNewList;
2010     } else {
2011       mGlobals.LastLanguage->Next = WNewList;
2012     }
2013
2014     mGlobals.LastLanguage = WNewList;
2015     //
2016     // Skip to next entry (comma-separated list)
2017     //
2018     while (*Language) {
2019       if (*Language == L',') {
2020         Language++;
2021         break;
2022       }
2023
2024       Language++;
2025     }
2026   }
2027
2028   return STATUS_SUCCESS;
2029 }
2030 //
2031 // The contents of the text file are expected to be (one per line)
2032 //   STRING_IDENTIFIER_NAME   ScopeName
2033 // For example:
2034 //   STR_ID_MY_FAVORITE_STRING   IBM
2035 //
2036 static
2037 STATUS
2038 ParseIndirectionFiles (
2039   TEXT_STRING_LIST    *Files
2040   )
2041 {
2042   FILE                        *Fptr;
2043   CHAR8                       Line[200];
2044   CHAR8                       *StringName;
2045   CHAR8                       *ScopeName;
2046   CHAR8                       *End;
2047   UINT32                      LineCount;
2048   WCHAR_MATCHING_STRING_LIST  *NewList;
2049
2050   Line[sizeof (Line) - 1] = 0;
2051   Fptr                    = NULL;
2052   while (Files != NULL) {
2053     Fptr      = fopen (Files->Str, "r");
2054     LineCount = 0;
2055     if (Fptr == NULL) {
2056       Error (NULL, 0, 0, Files->Str, "failed to open input indirection file for reading");
2057       return STATUS_ERROR;
2058     }
2059
2060     while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2061       //
2062       // remove terminating newline for error printing purposes.
2063       //
2064       if (Line[strlen (Line) - 1] == '\n') {
2065         Line[strlen (Line) - 1] = 0;
2066       }
2067
2068       LineCount++;
2069       if (Line[sizeof (Line) - 1] != 0) {
2070         Error (Files->Str, LineCount, 0, "line length exceeds maximum supported", NULL);
2071         goto Done;
2072       }
2073
2074       StringName = Line;
2075       while (*StringName && (isspace (*StringName))) {
2076         StringName++;
2077       }
2078
2079       if (*StringName) {
2080         if ((*StringName == '_') || isalpha (*StringName)) {
2081           End = StringName;
2082           while ((*End) && (*End == '_') || (isalnum (*End))) {
2083             End++;
2084           }
2085
2086           if (isspace (*End)) {
2087             *End = 0;
2088             End++;
2089             while (isspace (*End)) {
2090               End++;
2091             }
2092
2093             if (*End) {
2094               ScopeName = End;
2095               while (*End && !isspace (*End)) {
2096                 End++;
2097               }
2098
2099               *End = 0;
2100               //
2101               // Add the string name/scope pair
2102               //
2103               NewList = malloc (sizeof (WCHAR_MATCHING_STRING_LIST));
2104               if (NewList == NULL) {
2105                 Error (NULL, 0, 0, "memory allocation error", NULL);
2106                 goto Done;
2107               }
2108
2109               memset (NewList, 0, sizeof (WCHAR_MATCHING_STRING_LIST));
2110               NewList->Str1 = (WCHAR *) malloc ((strlen (StringName) + 1) * sizeof (WCHAR));
2111               NewList->Str2 = (WCHAR *) malloc ((strlen (ScopeName) + 1) * sizeof (WCHAR));
2112               if ((NewList->Str1 == NULL) || (NewList->Str2 == NULL)) {
2113                 Error (NULL, 0, 0, "memory allocation error", NULL);
2114                 goto Done;
2115               }
2116
2117               UnicodeSPrint (NewList->Str1, strlen (StringName) + 1, L"%a", StringName);
2118               UnicodeSPrint (NewList->Str2, strlen (ScopeName) + 1, L"%a", ScopeName);
2119               if (mGlobals.IndirectionList == NULL) {
2120                 mGlobals.IndirectionList = NewList;
2121               } else {
2122                 mGlobals.LastIndirectionList->Next = NewList;
2123               }
2124
2125               mGlobals.LastIndirectionList = NewList;
2126             } else {
2127               Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2128               goto Done;
2129             }
2130           } else {
2131             Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2132             goto Done;
2133           }
2134         } else {
2135           Error (Files->Str, LineCount, 0, StringName, "invalid string identifier");
2136           goto Done;
2137         }
2138       }
2139     }
2140
2141     fclose (Fptr);
2142     Fptr  = NULL;
2143     Files = Files->Next;
2144   }
2145
2146 Done:
2147   if (Fptr != NULL) {
2148     fclose (Fptr);
2149     return STATUS_ERROR;
2150   }
2151
2152   return STATUS_SUCCESS;
2153 }
2154
2155 static
2156 STATUS
2157 ScanFiles (
2158   TEXT_STRING_LIST *ScanFiles
2159   )
2160 {
2161   char              Line[MAX_LINE_LEN];
2162   FILE              *Fptr;
2163   UINT32            LineNum;
2164   char              *Cptr;
2165   char              *SavePtr;
2166   char              *TermPtr;
2167   char              *StringTokenPos;
2168   TEXT_STRING_LIST  *SList;
2169   BOOLEAN           SkipIt;
2170
2171   //
2172   // Put a null-terminator at the end of the line. If we read in
2173   // a line longer than we support, then we can catch it.
2174   //
2175   Line[MAX_LINE_LEN - 1] = 0;
2176   //
2177   // Process each file. If they gave us a skip extension list, then
2178   // skip it if the extension matches.
2179   //
2180   while (ScanFiles != NULL) {
2181     SkipIt = FALSE;
2182     for (SList = mGlobals.SkipExt; SList != NULL; SList = SList->Next) {
2183       if ((strlen (ScanFiles->Str) > strlen (SList->Str)) &&
2184           (strcmp (ScanFiles->Str + strlen (ScanFiles->Str) - strlen (SList->Str), SList->Str) == 0)
2185           ) {
2186         SkipIt = TRUE;
2187         //
2188         // printf ("Match: %s : %s\n", ScanFiles->Str, SList->Str);
2189         //
2190         break;
2191       }
2192     }
2193
2194     if (!SkipIt) {
2195       if (mGlobals.VerboseScan) {
2196         printf ("Scanning %s\n", ScanFiles->Str);
2197       }
2198
2199       Fptr = fopen (ScanFiles->Str, "r");
2200       if (Fptr == NULL) {
2201         Error (NULL, 0, 0, ScanFiles->Str, "failed to open input file for scanning");
2202         return STATUS_ERROR;
2203       }
2204
2205       LineNum = 0;
2206       while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2207         LineNum++;
2208         if (Line[MAX_LINE_LEN - 1] != 0) {
2209           Error (ScanFiles->Str, LineNum, 0, "line length exceeds maximum supported by tool", NULL);
2210           fclose (Fptr);
2211           return STATUS_ERROR;
2212         }
2213         //
2214         // Remove the newline from the input line so we can print a warning message
2215         //
2216         if (Line[strlen (Line) - 1] == '\n') {
2217           Line[strlen (Line) - 1] = 0;
2218         }
2219         //
2220         // Terminate the line at // comments
2221         //
2222         Cptr = strstr (Line, "//");
2223         if (Cptr != NULL) {
2224           *Cptr = 0;
2225         }
2226
2227         Cptr = Line;
2228         while ((Cptr = strstr (Cptr, STRING_TOKEN)) != NULL) {
2229           //
2230           // Found "STRING_TOKEN". Make sure we don't have NUM_STRING_TOKENS or
2231           // something like that. Then make sure it's followed by
2232           // an open parenthesis, a string identifier, and then a closing
2233           // parenthesis.
2234           //
2235           if (mGlobals.VerboseScan) {
2236             printf (" %d: %s", LineNum, Cptr);
2237           }
2238
2239           if (((Cptr == Line) || (!IsValidIdentifierChar (*(Cptr - 1), FALSE))) &&
2240               (!IsValidIdentifierChar (*(Cptr + sizeof (STRING_TOKEN) - 1), FALSE))
2241               ) {
2242             StringTokenPos  = Cptr;
2243             SavePtr         = Cptr;
2244             Cptr += strlen (STRING_TOKEN);
2245             while (*Cptr && isspace (*Cptr) && (*Cptr != '(')) {
2246               Cptr++;
2247             }
2248
2249             if (*Cptr != '(') {
2250               Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2251             } else {
2252               //
2253               // Skip over the open-parenthesis and find the next non-blank character
2254               //
2255               Cptr++;
2256               while (isspace (*Cptr)) {
2257                 Cptr++;
2258               }
2259
2260               SavePtr = Cptr;
2261               if ((*Cptr == '_') || isalpha (*Cptr)) {
2262                 while ((*Cptr == '_') || (isalnum (*Cptr))) {
2263                   Cptr++;
2264                 }
2265
2266                 TermPtr = Cptr;
2267                 while (*Cptr && isspace (*Cptr)) {
2268                   Cptr++;
2269                 }
2270
2271                 if (*Cptr != ')') {
2272                   Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2273                 }
2274
2275                 if (*TermPtr) {
2276                   *TermPtr  = 0;
2277                   Cptr      = TermPtr + 1;
2278                 } else {
2279                   Cptr = TermPtr;
2280                 }
2281                 //
2282                 // Add the string identifier to the list of used strings
2283                 //
2284                 ParserSetPosition (ScanFiles->Str, LineNum);
2285                 StringDBSetStringReferenced (SavePtr, mGlobals.IgnoreNotFound);
2286                 if (mGlobals.VerboseScan) {
2287                   printf ("...referenced %s", SavePtr);
2288                 }
2289               } else {
2290                 Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected valid string identifier name");
2291               }
2292             }
2293           } else {
2294             //
2295             // Found it, but it's a substring of something else. Advance our pointer.
2296             //
2297             Cptr++;
2298           }
2299
2300           if (mGlobals.VerboseScan) {
2301             printf ("\n");
2302           }
2303         }
2304       }
2305
2306       fclose (Fptr);
2307     } else {
2308       //
2309       // Skipping this file type
2310       //
2311       if (mGlobals.VerboseScan) {
2312         printf ("Skip scanning of %s\n", ScanFiles->Str);
2313       }
2314     }
2315
2316     ScanFiles = ScanFiles->Next;
2317   }
2318
2319   return STATUS_SUCCESS;
2320 }
2321 //
2322 // Free the global string lists we allocated memory for
2323 //
2324 static
2325 void
2326 FreeLists (
2327   VOID
2328   )
2329 {
2330   TEXT_STRING_LIST  *Temp;
2331   WCHAR_STRING_LIST *WTemp;
2332
2333   //
2334   // Traverse the include paths, freeing each
2335   //
2336   while (mGlobals.IncludePaths != NULL) {
2337     Temp = mGlobals.IncludePaths->Next;
2338     free (mGlobals.IncludePaths->Str);
2339     free (mGlobals.IncludePaths);
2340     mGlobals.IncludePaths = Temp;
2341   }
2342   //
2343   // If we did a scan, then free up our
2344   // list of files to scan.
2345   //
2346   while (mGlobals.ScanFileName != NULL) {
2347     Temp = mGlobals.ScanFileName->Next;
2348     free (mGlobals.ScanFileName->Str);
2349     free (mGlobals.ScanFileName);
2350     mGlobals.ScanFileName = Temp;
2351   }
2352   //
2353   // If they gave us a list of filename extensions to
2354   // skip on scan, then free them up.
2355   //
2356   while (mGlobals.SkipExt != NULL) {
2357     Temp = mGlobals.SkipExt->Next;
2358     free (mGlobals.SkipExt->Str);
2359     free (mGlobals.SkipExt);
2360     mGlobals.SkipExt = Temp;
2361   }
2362   //
2363   // Free up any languages specified
2364   //
2365   while (mGlobals.Language != NULL) {
2366     WTemp = mGlobals.Language->Next;
2367     free (mGlobals.Language->Str);
2368     free (mGlobals.Language);
2369     mGlobals.Language = WTemp;
2370   }
2371   //
2372   // Free up our indirection list
2373   //
2374   while (mGlobals.IndirectionList != NULL) {
2375     mGlobals.LastIndirectionList = mGlobals.IndirectionList->Next;
2376     free (mGlobals.IndirectionList->Str1);
2377     free (mGlobals.IndirectionList->Str2);
2378     free (mGlobals.IndirectionList);
2379     mGlobals.IndirectionList = mGlobals.LastIndirectionList;
2380   }
2381
2382   while (mGlobals.IndirectionFileName != NULL) {
2383     mGlobals.LastIndirectionFileName = mGlobals.IndirectionFileName->Next;
2384     free (mGlobals.IndirectionFileName->Str);
2385     free (mGlobals.IndirectionFileName);
2386     mGlobals.IndirectionFileName = mGlobals.LastIndirectionFileName;
2387   }
2388 }
2389
2390 static
2391 BOOLEAN
2392 IsValidIdentifierChar (
2393   CHAR8     Char,
2394   BOOLEAN   FirstChar
2395   )
2396 {
2397   //
2398   // If it's the first character of an identifier, then
2399   // it must be one of [A-Za-z_].
2400   //
2401   if (FirstChar) {
2402     if (isalpha (Char) || (Char == '_')) {
2403       return TRUE;
2404     }
2405   } else {
2406     //
2407     // If it's not the first character, then it can
2408     // be one of [A-Za-z_0-9]
2409     //
2410     if (isalnum (Char) || (Char == '_')) {
2411       return TRUE;
2412     }
2413   }
2414
2415   return FALSE;
2416 }
2417
2418 static
2419 void
2420 RewindFile (
2421   SOURCE_FILE *SourceFile
2422   )
2423 {
2424   SourceFile->LineNum       = 1;
2425   SourceFile->FileBufferPtr = SourceFile->FileBuffer;
2426   SourceFile->EndOfFile     = 0;
2427 }
2428
2429 static
2430 BOOLEAN
2431 SkipTo (
2432   SOURCE_FILE *SourceFile,
2433   WCHAR       WChar,
2434   BOOLEAN     StopAfterNewline
2435   )
2436 {
2437   while (!EndOfFile (SourceFile)) {
2438     //
2439     // Check for the character of interest
2440     //
2441     if (SourceFile->FileBufferPtr[0] == WChar) {
2442       return TRUE;
2443     } else {
2444       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
2445         SourceFile->LineNum++;
2446         if (StopAfterNewline) {
2447           SourceFile->FileBufferPtr++;
2448           if (SourceFile->FileBufferPtr[0] == 0) {
2449             SourceFile->FileBufferPtr++;
2450           }
2451
2452           return FALSE;
2453         }
2454       }
2455
2456       SourceFile->FileBufferPtr++;
2457     }
2458   }
2459
2460   return FALSE;
2461 }
2462
2463 static
2464 void
2465 Usage (
2466   VOID
2467   )
2468 /*++
2469
2470 Routine Description:
2471
2472   Print usage information for this utility.
2473
2474 Arguments:
2475
2476   None.
2477
2478 Returns:
2479
2480   Nothing.
2481
2482 --*/
2483 {
2484   int               Index;
2485   static const char *Str[] = {
2486     "",
2487     PROGRAM_NAME " version "TOOL_VERSION " -- process unicode strings file",
2488     "  Usage: "PROGRAM_NAME " -parse {parse options} [FileNames]",
2489     "         "PROGRAM_NAME " -scan {scan options} [FileName]",
2490     "         "PROGRAM_NAME " -dump {dump options}",
2491     "    Common options include:",
2492     "      -h or -?         for this help information",
2493     "      -db Database     required name of output/input database file",
2494     "      -bn BaseName     for use in the .h and .c output files",
2495     "                       Default = "DEFAULT_BASE_NAME,
2496     "      -v               for verbose output",
2497     "      -vdbw            for verbose output when writing database",
2498     "      -vdbr            for verbose output when reading database",
2499     "      -od FileName     to specify an output database file name",
2500     "    Parse options include:",
2501     "      -i IncludePath   add IncludePath to list of search paths",
2502     "      -newdb           to not read in existing database file",
2503     "      -uqs             to indicate that unquoted strings are used",
2504     "      FileNames        name of one or more unicode files to parse",
2505     "    Scan options include:",
2506     "      -scan            scan text file(s) for STRING_TOKEN() usage",
2507     "      -skipext .ext    to skip scan of files with .ext filename extension",
2508     "      -ignorenotfound  ignore if a given STRING_TOKEN(STR) is not ",
2509     "                       found in the database",
2510     "      FileNames        one or more files to scan",
2511     "    Dump options include:",
2512     "      -oc FileName     write string data to FileName",
2513     "      -oh FileName     write string defines to FileName",
2514     "      -ou FileName     dump database to unicode file FileName",
2515     "      -lang Lang       only dump for the language 'Lang'",
2516     "      -if FileName     to specify an indirection file",
2517     "      -hpk FileName    to create an HII export pack of the strings",
2518     "",
2519     "  The expected process is to parse a unicode string file to create an initial",
2520     "  database of string identifier names and string definitions. Then text files",
2521     "  should be scanned for STRING_TOKEN() usages, and the referenced",
2522     "  strings will be tagged as used in the database. After all files have been",
2523     "  scanned, then the database should be dumped to create the necessary output",
2524     "  files.",
2525     "",
2526     NULL
2527   };
2528   for (Index = 0; Str[Index] != NULL; Index++) {
2529     fprintf (stdout, "%s\n", Str[Index]);
2530   }
2531 }