Tools/CCode/Source/StrGather/StrGather.c

   1 /*++
   2
   3 Copyright (c) 2004-2007, Intel Corporation
   4 All rights reserved. This program and the accompanying materials
   5 are licensed and made available under the terms and conditions of the BSD License
   6 which accompanies this distribution.  The full text of the license may be found at
   7 http://opensource.org/licenses/bsd-license.php
   8
   9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  11
  12 Module Name:
  13
  14   StrGather.c
  15
  16 Abstract:
  17
  18   Parse a strings file and create or add to a string database file.
  19
  20 --*/
  21
  22 #include <stdio.h>
  23 #include <string.h>
  24 #include <stdlib.h>
  25 #include <ctype.h>
  26
  27 #include <Common/UefiBaseTypes.h>
  28
  29 #include "CommonLib.h"
  30 #include "EfiUtilityMsgs.h"
  31 #include "StrGather.h"
  32 #include "StringDB.h"
  33
  34 #ifndef MAX_PATH
  35 #define MAX_PATH                    255
  36 #endif
  37 #define MAX_NEST_DEPTH              20  // just in case we get in an endless loop.
  38 #define MAX_STRING_IDENTIFIER_NAME  100 // number of wchars
  39 #define MAX_LINE_LEN                200
  40 #define STRING_TOKEN                "STRING_TOKEN"
  41 #define DEFAULT_BASE_NAME           "BaseName"
  42 //
  43 // Operational modes for this utility
  44 //
  45 #define MODE_UNKNOWN  0
  46 #define MODE_PARSE    1
  47 #define MODE_SCAN     2
  48 #define MODE_DUMP     3
  49 //
  50 // Different file separater for Linux and Windows
  51 //
  52 #ifdef __GNUC__
  53 #define FILE_SEP_CHAR '/'
  54 #define FILE_SEP_STRING "/"
  55 #else
  56 #define FILE_SEP_CHAR '\\'
  57 #define FILE_SEP_STRING "\\"
  58 #endif
  59
  60 //
  61 // We keep a linked list of these for the source files we process
  62 //
  63 typedef struct _SOURCE_FILE {
  64   FILE                *Fptr;
  65   WCHAR               *FileBuffer;
  66   WCHAR               *FileBufferPtr;
  67   UINT32              FileSize;
  68   CHAR8               FileName[MAX_PATH];
  69   UINT32              LineNum;
  70   BOOLEAN             EndOfFile;
  71   BOOLEAN             SkipToHash;
  72   struct _SOURCE_FILE *Previous;
  73   struct _SOURCE_FILE *Next;
  74   WCHAR               ControlCharacter;
  75 } SOURCE_FILE;
  76
  77 #define DEFAULT_CONTROL_CHARACTER UNICODE_SLASH
  78
  79 //
  80 // Here's all our globals. We need a linked list of include paths, a linked
  81 // list of source files, a linked list of subdirectories (appended to each
  82 // include path when searching), and a couple other fields.
  83 //
  84 static struct {
  85   SOURCE_FILE                 SourceFiles;
  86   TEXT_STRING_LIST            *IncludePaths;                    // all include paths to search
  87   TEXT_STRING_LIST            *LastIncludePath;
  88   TEXT_STRING_LIST            *ScanFileName;
  89   TEXT_STRING_LIST            *LastScanFileName;
  90   TEXT_STRING_LIST            *SkipExt;                         // if -skipext .uni
  91   TEXT_STRING_LIST            *LastSkipExt;
  92   TEXT_STRING_LIST            *IndirectionFileName;
  93   TEXT_STRING_LIST            *LastIndirectionFileName;
  94   TEXT_STRING_LIST            *DatabaseFileName;
  95   TEXT_STRING_LIST            *LastDatabaseFileName;
  96   WCHAR_STRING_LIST           *Language;
  97   WCHAR_STRING_LIST           *LastLanguage;
  98   WCHAR_MATCHING_STRING_LIST  *IndirectionList;                 // from indirection file(s)
  99   WCHAR_MATCHING_STRING_LIST  *LastIndirectionList;
 100   BOOLEAN                     Verbose;                          // for more detailed output
 101   BOOLEAN                     VerboseDatabaseWrite;             // for more detailed output when writing database
 102   BOOLEAN                     VerboseDatabaseRead;              // for more detailed output when reading database
 103   BOOLEAN                     NewDatabase;                      // to start from scratch
 104   BOOLEAN                     IgnoreNotFound;                   // when scanning
 105   BOOLEAN                     VerboseScan;
 106   BOOLEAN                     UnquotedStrings;                  // -uqs option
 107   CHAR8                       OutputDatabaseFileName[MAX_PATH];
 108   CHAR8                       StringHFileName[MAX_PATH];
 109   CHAR8                       StringCFileName[MAX_PATH];        // output .C filename
 110   CHAR8                       DumpUFileName[MAX_PATH];          // output unicode dump file name
 111   CHAR8                       HiiExportPackFileName[MAX_PATH];  // HII export pack file name
 112   CHAR8                       BaseName[MAX_PATH];               // base filename of the strings file
 113   UINT32                      Mode;
 114 } mGlobals;
 115
 116 static
 117 BOOLEAN
 118 IsValidIdentifierChar (
 119   CHAR8     Char,
 120   BOOLEAN   FirstChar
 121   );
 122
 123 static
 124 void
 125 RewindFile (
 126   SOURCE_FILE *SourceFile
 127   );
 128
 129 static
 130 BOOLEAN
 131 SkipTo (
 132   SOURCE_FILE *SourceFile,
 133   WCHAR       WChar,
 134   BOOLEAN     StopAfterNewline
 135   );
 136
 137 static
 138 UINT32
 139 SkipWhiteSpace (
 140   SOURCE_FILE *SourceFile
 141   );
 142
 143 static
 144 BOOLEAN
 145 IsWhiteSpace (
 146   SOURCE_FILE *SourceFile
 147   );
 148
 149 static
 150 BOOLEAN
 151 EndOfFile (
 152   SOURCE_FILE *SourceFile
 153   );
 154
 155 static
 156 void
 157 PreprocessFile (
 158   SOURCE_FILE *SourceFile
 159   );
 160
 161 static
 162 UINT32
 163 GetStringIdentifierName (
 164   IN SOURCE_FILE  *SourceFile,
 165   IN OUT WCHAR    *StringIdentifierName,
 166   IN UINT32       StringIdentifierNameLen
 167   );
 168
 169 static
 170 UINT32
 171 GetLanguageIdentifierName (
 172   IN SOURCE_FILE  *SourceFile,
 173   IN OUT WCHAR    *LanguageIdentifierName,
 174   IN UINT32       LanguageIdentifierNameLen,
 175   IN BOOLEAN      Optional
 176   );
 177
 178 static
 179 WCHAR *
 180 GetPrintableLanguageName (
 181   IN SOURCE_FILE  *SourceFile
 182   );
 183
 184 static
 185 STATUS
 186 AddCommandLineLanguage (
 187   IN CHAR8         *Language
 188   );
 189
 190 static
 191 WCHAR *
 192 GetQuotedString (
 193   SOURCE_FILE *SourceFile,
 194   BOOLEAN     Optional
 195   );
 196
 197 static
 198 STATUS
 199 ProcessIncludeFile (
 200   SOURCE_FILE *SourceFile,
 201   SOURCE_FILE *ParentSourceFile
 202   );
 203
 204 static
 205 STATUS
 206 ParseFile (
 207   SOURCE_FILE *SourceFile
 208   );
 209
 210 static
 211 FILE  *
 212 FindFile (
 213   IN CHAR8    *FileName,
 214   OUT CHAR8   *FoundFileName,
 215   IN UINT32   FoundFileNameLen
 216   );
 217
 218 static
 219 STATUS
 220 ProcessArgs (
 221   int   Argc,
 222   char  *Argv[]
 223   );
 224
 225 static
 226 STATUS
 227 ProcessFile (
 228   SOURCE_FILE *SourceFile
 229   );
 230
 231 static
 232 UINT32
 233 wstrcmp (
 234   WCHAR *Buffer,
 235   WCHAR *Str
 236   );
 237
 238 static
 239 void
 240 Version (
 241   VOID
 242   );
 243
 244 static
 245 void
 246 Usage (
 247   VOID
 248   );
 249
 250 static
 251 void
 252 FreeLists (
 253   VOID
 254   );
 255
 256 static
 257 void
 258 ProcessTokenString (
 259   SOURCE_FILE *SourceFile
 260   );
 261
 262 static
 263 void
 264 ProcessTokenInclude (
 265   SOURCE_FILE *SourceFile
 266   );
 267
 268 static
 269 void
 270 ProcessTokenScope (
 271   SOURCE_FILE *SourceFile
 272   );
 273
 274 static
 275 void
 276 ProcessTokenLanguage (
 277   SOURCE_FILE *SourceFile
 278   );
 279
 280 static
 281 void
 282 ProcessTokenLangDef (
 283   SOURCE_FILE *SourceFile
 284   );
 285
 286 static
 287 STATUS
 288 ScanFiles (
 289   TEXT_STRING_LIST *ScanFiles
 290   );
 291
 292 static
 293 STATUS
 294 ParseIndirectionFiles (
 295   TEXT_STRING_LIST    *Files
 296   );
 297
 298 STATUS
 299 StringDBCreateHiiExportPack (
 300   CHAR8               *OutputFileName
 301   );
 302
 303 int
 304 main (
 305   int   Argc,
 306   char  *Argv[]
 307   )
 308 /*++
 309
 310 Routine Description:
 311
 312   Call the routine to parse the command-line options, then process the file.
 313
 314 Arguments:
 315
 316   Argc - Standard C main() argc and argv.
 317   Argv - Standard C main() argc and argv.
 318
 319 Returns:
 320
 321   0       if successful
 322   nonzero otherwise
 323
 324 --*/
 325 {
 326   STATUS  Status;
 327
 328   SetUtilityName (UTILITY_NAME);
 329   //
 330   // Process the command-line arguments
 331   //
 332   Status = ProcessArgs (Argc, Argv);
 333   if (Status != STATUS_SUCCESS) {
 334     return Status;
 335   }
 336   //
 337   // Initialize the database manager
 338   //
 339   StringDBConstructor ();
 340   //
 341   // We always try to read in an existing database file. It may not
 342   // exist, which is ok usually.
 343   //
 344   if (mGlobals.NewDatabase == 0) {
 345     //
 346     // Read all databases specified.
 347     //
 348     for (mGlobals.LastDatabaseFileName = mGlobals.DatabaseFileName;
 349          mGlobals.LastDatabaseFileName != NULL;
 350          mGlobals.LastDatabaseFileName = mGlobals.LastDatabaseFileName->Next
 351         ) {
 352       Status = StringDBReadDatabase (mGlobals.LastDatabaseFileName->Str, TRUE, mGlobals.VerboseDatabaseRead);
 353       if (Status != STATUS_SUCCESS) {
 354         return Status;
 355       }
 356     }
 357   }
 358   //
 359   // Read indirection file(s) if specified
 360   //
 361   if (ParseIndirectionFiles (mGlobals.IndirectionFileName) != STATUS_SUCCESS) {
 362     goto Finish;
 363   }
 364   //
 365   // If scanning source files, do that now
 366   //
 367   if (mGlobals.Mode == MODE_SCAN) {
 368     ScanFiles (mGlobals.ScanFileName);
 369   } else if (mGlobals.Mode == MODE_PARSE) {
 370     //
 371     // Parsing a unicode strings file
 372     //
 373     mGlobals.SourceFiles.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
 374     Status = ProcessIncludeFile (&mGlobals.SourceFiles, NULL);
 375     if (Status != STATUS_SUCCESS) {
 376       goto Finish;
 377     }
 378   }
 379   //
 380   // Create the string defines header file if there have been no errors.
 381   //
 382   ParserSetPosition (NULL, 0);
 383   if ((mGlobals.StringHFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 384     Status = StringDBDumpStringDefines (mGlobals.StringHFileName, mGlobals.BaseName);
 385     if (Status != EFI_SUCCESS) {
 386       goto Finish;
 387     }
 388   }
 389   //
 390   // Dump the strings to a .c file if there have still been no errors.
 391   //
 392   if ((mGlobals.StringCFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 393     Status = StringDBDumpCStrings (
 394               mGlobals.StringCFileName,
 395               mGlobals.BaseName,
 396               mGlobals.Language,
 397               mGlobals.IndirectionList
 398               );
 399     if (Status != EFI_SUCCESS) {
 400       goto Finish;
 401     }
 402   }
 403   //
 404   // Dump the database if requested
 405   //
 406   if ((mGlobals.DumpUFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 407     StringDBDumpDatabase (NULL, mGlobals.DumpUFileName, FALSE);
 408   }
 409   //
 410   // Dump the string data as HII binary string pack if requested
 411   //
 412   if ((mGlobals.HiiExportPackFileName[0] != 0) && (GetUtilityStatus () < STATUS_ERROR)) {
 413     StringDBCreateHiiExportPack (mGlobals.HiiExportPackFileName);
 414   }
 415   //
 416   // Always update the database if no errors and not in dump mode. If they specified -od
 417   // for an output database file name, then use that name. Otherwise use the name of
 418   // the first database file specified with -db
 419   //
 420   if ((mGlobals.Mode != MODE_DUMP) && (GetUtilityStatus () < STATUS_ERROR)) {
 421     if (mGlobals.OutputDatabaseFileName[0]) {
 422       Status = StringDBWriteDatabase (mGlobals.OutputDatabaseFileName, mGlobals.VerboseDatabaseWrite);
 423     } else {
 424       Status = StringDBWriteDatabase (mGlobals.DatabaseFileName->Str, mGlobals.VerboseDatabaseWrite);
 425     }
 426
 427     if (Status != EFI_SUCCESS) {
 428       goto Finish;
 429     }
 430   }
 431
 432 Finish:
 433   //
 434   // Free up memory
 435   //
 436   FreeLists ();
 437   StringDBDestructor ();
 438   return GetUtilityStatus ();
 439 }
 440
 441 static
 442 STATUS
 443 ProcessIncludeFile (
 444   SOURCE_FILE *SourceFile,
 445   SOURCE_FILE *ParentSourceFile
 446   )
 447 /*++
 448
 449 Routine Description:
 450
 451   Given a source file, open the file and parse it
 452
 453 Arguments:
 454
 455   SourceFile        - name of file to parse
 456   ParentSourceFile  - for error reporting purposes, the file that #included SourceFile.
 457
 458 Returns:
 459
 460   Standard status.
 461
 462 --*/
 463 {
 464   static UINT32 NestDepth = 0;
 465   CHAR8         FoundFileName[MAX_PATH];
 466   STATUS        Status;
 467
 468   Status = STATUS_SUCCESS;
 469   NestDepth++;
 470   //
 471   // Print the file being processed. Indent so you can tell the include nesting
 472   // depth.
 473   //
 474   if (mGlobals.Verbose) {
 475     fprintf (stdout, "%*cProcessing file '%s'\n", NestDepth * 2, ' ', SourceFile->FileName);
 476   }
 477
 478   //
 479   // Make sure we didn't exceed our maximum nesting depth
 480   //
 481   if (NestDepth > MAX_NEST_DEPTH) {
 482     Error (NULL, 0, 0, SourceFile->FileName, "max nesting depth (%d) exceeded", NestDepth);
 483     Status = STATUS_ERROR;
 484     goto Finish;
 485   }
 486   //
 487   // Try to open the file locally, and if that fails try along our include paths.
 488   //
 489   strcpy (FoundFileName, SourceFile->FileName);
 490   if ((SourceFile->Fptr = fopen (FoundFileName, "rb")) == NULL) {
 491     //
 492     // Try to find it among the paths if it has a parent (that is, it is included
 493     // by someone else).
 494     //
 495     if (ParentSourceFile == NULL) {
 496       Error (NULL, 0, 0, SourceFile->FileName, "file not found");
 497       return STATUS_ERROR;
 498     }
 499
 500     SourceFile->Fptr = FindFile (SourceFile->FileName, FoundFileName, sizeof (FoundFileName));
 501     if (SourceFile->Fptr == NULL) {
 502       Error (ParentSourceFile->FileName, ParentSourceFile->LineNum, 0, SourceFile->FileName, "include file not found");
 503       return STATUS_ERROR;
 504     }
 505   }
 506   //
 507   // Process the file found
 508   //
 509   ProcessFile (SourceFile);
 510 Finish:
 511   //
 512   // Close open files and return status
 513   //
 514   if (SourceFile->Fptr != NULL) {
 515     fclose (SourceFile->Fptr);
 516   }
 517
 518   return Status;
 519 }
 520
 521 static
 522 STATUS
 523 ProcessFile (
 524   SOURCE_FILE *SourceFile
 525   )
 526 {
 527   //
 528   // Get the file size, and then read the entire thing into memory.
 529   // Allocate space for a terminator character.
 530   //
 531   fseek (SourceFile->Fptr, 0, SEEK_END);
 532   SourceFile->FileSize = ftell (SourceFile->Fptr);
 533   fseek (SourceFile->Fptr, 0, SEEK_SET);
 534   SourceFile->FileBuffer = (WCHAR *) malloc (SourceFile->FileSize + sizeof (WCHAR));
 535   if (SourceFile->FileBuffer == NULL) {
 536     Error (NULL, 0, 0, "memory allocation failure", NULL);
 537     return STATUS_ERROR;
 538   }
 539
 540   fread ((VOID *) SourceFile->FileBuffer, SourceFile->FileSize, 1, SourceFile->Fptr);
 541   SourceFile->FileBuffer[(SourceFile->FileSize / sizeof (WCHAR))] = UNICODE_NULL;
 542   //
 543   // Pre-process the file to replace comments with spaces
 544   //
 545   PreprocessFile (SourceFile);
 546   //
 547   // Parse the file
 548   //
 549   ParseFile (SourceFile);
 550   free (SourceFile->FileBuffer);
 551   return STATUS_SUCCESS;
 552 }
 553
 554 static
 555 STATUS
 556 ParseFile (
 557   SOURCE_FILE *SourceFile
 558   )
 559 {
 560   BOOLEAN InComment;
 561   UINT32  Len;
 562
 563   //
 564   // First character of a unicode file is special. Make sure
 565   //
 566   if (SourceFile->FileBufferPtr[0] != UNICODE_FILE_START) {
 567     Error (SourceFile->FileName, 1, 0, SourceFile->FileName, "file does not appear to be a unicode file");
 568     return STATUS_ERROR;
 569   }
 570
 571   SourceFile->FileBufferPtr++;
 572   InComment = FALSE;
 573   //
 574   // Print the first line if in verbose mode
 575   //
 576   if (mGlobals.Verbose) {
 577     printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 578   }
 579   //
 580   // Since the syntax is relatively straightforward, just switch on the next char
 581   //
 582   while (!EndOfFile (SourceFile)) {
 583     //
 584     // Check for whitespace
 585     //
 586     if (SourceFile->FileBufferPtr[0] == UNICODE_SPACE) {
 587       SourceFile->FileBufferPtr++;
 588     } else if (SourceFile->FileBufferPtr[0] == UNICODE_TAB) {
 589       SourceFile->FileBufferPtr++;
 590     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 591       SourceFile->FileBufferPtr++;
 592     } else if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 593       SourceFile->FileBufferPtr++;
 594       SourceFile->LineNum++;
 595       if (mGlobals.Verbose) {
 596         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
 597       }
 598
 599       InComment = FALSE;
 600     } else if (SourceFile->FileBufferPtr[0] == 0) {
 601       SourceFile->FileBufferPtr++;
 602     } else if (InComment) {
 603       SourceFile->FileBufferPtr++;
 604     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 605       SourceFile->FileBufferPtr += 2;
 606       InComment = TRUE;
 607     } else if (SourceFile->SkipToHash && (SourceFile->FileBufferPtr[0] != SourceFile->ControlCharacter)) {
 608       SourceFile->FileBufferPtr++;
 609     } else {
 610       SourceFile->SkipToHash = FALSE;
 611       if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 612           ((Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"include")) > 0)
 613           ) {
 614         SourceFile->FileBufferPtr += Len + 1;
 615         ProcessTokenInclude (SourceFile);
 616       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 617                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"scope")) > 0
 618               ) {
 619         SourceFile->FileBufferPtr += Len + 1;
 620         ProcessTokenScope (SourceFile);
 621       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 622                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"language")) > 0
 623               ) {
 624         SourceFile->FileBufferPtr += Len + 1;
 625         ProcessTokenLanguage (SourceFile);
 626       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 627                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"langdef")) > 0
 628               ) {
 629         SourceFile->FileBufferPtr += Len + 1;
 630         ProcessTokenLangDef (SourceFile);
 631       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 632                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"string")) > 0
 633               ) {
 634         SourceFile->FileBufferPtr += Len + 1;
 635         ProcessTokenString (SourceFile);
 636       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 637                (Len = wstrcmp (SourceFile->FileBufferPtr + 1, L"EFI_BREAKPOINT()")) > 0
 638               ) {
 639         SourceFile->FileBufferPtr += Len;
 640         //
 641         // BUGBUG: Caling EFI_BREAKOINT() is breaking the link.  What is the proper action for this tool
 642         // in this condition?
 643         //
 644 //        EFI_BREAKPOINT ();
 645       } else if ((SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter) &&
 646                (SourceFile->FileBufferPtr[1] == UNICODE_EQUAL_SIGN)
 647               ) {
 648         SourceFile->ControlCharacter = SourceFile->FileBufferPtr[2];
 649         SourceFile->FileBufferPtr += 3;
 650       } else {
 651         Error (SourceFile->FileName, SourceFile->LineNum, 0, "unrecognized token", "%S", SourceFile->FileBufferPtr);
 652         //
 653         // Treat rest of line as a comment.
 654         //
 655         InComment = TRUE;
 656       }
 657     }
 658   }
 659
 660   return STATUS_SUCCESS;
 661 }
 662
 663 static
 664 void
 665 PreprocessFile (
 666   SOURCE_FILE *SourceFile
 667   )
 668 /*++
 669
 670 Routine Description:
 671   Preprocess a file to replace all carriage returns with NULLs so
 672   we can print lines from the file to the screen.
 673
 674 Arguments:
 675   SourceFile - structure that we use to keep track of an input file.
 676
 677 Returns:
 678   Nothing.
 679
 680 --*/
 681 {
 682   BOOLEAN InComment;
 683
 684   RewindFile (SourceFile);
 685   InComment = FALSE;
 686   while (!EndOfFile (SourceFile)) {
 687     //
 688     // If a line-feed, then no longer in a comment
 689     //
 690     if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
 691       SourceFile->FileBufferPtr++;
 692       SourceFile->LineNum++;
 693       InComment = 0;
 694     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 695       //
 696       // Replace all carriage returns with a NULL so we can print stuff
 697       //
 698       SourceFile->FileBufferPtr[0] = 0;
 699       SourceFile->FileBufferPtr++;
 700     } else if (InComment) {
 701       SourceFile->FileBufferPtr[0] = UNICODE_SPACE;
 702       SourceFile->FileBufferPtr++;
 703     } else if ((SourceFile->FileBufferPtr[0] == UNICODE_SLASH) && (SourceFile->FileBufferPtr[1] == UNICODE_SLASH)) {
 704       SourceFile->FileBufferPtr += 2;
 705       InComment = TRUE;
 706     } else {
 707       SourceFile->FileBufferPtr++;
 708     }
 709   }
 710   //
 711   // Could check for end-of-file and still in a comment, but
 712   // should not be necessary. So just restore the file pointers.
 713   //
 714   RewindFile (SourceFile);
 715 }
 716
 717 static
 718 WCHAR *
 719 GetPrintableLanguageName (
 720   IN SOURCE_FILE  *SourceFile
 721   )
 722 {
 723   WCHAR   *String;
 724   WCHAR   *Start;
 725   WCHAR   *Ptr;
 726   UINT32  Len;
 727
 728   SkipWhiteSpace (SourceFile);
 729   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 730     Error (
 731       SourceFile->FileName,
 732       SourceFile->LineNum,
 733       0,
 734       "expected quoted printable language name",
 735       "%S",
 736       SourceFile->FileBufferPtr
 737       );
 738     SourceFile->SkipToHash = TRUE;
 739     return NULL;
 740   }
 741
 742   Len = 0;
 743   SourceFile->FileBufferPtr++;
 744   Start = Ptr = SourceFile->FileBufferPtr;
 745   while (!EndOfFile (SourceFile)) {
 746     if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 747       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 748       break;
 749     } else if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
 750       break;
 751     }
 752
 753     SourceFile->FileBufferPtr++;
 754     Len++;
 755   }
 756
 757   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 758     Warning (
 759       SourceFile->FileName,
 760       SourceFile->LineNum,
 761       0,
 762       "missing closing quote on printable language name string",
 763       "%S",
 764       Start
 765       );
 766   } else {
 767     SourceFile->FileBufferPtr++;
 768   }
 769   //
 770   // Now allocate memory for the string and save it off
 771   //
 772   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 773   if (String == NULL) {
 774     Error (NULL, 0, 0, "memory allocation failed", NULL);
 775     return NULL;
 776   }
 777   //
 778   // Copy the string from the file buffer to the local copy.
 779   // We do no reformatting of it whatsoever at this point.
 780   //
 781   Ptr = String;
 782   while (Len > 0) {
 783     *Ptr = *Start;
 784     Start++;
 785     Ptr++;
 786     Len--;
 787   }
 788
 789   *Ptr = 0;
 790   //
 791   // Now format the string to convert \wide and \narrow controls
 792   //
 793   StringDBFormatString (String);
 794   return String;
 795 }
 796
 797 static
 798 WCHAR *
 799 GetQuotedString (
 800   SOURCE_FILE *SourceFile,
 801   BOOLEAN     Optional
 802   )
 803 {
 804   WCHAR   *String;
 805   WCHAR   *Start;
 806   WCHAR   *Ptr;
 807   UINT32  Len;
 808   BOOLEAN PreviousBackslash;
 809
 810   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 811     if (!Optional) {
 812       Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted string", "%S", SourceFile->FileBufferPtr);
 813     }
 814
 815     return NULL;
 816   }
 817
 818   Len = 0;
 819   SourceFile->FileBufferPtr++;
 820   Start             = Ptr = SourceFile->FileBufferPtr;
 821   PreviousBackslash = FALSE;
 822   while (!EndOfFile (SourceFile)) {
 823     if ((SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) && (!PreviousBackslash)) {
 824       break;
 825     } else if (SourceFile->FileBufferPtr[0] == UNICODE_CR) {
 826       Warning (SourceFile->FileName, SourceFile->LineNum, 0, "carriage return found in quoted string", "%S", Start);
 827       PreviousBackslash = FALSE;
 828     } else if (SourceFile->FileBufferPtr[0] == UNICODE_BACKSLASH) {
 829       PreviousBackslash = TRUE;
 830     } else {
 831       PreviousBackslash = FALSE;
 832     }
 833
 834     SourceFile->FileBufferPtr++;
 835     Len++;
 836   }
 837
 838   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
 839     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "missing closing quote on string", "%S", Start);
 840   } else {
 841     SourceFile->FileBufferPtr++;
 842   }
 843   //
 844   // Now allocate memory for the string and save it off
 845   //
 846   String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
 847   if (String == NULL) {
 848     Error (NULL, 0, 0, "memory allocation failed", NULL);
 849     return NULL;
 850   }
 851   //
 852   // Copy the string from the file buffer to the local copy.
 853   // We do no reformatting of it whatsoever at this point.
 854   //
 855   Ptr = String;
 856   while (Len > 0) {
 857     *Ptr = *Start;
 858     Start++;
 859     Ptr++;
 860     Len--;
 861   }
 862
 863   *Ptr = 0;
 864   return String;
 865 }
 866 //
 867 // Parse:
 868 //    #string STR_ID_NAME
 869 //
 870 // All we can do is call the string database to add the string identifier. Unfortunately
 871 // he'll have to keep track of the last identifier we added.
 872 //
 873 static
 874 void
 875 ProcessTokenString (
 876   SOURCE_FILE *SourceFile
 877   )
 878 {
 879   WCHAR   StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
 880   UINT16  StringId;
 881   //
 882   // Extract the string identifier name and add it to the database.
 883   //
 884   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
 885     StringId = STRING_ID_INVALID;
 886     StringDBAddStringIdentifier (StringIdentifier, &StringId, 0);
 887   } else {
 888     //
 889     // Error recovery -- skip to the next #
 890     //
 891     SourceFile->SkipToHash = TRUE;
 892   }
 893 }
 894
 895 static
 896 BOOLEAN
 897 EndOfFile (
 898   SOURCE_FILE *SourceFile
 899   )
 900 {
 901   //
 902   // The file buffer pointer will typically get updated before the End-of-file flag in the
 903   // source file structure, so check it first.
 904   //
 905   if (SourceFile->FileBufferPtr >= SourceFile->FileBuffer + SourceFile->FileSize / sizeof (WCHAR)) {
 906     SourceFile->EndOfFile = TRUE;
 907     return TRUE;
 908   }
 909
 910   if (SourceFile->EndOfFile) {
 911     return TRUE;
 912   }
 913
 914   return FALSE;
 915 }
 916
 917 static
 918 UINT32
 919 GetStringIdentifierName (
 920   IN SOURCE_FILE  *SourceFile,
 921   IN OUT WCHAR    *StringIdentifierName,
 922   IN UINT32       StringIdentifierNameLen
 923   )
 924 {
 925   UINT32  Len;
 926   WCHAR   *From;
 927   WCHAR   *Start;
 928
 929   //
 930   // Skip whitespace
 931   //
 932   SkipWhiteSpace (SourceFile);
 933   if (SourceFile->EndOfFile) {
 934     Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-file encountered", "expected string identifier");
 935     return 0;
 936   }
 937   //
 938   // Verify first character of name is [A-Za-z]
 939   //
 940   Len = 0;
 941   StringIdentifierNameLen /= 2;
 942   From  = SourceFile->FileBufferPtr;
 943   Start = SourceFile->FileBufferPtr;
 944   if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 945       ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))
 946       ) {
 947     //
 948     // Do nothing
 949     //
 950   } else {
 951     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid character in string identifier name", "%S", Start);
 952     return 0;
 953   }
 954
 955   while (!EndOfFile (SourceFile)) {
 956     if (((SourceFile->FileBufferPtr[0] >= UNICODE_A) && (SourceFile->FileBufferPtr[0] <= UNICODE_Z)) ||
 957         ((SourceFile->FileBufferPtr[0] >= UNICODE_z) && (SourceFile->FileBufferPtr[0] <= UNICODE_z)) ||
 958         ((SourceFile->FileBufferPtr[0] >= UNICODE_0) && (SourceFile->FileBufferPtr[0] <= UNICODE_9)) ||
 959         (SourceFile->FileBufferPtr[0] == UNICODE_UNDERSCORE)
 960         ) {
 961       Len++;
 962       if (Len >= StringIdentifierNameLen) {
 963         Error (SourceFile->FileName, SourceFile->LineNum, 0, "string identifier name too long", "%S", Start);
 964         return 0;
 965       }
 966
 967       *StringIdentifierName = SourceFile->FileBufferPtr[0];
 968       StringIdentifierName++;
 969       SourceFile->FileBufferPtr++;
 970     } else if (SkipWhiteSpace (SourceFile) == 0) {
 971       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid string identifier name", "%S", Start);
 972       return 0;
 973     } else {
 974       break;
 975     }
 976   }
 977   //
 978   // Terminate the copy of the string.
 979   //
 980   *StringIdentifierName = 0;
 981   return Len;
 982 }
 983
 984 static
 985 UINT32
 986 GetLanguageIdentifierName (
 987   IN SOURCE_FILE  *SourceFile,
 988   IN OUT WCHAR    *LanguageIdentifierName,
 989   IN UINT32       LanguageIdentifierNameLen,
 990   IN BOOLEAN      Optional
 991   )
 992 {
 993   UINT32  Len;
 994   WCHAR   *From;
 995   WCHAR   *Start;
 996   //
 997   // Skip whitespace
 998   //
 999   SkipWhiteSpace (SourceFile);
1000   if (SourceFile->EndOfFile) {
1001     if (!Optional) {
1002       Error (
1003         SourceFile->FileName,
1004         SourceFile->LineNum,
1005         0,
1006         "end-of-file encountered",
1007         "expected language identifier"
1008         );
1009     }
1010
1011     return 0;
1012   }
1013   //
1014   // This function is called to optionally get a language identifier name in:
1015   //   #string STR_ID eng "the string"
1016   // If it's optional, and we find a double-quote, then return now.
1017   //
1018   if (Optional) {
1019     if (*SourceFile->FileBufferPtr == UNICODE_DOUBLE_QUOTE) {
1020       return 0;
1021     }
1022   }
1023
1024   Len = 0;
1025   LanguageIdentifierNameLen /= 2;
1026   //
1027   // Internal error if we weren't given at least 4 WCHAR's to work with.
1028   //
1029   if (LanguageIdentifierNameLen < LANGUAGE_IDENTIFIER_NAME_LEN + 1) {
1030     Error (
1031       SourceFile->FileName,
1032       SourceFile->LineNum,
1033       0,
1034       "app error -- language identifier name length is invalid",
1035       NULL
1036       );
1037   }
1038
1039   From  = SourceFile->FileBufferPtr;
1040   Start = SourceFile->FileBufferPtr;
1041   while (!EndOfFile (SourceFile)) {
1042     if (((SourceFile->FileBufferPtr[0] >= UNICODE_a) && (SourceFile->FileBufferPtr[0] <= UNICODE_z))) {
1043       Len++;
1044       if (Len > LANGUAGE_IDENTIFIER_NAME_LEN) {
1045         Error (SourceFile->FileName, SourceFile->LineNum, 0, "language identifier name too long", "%S", Start);
1046         return 0;
1047       }
1048
1049       *LanguageIdentifierName = SourceFile->FileBufferPtr[0];
1050       SourceFile->FileBufferPtr++;
1051       LanguageIdentifierName++;
1052     } else if (!IsWhiteSpace (SourceFile)) {
1053       Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid language identifier name", "%S", Start);
1054       return 0;
1055     } else {
1056       break;
1057     }
1058   }
1059   //
1060   // Terminate the copy of the string.
1061   //
1062   *LanguageIdentifierName = 0;
1063   return Len;
1064 }
1065
1066 static
1067 void
1068 ProcessTokenInclude (
1069   SOURCE_FILE *SourceFile
1070   )
1071 {
1072   CHAR8       IncludeFileName[MAX_PATH];
1073   CHAR8       *To;
1074   UINT32      Len;
1075   BOOLEAN     ReportedError;
1076   SOURCE_FILE IncludedSourceFile;
1077
1078   ReportedError = FALSE;
1079   if (SkipWhiteSpace (SourceFile) == 0) {
1080     Warning (SourceFile->FileName, SourceFile->LineNum, 0, "expected whitespace following #include keyword", NULL);
1081   }
1082   //
1083   // Should be quoted file name
1084   //
1085   if (SourceFile->FileBufferPtr[0] != UNICODE_DOUBLE_QUOTE) {
1086     Error (SourceFile->FileName, SourceFile->LineNum, 0, "expected quoted include file name", NULL);
1087     goto FailDone;
1088   }
1089
1090   SourceFile->FileBufferPtr++;
1091   //
1092   // Copy the filename as ascii to our local string
1093   //
1094   To  = IncludeFileName;
1095   Len = 0;
1096   while (!EndOfFile (SourceFile)) {
1097     if ((SourceFile->FileBufferPtr[0] == UNICODE_CR) || (SourceFile->FileBufferPtr[0] == UNICODE_LF)) {
1098       Error (SourceFile->FileName, SourceFile->LineNum, 0, "end-of-line found in quoted include file name", NULL);
1099       goto FailDone;
1100     }
1101
1102     if (SourceFile->FileBufferPtr[0] == UNICODE_DOUBLE_QUOTE) {
1103       SourceFile->FileBufferPtr++;
1104       break;
1105     }
1106     //
1107     // If too long, then report the error once and process until the closing quote
1108     //
1109     Len++;
1110     if (!ReportedError && (Len >= sizeof (IncludeFileName))) {
1111       Error (SourceFile->FileName, SourceFile->LineNum, 0, "length of include file name exceeds limit", NULL);
1112       ReportedError = TRUE;
1113     }
1114
1115     if (!ReportedError) {
1116       *To = UNICODE_TO_ASCII (SourceFile->FileBufferPtr[0]);
1117       To++;
1118     }
1119
1120     SourceFile->FileBufferPtr++;
1121   }
1122
1123   if (!ReportedError) {
1124     *To = 0;
1125     memset ((char *) &IncludedSourceFile, 0, sizeof (SOURCE_FILE));
1126     strcpy (IncludedSourceFile.FileName, IncludeFileName);
1127     IncludedSourceFile.ControlCharacter = DEFAULT_CONTROL_CHARACTER;
1128     ProcessIncludeFile (&IncludedSourceFile, SourceFile);
1129     //
1130     // printf ("including file '%s'\n", IncludeFileName);
1131     //
1132   }
1133
1134   return ;
1135 FailDone:
1136   //
1137   // Error recovery -- skip to next #
1138   //
1139   SourceFile->SkipToHash = TRUE;
1140 }
1141
1142 static
1143 void
1144 ProcessTokenScope (
1145   SOURCE_FILE *SourceFile
1146   )
1147 {
1148   WCHAR StringIdentifier[MAX_STRING_IDENTIFIER_NAME];
1149   //
1150   // Extract the scope name
1151   //
1152   if (GetStringIdentifierName (SourceFile, StringIdentifier, sizeof (StringIdentifier)) > 0) {
1153     StringDBSetScope (StringIdentifier);
1154   }
1155 }
1156 //
1157 // Parse:  #langdef eng "English"
1158 //         #langdef chn "\wideChinese"
1159 //
1160 static
1161 void
1162 ProcessTokenLangDef (
1163   SOURCE_FILE *SourceFile
1164   )
1165 {
1166   WCHAR   LanguageIdentifier[MAX_STRING_IDENTIFIER_NAME];
1167   UINT32  Len;
1168   WCHAR   *PrintableName;
1169   //
1170   // Extract the 3-character language identifier
1171   //
1172   Len = GetLanguageIdentifierName (SourceFile, LanguageIdentifier, sizeof (LanguageIdentifier), FALSE);
1173   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1174     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", NULL);
1175   } else {
1176     //
1177     // Extract the printable name
1178     //
1179     PrintableName = GetPrintableLanguageName (SourceFile);
1180     if (PrintableName != NULL) {
1181       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1182       StringDBAddLanguage (LanguageIdentifier, PrintableName);
1183       free (PrintableName);
1184       return ;
1185     }
1186   }
1187   //
1188   // Error recovery -- skip to next #
1189   //
1190   SourceFile->SkipToHash = TRUE;
1191 }
1192
1193 static
1194 BOOLEAN
1195 ApparentQuotedString (
1196   SOURCE_FILE *SourceFile
1197   )
1198 {
1199   WCHAR *Ptr;
1200   //
1201   // See if the first and last nonblank characters on the line are double quotes
1202   //
1203   for (Ptr = SourceFile->FileBufferPtr; *Ptr && (*Ptr == UNICODE_SPACE); Ptr++)
1204     ;
1205   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1206     return FALSE;
1207   }
1208
1209   while (*Ptr) {
1210     Ptr++;
1211   }
1212
1213   Ptr--;
1214   for (; *Ptr && (*Ptr == UNICODE_SPACE); Ptr--)
1215     ;
1216   if (*Ptr != UNICODE_DOUBLE_QUOTE) {
1217     return FALSE;
1218   }
1219
1220   return TRUE;
1221 }
1222 //
1223 // Parse:
1224 //   #language eng "some string " "more string"
1225 //
1226 static
1227 void
1228 ProcessTokenLanguage (
1229   SOURCE_FILE *SourceFile
1230   )
1231 {
1232   WCHAR   *String;
1233   WCHAR   *SecondString;
1234   WCHAR   *TempString;
1235   WCHAR   *From;
1236   WCHAR   *To;
1237   WCHAR   Language[LANGUAGE_IDENTIFIER_NAME_LEN + 1];
1238   UINT32  Len;
1239   BOOLEAN PreviousNewline;
1240   //
1241   // Get the language identifier
1242   //
1243   Language[0] = 0;
1244   Len         = GetLanguageIdentifierName (SourceFile, Language, sizeof (Language), TRUE);
1245   if (Len != LANGUAGE_IDENTIFIER_NAME_LEN) {
1246     Error (SourceFile->FileName, SourceFile->LineNum, 0, "invalid or missing language identifier", "%S", Language);
1247     SourceFile->SkipToHash = TRUE;
1248     return ;
1249   }
1250   //
1251   // Extract the string value. It's either a quoted string that starts on the current line, or
1252   // an unquoted string that starts on the following line and continues until the next control
1253   // character in column 1.
1254   // Look ahead to find a quote or a newline
1255   //
1256   if (SkipTo (SourceFile, UNICODE_DOUBLE_QUOTE, TRUE)) {
1257     String = GetQuotedString (SourceFile, FALSE);
1258     if (String != NULL) {
1259       //
1260       // Set the position in the file of where we are parsing for error
1261       // reporting purposes. Then start looking ahead for additional
1262       // quoted strings, and concatenate them until we get a failure
1263       // back from the string parser.
1264       //
1265       Len = StrLen (String) + 1;
1266       ParserSetPosition (SourceFile->FileName, SourceFile->LineNum);
1267       do {
1268         SkipWhiteSpace (SourceFile);
1269         SecondString = GetQuotedString (SourceFile, TRUE);
1270         if (SecondString != NULL) {
1271           Len += StrLen (SecondString);
1272           TempString = (WCHAR *) malloc (Len * sizeof (WCHAR));
1273           if (TempString == NULL) {
1274             Error (NULL, 0, 0, "application error", "failed to allocate memory");
1275             return ;
1276           }
1277
1278           StrCpy (TempString, String);
1279           StrCat (TempString, SecondString);
1280           free (String);
1281           free (SecondString);
1282           String = TempString;
1283         }
1284       } while (SecondString != NULL);
1285       StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1286       free (String);
1287     } else {
1288       //
1289       // Error was reported at lower level. Error recovery mode.
1290       //
1291       SourceFile->SkipToHash = TRUE;
1292     }
1293   } else {
1294     if (!mGlobals.UnquotedStrings) {
1295       //
1296       // They're using unquoted strings. If the next non-blank character is a double quote, and the
1297       // last non-blank character on the line is a double quote, then more than likely they're using
1298       // quotes, so they need to put the quoted string on the end of the previous line
1299       //
1300       if (ApparentQuotedString (SourceFile)) {
1301         Warning (
1302           SourceFile->FileName,
1303           SourceFile->LineNum,
1304           0,
1305           "unexpected quoted string on line",
1306           "specify -uqs option if necessary"
1307           );
1308       }
1309     }
1310     //
1311     // Found end-of-line (hopefully). Skip over it and start taking in characters
1312     // until we find a control character at the start of a line.
1313     //
1314     Len             = 0;
1315     From            = SourceFile->FileBufferPtr;
1316     PreviousNewline = FALSE;
1317     while (!EndOfFile (SourceFile)) {
1318       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
1319         PreviousNewline = TRUE;
1320         SourceFile->LineNum++;
1321       } else {
1322         Len++;
1323         if (PreviousNewline && (SourceFile->FileBufferPtr[0] == SourceFile->ControlCharacter)) {
1324           break;
1325         }
1326
1327         PreviousNewline = FALSE;
1328       }
1329
1330       SourceFile->FileBufferPtr++;
1331     }
1332
1333     if ((Len == 0) && EndOfFile (SourceFile)) {
1334       Error (SourceFile->FileName, SourceFile->LineNum, 0, "unexpected end of file", NULL);
1335       SourceFile->SkipToHash = TRUE;
1336       return ;
1337     }
1338     //
1339     // Now allocate a buffer, copy the characters, and add the string.
1340     //
1341     String = (WCHAR *) malloc ((Len + 1) * sizeof (WCHAR));
1342     if (String == NULL) {
1343       Error (NULL, 0, 0, "application error", "failed to allocate memory");
1344       return ;
1345     }
1346
1347     To = String;
1348     while (From < SourceFile->FileBufferPtr) {
1349       switch (*From) {
1350       case UNICODE_LF:
1351       case 0:
1352         break;
1353
1354       default:
1355         *To = *From;
1356         To++;
1357         break;
1358       }
1359
1360       From++;
1361     }
1362
1363     //
1364     // String[Len] = 0;
1365     //
1366     *To = 0;
1367     StringDBAddString (Language, NULL, NULL, String, TRUE, 0);
1368   }
1369 }
1370
1371 static
1372 BOOLEAN
1373 IsWhiteSpace (
1374   SOURCE_FILE *SourceFile
1375   )
1376 {
1377   switch (SourceFile->FileBufferPtr[0]) {
1378   case UNICODE_NULL:
1379   case UNICODE_CR:
1380   case UNICODE_SPACE:
1381   case UNICODE_TAB:
1382   case UNICODE_LF:
1383     return TRUE;
1384
1385   default:
1386     return FALSE;
1387   }
1388 }
1389
1390 static
1391 UINT32
1392 SkipWhiteSpace (
1393   SOURCE_FILE *SourceFile
1394   )
1395 {
1396   UINT32  Count;
1397
1398   Count = 0;
1399   while (!EndOfFile (SourceFile)) {
1400     Count++;
1401     switch (*SourceFile->FileBufferPtr) {
1402     case UNICODE_NULL:
1403     case UNICODE_CR:
1404     case UNICODE_SPACE:
1405     case UNICODE_TAB:
1406       SourceFile->FileBufferPtr++;
1407       break;
1408
1409     case UNICODE_LF:
1410       SourceFile->FileBufferPtr++;
1411       SourceFile->LineNum++;
1412       if (mGlobals.Verbose) {
1413         printf ("%d: %S\n", SourceFile->LineNum, SourceFile->FileBufferPtr);
1414       }
1415       break;
1416
1417     default:
1418       return Count - 1;
1419     }
1420   }
1421   //
1422   // Some tokens require trailing whitespace. If we're at the end of the
1423   // file, then we count that as well.
1424   //
1425   if ((Count == 0) && (EndOfFile (SourceFile))) {
1426     Count++;
1427   }
1428
1429   return Count;
1430 }
1431
1432 static
1433 UINT32
1434 wstrcmp (
1435   WCHAR *Buffer,
1436   WCHAR *Str
1437   )
1438 {
1439   UINT32  Len;
1440
1441   Len = 0;
1442   while (*Str == *Buffer) {
1443     Buffer++;
1444     Str++;
1445     Len++;
1446   }
1447
1448   if (*Str) {
1449     return 0;
1450   }
1451
1452   return Len;
1453 }
1454 //
1455 // Given a filename, try to find it along the include paths.
1456 //
1457 static
1458 FILE *
1459 FindFile (
1460   IN CHAR8   *FileName,
1461   OUT CHAR8  *FoundFileName,
1462   IN UINT32  FoundFileNameLen
1463   )
1464 {
1465   FILE              *Fptr;
1466   TEXT_STRING_LIST  *List;
1467
1468   //
1469   // Traverse the list of paths and try to find the file
1470   //
1471   List = mGlobals.IncludePaths;
1472   while (List != NULL) {
1473     //
1474     // Put the path and filename together
1475     //
1476     if (strlen (List->Str) + strlen (FileName) + 1 > FoundFileNameLen) {
1477       Error (UTILITY_NAME, 0, 0, NULL, "internal error - cannot concatenate path+filename");
1478       return NULL;
1479     }
1480     //
1481     // Append the filename to this include path and try to open the file.
1482     //
1483     strcpy (FoundFileName, List->Str);
1484     strcat (FoundFileName, FileName);
1485     if ((Fptr = fopen (FoundFileName, "rb")) != NULL) {
1486       //
1487       // Return the file pointer
1488       //
1489       return Fptr;
1490     }
1491
1492     List = List->Next;
1493   }
1494   //
1495   // Not found
1496   //
1497   FoundFileName[0] = 0;
1498   return NULL;
1499 }
1500 //
1501 // Process the command-line arguments
1502 //
1503 static
1504 STATUS
1505 ProcessArgs (
1506   int   Argc,
1507   char  *Argv[]
1508   )
1509 {
1510   TEXT_STRING_LIST  *NewList;
1511   //
1512   // Clear our globals
1513   //
1514   memset ((char *) &mGlobals, 0, sizeof (mGlobals));
1515   strcpy (mGlobals.BaseName, DEFAULT_BASE_NAME);
1516   //
1517   // Skip program name
1518   //
1519   Argc--;
1520   Argv++;
1521
1522   if (Argc == 0) {
1523     Usage ();
1524     return STATUS_ERROR;
1525   }
1526
1527   if ((strcmp(Argv[0], "-h") == 0) || (strcmp(Argv[0], "--help") == 0) ||
1528       (strcmp(Argv[0], "-?") == 0) || (strcmp(Argv[0], "/?") == 0)) {
1529     Usage();
1530     return STATUS_ERROR;
1531   }
1532
1533   if ((strcmp(Argv[0], "-V") == 0) || (strcmp(Argv[0], "--version") == 0)) {
1534     Version();
1535     return STATUS_ERROR;
1536   }
1537
1538   mGlobals.Mode = MODE_UNKNOWN;
1539   //
1540   // Process until no more -args.
1541   //
1542   while ((Argc > 0) && (Argv[0][0] == '-')) {
1543     //
1544     // -parse option
1545     //
1546     if (stricmp (Argv[0], "-parse") == 0) {
1547       if (mGlobals.Mode != MODE_UNKNOWN) {
1548         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1549         return STATUS_ERROR;
1550       }
1551
1552       mGlobals.Mode = MODE_PARSE;
1553       //
1554       // -scan option
1555       //
1556     } else if (stricmp (Argv[0], "-scan") == 0) {
1557       if (mGlobals.Mode != MODE_UNKNOWN) {
1558         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1559         return STATUS_ERROR;
1560       }
1561
1562       mGlobals.Mode = MODE_SCAN;
1563       //
1564       // -vscan verbose scanning option
1565       //
1566     } else if (stricmp (Argv[0], "-vscan") == 0) {
1567       mGlobals.VerboseScan = TRUE;
1568       //
1569       // -dump option
1570       //
1571     } else if (stricmp (Argv[0], "-dump") == 0) {
1572       if (mGlobals.Mode != MODE_UNKNOWN) {
1573         Error (NULL, 0, 0, "only one of -parse/-scan/-dump allowed", NULL);
1574         return STATUS_ERROR;
1575       }
1576
1577       mGlobals.Mode = MODE_DUMP;
1578     } else if (stricmp (Argv[0], "-uqs") == 0) {
1579       mGlobals.UnquotedStrings = TRUE;
1580       //
1581       // -i path    add include search path when parsing
1582       //
1583     } else if (stricmp (Argv[0], "-i") == 0) {
1584       //
1585       // check for one more arg
1586       //
1587       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1588         Error (UTILITY_NAME, 0, 0, Argv[0], "missing include path");
1589         return STATUS_ERROR;
1590       }
1591       //
1592       // Allocate memory for a new list element, fill it in, and
1593       // add it to our list of include paths. Always make sure it
1594       // has a "\" on the end of it.
1595       //
1596       NewList = malloc (sizeof (TEXT_STRING_LIST));
1597       if (NewList == NULL) {
1598         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1599         return STATUS_ERROR;
1600       }
1601
1602       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1603       NewList->Str = malloc (strlen (Argv[1]) + 2);
1604       if (NewList->Str == NULL) {
1605         free (NewList);
1606         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1607         return STATUS_ERROR;
1608       }
1609
1610       strcpy (NewList->Str, Argv[1]);
1611       if (NewList->Str[strlen (NewList->Str) - 1] != FILE_SEP_CHAR) {
1612         strcat (NewList->Str, FILE_SEP_STRING);
1613       }
1614       //
1615       // Add it to our linked list
1616       //
1617       if (mGlobals.IncludePaths == NULL) {
1618         mGlobals.IncludePaths = NewList;
1619       } else {
1620         mGlobals.LastIncludePath->Next = NewList;
1621       }
1622
1623       mGlobals.LastIncludePath = NewList;
1624       Argc--;
1625       Argv++;
1626     } else if (stricmp (Argv[0], "-if") == 0) {
1627       //
1628       // Indirection file -- check for one more arg
1629       //
1630       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1631         Error (UTILITY_NAME, 0, 0, Argv[0], "missing indirection file name");
1632         return STATUS_ERROR;
1633       }
1634       //
1635       // Allocate memory for a new list element, fill it in, and
1636       // add it to our list of include paths. Always make sure it
1637       // has a "\" on the end of it.
1638       //
1639       NewList = malloc (sizeof (TEXT_STRING_LIST));
1640       if (NewList == NULL) {
1641         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1642         return STATUS_ERROR;
1643       }
1644
1645       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1646       NewList->Str = malloc (strlen (Argv[1]) + 1);
1647       if (NewList->Str == NULL) {
1648         free (NewList);
1649         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1650         return STATUS_ERROR;
1651       }
1652
1653       strcpy (NewList->Str, Argv[1]);
1654       //
1655       // Add it to our linked list
1656       //
1657       if (mGlobals.IndirectionFileName == NULL) {
1658         mGlobals.IndirectionFileName = NewList;
1659       } else {
1660         mGlobals.LastIndirectionFileName->Next = NewList;
1661       }
1662
1663       mGlobals.LastIndirectionFileName = NewList;
1664       Argc--;
1665       Argv++;
1666     } else if (stricmp (Argv[0], "-db") == 0) {
1667       //
1668       // -db option to specify a database file.
1669       // Check for one more arg (the database file name)
1670       //
1671       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1672         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database file name");
1673         return STATUS_ERROR;
1674       }
1675
1676       NewList = malloc (sizeof (TEXT_STRING_LIST));
1677       if (NewList == NULL) {
1678         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1679         return STATUS_ERROR;
1680       }
1681
1682       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1683       NewList->Str = malloc (strlen (Argv[1]) + 1);
1684       if (NewList->Str == NULL) {
1685         free (NewList);
1686         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1687         return STATUS_ERROR;
1688       }
1689
1690       strcpy (NewList->Str, Argv[1]);
1691       //
1692       // Add it to our linked list
1693       //
1694       if (mGlobals.DatabaseFileName == NULL) {
1695         mGlobals.DatabaseFileName = NewList;
1696       } else {
1697         mGlobals.LastDatabaseFileName->Next = NewList;
1698       }
1699
1700       mGlobals.LastDatabaseFileName = NewList;
1701       Argc--;
1702       Argv++;
1703     } else if (stricmp (Argv[0], "-ou") == 0) {
1704       //
1705       // -ou option to specify an output unicode file to
1706       // which we can dump our database.
1707       //
1708       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1709         Error (UTILITY_NAME, 0, 0, Argv[0], "missing database dump output file name");
1710         return STATUS_ERROR;
1711       }
1712
1713       if (mGlobals.DumpUFileName[0] == 0) {
1714         strcpy (mGlobals.DumpUFileName, Argv[1]);
1715       } else {
1716         Error (UTILITY_NAME, 0, 0, Argv[1], "-ou option already specified with '%s'", mGlobals.DumpUFileName);
1717         return STATUS_ERROR;
1718       }
1719
1720       Argc--;
1721       Argv++;
1722     } else if (stricmp (Argv[0], "-hpk") == 0) {
1723       //
1724       // -hpk option to create an HII export pack of the input database file
1725       //
1726       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1727         Error (UTILITY_NAME, 0, 0, Argv[0], "missing raw string data dump output file name");
1728         return STATUS_ERROR;
1729       }
1730
1731       if (mGlobals.HiiExportPackFileName[0] == 0) {
1732         strcpy (mGlobals.HiiExportPackFileName, Argv[1]);
1733       } else {
1734         Error (UTILITY_NAME, 0, 0, Argv[1], "-or option already specified with '%s'", mGlobals.HiiExportPackFileName);
1735         return STATUS_ERROR;
1736       }
1737
1738       Argc--;
1739       Argv++;
1740     } else if ((stricmp (Argv[0], "-?") == 0) || (stricmp (Argv[0], "-h") == 0)) {
1741       Usage ();
1742       return STATUS_ERROR;
1743     } else if (stricmp (Argv[0], "-v") == 0) {
1744       mGlobals.Verbose = 1;
1745     } else if (stricmp (Argv[0], "-vdbw") == 0) {
1746       mGlobals.VerboseDatabaseWrite = 1;
1747     } else if (stricmp (Argv[0], "-vdbr") == 0) {
1748       mGlobals.VerboseDatabaseRead = 1;
1749     } else if (stricmp (Argv[0], "-newdb") == 0) {
1750       mGlobals.NewDatabase = 1;
1751     } else if (stricmp (Argv[0], "-ignorenotfound") == 0) {
1752       mGlobals.IgnoreNotFound = 1;
1753     } else if (stricmp (Argv[0], "-oc") == 0) {
1754       //
1755       // check for one more arg
1756       //
1757       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1758         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output C filename");
1759         return STATUS_ERROR;
1760       }
1761
1762       strcpy (mGlobals.StringCFileName, Argv[1]);
1763       Argc--;
1764       Argv++;
1765     } else if (stricmp (Argv[0], "-bn") == 0) {
1766       //
1767       // check for one more arg
1768       //
1769       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1770         Error (UTILITY_NAME, 0, 0, Argv[0], "missing base name");
1771         Usage ();
1772         return STATUS_ERROR;
1773       }
1774
1775       strcpy (mGlobals.BaseName, Argv[1]);
1776       Argc--;
1777       Argv++;
1778     } else if (stricmp (Argv[0], "-oh") == 0) {
1779       //
1780       // -oh to specify output .h defines file name
1781       //
1782       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1783         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output .h filename");
1784         return STATUS_ERROR;
1785       }
1786
1787       strcpy (mGlobals.StringHFileName, Argv[1]);
1788       Argc--;
1789       Argv++;
1790     } else if (stricmp (Argv[0], "-skipext") == 0) {
1791       //
1792       // -skipext to skip scanning of files with certain filename extensions
1793       //
1794       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1795         Error (UTILITY_NAME, 0, 0, Argv[0], "missing filename extension");
1796         return STATUS_ERROR;
1797       }
1798       //
1799       // Allocate memory for a new list element, fill it in, and
1800       // add it to our list of excluded extensions. Always make sure it
1801       // has a "." as the first character.
1802       //
1803       NewList = malloc (sizeof (TEXT_STRING_LIST));
1804       if (NewList == NULL) {
1805         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1806         return STATUS_ERROR;
1807       }
1808
1809       memset ((char *) NewList, 0, sizeof (TEXT_STRING_LIST));
1810       NewList->Str = malloc (strlen (Argv[1]) + 2);
1811       if (NewList->Str == NULL) {
1812         free (NewList);
1813         Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1814         return STATUS_ERROR;
1815       }
1816
1817       if (Argv[1][0] == '.') {
1818         strcpy (NewList->Str, Argv[1]);
1819       } else {
1820         NewList->Str[0] = '.';
1821         strcpy (NewList->Str + 1, Argv[1]);
1822       }
1823       //
1824       // Add it to our linked list
1825       //
1826       if (mGlobals.SkipExt == NULL) {
1827         mGlobals.SkipExt = NewList;
1828       } else {
1829         mGlobals.LastSkipExt->Next = NewList;
1830       }
1831
1832       mGlobals.LastSkipExt = NewList;
1833       Argc--;
1834       Argv++;
1835     } else if (stricmp (Argv[0], "-lang") == 0) {
1836       //
1837       // "-lang eng" or "-lang spa+cat" to only output certain languages
1838       //
1839       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1840         Error (UTILITY_NAME, 0, 0, Argv[0], "missing language name");
1841         Usage ();
1842         return STATUS_ERROR;
1843       }
1844
1845       if (AddCommandLineLanguage (Argv[1]) != STATUS_SUCCESS) {
1846         return STATUS_ERROR;
1847       }
1848
1849       Argc--;
1850       Argv++;
1851     } else if (stricmp (Argv[0], "-od") == 0) {
1852       //
1853       // Output database file name -- check for another arg
1854       //
1855       if ((Argc <= 1) || (Argv[1][0] == '-')) {
1856         Error (UTILITY_NAME, 0, 0, Argv[0], "missing output database file name");
1857         return STATUS_ERROR;
1858       }
1859
1860       strcpy (mGlobals.OutputDatabaseFileName, Argv[1]);
1861       Argv++;
1862       Argc--;
1863     } else {
1864       //
1865       // Unrecognized arg
1866       //
1867       Error (UTILITY_NAME, 0, 0, Argv[0], "unrecognized option");
1868       Usage ();
1869       return STATUS_ERROR;
1870     }
1871
1872     Argv++;
1873     Argc--;
1874   }
1875   //
1876   // Make sure they specified the mode parse/scan/dump
1877   //
1878   if (mGlobals.Mode == MODE_UNKNOWN) {
1879     Error (NULL, 0, 0, "must specify one of -parse/-scan/-dump", NULL);
1880     return STATUS_ERROR;
1881   }
1882   //
1883   // All modes require a database filename
1884   //
1885   if (mGlobals.DatabaseFileName == 0) {
1886     Error (NULL, 0, 0, "must specify a database filename using -db DbFileName", NULL);
1887     Usage ();
1888     return STATUS_ERROR;
1889   }
1890   //
1891   // If dumping the database file, then return immediately if all
1892   // parameters check out.
1893   //
1894   if (mGlobals.Mode == MODE_DUMP) {
1895     //
1896     // Not much use if they didn't specify -oh or -oc or -ou or -hpk
1897     //
1898     if ((mGlobals.DumpUFileName[0] == 0) &&
1899         (mGlobals.StringHFileName[0] == 0) &&
1900         (mGlobals.StringCFileName[0] == 0) &&
1901         (mGlobals.HiiExportPackFileName[0] == 0)
1902         ) {
1903       Error (NULL, 0, 0, "-dump without -oc/-oh/-ou/-hpk is a NOP", NULL);
1904       return STATUS_ERROR;
1905     }
1906
1907     return STATUS_SUCCESS;
1908   }
1909   //
1910   // Had to specify source string file and output string defines header filename.
1911   //
1912   if (mGlobals.Mode == MODE_SCAN) {
1913     if (Argc < 1) {
1914       Error (UTILITY_NAME, 0, 0, NULL, "must specify at least one source file to scan with -scan");
1915       Usage ();
1916       return STATUS_ERROR;
1917     }
1918     //
1919     // Get the list of filenames
1920     //
1921     while (Argc > 0) {
1922       NewList = malloc (sizeof (TEXT_STRING_LIST));
1923       if (NewList == NULL) {
1924         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
1925         return STATUS_ERROR;
1926       }
1927
1928       memset (NewList, 0, sizeof (TEXT_STRING_LIST));
1929       NewList->Str = (CHAR8 *) malloc (strlen (Argv[0]) + 1);
1930       if (NewList->Str == NULL) {
1931         Error (UTILITY_NAME, 0, 0, "memory allocation failure", NULL);
1932         return STATUS_ERROR;
1933       }
1934
1935       strcpy (NewList->Str, Argv[0]);
1936       if (mGlobals.ScanFileName == NULL) {
1937         mGlobals.ScanFileName = NewList;
1938       } else {
1939         mGlobals.LastScanFileName->Next = NewList;
1940       }
1941
1942       mGlobals.LastScanFileName = NewList;
1943       Argc--;
1944       Argv++;
1945     }
1946   } else {
1947     //
1948     // Parse mode -- must specify an input unicode file name
1949     //
1950     if (Argc < 1) {
1951       Error (UTILITY_NAME, 0, 0, NULL, "must specify input unicode string file name with -parse");
1952       Usage ();
1953       return STATUS_ERROR;
1954     }
1955
1956     strcpy (mGlobals.SourceFiles.FileName, Argv[0]);
1957   }
1958
1959   return STATUS_SUCCESS;
1960 }
1961 //
1962 // Found "-lang eng,spa+cat" on the command line. Parse the
1963 // language list and save the setting for later processing.
1964 //
1965 static
1966 STATUS
1967 AddCommandLineLanguage (
1968   IN CHAR8         *Language
1969   )
1970 {
1971   WCHAR_STRING_LIST *WNewList;
1972   WCHAR             *From;
1973   WCHAR             *To;
1974   //
1975   // Keep processing the input string until we find the end.
1976   //
1977   while (*Language) {
1978     //
1979     // Allocate memory for a new list element, fill it in, and
1980     // add it to our list.
1981     //
1982     WNewList = MALLOC (sizeof (WCHAR_STRING_LIST));
1983     if (WNewList == NULL) {
1984       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1985       return STATUS_ERROR;
1986     }
1987
1988     memset ((char *) WNewList, 0, sizeof (WCHAR_STRING_LIST));
1989     WNewList->Str = malloc ((strlen (Language) + 1) * sizeof (WCHAR));
1990     if (WNewList->Str == NULL) {
1991       free (WNewList);
1992       Error (UTILITY_NAME, 0, 0, NULL, "memory allocation failure");
1993       return STATUS_ERROR;
1994     }
1995     //
1996     // Copy it as unicode to our new structure. Then remove the
1997     // plus signs in it, and verify each language name is 3 characters
1998     // long. If we find a comma, then we're done with this group, so
1999     // break out.
2000     //
2001     UnicodeSPrint (WNewList->Str, (strlen (Language) + 1) * sizeof (WCHAR), L"%a", Language);
2002     From = To = WNewList->Str;
2003     while (*From) {
2004       if (*From == L',') {
2005         break;
2006       }
2007
2008       if ((StrLen (From) < LANGUAGE_IDENTIFIER_NAME_LEN) ||
2009             (
2010               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != 0) &&
2011               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != UNICODE_PLUS_SIGN) &&
2012               (From[LANGUAGE_IDENTIFIER_NAME_LEN] != L',')
2013             )
2014           ) {
2015         Error (UTILITY_NAME, 0, 0, Language, "invalid format for language name on command line");
2016         FREE (WNewList->Str);
2017         FREE (WNewList);
2018         return STATUS_ERROR;
2019       }
2020
2021       StrnCpy (To, From, LANGUAGE_IDENTIFIER_NAME_LEN);
2022       To += LANGUAGE_IDENTIFIER_NAME_LEN;
2023       From += LANGUAGE_IDENTIFIER_NAME_LEN;
2024       if (*From == L'+') {
2025         From++;
2026       }
2027     }
2028
2029     *To = 0;
2030     //
2031     // Add it to our linked list
2032     //
2033     if (mGlobals.Language == NULL) {
2034       mGlobals.Language = WNewList;
2035     } else {
2036       mGlobals.LastLanguage->Next = WNewList;
2037     }
2038
2039     mGlobals.LastLanguage = WNewList;
2040     //
2041     // Skip to next entry (comma-separated list)
2042     //
2043     while (*Language) {
2044       if (*Language == L',') {
2045         Language++;
2046         break;
2047       }
2048
2049       Language++;
2050     }
2051   }
2052
2053   return STATUS_SUCCESS;
2054 }
2055 //
2056 // The contents of the text file are expected to be (one per line)
2057 //   STRING_IDENTIFIER_NAME   ScopeName
2058 // For example:
2059 //   STR_ID_MY_FAVORITE_STRING   IBM
2060 //
2061 static
2062 STATUS
2063 ParseIndirectionFiles (
2064   TEXT_STRING_LIST    *Files
2065   )
2066 {
2067   FILE                        *Fptr;
2068   CHAR8                       Line[200];
2069   CHAR8                       *StringName;
2070   CHAR8                       *ScopeName;
2071   CHAR8                       *End;
2072   UINT32                      LineCount;
2073   WCHAR_MATCHING_STRING_LIST  *NewList;
2074
2075   Line[sizeof (Line) - 1] = 0;
2076   Fptr                    = NULL;
2077   while (Files != NULL) {
2078     Fptr      = fopen (Files->Str, "r");
2079     LineCount = 0;
2080     if (Fptr == NULL) {
2081       Error (NULL, 0, 0, Files->Str, "failed to open input indirection file for reading");
2082       return STATUS_ERROR;
2083     }
2084
2085     while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2086       //
2087       // remove terminating newline for error printing purposes.
2088       //
2089       if (Line[strlen (Line) - 1] == '\n') {
2090         Line[strlen (Line) - 1] = 0;
2091       }
2092
2093       LineCount++;
2094       if (Line[sizeof (Line) - 1] != 0) {
2095         Error (Files->Str, LineCount, 0, "line length exceeds maximum supported", NULL);
2096         goto Done;
2097       }
2098
2099       StringName = Line;
2100       while (*StringName && (isspace (*StringName))) {
2101         StringName++;
2102       }
2103
2104       if (*StringName) {
2105         if ((*StringName == '_') || isalpha (*StringName)) {
2106           End = StringName;
2107           while ((*End) && (*End == '_') || (isalnum (*End))) {
2108             End++;
2109           }
2110
2111           if (isspace (*End)) {
2112             *End = 0;
2113             End++;
2114             while (isspace (*End)) {
2115               End++;
2116             }
2117
2118             if (*End) {
2119               ScopeName = End;
2120               while (*End && !isspace (*End)) {
2121                 End++;
2122               }
2123
2124               *End = 0;
2125               //
2126               // Add the string name/scope pair
2127               //
2128               NewList = malloc (sizeof (WCHAR_MATCHING_STRING_LIST));
2129               if (NewList == NULL) {
2130                 Error (NULL, 0, 0, "memory allocation error", NULL);
2131                 goto Done;
2132               }
2133
2134               memset (NewList, 0, sizeof (WCHAR_MATCHING_STRING_LIST));
2135               NewList->Str1 = (WCHAR *) malloc ((strlen (StringName) + 1) * sizeof (WCHAR));
2136               NewList->Str2 = (WCHAR *) malloc ((strlen (ScopeName) + 1) * sizeof (WCHAR));
2137               if ((NewList->Str1 == NULL) || (NewList->Str2 == NULL)) {
2138                 Error (NULL, 0, 0, "memory allocation error", NULL);
2139                 goto Done;
2140               }
2141
2142               UnicodeSPrint (NewList->Str1, strlen (StringName) + 1, L"%a", StringName);
2143               UnicodeSPrint (NewList->Str2, strlen (ScopeName) + 1, L"%a", ScopeName);
2144               if (mGlobals.IndirectionList == NULL) {
2145                 mGlobals.IndirectionList = NewList;
2146               } else {
2147                 mGlobals.LastIndirectionList->Next = NewList;
2148               }
2149
2150               mGlobals.LastIndirectionList = NewList;
2151             } else {
2152               Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2153               goto Done;
2154             }
2155           } else {
2156             Error (Files->Str, LineCount, 0, StringName, "invalid line : expected 'StringIdentifier Scope'");
2157             goto Done;
2158           }
2159         } else {
2160           Error (Files->Str, LineCount, 0, StringName, "invalid string identifier");
2161           goto Done;
2162         }
2163       }
2164     }
2165
2166     fclose (Fptr);
2167     Fptr  = NULL;
2168     Files = Files->Next;
2169   }
2170
2171 Done:
2172   if (Fptr != NULL) {
2173     fclose (Fptr);
2174     return STATUS_ERROR;
2175   }
2176
2177   return STATUS_SUCCESS;
2178 }
2179
2180 static
2181 STATUS
2182 ScanFiles (
2183   TEXT_STRING_LIST *ScanFiles
2184   )
2185 {
2186   char              Line[MAX_LINE_LEN];
2187   FILE              *Fptr;
2188   UINT32            LineNum;
2189   char              *Cptr;
2190   char              *SavePtr;
2191   char              *TermPtr;
2192   char              *StringTokenPos;
2193   TEXT_STRING_LIST  *SList;
2194   BOOLEAN           SkipIt;
2195
2196   //
2197   // Put a null-terminator at the end of the line. If we read in
2198   // a line longer than we support, then we can catch it.
2199   //
2200   Line[MAX_LINE_LEN - 1] = 0;
2201   //
2202   // Process each file. If they gave us a skip extension list, then
2203   // skip it if the extension matches.
2204   //
2205   while (ScanFiles != NULL) {
2206     SkipIt = FALSE;
2207     for (SList = mGlobals.SkipExt; SList != NULL; SList = SList->Next) {
2208       if ((strlen (ScanFiles->Str) > strlen (SList->Str)) &&
2209           (strcmp (ScanFiles->Str + strlen (ScanFiles->Str) - strlen (SList->Str), SList->Str) == 0)
2210           ) {
2211         SkipIt = TRUE;
2212         //
2213         // printf ("Match: %s : %s\n", ScanFiles->Str, SList->Str);
2214         //
2215         break;
2216       }
2217     }
2218
2219     if (!SkipIt) {
2220       if (mGlobals.VerboseScan) {
2221         printf ("Scanning %s\n", ScanFiles->Str);
2222       }
2223
2224       Fptr = fopen (ScanFiles->Str, "r");
2225       if (Fptr == NULL) {
2226         Error (NULL, 0, 0, ScanFiles->Str, "failed to open input file for scanning");
2227         return STATUS_ERROR;
2228       }
2229
2230       LineNum = 0;
2231       while (fgets (Line, sizeof (Line), Fptr) != NULL) {
2232         LineNum++;
2233         if (Line[MAX_LINE_LEN - 1] != 0) {
2234           Error (ScanFiles->Str, LineNum, 0, "line length exceeds maximum supported by tool", NULL);
2235           fclose (Fptr);
2236           return STATUS_ERROR;
2237         }
2238         //
2239         // Remove the newline from the input line so we can print a warning message
2240         //
2241         if (Line[strlen (Line) - 1] == '\n') {
2242           Line[strlen (Line) - 1] = 0;
2243         }
2244         //
2245         // Terminate the line at // comments
2246         //
2247         Cptr = strstr (Line, "//");
2248         if (Cptr != NULL) {
2249           *Cptr = 0;
2250         }
2251
2252         Cptr = Line;
2253         while ((Cptr = strstr (Cptr, STRING_TOKEN)) != NULL) {
2254           //
2255           // Found "STRING_TOKEN". Make sure we don't have NUM_STRING_TOKENS or
2256           // something like that. Then make sure it's followed by
2257           // an open parenthesis, a string identifier, and then a closing
2258           // parenthesis.
2259           //
2260           if (mGlobals.VerboseScan) {
2261             printf (" %d: %s", LineNum, Cptr);
2262           }
2263
2264           if (((Cptr == Line) || (!IsValidIdentifierChar (*(Cptr - 1), FALSE))) &&
2265               (!IsValidIdentifierChar (*(Cptr + sizeof (STRING_TOKEN) - 1), FALSE))
2266               ) {
2267             StringTokenPos  = Cptr;
2268             SavePtr         = Cptr;
2269             Cptr += strlen (STRING_TOKEN);
2270             while (*Cptr && isspace (*Cptr) && (*Cptr != '(')) {
2271               Cptr++;
2272             }
2273
2274             if (*Cptr != '(') {
2275               Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2276             } else {
2277               //
2278               // Skip over the open-parenthesis and find the next non-blank character
2279               //
2280               Cptr++;
2281               while (isspace (*Cptr)) {
2282                 Cptr++;
2283               }
2284
2285               SavePtr = Cptr;
2286               if ((*Cptr == '_') || isalpha (*Cptr)) {
2287                 while ((*Cptr == '_') || (isalnum (*Cptr))) {
2288                   Cptr++;
2289                 }
2290
2291                 TermPtr = Cptr;
2292                 while (*Cptr && isspace (*Cptr)) {
2293                   Cptr++;
2294                 }
2295
2296                 if (*Cptr != ')') {
2297                   Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected "STRING_TOKEN "(identifier)");
2298                 }
2299
2300                 if (*TermPtr) {
2301                   *TermPtr  = 0;
2302                   Cptr      = TermPtr + 1;
2303                 } else {
2304                   Cptr = TermPtr;
2305                 }
2306                 //
2307                 // Add the string identifier to the list of used strings
2308                 //
2309                 ParserSetPosition (ScanFiles->Str, LineNum);
2310                 StringDBSetStringReferenced (SavePtr, mGlobals.IgnoreNotFound);
2311                 if (mGlobals.VerboseScan) {
2312                   printf ("...referenced %s", SavePtr);
2313                 }
2314               } else {
2315                 Warning (ScanFiles->Str, LineNum, 0, StringTokenPos, "expected valid string identifier name");
2316               }
2317             }
2318           } else {
2319             //
2320             // Found it, but it's a substring of something else. Advance our pointer.
2321             //
2322             Cptr++;
2323           }
2324
2325           if (mGlobals.VerboseScan) {
2326             printf ("\n");
2327           }
2328         }
2329       }
2330
2331       fclose (Fptr);
2332     } else {
2333       //
2334       // Skipping this file type
2335       //
2336       if (mGlobals.VerboseScan) {
2337         printf ("Skip scanning of %s\n", ScanFiles->Str);
2338       }
2339     }
2340
2341     ScanFiles = ScanFiles->Next;
2342   }
2343
2344   return STATUS_SUCCESS;
2345 }
2346 //
2347 // Free the global string lists we allocated memory for
2348 //
2349 static
2350 void
2351 FreeLists (
2352   VOID
2353   )
2354 {
2355   TEXT_STRING_LIST  *Temp;
2356   WCHAR_STRING_LIST *WTemp;
2357
2358   //
2359   // Traverse the include paths, freeing each
2360   //
2361   while (mGlobals.IncludePaths != NULL) {
2362     Temp = mGlobals.IncludePaths->Next;
2363     free (mGlobals.IncludePaths->Str);
2364     free (mGlobals.IncludePaths);
2365     mGlobals.IncludePaths = Temp;
2366   }
2367   //
2368   // If we did a scan, then free up our
2369   // list of files to scan.
2370   //
2371   while (mGlobals.ScanFileName != NULL) {
2372     Temp = mGlobals.ScanFileName->Next;
2373     free (mGlobals.ScanFileName->Str);
2374     free (mGlobals.ScanFileName);
2375     mGlobals.ScanFileName = Temp;
2376   }
2377   //
2378   // If they gave us a list of filename extensions to
2379   // skip on scan, then free them up.
2380   //
2381   while (mGlobals.SkipExt != NULL) {
2382     Temp = mGlobals.SkipExt->Next;
2383     free (mGlobals.SkipExt->Str);
2384     free (mGlobals.SkipExt);
2385     mGlobals.SkipExt = Temp;
2386   }
2387   //
2388   // Free up any languages specified
2389   //
2390   while (mGlobals.Language != NULL) {
2391     WTemp = mGlobals.Language->Next;
2392     free (mGlobals.Language->Str);
2393     free (mGlobals.Language);
2394     mGlobals.Language = WTemp;
2395   }
2396   //
2397   // Free up our indirection list
2398   //
2399   while (mGlobals.IndirectionList != NULL) {
2400     mGlobals.LastIndirectionList = mGlobals.IndirectionList->Next;
2401     free (mGlobals.IndirectionList->Str1);
2402     free (mGlobals.IndirectionList->Str2);
2403     free (mGlobals.IndirectionList);
2404     mGlobals.IndirectionList = mGlobals.LastIndirectionList;
2405   }
2406
2407   while (mGlobals.IndirectionFileName != NULL) {
2408     mGlobals.LastIndirectionFileName = mGlobals.IndirectionFileName->Next;
2409     free (mGlobals.IndirectionFileName->Str);
2410     free (mGlobals.IndirectionFileName);
2411     mGlobals.IndirectionFileName = mGlobals.LastIndirectionFileName;
2412   }
2413 }
2414
2415 static
2416 BOOLEAN
2417 IsValidIdentifierChar (
2418   CHAR8     Char,
2419   BOOLEAN   FirstChar
2420   )
2421 {
2422   //
2423   // If it's the first character of an identifier, then
2424   // it must be one of [A-Za-z_].
2425   //
2426   if (FirstChar) {
2427     if (isalpha (Char) || (Char == '_')) {
2428       return TRUE;
2429     }
2430   } else {
2431     //
2432     // If it's not the first character, then it can
2433     // be one of [A-Za-z_0-9]
2434     //
2435     if (isalnum (Char) || (Char == '_')) {
2436       return TRUE;
2437     }
2438   }
2439
2440   return FALSE;
2441 }
2442
2443 static
2444 void
2445 RewindFile (
2446   SOURCE_FILE *SourceFile
2447   )
2448 {
2449   SourceFile->LineNum       = 1;
2450   SourceFile->FileBufferPtr = SourceFile->FileBuffer;
2451   SourceFile->EndOfFile     = 0;
2452 }
2453
2454 static
2455 BOOLEAN
2456 SkipTo (
2457   SOURCE_FILE *SourceFile,
2458   WCHAR       WChar,
2459   BOOLEAN     StopAfterNewline
2460   )
2461 {
2462   while (!EndOfFile (SourceFile)) {
2463     //
2464     // Check for the character of interest
2465     //
2466     if (SourceFile->FileBufferPtr[0] == WChar) {
2467       return TRUE;
2468     } else {
2469       if (SourceFile->FileBufferPtr[0] == UNICODE_LF) {
2470         SourceFile->LineNum++;
2471         if (StopAfterNewline) {
2472           SourceFile->FileBufferPtr++;
2473           if (SourceFile->FileBufferPtr[0] == 0) {
2474             SourceFile->FileBufferPtr++;
2475           }
2476
2477           return FALSE;
2478         }
2479       }
2480
2481       SourceFile->FileBufferPtr++;
2482     }
2483   }
2484
2485   return FALSE;
2486 }
2487
2488 static
2489 void
2490 Version (
2491   VOID
2492   )
2493 /*++
2494
2495 Routine Description:
2496
2497   Displays the standard utility information to SDTOUT
2498
2499 Arguments:
2500
2501   None
2502
2503 Returns:
2504
2505   None
2506
2507 --*/
2508 {
2509   printf ("%s v%d.%d -Utility to process unicode strings file..\n", UTILITY_NAME, UTILITY_MAJOR_VERSION, UTILITY_MINOR_VERSION);
2510   printf ("Copyright (c) 1999-2007 Intel Corporation. All rights reserved.\n");
2511 }
2512
2513 static
2514 void
2515 Usage (
2516   VOID
2517   )
2518 /*++
2519
2520 Routine Description:
2521
2522   Print usage information for this utility.
2523
2524 Arguments:
2525
2526   None.
2527
2528 Returns:
2529
2530   Nothing.
2531
2532 --*/
2533 {
2534   int               Index;
2535   static const char *Str[] = {
2536     "",
2537     "  Usage: "UTILITY_NAME " -parse {parse options} [FileNames]",
2538     "         "UTILITY_NAME " -scan {scan options} [FileName]",
2539     "         "UTILITY_NAME " -dump {dump options}",
2540     "    Common options include:",
2541     "      -h,--help,-?,/?  display help messages",
2542     "      -V,--version     display version information",
2543     "      -db Database     required name of output/input database file",
2544     "      -bn BaseName     for use in the .h and .c output files",
2545     "                       Default = "DEFAULT_BASE_NAME,
2546     "      -v               for verbose output",
2547     "      -vdbw            for verbose output when writing database",
2548     "      -vdbr            for verbose output when reading database",
2549     "      -od FileName     to specify an output database file name",
2550     "    Parse options include:",
2551     "      -i IncludePath   add IncludePath to list of search paths",
2552     "      -newdb           to not read in existing database file",
2553     "      -uqs             to indicate that unquoted strings are used",
2554     "      FileNames        name of one or more unicode files to parse",
2555     "    Scan options include:",
2556     "      -scan            scan text file(s) for STRING_TOKEN() usage",
2557     "      -skipext .ext    to skip scan of files with .ext filename extension",
2558     "      -ignorenotfound  ignore if a given STRING_TOKEN(STR) is not ",
2559     "                       found in the database",
2560     "      FileNames        one or more files to scan",
2561     "    Dump options include:",
2562     "      -oc FileName     write string data to FileName",
2563     "      -oh FileName     write string defines to FileName",
2564     "      -ou FileName     dump database to unicode file FileName",
2565     "      -lang Lang       only dump for the language 'Lang'",
2566     "      -if FileName     to specify an indirection file",
2567     "      -hpk FileName    to create an HII export pack of the strings",
2568     "",
2569     "  The expected process is to parse a unicode string file to create an initial",
2570     "  database of string identifier names and string definitions. Then text files",
2571     "  should be scanned for STRING_TOKEN() usages, and the referenced",
2572     "  strings will be tagged as used in the database. After all files have been",
2573     "  scanned, then the database should be dumped to create the necessary output",
2574     "  files.",
2575     "",
2576     NULL
2577   };
2578
2579   Version();
2580
2581   for (Index = 0; Str[Index] != NULL; Index++) {
2582     fprintf (stdout, "%s\n", Str[Index]);
2583   }
2584 }