2 # This file is used to parse a strings file and create or add to a string database
5 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18 import Common
.EdkLogger
as EdkLogger
19 from Common
.BuildToolError
import *
20 from UniClassObject
import *
21 from io
import BytesIO
22 from struct
import pack
, unpack
23 from Common
.LongFilePathSupport
import OpenLongFilePath
as open
28 EFI_HII_SIBT_END
= '0x00'
29 EFI_HII_SIBT_STRING_SCSU
= '0x10'
30 EFI_HII_SIBT_STRING_SCSU_FONT
= '0x11'
31 EFI_HII_SIBT_STRINGS_SCSU
= '0x12'
32 EFI_HII_SIBT_STRINGS_SCSU_FONT
= '0x13'
33 EFI_HII_SIBT_STRING_UCS2
= '0x14'
34 EFI_HII_SIBT_STRING_UCS2_FONT
= '0x15'
35 EFI_HII_SIBT_STRINGS_UCS2
= '0x16'
36 EFI_HII_SIBT_STRINGS_UCS2_FONT
= '0x17'
37 EFI_HII_SIBT_DUPLICATE
= '0x20'
38 EFI_HII_SIBT_SKIP2
= '0x21'
39 EFI_HII_SIBT_SKIP1
= '0x22'
40 EFI_HII_SIBT_EXT1
= '0x30'
41 EFI_HII_SIBT_EXT2
= '0x31'
42 EFI_HII_SIBT_EXT4
= '0x32'
43 EFI_HII_SIBT_FONT
= '0x40'
45 EFI_HII_PACKAGE_STRINGS
= '0x04'
46 EFI_HII_PACKAGE_FORM
= '0x02'
48 StringPackageType
= EFI_HII_PACKAGE_STRINGS
49 StringPackageForm
= EFI_HII_PACKAGE_FORM
50 StringBlockType
= EFI_HII_SIBT_STRING_UCS2
51 StringSkipType
= EFI_HII_SIBT_SKIP2
56 DEFINE_STR
= '#define'
57 COMMENT_DEFINE_STR
= COMMENT
+ DEFINE_STR
58 NOT_REFERENCED
= 'not referenced'
59 COMMENT_NOT_REFERENCED
= ' ' + COMMENT
+ NOT_REFERENCED
60 CHAR_ARRAY_DEFIN
= 'unsigned char'
61 COMMON_FILE_NAME
= 'Strings'
62 STRING_TOKEN
= re
.compile('STRING_TOKEN *\(([A-Z0-9_]+) *\)', re
.MULTILINE | re
.UNICODE
)
64 EFI_HII_ARRAY_SIZE_LENGTH
= 4
65 EFI_HII_PACKAGE_HEADER_LENGTH
= 4
66 EFI_HII_HDR_SIZE_LENGTH
= 4
67 EFI_HII_STRING_OFFSET_LENGTH
= 4
69 EFI_STRING_ID_LENGTH
= 2
70 EFI_HII_LANGUAGE_WINDOW
= 0
71 EFI_HII_LANGUAGE_WINDOW_LENGTH
= 2
72 EFI_HII_LANGUAGE_WINDOW_NUMBER
= 16
73 EFI_HII_STRING_PACKAGE_HDR_LENGTH
= EFI_HII_PACKAGE_HEADER_LENGTH
+ EFI_HII_HDR_SIZE_LENGTH
+ EFI_HII_STRING_OFFSET_LENGTH
+ EFI_HII_LANGUAGE_WINDOW_LENGTH
* EFI_HII_LANGUAGE_WINDOW_NUMBER
+ EFI_STRING_ID_LENGTH
75 H_C_FILE_HEADER
= ['//', \
76 '// DO NOT EDIT -- auto-generated file', \
78 '// This file is generated by the StrGather utility', \
80 LANGUAGE_NAME_STRING_NAME
= '$LANGUAGE_NAME'
81 PRINTABLE_LANGUAGE_NAME_STRING_NAME
= '$PRINTABLE_LANGUAGE_NAME'
83 ## Convert a dec number to a hex string
85 # Convert a dec number to a formatted hex string in length digit
86 # The digit is set to default 8
87 # The hex string starts with "0x"
88 # DecToHexStr(1000) is '0x000003E8'
89 # DecToHexStr(1000, 6) is '0x0003E8'
91 # @param Dec: The number in dec format
92 # @param Digit: The needed digit of hex string
94 # @retval: The formatted hex string
96 def DecToHexStr(Dec
, Digit
= 8):
97 return '0x{0:0{1}X}'.format(Dec
, Digit
)
99 ## Convert a dec number to a hex list
101 # Convert a dec number to a formatted hex list in size digit
102 # The digit is set to default 8
103 # DecToHexList(1000) is ['0xE8', '0x03', '0x00', '0x00']
104 # DecToHexList(1000, 6) is ['0xE8', '0x03', '0x00']
106 # @param Dec: The number in dec format
107 # @param Digit: The needed digit of hex list
109 # @retval: A list for formatted hex string
111 def DecToHexList(Dec
, Digit
= 8):
112 Hex
= '{0:0{1}X}'.format(Dec
, Digit
)
113 return ["0x" + Hex
[Bit
:Bit
+ 2] for Bit
in range(Digit
- 2, -1, -2)]
115 ## Convert a acsii string to a hex list
117 # Convert a acsii string to a formatted hex list
118 # AscToHexList('en-US') is ['0x65', '0x6E', '0x2D', '0x55', '0x53']
120 # @param Ascii: The acsii string
122 # @retval: A list for formatted hex string
124 def AscToHexList(Ascii
):
125 return ['0x{0:02X}'.format(ord(Item
)) for Item
in Ascii
]
127 ## Create content of .h file
129 # Create content of .h file
131 # @param BaseName: The basename of strings
132 # @param UniObjectClass A UniObjectClass instance
133 # @param IsCompatibleMode Compatible mode
134 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
136 # @retval Str: A string of .h file content
138 def CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
141 Line
= COMMENT_DEFINE_STR
+ ' ' + LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
142 Str
= WriteLine(Str
, Line
)
143 Line
= COMMENT_DEFINE_STR
+ ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ PRINTABLE_LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
144 Str
= WriteLine(Str
, Line
)
147 #Group the referred/Unused STRING token together.
148 for Index
in range(2, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]])):
149 StringItem
= UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]][Index
]
150 Name
= StringItem
.StringName
151 Token
= StringItem
.Token
152 Referenced
= StringItem
.Referenced
155 if Referenced
== True:
156 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
157 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4)
159 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4)
160 Str
= WriteLine(Str
, Line
)
162 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
163 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
165 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
166 UnusedStr
= WriteLine(UnusedStr
, Line
)
168 Str
= ''.join([Str
, UnusedStr
])
170 Str
= WriteLine(Str
, '')
171 if IsCompatibleMode
or UniGenCFlag
:
172 Str
= WriteLine(Str
, 'extern unsigned char ' + BaseName
+ 'Strings[];')
175 ## Create a complete .h file
177 # Create a complet .h file with file header and file content
179 # @param BaseName: The basename of strings
180 # @param UniObjectClass A UniObjectClass instance
181 # @param IsCompatibleMode Compatible mode
182 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
184 # @retval Str: A string of complete .h file
186 def CreateHFile(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
187 HFile
= WriteLine('', CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
))
191 ## Create a buffer to store all items in an array
193 # @param BinBuffer Buffer to contain Binary data.
194 # @param Array: The array need to be formatted
196 def CreateBinBuffer(BinBuffer
, Array
):
198 BinBuffer
.write(pack("B", int(Item
, 16)))
200 ## Create a formatted string all items in an array
202 # Use ',' to join each item in an array, and break an new line when reaching the width (default is 16)
204 # @param Array: The array need to be formatted
205 # @param Width: The line length, the default value is set to 16
207 # @retval ArrayItem: A string for all formatted array items
209 def CreateArrayItem(Array
, Width
= 16):
216 if Index
< MaxLength
:
217 Line
= Line
+ Item
+ ', '
220 ArrayItem
= WriteLine(ArrayItem
, Line
)
221 Line
= ' ' + Item
+ ', '
223 ArrayItem
= Write(ArrayItem
, Line
.rstrip())
227 ## CreateCFileStringValue
229 # Create a line with string value
231 # @param Value: Value of the string
233 # @retval Str: A formatted string with string value
236 def CreateCFileStringValue(Value
):
237 Value
= [StringBlockType
] + Value
238 Str
= WriteLine('', CreateArrayItem(Value
))
242 ## GetFilteredLanguage
244 # apply get best language rules to the UNI language code list
246 # @param UniLanguageList: language code definition list in *.UNI file
247 # @param LanguageFilterList: language code filter list of RFC4646 format in DSC file
249 # @retval UniLanguageListFiltered: the filtered language code
251 def GetFilteredLanguage(UniLanguageList
, LanguageFilterList
):
252 UniLanguageListFiltered
= []
253 # if filter list is empty, then consider there is no filter
254 if LanguageFilterList
== []:
255 UniLanguageListFiltered
= UniLanguageList
256 return UniLanguageListFiltered
257 for Language
in LanguageFilterList
:
258 # first check for exact match
259 if Language
in UniLanguageList
:
260 if Language
not in UniLanguageListFiltered
:
261 UniLanguageListFiltered
.append(Language
)
262 # find the first one with the same/equivalent primary tag
264 if Language
.find('-') != -1:
265 PrimaryTag
= Language
[0:Language
.find('-')].lower()
267 PrimaryTag
= Language
269 if len(PrimaryTag
) == 3:
270 PrimaryTag
= LangConvTable
.get(PrimaryTag
)
272 for UniLanguage
in UniLanguageList
:
273 if UniLanguage
.find('-') != -1:
274 UniLanguagePrimaryTag
= UniLanguage
[0:UniLanguage
.find('-')].lower()
276 UniLanguagePrimaryTag
= UniLanguage
278 if len(UniLanguagePrimaryTag
) == 3:
279 UniLanguagePrimaryTag
= LangConvTable
.get(UniLanguagePrimaryTag
)
281 if PrimaryTag
== UniLanguagePrimaryTag
:
282 if UniLanguage
not in UniLanguageListFiltered
:
283 UniLanguageListFiltered
.append(UniLanguage
)
286 # Here is rule 3 for "get best language"
287 # If tag is not listed in the Unicode file, the default ("en") tag should be used for that language
288 # for better processing, find the one that best suit for it.
290 if DefaultTag
not in UniLanguageListFiltered
:
291 # check whether language code with primary code equivalent with DefaultTag already in the list, if so, use that
292 for UniLanguage
in UniLanguageList
:
293 if UniLanguage
.startswith('en-') or UniLanguage
.startswith('eng-'):
294 if UniLanguage
not in UniLanguageListFiltered
:
295 UniLanguageListFiltered
.append(UniLanguage
)
298 UniLanguageListFiltered
.append(DefaultTag
)
299 return UniLanguageListFiltered
302 ## Create content of .c file
304 # Create content of .c file
306 # @param BaseName: The basename of strings
307 # @param UniObjectClass A UniObjectClass instance
308 # @param IsCompatibleMode Compatible mode
309 # @param UniBinBuffer UniBinBuffer to contain UniBinary data.
310 # @param FilterInfo Platform language filter information
312 # @retval Str: A string of .c file content
314 def CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniBinBuffer
, FilterInfo
):
318 TotalLength
= EFI_HII_ARRAY_SIZE_LENGTH
322 EDK2Module
= FilterInfo
[0]
324 LanguageFilterList
= FilterInfo
[1]
326 # EDK module is using ISO639-2 format filter, convert to the RFC4646 format
327 LanguageFilterList
= [LangConvTable
.get(F
.lower()) for F
in FilterInfo
[1]]
330 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
331 UniLanguageList
+= [UniObjectClass
.LanguageDef
[IndexI
][0]]
333 UniLanguageListFiltered
= GetFilteredLanguage(UniLanguageList
, LanguageFilterList
)
337 # Create lines for each language's strings
339 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
340 Language
= UniObjectClass
.LanguageDef
[IndexI
][0]
341 if Language
not in UniLanguageListFiltered
:
344 StringBuffer
= BytesIO()
347 NumberOfUseOtherLangDef
= 0
349 for IndexJ
in range(1, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[IndexI
][0]])):
350 Item
= UniObjectClass
.OrderedStringListByToken
[Language
][IndexJ
]
352 Name
= Item
.StringName
353 Value
= Item
.StringValueByteList
354 Referenced
= Item
.Referenced
356 UseOtherLangDef
= Item
.UseOtherLangDef
358 if UseOtherLangDef
!= '' and Referenced
:
359 NumberOfUseOtherLangDef
= NumberOfUseOtherLangDef
+ 1
362 if NumberOfUseOtherLangDef
> 0:
363 StrStringValue
= WriteLine(StrStringValue
, CreateArrayItem([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
364 CreateBinBuffer (StringBuffer
, ([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
365 NumberOfUseOtherLangDef
= 0
366 ArrayLength
= ArrayLength
+ 3
367 if Referenced
and Item
.Token
> 0:
369 StrStringValue
= WriteLine(StrStringValue
, "// %s: %s:%s" % (DecToHexStr(Index
, 4), Name
, DecToHexStr(Token
, 4)))
370 StrStringValue
= Write(StrStringValue
, CreateCFileStringValue(Value
))
371 CreateBinBuffer (StringBuffer
, [StringBlockType
] + Value
)
372 ArrayLength
= ArrayLength
+ Item
.Length
+ 1 # 1 is for the length of string type
375 # EFI_HII_PACKAGE_HEADER
377 Offset
= EFI_HII_STRING_PACKAGE_HDR_LENGTH
+ len(Language
) + 1
378 ArrayLength
= Offset
+ ArrayLength
+ 1
381 # Create PACKAGE HEADER
383 Str
= WriteLine(Str
, '// PACKAGE HEADER\n')
384 TotalLength
= TotalLength
+ ArrayLength
386 List
= DecToHexList(ArrayLength
, 6) + \
387 [StringPackageType
] + \
388 DecToHexList(Offset
) + \
389 DecToHexList(Offset
) + \
390 DecToHexList(EFI_HII_LANGUAGE_WINDOW
, EFI_HII_LANGUAGE_WINDOW_LENGTH
* 2) * EFI_HII_LANGUAGE_WINDOW_NUMBER
+ \
391 DecToHexList(EFI_STRING_ID
, 4) + \
392 AscToHexList(Language
) + \
394 Str
= WriteLine(Str
, CreateArrayItem(List
, 16) + '\n')
397 # Create PACKAGE DATA
399 Str
= WriteLine(Str
, '// PACKAGE DATA\n')
400 Str
= Write(Str
, StrStringValue
)
403 # Add an EFI_HII_SIBT_END at last
405 Str
= WriteLine(Str
, ' ' + EFI_HII_SIBT_END
+ ",")
408 # Create binary UNI string
411 CreateBinBuffer (UniBinBuffer
, List
)
412 UniBinBuffer
.write (StringBuffer
.getvalue())
413 UniBinBuffer
.write (pack("B", int(EFI_HII_SIBT_END
, 16)))
417 # Create line for string variable name
418 # "unsigned char $(BaseName)Strings[] = {"
420 AllStr
= WriteLine('', CHAR_ARRAY_DEFIN
+ ' ' + BaseName
+ COMMON_FILE_NAME
+ '[] = {\n')
424 # Create FRAMEWORK_EFI_HII_PACK_HEADER in compatible mode
426 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Length')
427 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
+ 2)) + '\n')
428 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Type')
429 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(2, 4)) + '\n')
432 # Create whole array length in UEFI mode
434 AllStr
= WriteLine(AllStr
, '// STRGATHER_OUTPUT_HEADER')
435 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
)) + '\n')
440 AllStr
= Write(AllStr
, Str
)
444 ## Create end of .c file
446 # Create end of .c file
448 # @retval Str: A string of .h file end
450 def CreateCFileEnd():
451 Str
= Write('', '};')
456 # Create a complete .c file
458 # @param BaseName: The basename of strings
459 # @param UniObjectClass A UniObjectClass instance
460 # @param IsCompatibleMode Compatible Mode
461 # @param FilterInfo Platform language filter information
463 # @retval CFile: A string of complete .c file
465 def CreateCFile(BaseName
, UniObjectClass
, IsCompatibleMode
, FilterInfo
):
467 CFile
= WriteLine(CFile
, CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, None, FilterInfo
))
468 CFile
= WriteLine(CFile
, CreateCFileEnd())
473 # Get a list for all files
475 # @param IncludeList: A list of all path to be searched
476 # @param SkipList: A list of all types of file could be skipped
478 # @retval FileList: A list of all files found
480 def GetFileList(SourceFileList
, IncludeList
, SkipList
):
481 if IncludeList
is None:
482 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, "Include path for unicode file is not defined")
488 for File
in SourceFileList
:
489 for Dir
in IncludeList
:
490 if not os
.path
.exists(Dir
):
492 File
= os
.path
.join(Dir
, File
.Path
)
496 if os
.path
.isfile(File
) != True:
499 # Ignore file listed in skip list
502 for Skip
in SkipList
:
503 if os
.path
.splitext(File
)[1].upper() == Skip
.upper():
504 EdkLogger
.verbose("Skipped %s for string token uses search" % File
)
509 FileList
.append(File
)
517 # Search whether all string defined in UniObjectClass are referenced
518 # All string used should be set to Referenced
520 # @param UniObjectClass: Input UniObjectClass
521 # @param FileList: Search path list
522 # @param IsCompatibleMode Compatible Mode
524 # @retval UniObjectClass: UniObjectClass after searched
526 def SearchString(UniObjectClass
, FileList
, IsCompatibleMode
):
528 return UniObjectClass
530 for File
in FileList
:
531 if os
.path
.isfile(File
):
532 Lines
= open(File
, 'r')
534 for StrName
in STRING_TOKEN
.findall(Line
):
535 EdkLogger
.debug(EdkLogger
.DEBUG_5
, "Found string identifier: " + StrName
)
536 UniObjectClass
.SetStringReferenced(StrName
)
538 UniObjectClass
.ReToken()
540 return UniObjectClass
544 # This function is used for UEFI2.1 spec
547 def GetStringFiles(UniFilList
, SourceFileList
, IncludeList
, IncludePathList
, SkipList
, BaseName
, IsCompatibleMode
= False, ShellMode
= False, UniGenCFlag
= True, UniGenBinBuffer
= None, FilterInfo
= [True, []]):
548 if len(UniFilList
) > 0:
551 # support ISO 639-2 codes in .UNI files of EDK Shell
553 Uni
= UniFileClassObject(sorted (UniFilList
), True, IncludePathList
)
555 Uni
= UniFileClassObject(sorted (UniFilList
), IsCompatibleMode
, IncludePathList
)
557 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, 'No unicode files given')
559 FileList
= GetFileList(SourceFileList
, IncludeList
, SkipList
)
561 Uni
= SearchString(Uni
, sorted (FileList
), IsCompatibleMode
)
563 HFile
= CreateHFile(BaseName
, Uni
, IsCompatibleMode
, UniGenCFlag
)
565 if IsCompatibleMode
or UniGenCFlag
:
566 CFile
= CreateCFile(BaseName
, Uni
, IsCompatibleMode
, FilterInfo
)
568 CreateCFileContent(BaseName
, Uni
, IsCompatibleMode
, UniGenBinBuffer
, FilterInfo
)
575 def Write(Target
, Item
):
576 return ''.join([Target
, Item
])
579 # Write an item with a break line
581 def WriteLine(Target
, Item
):
582 return ''.join([Target
, Item
, '\n'])
584 # This acts like the main() function for the script, unless it is 'import'ed into another
586 if __name__
== '__main__':
587 EdkLogger
.info('start')
590 r
'C:\\Edk\\Strings2.uni',
591 r
'C:\\Edk\\Strings.uni'
595 for Root
, Dirs
, Files
in os
.walk('C:\\Edk'):
597 SrcFileList
.append(File
)
603 SkipList
= ['.inf', '.uni']
604 BaseName
= 'DriverSample'
605 (h
, c
) = GetStringFiles(UniFileList
, SrcFileList
, IncludeList
, SkipList
, BaseName
, True)
606 hfile
= open('unistring.h', 'w')
607 cfile
= open('unistring.c', 'w')
611 EdkLogger
.info('end')