2 # This file is used to parse a strings file and create or add to a string database
5 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # SPDX-License-Identifier: BSD-2-Clause-Patent
11 from __future__
import absolute_import
13 import Common
.EdkLogger
as EdkLogger
14 from Common
.BuildToolError
import *
15 from .UniClassObject
import *
16 from io
import BytesIO
17 from struct
import pack
, unpack
18 from Common
.LongFilePathSupport
import OpenLongFilePath
as open
23 EFI_HII_SIBT_END
= '0x00'
24 EFI_HII_SIBT_STRING_SCSU
= '0x10'
25 EFI_HII_SIBT_STRING_SCSU_FONT
= '0x11'
26 EFI_HII_SIBT_STRINGS_SCSU
= '0x12'
27 EFI_HII_SIBT_STRINGS_SCSU_FONT
= '0x13'
28 EFI_HII_SIBT_STRING_UCS2
= '0x14'
29 EFI_HII_SIBT_STRING_UCS2_FONT
= '0x15'
30 EFI_HII_SIBT_STRINGS_UCS2
= '0x16'
31 EFI_HII_SIBT_STRINGS_UCS2_FONT
= '0x17'
32 EFI_HII_SIBT_DUPLICATE
= '0x20'
33 EFI_HII_SIBT_SKIP2
= '0x21'
34 EFI_HII_SIBT_SKIP1
= '0x22'
35 EFI_HII_SIBT_EXT1
= '0x30'
36 EFI_HII_SIBT_EXT2
= '0x31'
37 EFI_HII_SIBT_EXT4
= '0x32'
38 EFI_HII_SIBT_FONT
= '0x40'
40 EFI_HII_PACKAGE_STRINGS
= '0x04'
41 EFI_HII_PACKAGE_FORM
= '0x02'
43 StringPackageType
= EFI_HII_PACKAGE_STRINGS
44 StringPackageForm
= EFI_HII_PACKAGE_FORM
45 StringBlockType
= EFI_HII_SIBT_STRING_UCS2
46 StringSkipType
= EFI_HII_SIBT_SKIP2
51 DEFINE_STR
= '#define'
52 COMMENT_DEFINE_STR
= COMMENT
+ DEFINE_STR
53 NOT_REFERENCED
= 'not referenced'
54 COMMENT_NOT_REFERENCED
= ' ' + COMMENT
+ NOT_REFERENCED
55 CHAR_ARRAY_DEFIN
= 'unsigned char'
56 COMMON_FILE_NAME
= 'Strings'
57 STRING_TOKEN
= re
.compile('STRING_TOKEN *\(([A-Z0-9_]+) *\)', re
.MULTILINE | re
.UNICODE
)
59 EFI_HII_ARRAY_SIZE_LENGTH
= 4
60 EFI_HII_PACKAGE_HEADER_LENGTH
= 4
61 EFI_HII_HDR_SIZE_LENGTH
= 4
62 EFI_HII_STRING_OFFSET_LENGTH
= 4
64 EFI_STRING_ID_LENGTH
= 2
65 EFI_HII_LANGUAGE_WINDOW
= 0
66 EFI_HII_LANGUAGE_WINDOW_LENGTH
= 2
67 EFI_HII_LANGUAGE_WINDOW_NUMBER
= 16
68 EFI_HII_STRING_PACKAGE_HDR_LENGTH
= EFI_HII_PACKAGE_HEADER_LENGTH
+ EFI_HII_HDR_SIZE_LENGTH
+ EFI_HII_STRING_OFFSET_LENGTH
+ EFI_HII_LANGUAGE_WINDOW_LENGTH
* EFI_HII_LANGUAGE_WINDOW_NUMBER
+ EFI_STRING_ID_LENGTH
70 H_C_FILE_HEADER
= ['//', \
71 '// DO NOT EDIT -- auto-generated file', \
73 '// This file is generated by the StrGather utility', \
75 LANGUAGE_NAME_STRING_NAME
= '$LANGUAGE_NAME'
76 PRINTABLE_LANGUAGE_NAME_STRING_NAME
= '$PRINTABLE_LANGUAGE_NAME'
78 ## Convert a dec number to a hex string
80 # Convert a dec number to a formatted hex string in length digit
81 # The digit is set to default 8
82 # The hex string starts with "0x"
83 # DecToHexStr(1000) is '0x000003E8'
84 # DecToHexStr(1000, 6) is '0x0003E8'
86 # @param Dec: The number in dec format
87 # @param Digit: The needed digit of hex string
89 # @retval: The formatted hex string
91 def DecToHexStr(Dec
, Digit
= 8):
92 return '0x{0:0{1}X}'.format(Dec
, Digit
)
94 ## Convert a dec number to a hex list
96 # Convert a dec number to a formatted hex list in size digit
97 # The digit is set to default 8
98 # DecToHexList(1000) is ['0xE8', '0x03', '0x00', '0x00']
99 # DecToHexList(1000, 6) is ['0xE8', '0x03', '0x00']
101 # @param Dec: The number in dec format
102 # @param Digit: The needed digit of hex list
104 # @retval: A list for formatted hex string
106 def DecToHexList(Dec
, Digit
= 8):
107 Hex
= '{0:0{1}X}'.format(Dec
, Digit
)
108 return ["0x" + Hex
[Bit
:Bit
+ 2] for Bit
in range(Digit
- 2, -1, -2)]
110 ## Convert a acsii string to a hex list
112 # Convert a acsii string to a formatted hex list
113 # AscToHexList('en-US') is ['0x65', '0x6E', '0x2D', '0x55', '0x53']
115 # @param Ascii: The acsii string
117 # @retval: A list for formatted hex string
119 def AscToHexList(Ascii
):
121 return ['0x{0:02X}'.format(Item
) for Item
in Ascii
]
123 return ['0x{0:02X}'.format(ord(Item
)) for Item
in Ascii
]
125 ## Create content of .h file
127 # Create content of .h file
129 # @param BaseName: The basename of strings
130 # @param UniObjectClass A UniObjectClass instance
131 # @param IsCompatibleMode Compatible mode
132 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
134 # @retval Str: A string of .h file content
136 def CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
139 Line
= COMMENT_DEFINE_STR
+ ' ' + LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
140 Str
= WriteLine(Str
, Line
)
141 Line
= COMMENT_DEFINE_STR
+ ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ PRINTABLE_LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
142 Str
= WriteLine(Str
, Line
)
145 #Group the referred/Unused STRING token together.
146 for Index
in range(2, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]])):
147 StringItem
= UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]][Index
]
148 Name
= StringItem
.StringName
149 Token
= StringItem
.Token
150 Referenced
= StringItem
.Referenced
153 if Referenced
== True:
154 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
155 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4)
157 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4)
158 Str
= WriteLine(Str
, Line
)
160 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
161 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
163 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
164 UnusedStr
= WriteLine(UnusedStr
, Line
)
166 Str
.extend( UnusedStr
)
168 Str
= WriteLine(Str
, '')
169 if IsCompatibleMode
or UniGenCFlag
:
170 Str
= WriteLine(Str
, 'extern unsigned char ' + BaseName
+ 'Strings[];')
173 ## Create a complete .h file
175 # Create a complet .h file with file header and file content
177 # @param BaseName: The basename of strings
178 # @param UniObjectClass A UniObjectClass instance
179 # @param IsCompatibleMode Compatible mode
180 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
182 # @retval Str: A string of complete .h file
184 def CreateHFile(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
185 HFile
= WriteLine('', CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
))
187 return "".join(HFile
)
189 ## Create a buffer to store all items in an array
191 # @param BinBuffer Buffer to contain Binary data.
192 # @param Array: The array need to be formatted
194 def CreateBinBuffer(BinBuffer
, Array
):
196 BinBuffer
.write(pack("B", int(Item
, 16)))
198 ## Create a formatted string all items in an array
200 # Use ',' to join each item in an array, and break an new line when reaching the width (default is 16)
202 # @param Array: The array need to be formatted
203 # @param Width: The line length, the default value is set to 16
205 # @retval ArrayItem: A string for all formatted array items
207 def CreateArrayItem(Array
, Width
= 16):
214 if Index
< MaxLength
:
215 Line
= Line
+ Item
+ ', '
218 ArrayItem
= WriteLine(ArrayItem
, Line
)
219 Line
= ' ' + Item
+ ', '
221 ArrayItem
= Write(ArrayItem
, Line
.rstrip())
223 return "".join(ArrayItem
)
225 ## CreateCFileStringValue
227 # Create a line with string value
229 # @param Value: Value of the string
231 # @retval Str: A formatted string with string value
234 def CreateCFileStringValue(Value
):
235 Value
= [StringBlockType
] + Value
236 Str
= WriteLine('', CreateArrayItem(Value
))
240 ## GetFilteredLanguage
242 # apply get best language rules to the UNI language code list
244 # @param UniLanguageList: language code definition list in *.UNI file
245 # @param LanguageFilterList: language code filter list of RFC4646 format in DSC file
247 # @retval UniLanguageListFiltered: the filtered language code
249 def GetFilteredLanguage(UniLanguageList
, LanguageFilterList
):
250 UniLanguageListFiltered
= []
251 # if filter list is empty, then consider there is no filter
252 if LanguageFilterList
== []:
253 UniLanguageListFiltered
= UniLanguageList
254 return UniLanguageListFiltered
255 for Language
in LanguageFilterList
:
256 # first check for exact match
257 if Language
in UniLanguageList
:
258 if Language
not in UniLanguageListFiltered
:
259 UniLanguageListFiltered
.append(Language
)
260 # find the first one with the same/equivalent primary tag
262 if Language
.find('-') != -1:
263 PrimaryTag
= Language
[0:Language
.find('-')].lower()
265 PrimaryTag
= Language
267 if len(PrimaryTag
) == 3:
268 PrimaryTag
= LangConvTable
.get(PrimaryTag
)
270 for UniLanguage
in UniLanguageList
:
271 if UniLanguage
.find('-') != -1:
272 UniLanguagePrimaryTag
= UniLanguage
[0:UniLanguage
.find('-')].lower()
274 UniLanguagePrimaryTag
= UniLanguage
276 if len(UniLanguagePrimaryTag
) == 3:
277 UniLanguagePrimaryTag
= LangConvTable
.get(UniLanguagePrimaryTag
)
279 if PrimaryTag
== UniLanguagePrimaryTag
:
280 if UniLanguage
not in UniLanguageListFiltered
:
281 UniLanguageListFiltered
.append(UniLanguage
)
284 # Here is rule 3 for "get best language"
285 # If tag is not listed in the Unicode file, the default ("en") tag should be used for that language
286 # for better processing, find the one that best suit for it.
288 if DefaultTag
not in UniLanguageListFiltered
:
289 # check whether language code with primary code equivalent with DefaultTag already in the list, if so, use that
290 for UniLanguage
in UniLanguageList
:
291 if UniLanguage
.startswith('en-') or UniLanguage
.startswith('eng-'):
292 if UniLanguage
not in UniLanguageListFiltered
:
293 UniLanguageListFiltered
.append(UniLanguage
)
296 UniLanguageListFiltered
.append(DefaultTag
)
297 return UniLanguageListFiltered
300 ## Create content of .c file
302 # Create content of .c file
304 # @param BaseName: The basename of strings
305 # @param UniObjectClass A UniObjectClass instance
306 # @param IsCompatibleMode Compatible mode
307 # @param UniBinBuffer UniBinBuffer to contain UniBinary data.
308 # @param FilterInfo Platform language filter information
310 # @retval Str: A string of .c file content
312 def CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniBinBuffer
, FilterInfo
):
316 TotalLength
= EFI_HII_ARRAY_SIZE_LENGTH
320 EDK2Module
= FilterInfo
[0]
322 LanguageFilterList
= FilterInfo
[1]
324 # EDK module is using ISO639-2 format filter, convert to the RFC4646 format
325 LanguageFilterList
= [LangConvTable
.get(F
.lower()) for F
in FilterInfo
[1]]
328 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
329 UniLanguageList
+= [UniObjectClass
.LanguageDef
[IndexI
][0]]
331 UniLanguageListFiltered
= GetFilteredLanguage(UniLanguageList
, LanguageFilterList
)
335 # Create lines for each language's strings
337 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
338 Language
= UniObjectClass
.LanguageDef
[IndexI
][0]
339 if Language
not in UniLanguageListFiltered
:
342 StringBuffer
= BytesIO()
345 NumberOfUseOtherLangDef
= 0
347 for IndexJ
in range(1, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[IndexI
][0]])):
348 Item
= UniObjectClass
.OrderedStringListByToken
[Language
][IndexJ
]
350 Name
= Item
.StringName
351 Value
= Item
.StringValueByteList
352 Referenced
= Item
.Referenced
354 UseOtherLangDef
= Item
.UseOtherLangDef
356 if UseOtherLangDef
!= '' and Referenced
:
357 NumberOfUseOtherLangDef
= NumberOfUseOtherLangDef
+ 1
360 if NumberOfUseOtherLangDef
> 0:
361 StrStringValue
= WriteLine(StrStringValue
, CreateArrayItem([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
362 CreateBinBuffer (StringBuffer
, ([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
363 NumberOfUseOtherLangDef
= 0
364 ArrayLength
= ArrayLength
+ 3
365 if Referenced
and Item
.Token
> 0:
367 StrStringValue
= WriteLine(StrStringValue
, "// %s: %s:%s" % (DecToHexStr(Index
, 4), Name
, DecToHexStr(Token
, 4)))
368 StrStringValue
= Write(StrStringValue
, CreateCFileStringValue(Value
))
369 CreateBinBuffer (StringBuffer
, [StringBlockType
] + Value
)
370 ArrayLength
= ArrayLength
+ Item
.Length
+ 1 # 1 is for the length of string type
373 # EFI_HII_PACKAGE_HEADER
375 Offset
= EFI_HII_STRING_PACKAGE_HDR_LENGTH
+ len(Language
) + 1
376 ArrayLength
= Offset
+ ArrayLength
+ 1
379 # Create PACKAGE HEADER
381 Str
= WriteLine(Str
, '// PACKAGE HEADER\n')
382 TotalLength
= TotalLength
+ ArrayLength
384 List
= DecToHexList(ArrayLength
, 6) + \
385 [StringPackageType
] + \
386 DecToHexList(Offset
) + \
387 DecToHexList(Offset
) + \
388 DecToHexList(EFI_HII_LANGUAGE_WINDOW
, EFI_HII_LANGUAGE_WINDOW_LENGTH
* 2) * EFI_HII_LANGUAGE_WINDOW_NUMBER
+ \
389 DecToHexList(EFI_STRING_ID
, 4) + \
390 AscToHexList(Language
) + \
392 Str
= WriteLine(Str
, CreateArrayItem(List
, 16) + '\n')
395 # Create PACKAGE DATA
397 Str
= WriteLine(Str
, '// PACKAGE DATA\n')
398 Str
= Write(Str
, StrStringValue
)
401 # Add an EFI_HII_SIBT_END at last
403 Str
= WriteLine(Str
, ' ' + EFI_HII_SIBT_END
+ ",")
406 # Create binary UNI string
409 CreateBinBuffer (UniBinBuffer
, List
)
410 UniBinBuffer
.write (StringBuffer
.getvalue())
411 UniBinBuffer
.write (pack("B", int(EFI_HII_SIBT_END
, 16)))
415 # Create line for string variable name
416 # "unsigned char $(BaseName)Strings[] = {"
418 AllStr
= WriteLine('', CHAR_ARRAY_DEFIN
+ ' ' + BaseName
+ COMMON_FILE_NAME
+ '[] = {\n')
422 # Create FRAMEWORK_EFI_HII_PACK_HEADER in compatible mode
424 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Length')
425 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
+ 2)) + '\n')
426 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Type')
427 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(2, 4)) + '\n')
430 # Create whole array length in UEFI mode
432 AllStr
= WriteLine(AllStr
, '// STRGATHER_OUTPUT_HEADER')
433 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
)) + '\n')
438 AllStr
= Write(AllStr
, Str
)
440 return "".join(AllStr
)
442 ## Create end of .c file
444 # Create end of .c file
446 # @retval Str: A string of .h file end
448 def CreateCFileEnd():
449 Str
= Write('', '};')
454 # Create a complete .c file
456 # @param BaseName: The basename of strings
457 # @param UniObjectClass A UniObjectClass instance
458 # @param IsCompatibleMode Compatible Mode
459 # @param FilterInfo Platform language filter information
461 # @retval CFile: A string of complete .c file
463 def CreateCFile(BaseName
, UniObjectClass
, IsCompatibleMode
, FilterInfo
):
465 CFile
= WriteLine(CFile
, CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, None, FilterInfo
))
466 CFile
= WriteLine(CFile
, CreateCFileEnd())
467 return "".join(CFile
)
471 # Get a list for all files
473 # @param IncludeList: A list of all path to be searched
474 # @param SkipList: A list of all types of file could be skipped
476 # @retval FileList: A list of all files found
478 def GetFileList(SourceFileList
, IncludeList
, SkipList
):
479 if IncludeList
is None:
480 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, "Include path for unicode file is not defined")
486 for File
in SourceFileList
:
487 for Dir
in IncludeList
:
488 if not os
.path
.exists(Dir
):
490 File
= os
.path
.join(Dir
, File
.Path
)
494 if os
.path
.isfile(File
) != True:
497 # Ignore file listed in skip list
500 for Skip
in SkipList
:
501 if os
.path
.splitext(File
)[1].upper() == Skip
.upper():
502 EdkLogger
.verbose("Skipped %s for string token uses search" % File
)
507 FileList
.append(File
)
515 # Search whether all string defined in UniObjectClass are referenced
516 # All string used should be set to Referenced
518 # @param UniObjectClass: Input UniObjectClass
519 # @param FileList: Search path list
520 # @param IsCompatibleMode Compatible Mode
522 # @retval UniObjectClass: UniObjectClass after searched
524 def SearchString(UniObjectClass
, FileList
, IsCompatibleMode
):
526 return UniObjectClass
528 for File
in FileList
:
530 if os
.path
.isfile(File
):
531 Lines
= open(File
, 'r')
533 for StrName
in STRING_TOKEN
.findall(Line
):
534 EdkLogger
.debug(EdkLogger
.DEBUG_5
, "Found string identifier: " + StrName
)
535 UniObjectClass
.SetStringReferenced(StrName
)
537 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, "SearchString: Error while processing file", File
=File
, RaiseError
=False)
540 UniObjectClass
.ReToken()
542 return UniObjectClass
546 # This function is used for UEFI2.1 spec
549 def GetStringFiles(UniFilList
, SourceFileList
, IncludeList
, IncludePathList
, SkipList
, BaseName
, IsCompatibleMode
= False, ShellMode
= False, UniGenCFlag
= True, UniGenBinBuffer
= None, FilterInfo
= [True, []]):
550 if len(UniFilList
) > 0:
553 # support ISO 639-2 codes in .UNI files of EDK Shell
555 Uni
= UniFileClassObject(sorted(UniFilList
, key
=lambda x
: x
.File
), True, IncludePathList
)
557 Uni
= UniFileClassObject(sorted(UniFilList
, key
=lambda x
: x
.File
), IsCompatibleMode
, IncludePathList
)
559 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, 'No unicode files given')
561 FileList
= GetFileList(SourceFileList
, IncludeList
, SkipList
)
563 Uni
= SearchString(Uni
, sorted (FileList
), IsCompatibleMode
)
565 HFile
= CreateHFile(BaseName
, Uni
, IsCompatibleMode
, UniGenCFlag
)
567 if IsCompatibleMode
or UniGenCFlag
:
568 CFile
= CreateCFile(BaseName
, Uni
, IsCompatibleMode
, FilterInfo
)
570 CreateCFileContent(BaseName
, Uni
, IsCompatibleMode
, UniGenBinBuffer
, FilterInfo
)
577 def Write(Target
, Item
):
578 if isinstance(Target
,str):
582 if isinstance(Item
,list):
589 # Write an item with a break line
591 def WriteLine(Target
, Item
):
592 if isinstance(Target
,str):
596 if isinstance(Item
, list):
603 # This acts like the main() function for the script, unless it is 'import'ed into another
605 if __name__
== '__main__':
606 EdkLogger
.info('start')
609 r
'C:\\Edk\\Strings2.uni',
610 r
'C:\\Edk\\Strings.uni'
614 for Root
, Dirs
, Files
in os
.walk('C:\\Edk'):
616 SrcFileList
.append(File
)
622 SkipList
= ['.inf', '.uni']
623 BaseName
= 'DriverSample'
624 (h
, c
) = GetStringFiles(UniFileList
, SrcFileList
, IncludeList
, SkipList
, BaseName
, True)
625 hfile
= open('unistring.h', 'w')
626 cfile
= open('unistring.c', 'w')
630 EdkLogger
.info('end')