2 # This file is used to parse a strings file and create or add to a string database
5 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18 import Common
.EdkLogger
as EdkLogger
19 from Common
.BuildToolError
import *
20 from .UniClassObject
import *
21 from io
import BytesIO
22 from struct
import pack
, unpack
23 from Common
.LongFilePathSupport
import OpenLongFilePath
as open
28 EFI_HII_SIBT_END
= '0x00'
29 EFI_HII_SIBT_STRING_SCSU
= '0x10'
30 EFI_HII_SIBT_STRING_SCSU_FONT
= '0x11'
31 EFI_HII_SIBT_STRINGS_SCSU
= '0x12'
32 EFI_HII_SIBT_STRINGS_SCSU_FONT
= '0x13'
33 EFI_HII_SIBT_STRING_UCS2
= '0x14'
34 EFI_HII_SIBT_STRING_UCS2_FONT
= '0x15'
35 EFI_HII_SIBT_STRINGS_UCS2
= '0x16'
36 EFI_HII_SIBT_STRINGS_UCS2_FONT
= '0x17'
37 EFI_HII_SIBT_DUPLICATE
= '0x20'
38 EFI_HII_SIBT_SKIP2
= '0x21'
39 EFI_HII_SIBT_SKIP1
= '0x22'
40 EFI_HII_SIBT_EXT1
= '0x30'
41 EFI_HII_SIBT_EXT2
= '0x31'
42 EFI_HII_SIBT_EXT4
= '0x32'
43 EFI_HII_SIBT_FONT
= '0x40'
45 EFI_HII_PACKAGE_STRINGS
= '0x04'
46 EFI_HII_PACKAGE_FORM
= '0x02'
48 StringPackageType
= EFI_HII_PACKAGE_STRINGS
49 StringPackageForm
= EFI_HII_PACKAGE_FORM
50 StringBlockType
= EFI_HII_SIBT_STRING_UCS2
51 StringSkipType
= EFI_HII_SIBT_SKIP2
56 DEFINE_STR
= '#define'
57 COMMENT_DEFINE_STR
= COMMENT
+ DEFINE_STR
58 NOT_REFERENCED
= 'not referenced'
59 COMMENT_NOT_REFERENCED
= ' ' + COMMENT
+ NOT_REFERENCED
60 CHAR_ARRAY_DEFIN
= 'unsigned char'
61 COMMON_FILE_NAME
= 'Strings'
62 STRING_TOKEN
= re
.compile('STRING_TOKEN *\(([A-Z0-9_]+) *\)', re
.MULTILINE | re
.UNICODE
)
64 EFI_HII_ARRAY_SIZE_LENGTH
= 4
65 EFI_HII_PACKAGE_HEADER_LENGTH
= 4
66 EFI_HII_HDR_SIZE_LENGTH
= 4
67 EFI_HII_STRING_OFFSET_LENGTH
= 4
69 EFI_STRING_ID_LENGTH
= 2
70 EFI_HII_LANGUAGE_WINDOW
= 0
71 EFI_HII_LANGUAGE_WINDOW_LENGTH
= 2
72 EFI_HII_LANGUAGE_WINDOW_NUMBER
= 16
73 EFI_HII_STRING_PACKAGE_HDR_LENGTH
= EFI_HII_PACKAGE_HEADER_LENGTH
+ EFI_HII_HDR_SIZE_LENGTH
+ EFI_HII_STRING_OFFSET_LENGTH
+ EFI_HII_LANGUAGE_WINDOW_LENGTH
* EFI_HII_LANGUAGE_WINDOW_NUMBER
+ EFI_STRING_ID_LENGTH
75 H_C_FILE_HEADER
= ['//', \
76 '// DO NOT EDIT -- auto-generated file', \
78 '// This file is generated by the StrGather utility', \
80 LANGUAGE_NAME_STRING_NAME
= '$LANGUAGE_NAME'
81 PRINTABLE_LANGUAGE_NAME_STRING_NAME
= '$PRINTABLE_LANGUAGE_NAME'
83 ## Convert a dec number to a hex string
85 # Convert a dec number to a formatted hex string in length digit
86 # The digit is set to default 8
87 # The hex string starts with "0x"
88 # DecToHexStr(1000) is '0x000003E8'
89 # DecToHexStr(1000, 6) is '0x0003E8'
91 # @param Dec: The number in dec format
92 # @param Digit: The needed digit of hex string
94 # @retval: The formatted hex string
96 def DecToHexStr(Dec
, Digit
= 8):
97 return '0x{0:0{1}X}'.format(Dec
, Digit
)
99 ## Convert a dec number to a hex list
101 # Convert a dec number to a formatted hex list in size digit
102 # The digit is set to default 8
103 # DecToHexList(1000) is ['0xE8', '0x03', '0x00', '0x00']
104 # DecToHexList(1000, 6) is ['0xE8', '0x03', '0x00']
106 # @param Dec: The number in dec format
107 # @param Digit: The needed digit of hex list
109 # @retval: A list for formatted hex string
111 def DecToHexList(Dec
, Digit
= 8):
112 Hex
= '{0:0{1}X}'.format(Dec
, Digit
)
113 return ["0x" + Hex
[Bit
:Bit
+ 2] for Bit
in range(Digit
- 2, -1, -2)]
115 ## Convert a acsii string to a hex list
117 # Convert a acsii string to a formatted hex list
118 # AscToHexList('en-US') is ['0x65', '0x6E', '0x2D', '0x55', '0x53']
120 # @param Ascii: The acsii string
122 # @retval: A list for formatted hex string
124 def AscToHexList(Ascii
):
125 if isinstance(Ascii
, bytes
):
126 return ['0x{0:02X}'.format(Item
) for Item
in Ascii
]
127 return ['0x{0:02X}'.format(ord(Item
)) for Item
in Ascii
]
129 ## Create content of .h file
131 # Create content of .h file
133 # @param BaseName: The basename of strings
134 # @param UniObjectClass A UniObjectClass instance
135 # @param IsCompatibleMode Compatible mode
136 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
138 # @retval Str: A string of .h file content
140 def CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
143 Line
= COMMENT_DEFINE_STR
+ ' ' + LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
144 Str
= WriteLine(Str
, Line
)
145 Line
= COMMENT_DEFINE_STR
+ ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ PRINTABLE_LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
146 Str
= WriteLine(Str
, Line
)
149 #Group the referred/Unused STRING token together.
150 for Index
in range(2, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]])):
151 StringItem
= UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]][Index
]
152 Name
= StringItem
.StringName
153 Token
= StringItem
.Token
154 Referenced
= StringItem
.Referenced
157 if Referenced
== True:
158 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
159 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4)
161 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4)
162 Str
= WriteLine(Str
, Line
)
164 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
165 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
167 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
168 UnusedStr
= WriteLine(UnusedStr
, Line
)
170 Str
= ''.join([Str
, UnusedStr
])
172 Str
= WriteLine(Str
, '')
173 if IsCompatibleMode
or UniGenCFlag
:
174 Str
= WriteLine(Str
, 'extern unsigned char ' + BaseName
+ 'Strings[];')
177 ## Create a complete .h file
179 # Create a complet .h file with file header and file content
181 # @param BaseName: The basename of strings
182 # @param UniObjectClass A UniObjectClass instance
183 # @param IsCompatibleMode Compatible mode
184 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
186 # @retval Str: A string of complete .h file
188 def CreateHFile(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
189 HFile
= WriteLine('', CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
))
193 ## Create a buffer to store all items in an array
195 # @param BinBuffer Buffer to contain Binary data.
196 # @param Array: The array need to be formatted
198 def CreateBinBuffer(BinBuffer
, Array
):
200 BinBuffer
.write(pack("B", int(Item
, 16)))
202 ## Create a formatted string all items in an array
204 # Use ',' to join each item in an array, and break an new line when reaching the width (default is 16)
206 # @param Array: The array need to be formatted
207 # @param Width: The line length, the default value is set to 16
209 # @retval ArrayItem: A string for all formatted array items
211 def CreateArrayItem(Array
, Width
= 16):
218 if Index
< MaxLength
:
219 Line
= Line
+ Item
+ ', '
222 ArrayItem
= WriteLine(ArrayItem
, Line
)
223 Line
= ' ' + Item
+ ', '
225 ArrayItem
= Write(ArrayItem
, Line
.rstrip())
229 ## CreateCFileStringValue
231 # Create a line with string value
233 # @param Value: Value of the string
235 # @retval Str: A formatted string with string value
238 def CreateCFileStringValue(Value
):
239 Value
= [StringBlockType
] + Value
240 Str
= WriteLine('', CreateArrayItem(Value
))
244 ## GetFilteredLanguage
246 # apply get best language rules to the UNI language code list
248 # @param UniLanguageList: language code definition list in *.UNI file
249 # @param LanguageFilterList: language code filter list of RFC4646 format in DSC file
251 # @retval UniLanguageListFiltered: the filtered language code
253 def GetFilteredLanguage(UniLanguageList
, LanguageFilterList
):
254 UniLanguageListFiltered
= []
255 # if filter list is empty, then consider there is no filter
256 if LanguageFilterList
== []:
257 UniLanguageListFiltered
= UniLanguageList
258 return UniLanguageListFiltered
259 for Language
in LanguageFilterList
:
260 # first check for exact match
261 if Language
in UniLanguageList
:
262 if Language
not in UniLanguageListFiltered
:
263 UniLanguageListFiltered
.append(Language
)
264 # find the first one with the same/equivalent primary tag
266 if Language
.find('-') != -1:
267 PrimaryTag
= Language
[0:Language
.find('-')].lower()
269 PrimaryTag
= Language
271 if len(PrimaryTag
) == 3:
272 PrimaryTag
= LangConvTable
.get(PrimaryTag
)
274 for UniLanguage
in UniLanguageList
:
275 if UniLanguage
.find('-') != -1:
276 UniLanguagePrimaryTag
= UniLanguage
[0:UniLanguage
.find('-')].lower()
278 UniLanguagePrimaryTag
= UniLanguage
280 if len(UniLanguagePrimaryTag
) == 3:
281 UniLanguagePrimaryTag
= LangConvTable
.get(UniLanguagePrimaryTag
)
283 if PrimaryTag
== UniLanguagePrimaryTag
:
284 if UniLanguage
not in UniLanguageListFiltered
:
285 UniLanguageListFiltered
.append(UniLanguage
)
288 # Here is rule 3 for "get best language"
289 # If tag is not listed in the Unicode file, the default ("en") tag should be used for that language
290 # for better processing, find the one that best suit for it.
292 if DefaultTag
not in UniLanguageListFiltered
:
293 # check whether language code with primary code equivalent with DefaultTag already in the list, if so, use that
294 for UniLanguage
in UniLanguageList
:
295 if UniLanguage
.startswith('en-') or UniLanguage
.startswith('eng-'):
296 if UniLanguage
not in UniLanguageListFiltered
:
297 UniLanguageListFiltered
.append(UniLanguage
)
300 UniLanguageListFiltered
.append(DefaultTag
)
301 return UniLanguageListFiltered
304 ## Create content of .c file
306 # Create content of .c file
308 # @param BaseName: The basename of strings
309 # @param UniObjectClass A UniObjectClass instance
310 # @param IsCompatibleMode Compatible mode
311 # @param UniBinBuffer UniBinBuffer to contain UniBinary data.
312 # @param FilterInfo Platform language filter information
314 # @retval Str: A string of .c file content
316 def CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniBinBuffer
, FilterInfo
):
320 TotalLength
= EFI_HII_ARRAY_SIZE_LENGTH
324 EDK2Module
= FilterInfo
[0]
326 LanguageFilterList
= FilterInfo
[1]
328 # EDK module is using ISO639-2 format filter, convert to the RFC4646 format
329 LanguageFilterList
= [LangConvTable
.get(F
.lower()) for F
in FilterInfo
[1]]
332 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
333 UniLanguageList
+= [UniObjectClass
.LanguageDef
[IndexI
][0]]
335 UniLanguageListFiltered
= GetFilteredLanguage(UniLanguageList
, LanguageFilterList
)
339 # Create lines for each language's strings
341 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
342 Language
= UniObjectClass
.LanguageDef
[IndexI
][0]
343 if Language
not in UniLanguageListFiltered
:
346 StringBuffer
= BytesIO()
349 NumberOfUseOtherLangDef
= 0
351 for IndexJ
in range(1, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[IndexI
][0]])):
352 Item
= UniObjectClass
.OrderedStringListByToken
[Language
][IndexJ
]
354 Name
= Item
.StringName
355 Value
= Item
.StringValueByteList
356 Referenced
= Item
.Referenced
358 UseOtherLangDef
= Item
.UseOtherLangDef
360 if UseOtherLangDef
!= '' and Referenced
:
361 NumberOfUseOtherLangDef
= NumberOfUseOtherLangDef
+ 1
364 if NumberOfUseOtherLangDef
> 0:
365 StrStringValue
= WriteLine(StrStringValue
, CreateArrayItem([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
366 CreateBinBuffer (StringBuffer
, ([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
367 NumberOfUseOtherLangDef
= 0
368 ArrayLength
= ArrayLength
+ 3
369 if Referenced
and Item
.Token
> 0:
371 StrStringValue
= WriteLine(StrStringValue
, "// %s: %s:%s" % (DecToHexStr(Index
, 4), Name
, DecToHexStr(Token
, 4)))
372 StrStringValue
= Write(StrStringValue
, CreateCFileStringValue(Value
))
373 CreateBinBuffer (StringBuffer
, [StringBlockType
] + Value
)
374 ArrayLength
= ArrayLength
+ Item
.Length
+ 1 # 1 is for the length of string type
377 # EFI_HII_PACKAGE_HEADER
379 Offset
= EFI_HII_STRING_PACKAGE_HDR_LENGTH
+ len(Language
) + 1
380 ArrayLength
= Offset
+ ArrayLength
+ 1
383 # Create PACKAGE HEADER
385 Str
= WriteLine(Str
, '// PACKAGE HEADER\n')
386 TotalLength
= TotalLength
+ ArrayLength
388 List
= DecToHexList(ArrayLength
, 6) + \
389 [StringPackageType
] + \
390 DecToHexList(Offset
) + \
391 DecToHexList(Offset
) + \
392 DecToHexList(EFI_HII_LANGUAGE_WINDOW
, EFI_HII_LANGUAGE_WINDOW_LENGTH
* 2) * EFI_HII_LANGUAGE_WINDOW_NUMBER
+ \
393 DecToHexList(EFI_STRING_ID
, 4) + \
394 AscToHexList(Language
) + \
396 Str
= WriteLine(Str
, CreateArrayItem(List
, 16) + '\n')
399 # Create PACKAGE DATA
401 Str
= WriteLine(Str
, '// PACKAGE DATA\n')
402 Str
= Write(Str
, StrStringValue
)
405 # Add an EFI_HII_SIBT_END at last
407 Str
= WriteLine(Str
, ' ' + EFI_HII_SIBT_END
+ ",")
410 # Create binary UNI string
413 CreateBinBuffer (UniBinBuffer
, List
)
414 UniBinBuffer
.write (StringBuffer
.getvalue())
415 UniBinBuffer
.write (pack("B", int(EFI_HII_SIBT_END
, 16)))
419 # Create line for string variable name
420 # "unsigned char $(BaseName)Strings[] = {"
422 AllStr
= WriteLine('', CHAR_ARRAY_DEFIN
+ ' ' + BaseName
+ COMMON_FILE_NAME
+ '[] = {\n')
426 # Create FRAMEWORK_EFI_HII_PACK_HEADER in compatible mode
428 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Length')
429 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
+ 2)) + '\n')
430 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Type')
431 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(2, 4)) + '\n')
434 # Create whole array length in UEFI mode
436 AllStr
= WriteLine(AllStr
, '// STRGATHER_OUTPUT_HEADER')
437 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
)) + '\n')
442 AllStr
= Write(AllStr
, Str
)
446 ## Create end of .c file
448 # Create end of .c file
450 # @retval Str: A string of .h file end
452 def CreateCFileEnd():
453 Str
= Write('', '};')
458 # Create a complete .c file
460 # @param BaseName: The basename of strings
461 # @param UniObjectClass A UniObjectClass instance
462 # @param IsCompatibleMode Compatible Mode
463 # @param FilterInfo Platform language filter information
465 # @retval CFile: A string of complete .c file
467 def CreateCFile(BaseName
, UniObjectClass
, IsCompatibleMode
, FilterInfo
):
469 CFile
= WriteLine(CFile
, CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, None, FilterInfo
))
470 CFile
= WriteLine(CFile
, CreateCFileEnd())
475 # Get a list for all files
477 # @param IncludeList: A list of all path to be searched
478 # @param SkipList: A list of all types of file could be skipped
480 # @retval FileList: A list of all files found
482 def GetFileList(SourceFileList
, IncludeList
, SkipList
):
483 if IncludeList
is None:
484 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, "Include path for unicode file is not defined")
490 for File
in SourceFileList
:
491 for Dir
in IncludeList
:
492 if not os
.path
.exists(Dir
):
494 File
= os
.path
.join(Dir
, File
.Path
)
498 if os
.path
.isfile(File
) != True:
501 # Ignore file listed in skip list
504 for Skip
in SkipList
:
505 if os
.path
.splitext(File
)[1].upper() == Skip
.upper():
506 EdkLogger
.verbose("Skipped %s for string token uses search" % File
)
511 FileList
.append(File
)
519 # Search whether all string defined in UniObjectClass are referenced
520 # All string used should be set to Referenced
522 # @param UniObjectClass: Input UniObjectClass
523 # @param FileList: Search path list
524 # @param IsCompatibleMode Compatible Mode
526 # @retval UniObjectClass: UniObjectClass after searched
528 def SearchString(UniObjectClass
, FileList
, IsCompatibleMode
):
530 return UniObjectClass
532 for File
in FileList
:
533 if os
.path
.isfile(File
):
534 Lines
= open(File
, 'r')
536 for StrName
in STRING_TOKEN
.findall(Line
):
537 EdkLogger
.debug(EdkLogger
.DEBUG_5
, "Found string identifier: " + StrName
)
538 UniObjectClass
.SetStringReferenced(StrName
)
540 UniObjectClass
.ReToken()
542 return UniObjectClass
546 # This function is used for UEFI2.1 spec
549 def GetStringFiles(UniFilList
, SourceFileList
, IncludeList
, IncludePathList
, SkipList
, BaseName
, IsCompatibleMode
= False, ShellMode
= False, UniGenCFlag
= True, UniGenBinBuffer
= None, FilterInfo
= [True, []]):
550 if len(UniFilList
) > 0:
553 # support ISO 639-2 codes in .UNI files of EDK Shell
555 Uni
= UniFileClassObject(sorted (UniFilList
, key
=lambda x
: x
.File
), True, IncludePathList
)
557 Uni
= UniFileClassObject(sorted (UniFilList
, key
=lambda x
: x
.File
), IsCompatibleMode
, IncludePathList
)
559 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, 'No unicode files given')
561 FileList
= GetFileList(SourceFileList
, IncludeList
, SkipList
)
563 Uni
= SearchString(Uni
, sorted (FileList
), IsCompatibleMode
)
565 HFile
= CreateHFile(BaseName
, Uni
, IsCompatibleMode
, UniGenCFlag
)
567 if IsCompatibleMode
or UniGenCFlag
:
568 CFile
= CreateCFile(BaseName
, Uni
, IsCompatibleMode
, FilterInfo
)
570 CreateCFileContent(BaseName
, Uni
, IsCompatibleMode
, UniGenBinBuffer
, FilterInfo
)
577 def Write(Target
, Item
):
578 return ''.join([Target
, Item
])
581 # Write an item with a break line
583 def WriteLine(Target
, Item
):
584 return ''.join([Target
, Item
, '\n'])
586 # This acts like the main() function for the script, unless it is 'import'ed into another
588 if __name__
== '__main__':
589 EdkLogger
.info('start')
592 r
'C:\\Edk\\Strings2.uni',
593 r
'C:\\Edk\\Strings.uni'
597 for Root
, Dirs
, Files
in os
.walk('C:\\Edk'):
599 SrcFileList
.append(File
)
605 SkipList
= ['.inf', '.uni']
606 BaseName
= 'DriverSample'
607 (h
, c
) = GetStringFiles(UniFileList
, SrcFileList
, IncludeList
, SkipList
, BaseName
, True)
608 hfile
= open('unistring.h', 'w')
609 cfile
= open('unistring.c', 'w')
613 EdkLogger
.info('end')