2 # This file is used to parse a strings file and create or add to a string database
5 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18 import Common
.EdkLogger
as EdkLogger
19 from Common
.BuildToolError
import *
20 from UniClassObject
import *
21 from StringIO
import StringIO
22 from struct
import pack
, unpack
23 from Common
.LongFilePathSupport
import OpenLongFilePath
as open
28 EFI_HII_SIBT_END
= '0x00'
29 EFI_HII_SIBT_STRING_SCSU
= '0x10'
30 EFI_HII_SIBT_STRING_SCSU_FONT
= '0x11'
31 EFI_HII_SIBT_STRINGS_SCSU
= '0x12'
32 EFI_HII_SIBT_STRINGS_SCSU_FONT
= '0x13'
33 EFI_HII_SIBT_STRING_UCS2
= '0x14'
34 EFI_HII_SIBT_STRING_UCS2_FONT
= '0x15'
35 EFI_HII_SIBT_STRINGS_UCS2
= '0x16'
36 EFI_HII_SIBT_STRINGS_UCS2_FONT
= '0x17'
37 EFI_HII_SIBT_DUPLICATE
= '0x20'
38 EFI_HII_SIBT_SKIP2
= '0x21'
39 EFI_HII_SIBT_SKIP1
= '0x22'
40 EFI_HII_SIBT_EXT1
= '0x30'
41 EFI_HII_SIBT_EXT2
= '0x31'
42 EFI_HII_SIBT_EXT4
= '0x32'
43 EFI_HII_SIBT_FONT
= '0x40'
45 EFI_HII_PACKAGE_STRINGS
= '0x04'
46 EFI_HII_PACKAGE_FORM
= '0x02'
48 StringPackageType
= EFI_HII_PACKAGE_STRINGS
49 StringPackageForm
= EFI_HII_PACKAGE_FORM
50 StringBlockType
= EFI_HII_SIBT_STRING_UCS2
51 StringSkipType
= EFI_HII_SIBT_SKIP2
56 DEFINE_STR
= '#define'
57 COMMENT_DEFINE_STR
= COMMENT
+ DEFINE_STR
58 NOT_REFERENCED
= 'not referenced'
59 COMMENT_NOT_REFERENCED
= ' ' + COMMENT
+ NOT_REFERENCED
60 CHAR_ARRAY_DEFIN
= 'unsigned char'
61 COMMON_FILE_NAME
= 'Strings'
62 STRING_TOKEN
= re
.compile('STRING_TOKEN *\(([A-Z0-9_]+) *\)', re
.MULTILINE | re
.UNICODE
)
64 EFI_HII_ARRAY_SIZE_LENGTH
= 4
65 EFI_HII_PACKAGE_HEADER_LENGTH
= 4
66 EFI_HII_HDR_SIZE_LENGTH
= 4
67 EFI_HII_STRING_OFFSET_LENGTH
= 4
69 EFI_STRING_ID_LENGTH
= 2
70 EFI_HII_LANGUAGE_WINDOW
= 0
71 EFI_HII_LANGUAGE_WINDOW_LENGTH
= 2
72 EFI_HII_LANGUAGE_WINDOW_NUMBER
= 16
73 EFI_HII_STRING_PACKAGE_HDR_LENGTH
= EFI_HII_PACKAGE_HEADER_LENGTH
+ EFI_HII_HDR_SIZE_LENGTH
+ EFI_HII_STRING_OFFSET_LENGTH
+ EFI_HII_LANGUAGE_WINDOW_LENGTH
* EFI_HII_LANGUAGE_WINDOW_NUMBER
+ EFI_STRING_ID_LENGTH
75 H_C_FILE_HEADER
= ['//', \
76 '// DO NOT EDIT -- auto-generated file', \
78 '// This file is generated by the StrGather utility', \
80 LANGUAGE_NAME_STRING_NAME
= '$LANGUAGE_NAME'
81 PRINTABLE_LANGUAGE_NAME_STRING_NAME
= '$PRINTABLE_LANGUAGE_NAME'
83 ## Convert a dec number to a hex string
85 # Convert a dec number to a formatted hex string in length digit
86 # The digit is set to default 8
87 # The hex string starts with "0x"
88 # DecToHexStr(1000) is '0x000003E8'
89 # DecToHexStr(1000, 6) is '0x0003E8'
91 # @param Dec: The number in dec format
92 # @param Digit: The needed digit of hex string
94 # @retval: The formatted hex string
96 def DecToHexStr(Dec
, Digit
= 8):
97 return '0x{0:0{1}X}'.format(Dec
,Digit
)
99 ## Convert a dec number to a hex list
101 # Convert a dec number to a formatted hex list in size digit
102 # The digit is set to default 8
103 # DecToHexList(1000) is ['0xE8', '0x03', '0x00', '0x00']
104 # DecToHexList(1000, 6) is ['0xE8', '0x03', '0x00']
106 # @param Dec: The number in dec format
107 # @param Digit: The needed digit of hex list
109 # @retval: A list for formatted hex string
111 def DecToHexList(Dec
, Digit
= 8):
112 Hex
= '{0:0{1}X}'.format(Dec
,Digit
)
113 return ["0x" + Hex
[Bit
:Bit
+ 2] for Bit
in range(Digit
- 2, -1, -2)]
115 ## Convert a acsii string to a hex list
117 # Convert a acsii string to a formatted hex list
118 # AscToHexList('en-US') is ['0x65', '0x6E', '0x2D', '0x55', '0x53']
120 # @param Ascii: The acsii string
122 # @retval: A list for formatted hex string
124 def AscToHexList(Ascii
):
125 return ['0x{0:02X}'.format(ord(Item
)) for Item
in Ascii
]
127 ## Create header of .h file
129 # Create a header of .h file
131 # @param BaseName: The basename of strings
133 # @retval Str: A string for .h file header
135 def CreateHFileHeader(BaseName
):
137 for Item
in H_C_FILE_HEADER
:
138 Str
= WriteLine(Str
, Item
)
139 Str
= WriteLine(Str
, '#ifndef _' + BaseName
.upper() + '_STRINGS_DEFINE_H_')
140 Str
= WriteLine(Str
, '#define _' + BaseName
.upper() + '_STRINGS_DEFINE_H_')
143 ## Create content of .h file
145 # Create content of .h file
147 # @param BaseName: The basename of strings
148 # @param UniObjectClass A UniObjectClass instance
149 # @param IsCompatibleMode Compatible mode
150 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
152 # @retval Str: A string of .h file content
154 def CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
157 Line
= COMMENT_DEFINE_STR
+ ' ' + LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
158 Str
= WriteLine(Str
, Line
)
159 Line
= COMMENT_DEFINE_STR
+ ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ PRINTABLE_LANGUAGE_NAME_STRING_NAME
)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
160 Str
= WriteLine(Str
, Line
)
163 #Group the referred/Unused STRING token together.
164 for Index
in range(2, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]])):
165 StringItem
= UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[0][0]][Index
]
166 Name
= StringItem
.StringName
167 Token
= StringItem
.Token
168 Referenced
= StringItem
.Referenced
171 if Referenced
== True:
172 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
173 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4)
175 Line
= DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4)
176 Str
= WriteLine(Str
, Line
)
178 if (ValueStartPtr
- len(DEFINE_STR
+ Name
)) <= 0:
179 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
181 Line
= COMMENT_DEFINE_STR
+ ' ' + Name
+ ' ' * (ValueStartPtr
- len(DEFINE_STR
+ Name
)) + DecToHexStr(Token
, 4) + COMMENT_NOT_REFERENCED
182 UnusedStr
= WriteLine(UnusedStr
, Line
)
184 Str
= ''.join([Str
, UnusedStr
])
186 Str
= WriteLine(Str
, '')
187 if IsCompatibleMode
or UniGenCFlag
:
188 Str
= WriteLine(Str
, 'extern unsigned char ' + BaseName
+ 'Strings[];')
191 ## Create a complete .h file
193 # Create a complet .h file with file header and file content
195 # @param BaseName: The basename of strings
196 # @param UniObjectClass A UniObjectClass instance
197 # @param IsCompatibleMode Compatible mode
198 # @param UniGenCFlag UniString is generated into AutoGen C file when it is set to True
200 # @retval Str: A string of complete .h file
202 def CreateHFile(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
):
203 HFile
= WriteLine('', CreateHFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniGenCFlag
))
207 ## Create header of .c file
209 # Create a header of .c file
211 # @retval Str: A string for .c file header
213 def CreateCFileHeader():
215 for Item
in H_C_FILE_HEADER
:
216 Str
= WriteLine(Str
, Item
)
220 ## Create a buffer to store all items in an array
222 # @param BinBuffer Buffer to contain Binary data.
223 # @param Array: The array need to be formatted
225 def CreateBinBuffer(BinBuffer
, Array
):
227 BinBuffer
.write(pack("B", int(Item
, 16)))
229 ## Create a formatted string all items in an array
231 # Use ',' to join each item in an array, and break an new line when reaching the width (default is 16)
233 # @param Array: The array need to be formatted
234 # @param Width: The line length, the default value is set to 16
236 # @retval ArrayItem: A string for all formatted array items
238 def CreateArrayItem(Array
, Width
= 16):
245 if Index
< MaxLength
:
246 Line
= Line
+ Item
+ ', '
249 ArrayItem
= WriteLine(ArrayItem
, Line
)
250 Line
= ' ' + Item
+ ', '
252 ArrayItem
= Write(ArrayItem
, Line
.rstrip())
256 ## CreateCFileStringValue
258 # Create a line with string value
260 # @param Value: Value of the string
262 # @retval Str: A formatted string with string value
265 def CreateCFileStringValue(Value
):
266 Value
= [StringBlockType
] + Value
267 Str
= WriteLine('', CreateArrayItem(Value
))
271 ## GetFilteredLanguage
273 # apply get best language rules to the UNI language code list
275 # @param UniLanguageList: language code definition list in *.UNI file
276 # @param LanguageFilterList: language code filter list of RFC4646 format in DSC file
278 # @retval UniLanguageListFiltered: the filtered language code
280 def GetFilteredLanguage(UniLanguageList
, LanguageFilterList
):
281 UniLanguageListFiltered
= []
282 # if filter list is empty, then consider there is no filter
283 if LanguageFilterList
== []:
284 UniLanguageListFiltered
= UniLanguageList
285 return UniLanguageListFiltered
286 for Language
in LanguageFilterList
:
287 # first check for exact match
288 if Language
in UniLanguageList
:
289 if Language
not in UniLanguageListFiltered
:
290 UniLanguageListFiltered
+= [Language
]
291 # find the first one with the same/equivalent primary tag
293 if Language
.find('-') != -1:
294 PrimaryTag
= Language
[0:Language
.find('-')].lower()
296 PrimaryTag
= Language
298 if len(PrimaryTag
) == 3:
299 PrimaryTag
= LangConvTable
.get(PrimaryTag
)
301 for UniLanguage
in UniLanguageList
:
302 if UniLanguage
.find('-') != -1:
303 UniLanguagePrimaryTag
= UniLanguage
[0:UniLanguage
.find('-')].lower()
305 UniLanguagePrimaryTag
= UniLanguage
307 if len(UniLanguagePrimaryTag
) == 3:
308 UniLanguagePrimaryTag
= LangConvTable
.get(UniLanguagePrimaryTag
)
310 if PrimaryTag
== UniLanguagePrimaryTag
:
311 if UniLanguage
not in UniLanguageListFiltered
:
312 UniLanguageListFiltered
+= [UniLanguage
]
315 # Here is rule 3 for "get best language"
316 # If tag is not listed in the Unicode file, the default ("en") tag should be used for that language
317 # for better processing, find the one that best suit for it.
319 if DefaultTag
not in UniLanguageListFiltered
:
320 # check whether language code with primary code equivalent with DefaultTag already in the list, if so, use that
321 for UniLanguage
in UniLanguageList
:
322 if UniLanguage
.startswith('en-') or UniLanguage
.startswith('eng-'):
323 if UniLanguage
not in UniLanguageListFiltered
:
324 UniLanguageListFiltered
+= [UniLanguage
]
327 UniLanguageListFiltered
+= [DefaultTag
]
328 return UniLanguageListFiltered
331 ## Create content of .c file
333 # Create content of .c file
335 # @param BaseName: The basename of strings
336 # @param UniObjectClass A UniObjectClass instance
337 # @param IsCompatibleMode Compatible mode
338 # @param UniBinBuffer UniBinBuffer to contain UniBinary data.
339 # @param FilterInfo Platform language filter information
341 # @retval Str: A string of .c file content
343 def CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, UniBinBuffer
, FilterInfo
):
347 TotalLength
= EFI_HII_ARRAY_SIZE_LENGTH
351 EDK2Module
= FilterInfo
[0]
353 LanguageFilterList
= FilterInfo
[1]
355 # EDK module is using ISO639-2 format filter, convert to the RFC4646 format
356 LanguageFilterList
= [LangConvTable
.get(F
.lower()) for F
in FilterInfo
[1]]
359 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
360 UniLanguageList
+= [UniObjectClass
.LanguageDef
[IndexI
][0]]
362 UniLanguageListFiltered
= GetFilteredLanguage(UniLanguageList
, LanguageFilterList
)
366 # Create lines for each language's strings
368 for IndexI
in range(len(UniObjectClass
.LanguageDef
)):
369 Language
= UniObjectClass
.LanguageDef
[IndexI
][0]
370 if Language
not in UniLanguageListFiltered
:
373 StringBuffer
= StringIO()
376 NumberOfUseOtherLangDef
= 0
378 for IndexJ
in range(1, len(UniObjectClass
.OrderedStringList
[UniObjectClass
.LanguageDef
[IndexI
][0]])):
379 Item
= UniObjectClass
.OrderedStringListByToken
[Language
][IndexJ
]
381 Name
= Item
.StringName
382 Value
= Item
.StringValueByteList
383 Referenced
= Item
.Referenced
385 UseOtherLangDef
= Item
.UseOtherLangDef
387 if UseOtherLangDef
!= '' and Referenced
:
388 NumberOfUseOtherLangDef
= NumberOfUseOtherLangDef
+ 1
391 if NumberOfUseOtherLangDef
> 0:
392 StrStringValue
= WriteLine(StrStringValue
, CreateArrayItem([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
393 CreateBinBuffer (StringBuffer
, ([StringSkipType
] + DecToHexList(NumberOfUseOtherLangDef
, 4)))
394 NumberOfUseOtherLangDef
= 0
395 ArrayLength
= ArrayLength
+ 3
396 if Referenced
and Item
.Token
> 0:
398 StrStringValue
= WriteLine(StrStringValue
, "// %s: %s:%s" % (DecToHexStr(Index
, 4), Name
, DecToHexStr(Token
, 4)))
399 StrStringValue
= Write(StrStringValue
, CreateCFileStringValue(Value
))
400 CreateBinBuffer (StringBuffer
, [StringBlockType
] + Value
)
401 ArrayLength
= ArrayLength
+ Item
.Length
+ 1 # 1 is for the length of string type
404 # EFI_HII_PACKAGE_HEADER
406 Offset
= EFI_HII_STRING_PACKAGE_HDR_LENGTH
+ len(Language
) + 1
407 ArrayLength
= Offset
+ ArrayLength
+ 1
410 # Create PACKAGE HEADER
412 Str
= WriteLine(Str
, '// PACKAGE HEADER\n')
413 TotalLength
= TotalLength
+ ArrayLength
415 List
= DecToHexList(ArrayLength
, 6) + \
416 [StringPackageType
] + \
417 DecToHexList(Offset
) + \
418 DecToHexList(Offset
) + \
419 DecToHexList(EFI_HII_LANGUAGE_WINDOW
, EFI_HII_LANGUAGE_WINDOW_LENGTH
* 2) * EFI_HII_LANGUAGE_WINDOW_NUMBER
+ \
420 DecToHexList(EFI_STRING_ID
, 4) + \
421 AscToHexList(Language
) + \
423 Str
= WriteLine(Str
, CreateArrayItem(List
, 16) + '\n')
426 # Create PACKAGE DATA
428 Str
= WriteLine(Str
, '// PACKAGE DATA\n')
429 Str
= Write(Str
, StrStringValue
)
432 # Add an EFI_HII_SIBT_END at last
434 Str
= WriteLine(Str
, ' ' + EFI_HII_SIBT_END
+ ",")
437 # Create binary UNI string
440 CreateBinBuffer (UniBinBuffer
, List
)
441 UniBinBuffer
.write (StringBuffer
.getvalue())
442 UniBinBuffer
.write (pack("B", int(EFI_HII_SIBT_END
, 16)))
446 # Create line for string variable name
447 # "unsigned char $(BaseName)Strings[] = {"
449 AllStr
= WriteLine('', CHAR_ARRAY_DEFIN
+ ' ' + BaseName
+ COMMON_FILE_NAME
+ '[] = {\n')
453 # Create FRAMEWORK_EFI_HII_PACK_HEADER in compatible mode
455 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Length')
456 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
+ 2)) + '\n')
457 AllStr
= WriteLine(AllStr
, '// FRAMEWORK PACKAGE HEADER Type')
458 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(2, 4)) + '\n')
461 # Create whole array length in UEFI mode
463 AllStr
= WriteLine(AllStr
, '// STRGATHER_OUTPUT_HEADER')
464 AllStr
= WriteLine(AllStr
, CreateArrayItem(DecToHexList(TotalLength
)) + '\n')
469 AllStr
= Write(AllStr
, Str
)
473 ## Create end of .c file
475 # Create end of .c file
477 # @retval Str: A string of .h file end
479 def CreateCFileEnd():
480 Str
= Write('', '};')
485 # Create a complete .c file
487 # @param BaseName: The basename of strings
488 # @param UniObjectClass A UniObjectClass instance
489 # @param IsCompatibleMode Compatible Mode
490 # @param FilterInfo Platform language filter information
492 # @retval CFile: A string of complete .c file
494 def CreateCFile(BaseName
, UniObjectClass
, IsCompatibleMode
, FilterInfo
):
496 #CFile = WriteLine(CFile, CreateCFileHeader())
497 CFile
= WriteLine(CFile
, CreateCFileContent(BaseName
, UniObjectClass
, IsCompatibleMode
, None, FilterInfo
))
498 CFile
= WriteLine(CFile
, CreateCFileEnd())
503 # Get a list for all files
505 # @param IncludeList: A list of all path to be searched
506 # @param SkipList: A list of all types of file could be skipped
508 # @retval FileList: A list of all files found
510 def GetFileList(SourceFileList
, IncludeList
, SkipList
):
511 if IncludeList
== None:
512 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, "Include path for unicode file is not defined")
518 for File
in SourceFileList
:
519 for Dir
in IncludeList
:
520 if not os
.path
.exists(Dir
):
522 File
= os
.path
.join(Dir
, File
.Path
)
526 if os
.path
.isfile(File
) != True:
529 # Ignore file listed in skip list
532 for Skip
in SkipList
:
533 if os
.path
.splitext(File
)[1].upper() == Skip
.upper():
534 EdkLogger
.verbose("Skipped %s for string token uses search" % File
)
539 FileList
.append(File
)
547 # Search whether all string defined in UniObjectClass are referenced
548 # All string used should be set to Referenced
550 # @param UniObjectClass: Input UniObjectClass
551 # @param FileList: Search path list
552 # @param IsCompatibleMode Compatible Mode
554 # @retval UniObjectClass: UniObjectClass after searched
556 def SearchString(UniObjectClass
, FileList
, IsCompatibleMode
):
558 return UniObjectClass
560 for File
in FileList
:
561 if os
.path
.isfile(File
):
562 Lines
= open(File
, 'r')
564 for StrName
in STRING_TOKEN
.findall(Line
):
565 EdkLogger
.debug(EdkLogger
.DEBUG_5
, "Found string identifier: " + StrName
)
566 UniObjectClass
.SetStringReferenced(StrName
)
568 UniObjectClass
.ReToken()
570 return UniObjectClass
574 # This function is used for UEFI2.1 spec
577 def GetStringFiles(UniFilList
, SourceFileList
, IncludeList
, IncludePathList
, SkipList
, BaseName
, IsCompatibleMode
= False, ShellMode
= False, UniGenCFlag
= True, UniGenBinBuffer
= None, FilterInfo
= [True, []]):
578 if len(UniFilList
) > 0:
581 # support ISO 639-2 codes in .UNI files of EDK Shell
583 Uni
= UniFileClassObject(sorted (UniFilList
), True, IncludePathList
)
585 Uni
= UniFileClassObject(sorted (UniFilList
), IsCompatibleMode
, IncludePathList
)
587 EdkLogger
.error("UnicodeStringGather", AUTOGEN_ERROR
, 'No unicode files given')
589 FileList
= GetFileList(SourceFileList
, IncludeList
, SkipList
)
591 Uni
= SearchString(Uni
, sorted (FileList
), IsCompatibleMode
)
593 HFile
= CreateHFile(BaseName
, Uni
, IsCompatibleMode
, UniGenCFlag
)
595 if IsCompatibleMode
or UniGenCFlag
:
596 CFile
= CreateCFile(BaseName
, Uni
, IsCompatibleMode
, FilterInfo
)
598 CreateCFileContent(BaseName
, Uni
, IsCompatibleMode
, UniGenBinBuffer
, FilterInfo
)
605 def Write(Target
, Item
):
606 return ''.join([Target
, Item
])
609 # Write an item with a break line
611 def WriteLine(Target
, Item
):
612 return ''.join([Target
, Item
, '\n'])
614 # This acts like the main() function for the script, unless it is 'import'ed into another
616 if __name__
== '__main__':
617 EdkLogger
.info('start')
620 r
'C:\\Edk\\Strings2.uni',
621 r
'C:\\Edk\\Strings.uni'
625 for Root
, Dirs
, Files
in os
.walk('C:\\Edk'):
627 SrcFileList
.append(File
)
633 SkipList
= ['.inf', '.uni']
634 BaseName
= 'DriverSample'
635 (h
, c
) = GetStringFiles(UniFileList
, SrcFileList
, IncludeList
, SkipList
, BaseName
, True)
636 hfile
= open('unistring.h', 'w')
637 cfile
= open('unistring.c', 'w')
641 EdkLogger
.info('end')