2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2019, Intel Corporation. All rights reserved.<BR>
6 # SPDX-License-Identifier: BSD-2-Clause-Patent
9 Collect all defined strings in multiple uni files
11 from __future__
import print_function
18 from Logger
import ToolError
19 from Logger
import Log
as EdkLogger
20 from Logger
import StringTable
as ST
21 from Library
.StringUtils
import GetLineNo
22 from Library
.Misc
import PathClass
23 from Library
.Misc
import GetCharIndexOutStr
24 from Library
import DataType
as DT
25 from Library
.ParserValidate
import CheckUTF16FileHeader
30 UNICODE_WIDE_CHAR
= u
'\\wide'
31 UNICODE_NARROW_CHAR
= u
'\\narrow'
32 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
33 UNICODE_UNICODE_CR
= '\r'
34 UNICODE_UNICODE_LF
= '\n'
36 NARROW_CHAR
= u
'\uFFF0'
38 NON_BREAKING_CHAR
= u
'\uFFF2'
45 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
46 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
47 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
48 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
49 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
50 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
51 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
52 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
53 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
54 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
55 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
56 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
57 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
58 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
59 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
60 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
61 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
62 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
63 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
64 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
65 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
66 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
67 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
68 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
69 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
70 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
71 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
72 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
73 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
74 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
75 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
76 'zho':'zh', 'zul':'zu'}
78 ## Convert a python unicode string to a normal string
80 # Convert a python unicode string to a normal string
81 # UniToStr(u'I am a string') is 'I am a string'
83 # @param Uni: The python unicode string
85 # @retval: The formatted normal string
88 return repr(Uni
)[2:-1]
90 ## Convert a unicode string to a Hex list
92 # Convert a unicode string to a Hex list
93 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
95 # @param Uni: The python unicode string
97 # @retval List: The formatted hex list
99 def UniToHexList(Uni
):
102 Temp
= '%04X' % ord(Item
)
103 List
.append('0x' + Temp
[2:4])
104 List
.append('0x' + Temp
[0:2])
107 ## Convert special unicode characters
109 # Convert special characters to (c), (r) and (tm).
111 # @param Uni: The python unicode string
113 # @retval NewUni: The converted unicode string
115 def ConvertSpecialUnicodes(Uni
):
116 OldUni
= NewUni
= Uni
117 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
118 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
119 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
124 ## GetLanguageCode1766
126 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
127 # RFC 1766 language codes supported in compatibility mode
128 # RFC 4646 language codes supported in native mode
130 # @param LangName: Language codes read from .UNI file
132 # @retval LangName: Valid language code in RFC 1766 format or None
134 def GetLanguageCode1766(LangName
, File
=None):
137 length
= len(LangName
)
139 if LangName
.isalpha():
140 for Key
in gLANG_CONV_TABLE
.keys():
141 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
144 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
147 EdkLogger
.Error("Unicode File Parser",
148 ToolError
.FORMAT_INVALID
,
149 "Invalid RFC 1766 language code : %s" % LangName
,
152 if LangName
[0:2].isalpha() and LangName
[2] == '-':
153 for Key
in gLANG_CONV_TABLE
.keys():
154 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
157 if LangName
[0:2].isalpha() and LangName
[2] == '-':
158 for Key
in gLANG_CONV_TABLE
.keys():
159 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
161 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
162 for Key
in gLANG_CONV_TABLE
.keys():
163 if Key
== LangName
[0:3].lower():
166 EdkLogger
.Error("Unicode File Parser",
167 ToolError
.FORMAT_INVALID
,
168 "Invalid RFC 4646 language code : %s" % LangName
,
173 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
174 # RFC 1766 language codes supported in compatibility mode
175 # RFC 4646 language codes supported in native mode
177 # @param LangName: Language codes read from .UNI file
179 # @retval LangName: Valid lanugage code in RFC 4646 format or None
181 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
182 length
= len(LangName
)
184 if length
== 3 and LangName
.isalpha():
185 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
186 if TempLangName
is not None:
190 EdkLogger
.Error("Unicode File Parser",
191 ToolError
.FORMAT_INVALID
,
192 "Invalid RFC 1766 language code : %s" % LangName
,
194 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
197 if LangName
.isalpha():
200 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
203 if LangName
[0:2].isalpha() and LangName
[2] == '-':
206 if LangName
[0:2].isalpha() and LangName
[2] == '-':
208 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
211 EdkLogger
.Error("Unicode File Parser",
212 ToolError
.FORMAT_INVALID
,
213 "Invalid RFC 4646 language code : %s" % LangName
,
218 # Formatted the entry in Uni file.
220 # @param StrTokenName StrTokenName.
221 # @param TokenValueList A list need to be processed.
222 # @param ContainerFile ContainerFile.
224 # @return formatted entry
225 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
228 if len(StrTokenName
) > PreFormatLength
:
229 PreFormatLength
= len(StrTokenName
) + 1
230 for (Lang
, Value
) in TokenValueList
:
231 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
234 Lang
= DT
.TAB_LANGUAGE_EN_US
236 Lang
= DT
.TAB_LANGUAGE_EN_US
237 elif len(Lang
.split('-')[0]) == 3:
238 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
240 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
241 ValueList
= Value
.split('\n')
243 for SubValue
in ValueList
:
246 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
247 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
249 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
251 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
255 ## StringDefClassObject
257 # A structure for language definition
259 class StringDefClassObject(object):
260 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
262 self
.StringNameByteList
= []
263 self
.StringValue
= ''
264 self
.StringValueByteList
= ''
266 self
.Referenced
= Referenced
267 self
.UseOtherLangDef
= UseOtherLangDef
271 self
.StringName
= Name
272 self
.StringNameByteList
= UniToHexList(Name
)
273 if Value
is not None:
274 self
.StringValue
= Value
275 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
276 self
.Length
= len(self
.StringValueByteList
)
277 if Token
is not None:
281 return repr(self
.StringName
) + ' ' + \
282 repr(self
.Token
) + ' ' + \
283 repr(self
.Referenced
) + ' ' + \
284 repr(self
.StringValue
) + ' ' + \
285 repr(self
.UseOtherLangDef
)
287 def UpdateValue(self
, Value
= None):
288 if Value
is not None:
290 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
292 self
.StringValue
= Value
293 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
294 self
.Length
= len(self
.StringValueByteList
)
296 ## UniFileClassObject
298 # A structure for .uni file definition
300 class UniFileClassObject(object):
301 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
302 self
.FileList
= FileList
304 self
.IncFileList
= FileList
305 self
.UniFileHeader
= ''
307 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
308 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
309 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
310 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
311 self
.IsCompatibleMode
= IsCompatibleMode
312 if not IncludePathList
:
313 self
.IncludePathList
= []
315 self
.IncludePathList
= IncludePathList
316 if len(self
.FileList
) > 0:
317 self
.LoadUniFiles(FileList
)
320 # Get Language definition
322 def GetLangDef(self
, File
, Line
):
323 Lang
= shlex
.split(Line
.split(u
"//")[0])
326 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
327 except UnicodeError as Xstr
:
328 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
329 except UnicodeError as Xstr
:
330 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
332 EdkLogger
.Error("Unicode File Parser",
333 ToolError
.FILE_OPEN_FAILURE
,
334 "File read failure: %s" % str(Xstr
),
336 LineNo
= GetLineNo(FileIn
, Line
, False)
337 EdkLogger
.Error("Unicode File Parser",
338 ToolError
.PARSER_ERROR
,
339 "Wrong language definition",
340 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
341 File
= File
, Line
= LineNo
)
343 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
344 LangPrintName
= Lang
[2]
347 for Item
in self
.LanguageDef
:
348 if Item
[0] == LangName
:
353 self
.LanguageDef
.append([LangName
, LangPrintName
])
356 # Add language string
358 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
359 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
363 # The found STRING tokens will be added into new language string list
364 # so that the unique STRING identifier is reserved for all languages in the package list.
366 FirstLangName
= self
.LanguageDef
[0][0]
367 if LangName
!= FirstLangName
:
368 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
369 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
370 if Item
.UseOtherLangDef
!= '':
371 OtherLang
= Item
.UseOtherLangDef
373 OtherLang
= FirstLangName
374 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
379 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
383 # Get String name and value
385 def GetStringObject(self
, Item
):
389 Name
= Item
.split()[1]
390 # Check the string name is the upper character
392 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
393 if MatchString
is None or MatchString
.end(0) != len(Name
):
394 EdkLogger
.Error("Unicode File Parser",
395 ToolError
.FORMAT_INVALID
,
396 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
397 LanguageList
= Item
.split(u
'#language ')
398 for IndexI
in range(len(LanguageList
)):
402 Language
= LanguageList
[IndexI
].split()[0]
403 #.replace(u'\r\n', u'')
405 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
406 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
407 self
.AddStringToList(Name
, Language
, Value
)
410 # Get include file list and load them
412 def GetIncludeFile(self
, Item
, Dir
= None):
415 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
416 self
.LoadUniFile(FileName
)
419 # Pre-process before parse .uni file
421 def PreProcess(self
, File
, IsIncludeFile
=False):
422 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
423 EdkLogger
.Error("Unicode File Parser",
424 ToolError
.FILE_NOT_FOUND
,
428 # Check file header of the Uni file
430 # if not CheckUTF16FileHeader(File.Path):
431 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
432 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
435 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
436 except UnicodeError as Xstr
:
437 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
439 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
441 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
445 # get the file header
450 if not self
.UniFileHeader
:
451 FirstGenHeader
= True
453 FirstGenHeader
= False
458 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
459 and not HeaderEnd
and not HeaderStart
:
461 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
463 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
464 self
.UniFileHeader
+= Line
+ '\r\n'
468 # Use unique identifier
472 MultiLineFeedExits
= False
475 # 1: single String entry exist
476 # 2: line feed exist under the some single String entry
478 StringEntryExistsFlag
= 0
480 Line
= FileIn
[LineCount
]
484 # Ignore comment line and empty line
486 if Line
== u
'' or Line
.startswith(u
'//'):
488 # Change the single line String entry flag status
490 if StringEntryExistsFlag
== 1:
491 StringEntryExistsFlag
= 2
493 # If the '#string' line and the '#language' line are not in the same line,
494 # there should be only one line feed character between them
496 if MultiLineFeedExits
:
497 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
500 MultiLineFeedExits
= False
502 # Process comment embedded in string define lines
504 FindFlag
= Line
.find(u
'//')
505 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
506 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
507 if FileIn
[LineCount
].strip().startswith('#language'):
508 Line
= Line
+ FileIn
[LineCount
]
509 FileIn
[LineCount
-1] = Line
510 FileIn
[LineCount
] = '\r\n'
512 for Index
in range (LineCount
+ 1, len (FileIn
) - 1):
513 if (Index
== len(FileIn
) -1):
514 FileIn
[Index
] = '\r\n'
516 FileIn
[Index
] = FileIn
[Index
+ 1]
518 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
520 if (len(Line
) - 1) > CommIndex
:
521 if Line
[CommIndex
+1] == u
'/':
522 Line
= Line
[:CommIndex
].strip()
524 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
526 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
528 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
529 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
530 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
532 Line
= Line
.replace(u
'\\\\', u
'\u0006')
533 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
534 Line
= Line
.replace(u
'\\n', CR
+ LF
)
535 Line
= Line
.replace(u
'\\r', CR
)
536 Line
= Line
.replace(u
'\\t', u
'\t')
537 Line
= Line
.replace(u
'''\"''', u
'''"''')
538 Line
= Line
.replace(u
'\t', u
' ')
539 Line
= Line
.replace(u
'\u0006', u
'\\')
542 # Check if single line has correct '"'
544 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
545 if not Line
.endswith('"'):
546 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
547 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
548 % (LineCount
, File
.Path
))
551 # Between Name entry and Language entry can not contain line feed
553 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
554 MultiLineFeedExits
= True
556 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
557 MultiLineFeedExits
= True
560 # Between Language entry and String entry can not contain line feed
562 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
563 MultiLineFeedExits
= True
566 # Check the situation that there only has one '"' for the language entry
568 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.count(u
'"') == 1:
569 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
570 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
571 % (LineCount
, File
.Path
))
574 # Check the situation that there has more than 2 '"' for the language entry
576 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.replace(u
'\\"', '').count(u
'"') > 2:
577 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
578 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
579 % (LineCount
, File
.Path
))
582 # Between two String entry, can not contain line feed
584 if Line
.startswith(u
'"'):
585 if StringEntryExistsFlag
== 2:
586 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
587 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
589 StringEntryExistsFlag
= 1
590 if not Line
.endswith('"'):
591 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
592 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
593 % (LineCount
, File
.Path
))
596 # Check the situation that there has more than 2 '"' for the language entry
598 if Line
.strip() and Line
.replace(u
'\\"', '').count(u
'"') > 2:
599 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
600 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
601 % (LineCount
, File
.Path
))
603 elif Line
.startswith(u
'#language'):
604 if StringEntryExistsFlag
== 2:
605 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
606 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
607 StringEntryExistsFlag
= 0
609 StringEntryExistsFlag
= 0
614 # Convert string def format as below
616 # #string MY_STRING_1
618 # "My first English string line 1"
619 # "My first English string line 2"
620 # #string MY_STRING_1
622 # "Mi segunda secuencia 1"
623 # "Mi segunda secuencia 2"
626 if not IsIncludeFile
and not Lines
:
627 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
628 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
633 ExistStrNameList
= []
635 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
636 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
637 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
640 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
641 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
642 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
643 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
644 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
645 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
646 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
649 if Line
.count(u
'#language') > 1:
650 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
651 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
654 if Line
.startswith(u
'//'):
656 elif Line
.startswith(u
'#langdef'):
657 if len(Line
.split()) == 2:
658 NewLines
.append(Line
)
660 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
661 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
662 NewLines
.append(Line
[Line
.find(u
'"'):])
664 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
665 elif Line
.startswith(u
'#string'):
666 if len(Line
.split()) == 2:
669 if StrName
.split()[1] not in ExistStrNameList
:
670 ExistStrNameList
.append(StrName
.split()[1].strip())
671 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
672 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
673 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
674 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
675 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
676 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
679 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
680 if Line
[Line
.find(u
'#language')-1] != ' ' or \
681 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
682 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
684 if Line
.find(u
'"') > 0:
685 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
687 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
689 if StrName
.split()[1] not in ExistStrNameList
:
690 ExistStrNameList
.append(StrName
.split()[1].strip())
691 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
692 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
693 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
694 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
695 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
696 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
699 if StrName
not in NewLines
:
700 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
702 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
703 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
704 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
705 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
706 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
707 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
709 if Line
[Line
.find(u
'"')-1] != u
' ':
710 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
712 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
714 if StrName
.split()[1] not in ExistStrNameList
:
715 ExistStrNameList
.append(StrName
.split()[1].strip())
716 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
717 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
718 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
719 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
720 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
721 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
724 if StrName
not in NewLines
:
725 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
727 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
728 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
729 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
731 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
732 elif Line
.startswith(u
'#language'):
733 if len(Line
.split()) == 2:
735 if StrName
not in NewLines
:
736 NewLines
.append(StrName
)
738 NewLines
.append(StrName
)
739 NewLines
.append(Line
)
740 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
742 if StrName
not in NewLines
:
743 NewLines
.append(StrName
)
745 NewLines
.append(StrName
)
746 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
747 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
749 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
750 elif Line
.startswith(u
'"'):
751 if u
'#string' in Line
or u
'#language' in Line
:
752 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
753 NewLines
.append(Line
)
756 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
758 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
759 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
760 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
763 if StrName
and not NewLines
:
764 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
765 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
769 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
770 # should be Abstract, Description, BinaryAbstract, BinaryDescription
771 AbstractPosition
= -1
772 DescriptionPosition
= -1
773 BinaryAbstractPosition
= -1
774 BinaryDescriptionPosition
= -1
775 for StrName
in ExistStrNameList
:
776 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
777 if 'BINARY' in StrName
:
778 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
780 AbstractPosition
= ExistStrNameList
.index(StrName
)
781 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
782 if 'BINARY' in StrName
:
783 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
785 DescriptionPosition
= ExistStrNameList
.index(StrName
)
787 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
788 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
791 BinaryMin
= BinaryOrderList
[0]
792 BinaryMax
= BinaryOrderList
[1]
793 if BinaryDescriptionPosition
> -1:
794 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
796 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
797 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
799 elif BinaryAbstractPosition
> -1:
800 if not(BinaryAbstractPosition
> Max
):
801 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
802 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
805 if DescriptionPosition
> -1:
806 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
807 DescriptionPosition
> AbstractPosition
):
808 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
809 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
812 if not self
.UniFileHeader
:
813 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
814 Message
= ST
.ERR_NO_SOURCE_HEADER
,
822 def LoadUniFile(self
, File
= None):
824 EdkLogger
.Error("Unicode File Parser",
825 ToolError
.PARSER_ERROR
,
826 Message
='No unicode file is given',
832 # Process special char in file
834 Lines
= self
.PreProcess(File
)
837 # Get Unicode Information
839 for IndexI
in range(len(Lines
)):
841 if (IndexI
+ 1) < len(Lines
):
842 SecondLine
= Lines
[IndexI
+ 1]
843 if (IndexI
+ 2) < len(Lines
):
844 ThirdLine
= Lines
[IndexI
+ 2]
847 # Get Language def information
849 if Line
.find(u
'#langdef ') >= 0:
850 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
858 # Get string def information format as below
860 # #string MY_STRING_1
862 # "My first English string line 1"
863 # "My first English string line 2"
864 # #string MY_STRING_1
866 # "Mi segunda secuencia 1"
867 # "Mi segunda secuencia 2"
869 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
870 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
871 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
872 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
873 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
874 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
877 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
878 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
879 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
880 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
881 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
882 if Lines
[IndexJ
][-2] == ' ':
885 if Lines
[IndexJ
].strip()[1:-1].strip():
886 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
888 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
891 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
895 if Value
.endswith('\r\n'):
896 Value
= Value
[: Value
.rfind('\r\n')]
897 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
898 self
.AddStringToList(Name
, Language
, Value
)
902 # Load multiple .uni files
904 def LoadUniFiles(self
, FileList
):
905 if len(FileList
) > 0:
906 for File
in FileList
:
907 FilePath
= File
.Path
.strip()
908 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
909 self
.LoadUniFile(File
)
912 # Add a string to list
914 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
915 for LangNameItem
in self
.LanguageDef
:
916 if Language
== LangNameItem
[0]:
919 if Language
not in self
.OrderedStringList
:
920 self
.OrderedStringList
[Language
] = []
921 self
.OrderedStringDict
[Language
] = {}
924 if Name
in self
.OrderedStringDict
[Language
]:
926 if Value
is not None:
927 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
928 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
929 Item
.UpdateValue(Value
)
930 Item
.UseOtherLangDef
= ''
933 Token
= len(self
.OrderedStringList
[Language
])
935 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
940 self
.OrderedStringDict
[Language
][Name
] = Token
941 for LangName
in self
.LanguageDef
:
943 # New STRING token will be added into all language string lists.
944 # so that the unique STRING identifier is reserved for all languages in the package list.
946 if LangName
[0] != Language
:
947 if UseOtherLangDef
!= '':
948 OtherLangDef
= UseOtherLangDef
950 OtherLangDef
= Language
951 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
956 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
958 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
963 self
.OrderedStringDict
[Language
][Name
] = Index
966 # Set the string as referenced
968 def SetStringReferenced(self
, Name
):
970 # String stoken are added in the same order in all language string lists.
971 # So, only update the status of string stoken in first language string list.
973 Lang
= self
.LanguageDef
[0][0]
974 if Name
in self
.OrderedStringDict
[Lang
]:
975 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
976 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
977 Item
.Referenced
= True
980 # Search the string in language definition by Name
982 def FindStringValue(self
, Name
, Lang
):
983 if Name
in self
.OrderedStringDict
[Lang
]:
984 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
985 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
990 # Search the string in language definition by Token
992 def FindByToken(self
, Token
, Lang
):
993 for Item
in self
.OrderedStringList
[Lang
]:
994 if Item
.Token
== Token
:
1000 # Re-order strings and re-generate tokens
1003 if len(self
.LanguageDef
) == 0:
1006 # Retoken all language strings according to the status of string stoken in the first language string.
1008 FirstLangName
= self
.LanguageDef
[0][0]
1010 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1011 for LangNameItem
in self
.LanguageDef
:
1012 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
1015 # Use small token for all referred string stoken.
1018 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1019 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1020 if FirstLangItem
.Referenced
== True:
1021 for LangNameItem
in self
.LanguageDef
:
1022 LangName
= LangNameItem
[0]
1023 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1024 OtherLangItem
.Referenced
= True
1025 OtherLangItem
.Token
= RefToken
1026 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1027 RefToken
= RefToken
+ 1
1030 # Use big token for all unreferred string stoken.
1033 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1034 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1035 if FirstLangItem
.Referenced
== False:
1036 for LangNameItem
in self
.LanguageDef
:
1037 LangName
= LangNameItem
[0]
1038 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1039 OtherLangItem
.Token
= RefToken
+ UnRefToken
1040 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1041 UnRefToken
= UnRefToken
+ 1
1044 # Show the instance itself
1047 print(self
.LanguageDef
)
1048 #print self.OrderedStringList
1049 for Item
in self
.OrderedStringList
:
1051 for Member
in self
.OrderedStringList
[Item
]:
1055 # Read content from '!include' UNI file
1057 def ReadIncludeUNIfile(self
, FilaPath
):
1061 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1062 EdkLogger
.Error("Unicode File Parser",
1063 ToolError
.FILE_NOT_FOUND
,
1066 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1067 except UnicodeError as Xstr
:
1068 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1069 except UnicodeError:
1070 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1072 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)