2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
23 from Logger
import ToolError
24 from Logger
import Log
as EdkLogger
25 from Logger
import StringTable
as ST
26 from Library
.StringUtils
import GetLineNo
27 from Library
.Misc
import PathClass
28 from Library
.Misc
import GetCharIndexOutStr
29 from Library
import DataType
as DT
30 from Library
.ParserValidate
import CheckUTF16FileHeader
35 UNICODE_WIDE_CHAR
= u
'\\wide'
36 UNICODE_NARROW_CHAR
= u
'\\narrow'
37 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
38 UNICODE_UNICODE_CR
= '\r'
39 UNICODE_UNICODE_LF
= '\n'
41 NARROW_CHAR
= u
'\uFFF0'
43 NON_BREAKING_CHAR
= u
'\uFFF2'
50 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
51 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
52 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
53 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
54 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
55 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
56 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
57 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
58 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
59 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
60 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
61 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
62 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
63 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
64 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
65 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
66 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
67 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
68 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
69 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
70 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
71 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
72 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
73 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
74 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
75 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
76 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
77 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
78 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
79 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
80 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
81 'zho':'zh', 'zul':'zu'}
83 ## Convert a python unicode string to a normal string
85 # Convert a python unicode string to a normal string
86 # UniToStr(u'I am a string') is 'I am a string'
88 # @param Uni: The python unicode string
90 # @retval: The formatted normal string
93 return repr(Uni
)[2:-1]
95 ## Convert a unicode string to a Hex list
97 # Convert a unicode string to a Hex list
98 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
100 # @param Uni: The python unicode string
102 # @retval List: The formatted hex list
104 def UniToHexList(Uni
):
107 Temp
= '%04X' % ord(Item
)
108 List
.append('0x' + Temp
[2:4])
109 List
.append('0x' + Temp
[0:2])
112 ## Convert special unicode characters
114 # Convert special characters to (c), (r) and (tm).
116 # @param Uni: The python unicode string
118 # @retval NewUni: The converted unicode string
120 def ConvertSpecialUnicodes(Uni
):
122 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
123 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
124 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
127 ## GetLanguageCode1766
129 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
130 # RFC 1766 language codes supported in compatiblity mode
131 # RFC 4646 language codes supported in native mode
133 # @param LangName: Language codes read from .UNI file
135 # @retval LangName: Valid lanugage code in RFC 1766 format or None
137 def GetLanguageCode1766(LangName
, File
=None):
140 length
= len(LangName
)
142 if LangName
.isalpha():
143 for Key
in gLANG_CONV_TABLE
.keys():
144 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
147 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
150 EdkLogger
.Error("Unicode File Parser",
151 ToolError
.FORMAT_INVALID
,
152 "Invalid RFC 1766 language code : %s" % LangName
,
155 if LangName
[0:2].isalpha() and LangName
[2] == '-':
156 for Key
in gLANG_CONV_TABLE
.keys():
157 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
160 if LangName
[0:2].isalpha() and LangName
[2] == '-':
161 for Key
in gLANG_CONV_TABLE
.keys():
162 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
164 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
165 for Key
in gLANG_CONV_TABLE
.keys():
166 if Key
== LangName
[0:3].lower():
169 EdkLogger
.Error("Unicode File Parser",
170 ToolError
.FORMAT_INVALID
,
171 "Invalid RFC 4646 language code : %s" % LangName
,
176 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
177 # RFC 1766 language codes supported in compatiblity mode
178 # RFC 4646 language codes supported in native mode
180 # @param LangName: Language codes read from .UNI file
182 # @retval LangName: Valid lanugage code in RFC 4646 format or None
184 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
185 length
= len(LangName
)
187 if length
== 3 and LangName
.isalpha():
188 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
189 if TempLangName
is not None:
193 EdkLogger
.Error("Unicode File Parser",
194 ToolError
.FORMAT_INVALID
,
195 "Invalid RFC 1766 language code : %s" % LangName
,
197 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
200 if LangName
.isalpha():
203 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
206 if LangName
[0:2].isalpha() and LangName
[2] == '-':
209 if LangName
[0:2].isalpha() and LangName
[2] == '-':
211 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
214 EdkLogger
.Error("Unicode File Parser",
215 ToolError
.FORMAT_INVALID
,
216 "Invalid RFC 4646 language code : %s" % LangName
,
221 # Formated the entry in Uni file.
223 # @param StrTokenName StrTokenName.
224 # @param TokenValueList A list need to be processed.
225 # @param ContainerFile ContainerFile.
227 # @return formated entry
228 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
231 if len(StrTokenName
) > PreFormatLength
:
232 PreFormatLength
= len(StrTokenName
) + 1
233 for (Lang
, Value
) in TokenValueList
:
234 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
237 Lang
= DT
.TAB_LANGUAGE_EN_US
239 Lang
= DT
.TAB_LANGUAGE_EN_US
240 elif len(Lang
.split('-')[0]) == 3:
241 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
243 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
244 ValueList
= Value
.split('\n')
246 for SubValue
in ValueList
:
249 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\n'
250 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
252 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
254 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
258 ## StringDefClassObject
260 # A structure for language definition
262 class StringDefClassObject(object):
263 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
265 self
.StringNameByteList
= []
266 self
.StringValue
= ''
267 self
.StringValueByteList
= ''
269 self
.Referenced
= Referenced
270 self
.UseOtherLangDef
= UseOtherLangDef
274 self
.StringName
= Name
275 self
.StringNameByteList
= UniToHexList(Name
)
276 if Value
is not None:
277 self
.StringValue
= Value
278 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
279 self
.Length
= len(self
.StringValueByteList
)
280 if Token
is not None:
284 return repr(self
.StringName
) + ' ' + \
285 repr(self
.Token
) + ' ' + \
286 repr(self
.Referenced
) + ' ' + \
287 repr(self
.StringValue
) + ' ' + \
288 repr(self
.UseOtherLangDef
)
290 def UpdateValue(self
, Value
= None):
291 if Value
is not None:
293 self
.StringValue
= self
.StringValue
+ '\n' + Value
295 self
.StringValue
= Value
296 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
297 self
.Length
= len(self
.StringValueByteList
)
299 ## UniFileClassObject
301 # A structure for .uni file definition
303 class UniFileClassObject(object):
304 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
305 self
.FileList
= FileList
307 self
.IncFileList
= FileList
308 self
.UniFileHeader
= ''
310 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
311 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
312 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
313 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
314 self
.IsCompatibleMode
= IsCompatibleMode
315 if not IncludePathList
:
316 self
.IncludePathList
= []
318 self
.IncludePathList
= IncludePathList
319 if len(self
.FileList
) > 0:
320 self
.LoadUniFiles(FileList
)
323 # Get Language definition
325 def GetLangDef(self
, File
, Line
):
326 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
329 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
330 except UnicodeError as Xstr
:
331 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
332 except UnicodeError as Xstr
:
333 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
335 EdkLogger
.Error("Unicode File Parser",
336 ToolError
.FILE_OPEN_FAILURE
,
337 "File read failure: %s" % str(Xstr
),
339 LineNo
= GetLineNo(FileIn
, Line
, False)
340 EdkLogger
.Error("Unicode File Parser",
341 ToolError
.PARSER_ERROR
,
342 "Wrong language definition",
343 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
344 File
= File
, Line
= LineNo
)
346 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
347 LangPrintName
= Lang
[2]
350 for Item
in self
.LanguageDef
:
351 if Item
[0] == LangName
:
356 self
.LanguageDef
.append([LangName
, LangPrintName
])
359 # Add language string
361 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
362 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
366 # The found STRING tokens will be added into new language string list
367 # so that the unique STRING identifier is reserved for all languages in the package list.
369 FirstLangName
= self
.LanguageDef
[0][0]
370 if LangName
!= FirstLangName
:
371 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
372 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
373 if Item
.UseOtherLangDef
!= '':
374 OtherLang
= Item
.UseOtherLangDef
376 OtherLang
= FirstLangName
377 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
382 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
386 # Get String name and value
388 def GetStringObject(self
, Item
):
392 Name
= Item
.split()[1]
393 # Check the string name is the upper character
395 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
396 if MatchString
is None or MatchString
.end(0) != len(Name
):
397 EdkLogger
.Error("Unicode File Parser",
398 ToolError
.FORMAT_INVALID
,
399 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
400 LanguageList
= Item
.split(u
'#language ')
401 for IndexI
in range(len(LanguageList
)):
405 Language
= LanguageList
[IndexI
].split()[0]
406 #.replace(u'\r\n', u'')
408 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
409 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
410 self
.AddStringToList(Name
, Language
, Value
)
413 # Get include file list and load them
415 def GetIncludeFile(self
, Item
, Dir
= None):
418 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
419 self
.LoadUniFile(FileName
)
422 # Pre-process before parse .uni file
424 def PreProcess(self
, File
, IsIncludeFile
=False):
425 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
426 EdkLogger
.Error("Unicode File Parser",
427 ToolError
.FILE_NOT_FOUND
,
431 # Check file header of the Uni file
433 # if not CheckUTF16FileHeader(File.Path):
434 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
435 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
438 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
439 except UnicodeError as Xstr
:
440 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
442 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
444 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
448 # get the file header
453 if not self
.UniFileHeader
:
454 FirstGenHeader
= True
456 FirstGenHeader
= False
461 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
462 and not HeaderEnd
and not HeaderStart
:
464 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
466 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
467 self
.UniFileHeader
+= Line
+ '\n'
471 # Use unique identifier
475 MultiLineFeedExits
= False
478 # 1: signle String entry exist
479 # 2: line feed exist under the some signle String entry
481 StringEntryExistsFlag
= 0
483 Line
= FileIn
[LineCount
]
487 # Ignore comment line and empty line
489 if Line
== u
'' or Line
.startswith(u
'//'):
491 # Change the single line String entry flag status
493 if StringEntryExistsFlag
== 1:
494 StringEntryExistsFlag
= 2
496 # If the '#string' line and the '#language' line are not in the same line,
497 # there should be only one line feed character betwwen them
499 if MultiLineFeedExits
:
500 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
503 MultiLineFeedExits
= False
505 # Process comment embeded in string define lines
507 FindFlag
= Line
.find(u
'//')
508 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
509 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
510 if FileIn
[LineCount
].strip().startswith('#language'):
511 Line
= Line
+ FileIn
[LineCount
]
512 FileIn
[LineCount
-1] = Line
513 FileIn
[LineCount
] = '\n'
515 for Index
in range (LineCount
+ 1, len (FileIn
) - 1):
516 if (Index
== len(FileIn
) -1):
519 FileIn
[Index
] = FileIn
[Index
+ 1]
521 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
523 if (len(Line
) - 1) > CommIndex
:
524 if Line
[CommIndex
+1] == u
'/':
525 Line
= Line
[:CommIndex
].strip()
527 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
529 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
531 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
532 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
533 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
535 Line
= Line
.replace(u
'\\\\', u
'\u0006')
536 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
537 Line
= Line
.replace(u
'\\n', CR
+ LF
)
538 Line
= Line
.replace(u
'\\r', CR
)
539 Line
= Line
.replace(u
'\\t', u
'\t')
540 Line
= Line
.replace(u
'''\"''', u
'''"''')
541 Line
= Line
.replace(u
'\t', u
' ')
542 Line
= Line
.replace(u
'\u0006', u
'\\')
545 # Check if single line has correct '"'
547 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
548 if not Line
.endswith('"'):
549 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
550 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
551 % (LineCount
, File
.Path
))
554 # Between Name entry and Language entry can not contain line feed
556 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
557 MultiLineFeedExits
= True
559 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
560 MultiLineFeedExits
= True
563 # Between Language entry and String entry can not contain line feed
565 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
566 MultiLineFeedExits
= True
569 # Between two String entry, can not contain line feed
571 if Line
.startswith(u
'"'):
572 if StringEntryExistsFlag
== 2:
573 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
574 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
576 StringEntryExistsFlag
= 1
577 if not Line
.endswith('"'):
578 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
579 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
580 % (LineCount
, File
.Path
))
581 elif Line
.startswith(u
'#language'):
582 if StringEntryExistsFlag
== 2:
583 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
584 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
585 StringEntryExistsFlag
= 0
587 StringEntryExistsFlag
= 0
592 # Convert string def format as below
594 # #string MY_STRING_1
596 # "My first English string line 1"
597 # "My first English string line 2"
598 # #string MY_STRING_1
600 # "Mi segunda secuencia 1"
601 # "Mi segunda secuencia 2"
604 if not IsIncludeFile
and not Lines
:
605 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
606 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
611 ExistStrNameList
= []
613 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
614 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
615 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
618 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
619 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
620 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
621 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
622 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
623 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
624 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
627 if Line
.count(u
'#language') > 1:
628 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
629 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
632 if Line
.startswith(u
'//'):
634 elif Line
.startswith(u
'#langdef'):
635 if len(Line
.split()) == 2:
636 NewLines
.append(Line
)
638 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
639 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
640 NewLines
.append(Line
[Line
.find(u
'"'):])
642 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
643 elif Line
.startswith(u
'#string'):
644 if len(Line
.split()) == 2:
647 if StrName
.split()[1] not in ExistStrNameList
:
648 ExistStrNameList
.append(StrName
.split()[1].strip())
649 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
650 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
651 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
652 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
653 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
654 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
657 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
658 if Line
[Line
.find(u
'#language')-1] != ' ' or \
659 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
660 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
662 if Line
.find(u
'"') > 0:
663 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
665 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
667 if StrName
.split()[1] not in ExistStrNameList
:
668 ExistStrNameList
.append(StrName
.split()[1].strip())
669 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
670 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
671 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
672 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
673 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
674 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
677 if StrName
not in NewLines
:
678 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
680 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
681 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
682 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
683 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
684 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
685 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
687 if Line
[Line
.find(u
'"')-1] != u
' ':
688 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
690 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
692 if StrName
.split()[1] not in ExistStrNameList
:
693 ExistStrNameList
.append(StrName
.split()[1].strip())
694 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
695 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
696 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
697 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
698 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
699 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
702 if StrName
not in NewLines
:
703 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
705 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
706 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
707 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
709 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
710 elif Line
.startswith(u
'#language'):
711 if len(Line
.split()) == 2:
713 if StrName
not in NewLines
:
714 NewLines
.append(StrName
)
716 NewLines
.append(StrName
)
717 NewLines
.append(Line
)
718 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
720 if StrName
not in NewLines
:
721 NewLines
.append(StrName
)
723 NewLines
.append(StrName
)
724 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
725 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
727 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
728 elif Line
.startswith(u
'"'):
729 if u
'#string' in Line
or u
'#language' in Line
:
730 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
731 NewLines
.append(Line
)
734 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
736 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
737 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
738 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
741 if StrName
and not NewLines
:
742 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
743 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
747 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
748 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
749 AbstractPosition
= -1
750 DescriptionPosition
= -1
751 BinaryAbstractPosition
= -1
752 BinaryDescriptionPosition
= -1
753 for StrName
in ExistStrNameList
:
754 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
755 if 'BINARY' in StrName
:
756 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
758 AbstractPosition
= ExistStrNameList
.index(StrName
)
759 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
760 if 'BINARY' in StrName
:
761 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
763 DescriptionPosition
= ExistStrNameList
.index(StrName
)
765 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
766 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
769 BinaryMin
= BinaryOrderList
[0]
770 BinaryMax
= BinaryOrderList
[1]
771 if BinaryDescriptionPosition
> -1:
772 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
774 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
775 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
777 elif BinaryAbstractPosition
> -1:
778 if not(BinaryAbstractPosition
> Max
):
779 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
780 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
783 if DescriptionPosition
> -1:
784 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
785 DescriptionPosition
> AbstractPosition
):
786 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
787 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
790 if not self
.UniFileHeader
:
791 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
792 Message
= ST
.ERR_NO_SOURCE_HEADER
,
800 def LoadUniFile(self
, File
= None):
802 EdkLogger
.Error("Unicode File Parser",
803 ToolError
.PARSER_ERROR
,
804 Message
='No unicode file is given',
810 # Process special char in file
812 Lines
= self
.PreProcess(File
)
815 # Get Unicode Information
817 for IndexI
in range(len(Lines
)):
819 if (IndexI
+ 1) < len(Lines
):
820 SecondLine
= Lines
[IndexI
+ 1]
821 if (IndexI
+ 2) < len(Lines
):
822 ThirdLine
= Lines
[IndexI
+ 2]
825 # Get Language def information
827 if Line
.find(u
'#langdef ') >= 0:
828 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
836 # Get string def information format as below
838 # #string MY_STRING_1
840 # "My first English string line 1"
841 # "My first English string line 2"
842 # #string MY_STRING_1
844 # "Mi segunda secuencia 1"
845 # "Mi segunda secuencia 2"
847 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
848 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
849 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
850 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
851 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
852 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
855 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
856 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
857 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
858 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
859 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
860 if Lines
[IndexJ
][-2] == ' ':
863 if Lines
[IndexJ
].strip()[1:-1].strip():
864 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
866 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
869 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\n'
873 if Value
.endswith('\n'):
874 Value
= Value
[: Value
.rfind('\n')]
875 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
876 self
.AddStringToList(Name
, Language
, Value
)
880 # Load multiple .uni files
882 def LoadUniFiles(self
, FileList
):
883 if len(FileList
) > 0:
884 for File
in FileList
:
885 FilePath
= File
.Path
.strip()
886 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
887 self
.LoadUniFile(File
)
890 # Add a string to list
892 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
893 for LangNameItem
in self
.LanguageDef
:
894 if Language
== LangNameItem
[0]:
897 if Language
not in self
.OrderedStringList
:
898 self
.OrderedStringList
[Language
] = []
899 self
.OrderedStringDict
[Language
] = {}
902 if Name
in self
.OrderedStringDict
[Language
]:
904 if Value
is not None:
905 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
906 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
907 Item
.UpdateValue(Value
)
908 Item
.UseOtherLangDef
= ''
911 Token
= len(self
.OrderedStringList
[Language
])
913 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
918 self
.OrderedStringDict
[Language
][Name
] = Token
919 for LangName
in self
.LanguageDef
:
921 # New STRING token will be added into all language string lists.
922 # so that the unique STRING identifier is reserved for all languages in the package list.
924 if LangName
[0] != Language
:
925 if UseOtherLangDef
!= '':
926 OtherLangDef
= UseOtherLangDef
928 OtherLangDef
= Language
929 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
934 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
936 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
941 self
.OrderedStringDict
[Language
][Name
] = Index
944 # Set the string as referenced
946 def SetStringReferenced(self
, Name
):
948 # String stoken are added in the same order in all language string lists.
949 # So, only update the status of string stoken in first language string list.
951 Lang
= self
.LanguageDef
[0][0]
952 if Name
in self
.OrderedStringDict
[Lang
]:
953 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
954 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
955 Item
.Referenced
= True
958 # Search the string in language definition by Name
960 def FindStringValue(self
, Name
, Lang
):
961 if Name
in self
.OrderedStringDict
[Lang
]:
962 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
963 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
968 # Search the string in language definition by Token
970 def FindByToken(self
, Token
, Lang
):
971 for Item
in self
.OrderedStringList
[Lang
]:
972 if Item
.Token
== Token
:
978 # Re-order strings and re-generate tokens
981 if len(self
.LanguageDef
) == 0:
984 # Retoken all language strings according to the status of string stoken in the first language string.
986 FirstLangName
= self
.LanguageDef
[0][0]
988 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
989 for LangNameItem
in self
.LanguageDef
:
990 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
993 # Use small token for all referred string stoken.
996 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
997 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
998 if FirstLangItem
.Referenced
== True:
999 for LangNameItem
in self
.LanguageDef
:
1000 LangName
= LangNameItem
[0]
1001 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1002 OtherLangItem
.Referenced
= True
1003 OtherLangItem
.Token
= RefToken
1004 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1005 RefToken
= RefToken
+ 1
1008 # Use big token for all unreferred string stoken.
1011 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1012 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1013 if FirstLangItem
.Referenced
== False:
1014 for LangNameItem
in self
.LanguageDef
:
1015 LangName
= LangNameItem
[0]
1016 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1017 OtherLangItem
.Token
= RefToken
+ UnRefToken
1018 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1019 UnRefToken
= UnRefToken
+ 1
1022 # Show the instance itself
1025 print(self
.LanguageDef
)
1026 #print self.OrderedStringList
1027 for Item
in self
.OrderedStringList
:
1029 for Member
in self
.OrderedStringList
[Item
]:
1033 # Read content from '!include' UNI file
1035 def ReadIncludeUNIfile(self
, FilaPath
):
1039 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1040 EdkLogger
.Error("Unicode File Parser",
1041 ToolError
.FILE_NOT_FOUND
,
1044 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1045 except UnicodeError as Xstr
:
1046 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1047 except UnicodeError:
1048 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1050 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)