2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
17 from __future__
import print_function
24 from Logger
import ToolError
25 from Logger
import Log
as EdkLogger
26 from Logger
import StringTable
as ST
27 from Library
.StringUtils
import GetLineNo
28 from Library
.Misc
import PathClass
29 from Library
.Misc
import GetCharIndexOutStr
30 from Library
import DataType
as DT
31 from Library
.ParserValidate
import CheckUTF16FileHeader
36 UNICODE_WIDE_CHAR
= u
'\\wide'
37 UNICODE_NARROW_CHAR
= u
'\\narrow'
38 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
39 UNICODE_UNICODE_CR
= '\r'
40 UNICODE_UNICODE_LF
= '\n'
42 NARROW_CHAR
= u
'\uFFF0'
44 NON_BREAKING_CHAR
= u
'\uFFF2'
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
122 OldUni
= NewUni
= Uni
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
130 ## GetLanguageCode1766
132 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
133 # RFC 1766 language codes supported in compatibility mode
134 # RFC 4646 language codes supported in native mode
136 # @param LangName: Language codes read from .UNI file
138 # @retval LangName: Valid language code in RFC 1766 format or None
140 def GetLanguageCode1766(LangName
, File
=None):
143 length
= len(LangName
)
145 if LangName
.isalpha():
146 for Key
in gLANG_CONV_TABLE
.keys():
147 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
150 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
153 EdkLogger
.Error("Unicode File Parser",
154 ToolError
.FORMAT_INVALID
,
155 "Invalid RFC 1766 language code : %s" % LangName
,
158 if LangName
[0:2].isalpha() and LangName
[2] == '-':
159 for Key
in gLANG_CONV_TABLE
.keys():
160 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
163 if LangName
[0:2].isalpha() and LangName
[2] == '-':
164 for Key
in gLANG_CONV_TABLE
.keys():
165 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
167 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
168 for Key
in gLANG_CONV_TABLE
.keys():
169 if Key
== LangName
[0:3].lower():
172 EdkLogger
.Error("Unicode File Parser",
173 ToolError
.FORMAT_INVALID
,
174 "Invalid RFC 4646 language code : %s" % LangName
,
179 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
180 # RFC 1766 language codes supported in compatibility mode
181 # RFC 4646 language codes supported in native mode
183 # @param LangName: Language codes read from .UNI file
185 # @retval LangName: Valid lanugage code in RFC 4646 format or None
187 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
188 length
= len(LangName
)
190 if length
== 3 and LangName
.isalpha():
191 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
192 if TempLangName
is not None:
196 EdkLogger
.Error("Unicode File Parser",
197 ToolError
.FORMAT_INVALID
,
198 "Invalid RFC 1766 language code : %s" % LangName
,
200 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
203 if LangName
.isalpha():
206 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
209 if LangName
[0:2].isalpha() and LangName
[2] == '-':
212 if LangName
[0:2].isalpha() and LangName
[2] == '-':
214 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
217 EdkLogger
.Error("Unicode File Parser",
218 ToolError
.FORMAT_INVALID
,
219 "Invalid RFC 4646 language code : %s" % LangName
,
224 # Formatted the entry in Uni file.
226 # @param StrTokenName StrTokenName.
227 # @param TokenValueList A list need to be processed.
228 # @param ContainerFile ContainerFile.
230 # @return formatted entry
231 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
234 if len(StrTokenName
) > PreFormatLength
:
235 PreFormatLength
= len(StrTokenName
) + 1
236 for (Lang
, Value
) in TokenValueList
:
237 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
240 Lang
= DT
.TAB_LANGUAGE_EN_US
242 Lang
= DT
.TAB_LANGUAGE_EN_US
243 elif len(Lang
.split('-')[0]) == 3:
244 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
246 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
247 ValueList
= Value
.split('\n')
249 for SubValue
in ValueList
:
252 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
253 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
255 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
257 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
261 ## StringDefClassObject
263 # A structure for language definition
265 class StringDefClassObject(object):
266 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
268 self
.StringNameByteList
= []
269 self
.StringValue
= ''
270 self
.StringValueByteList
= ''
272 self
.Referenced
= Referenced
273 self
.UseOtherLangDef
= UseOtherLangDef
277 self
.StringName
= Name
278 self
.StringNameByteList
= UniToHexList(Name
)
279 if Value
is not None:
280 self
.StringValue
= Value
281 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
282 self
.Length
= len(self
.StringValueByteList
)
283 if Token
is not None:
287 return repr(self
.StringName
) + ' ' + \
288 repr(self
.Token
) + ' ' + \
289 repr(self
.Referenced
) + ' ' + \
290 repr(self
.StringValue
) + ' ' + \
291 repr(self
.UseOtherLangDef
)
293 def UpdateValue(self
, Value
= None):
294 if Value
is not None:
296 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
298 self
.StringValue
= Value
299 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
300 self
.Length
= len(self
.StringValueByteList
)
302 ## UniFileClassObject
304 # A structure for .uni file definition
306 class UniFileClassObject(object):
307 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
308 self
.FileList
= FileList
310 self
.IncFileList
= FileList
311 self
.UniFileHeader
= ''
313 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
314 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
315 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
316 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
317 self
.IsCompatibleMode
= IsCompatibleMode
318 if not IncludePathList
:
319 self
.IncludePathList
= []
321 self
.IncludePathList
= IncludePathList
322 if len(self
.FileList
) > 0:
323 self
.LoadUniFiles(FileList
)
326 # Get Language definition
328 def GetLangDef(self
, File
, Line
):
329 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
332 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
333 except UnicodeError as Xstr
:
334 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
335 except UnicodeError as Xstr
:
336 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
338 EdkLogger
.Error("Unicode File Parser",
339 ToolError
.FILE_OPEN_FAILURE
,
340 "File read failure: %s" % str(Xstr
),
342 LineNo
= GetLineNo(FileIn
, Line
, False)
343 EdkLogger
.Error("Unicode File Parser",
344 ToolError
.PARSER_ERROR
,
345 "Wrong language definition",
346 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
347 File
= File
, Line
= LineNo
)
349 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
350 LangPrintName
= Lang
[2]
353 for Item
in self
.LanguageDef
:
354 if Item
[0] == LangName
:
359 self
.LanguageDef
.append([LangName
, LangPrintName
])
362 # Add language string
364 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
365 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
369 # The found STRING tokens will be added into new language string list
370 # so that the unique STRING identifier is reserved for all languages in the package list.
372 FirstLangName
= self
.LanguageDef
[0][0]
373 if LangName
!= FirstLangName
:
374 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
375 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
376 if Item
.UseOtherLangDef
!= '':
377 OtherLang
= Item
.UseOtherLangDef
379 OtherLang
= FirstLangName
380 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
385 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
389 # Get String name and value
391 def GetStringObject(self
, Item
):
395 Name
= Item
.split()[1]
396 # Check the string name is the upper character
398 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
399 if MatchString
is None or MatchString
.end(0) != len(Name
):
400 EdkLogger
.Error("Unicode File Parser",
401 ToolError
.FORMAT_INVALID
,
402 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
403 LanguageList
= Item
.split(u
'#language ')
404 for IndexI
in range(len(LanguageList
)):
408 Language
= LanguageList
[IndexI
].split()[0]
409 #.replace(u'\r\n', u'')
411 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
412 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
413 self
.AddStringToList(Name
, Language
, Value
)
416 # Get include file list and load them
418 def GetIncludeFile(self
, Item
, Dir
= None):
421 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
422 self
.LoadUniFile(FileName
)
425 # Pre-process before parse .uni file
427 def PreProcess(self
, File
, IsIncludeFile
=False):
428 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
429 EdkLogger
.Error("Unicode File Parser",
430 ToolError
.FILE_NOT_FOUND
,
434 # Check file header of the Uni file
436 # if not CheckUTF16FileHeader(File.Path):
437 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
438 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
441 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
442 except UnicodeError as Xstr
:
443 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
445 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
447 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
451 # get the file header
456 if not self
.UniFileHeader
:
457 FirstGenHeader
= True
459 FirstGenHeader
= False
464 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
465 and not HeaderEnd
and not HeaderStart
:
467 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
469 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
470 self
.UniFileHeader
+= Line
+ '\r\n'
474 # Use unique identifier
478 MultiLineFeedExits
= False
481 # 1: single String entry exist
482 # 2: line feed exist under the some single String entry
484 StringEntryExistsFlag
= 0
486 Line
= FileIn
[LineCount
]
490 # Ignore comment line and empty line
492 if Line
== u
'' or Line
.startswith(u
'//'):
494 # Change the single line String entry flag status
496 if StringEntryExistsFlag
== 1:
497 StringEntryExistsFlag
= 2
499 # If the '#string' line and the '#language' line are not in the same line,
500 # there should be only one line feed character between them
502 if MultiLineFeedExits
:
503 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
506 MultiLineFeedExits
= False
508 # Process comment embedded in string define lines
510 FindFlag
= Line
.find(u
'//')
511 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
512 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
513 if FileIn
[LineCount
].strip().startswith('#language'):
514 Line
= Line
+ FileIn
[LineCount
]
515 FileIn
[LineCount
-1] = Line
516 FileIn
[LineCount
] = '\r\n'
518 for Index
in range (LineCount
+ 1, len (FileIn
) - 1):
519 if (Index
== len(FileIn
) -1):
520 FileIn
[Index
] = '\r\n'
522 FileIn
[Index
] = FileIn
[Index
+ 1]
524 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
526 if (len(Line
) - 1) > CommIndex
:
527 if Line
[CommIndex
+1] == u
'/':
528 Line
= Line
[:CommIndex
].strip()
530 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
532 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
534 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
535 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
536 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
538 Line
= Line
.replace(u
'\\\\', u
'\u0006')
539 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
540 Line
= Line
.replace(u
'\\n', CR
+ LF
)
541 Line
= Line
.replace(u
'\\r', CR
)
542 Line
= Line
.replace(u
'\\t', u
'\t')
543 Line
= Line
.replace(u
'''\"''', u
'''"''')
544 Line
= Line
.replace(u
'\t', u
' ')
545 Line
= Line
.replace(u
'\u0006', u
'\\')
548 # Check if single line has correct '"'
550 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
551 if not Line
.endswith('"'):
552 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
553 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
554 % (LineCount
, File
.Path
))
557 # Between Name entry and Language entry can not contain line feed
559 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
560 MultiLineFeedExits
= True
562 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
563 MultiLineFeedExits
= True
566 # Between Language entry and String entry can not contain line feed
568 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
569 MultiLineFeedExits
= True
572 # Check the situation that there only has one '"' for the language entry
574 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.count(u
'"') == 1:
575 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
576 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
577 % (LineCount
, File
.Path
))
580 # Check the situation that there has more than 2 '"' for the language entry
582 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.replace(u
'\\"', '').count(u
'"') > 2:
583 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
584 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
585 % (LineCount
, File
.Path
))
588 # Between two String entry, can not contain line feed
590 if Line
.startswith(u
'"'):
591 if StringEntryExistsFlag
== 2:
592 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
593 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
595 StringEntryExistsFlag
= 1
596 if not Line
.endswith('"'):
597 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
598 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
599 % (LineCount
, File
.Path
))
600 elif Line
.startswith(u
'#language'):
601 if StringEntryExistsFlag
== 2:
602 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
603 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
604 StringEntryExistsFlag
= 0
606 StringEntryExistsFlag
= 0
611 # Convert string def format as below
613 # #string MY_STRING_1
615 # "My first English string line 1"
616 # "My first English string line 2"
617 # #string MY_STRING_1
619 # "Mi segunda secuencia 1"
620 # "Mi segunda secuencia 2"
623 if not IsIncludeFile
and not Lines
:
624 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
625 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
630 ExistStrNameList
= []
632 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
633 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
634 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
637 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
638 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
639 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
640 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
641 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
642 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
643 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
646 if Line
.count(u
'#language') > 1:
647 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
648 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
651 if Line
.startswith(u
'//'):
653 elif Line
.startswith(u
'#langdef'):
654 if len(Line
.split()) == 2:
655 NewLines
.append(Line
)
657 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
658 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
659 NewLines
.append(Line
[Line
.find(u
'"'):])
661 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
662 elif Line
.startswith(u
'#string'):
663 if len(Line
.split()) == 2:
666 if StrName
.split()[1] not in ExistStrNameList
:
667 ExistStrNameList
.append(StrName
.split()[1].strip())
668 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
669 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
670 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
671 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
672 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
673 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
676 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
677 if Line
[Line
.find(u
'#language')-1] != ' ' or \
678 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
679 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
681 if Line
.find(u
'"') > 0:
682 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
684 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
686 if StrName
.split()[1] not in ExistStrNameList
:
687 ExistStrNameList
.append(StrName
.split()[1].strip())
688 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
689 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
690 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
691 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
692 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
693 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
696 if StrName
not in NewLines
:
697 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
699 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
700 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
701 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
702 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
703 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
704 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
706 if Line
[Line
.find(u
'"')-1] != u
' ':
707 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
709 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
711 if StrName
.split()[1] not in ExistStrNameList
:
712 ExistStrNameList
.append(StrName
.split()[1].strip())
713 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
714 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
715 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
716 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
717 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
718 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
721 if StrName
not in NewLines
:
722 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
724 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
725 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
726 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
728 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
729 elif Line
.startswith(u
'#language'):
730 if len(Line
.split()) == 2:
732 if StrName
not in NewLines
:
733 NewLines
.append(StrName
)
735 NewLines
.append(StrName
)
736 NewLines
.append(Line
)
737 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
739 if StrName
not in NewLines
:
740 NewLines
.append(StrName
)
742 NewLines
.append(StrName
)
743 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
744 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
746 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
747 elif Line
.startswith(u
'"'):
749 # Check the situation that there has more than 2 '"' for the language entry
751 if Line
.replace(u
'\\"', '').count(u
'"') > 2:
752 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
753 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
754 % (LineCount
, File
.Path
))
755 if u
'#string' in Line
or u
'#language' in Line
:
756 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
757 NewLines
.append(Line
)
760 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
762 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
763 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
764 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
767 if StrName
and not NewLines
:
768 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
769 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
773 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
774 # should be Abstract, Description, BinaryAbstract, BinaryDescription
775 AbstractPosition
= -1
776 DescriptionPosition
= -1
777 BinaryAbstractPosition
= -1
778 BinaryDescriptionPosition
= -1
779 for StrName
in ExistStrNameList
:
780 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
781 if 'BINARY' in StrName
:
782 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
784 AbstractPosition
= ExistStrNameList
.index(StrName
)
785 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
786 if 'BINARY' in StrName
:
787 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
789 DescriptionPosition
= ExistStrNameList
.index(StrName
)
791 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
792 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
795 BinaryMin
= BinaryOrderList
[0]
796 BinaryMax
= BinaryOrderList
[1]
797 if BinaryDescriptionPosition
> -1:
798 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
800 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
801 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
803 elif BinaryAbstractPosition
> -1:
804 if not(BinaryAbstractPosition
> Max
):
805 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
806 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
809 if DescriptionPosition
> -1:
810 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
811 DescriptionPosition
> AbstractPosition
):
812 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
813 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
816 if not self
.UniFileHeader
:
817 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
818 Message
= ST
.ERR_NO_SOURCE_HEADER
,
826 def LoadUniFile(self
, File
= None):
828 EdkLogger
.Error("Unicode File Parser",
829 ToolError
.PARSER_ERROR
,
830 Message
='No unicode file is given',
836 # Process special char in file
838 Lines
= self
.PreProcess(File
)
841 # Get Unicode Information
843 for IndexI
in range(len(Lines
)):
845 if (IndexI
+ 1) < len(Lines
):
846 SecondLine
= Lines
[IndexI
+ 1]
847 if (IndexI
+ 2) < len(Lines
):
848 ThirdLine
= Lines
[IndexI
+ 2]
851 # Get Language def information
853 if Line
.find(u
'#langdef ') >= 0:
854 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
862 # Get string def information format as below
864 # #string MY_STRING_1
866 # "My first English string line 1"
867 # "My first English string line 2"
868 # #string MY_STRING_1
870 # "Mi segunda secuencia 1"
871 # "Mi segunda secuencia 2"
873 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
874 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
875 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
876 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
877 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
878 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
881 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
882 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
883 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
884 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
885 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
886 if Lines
[IndexJ
][-2] == ' ':
889 if Lines
[IndexJ
].strip()[1:-1].strip():
890 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
892 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
895 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
899 if Value
.endswith('\r\n'):
900 Value
= Value
[: Value
.rfind('\r\n')]
901 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
902 self
.AddStringToList(Name
, Language
, Value
)
906 # Load multiple .uni files
908 def LoadUniFiles(self
, FileList
):
909 if len(FileList
) > 0:
910 for File
in FileList
:
911 FilePath
= File
.Path
.strip()
912 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
913 self
.LoadUniFile(File
)
916 # Add a string to list
918 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
919 for LangNameItem
in self
.LanguageDef
:
920 if Language
== LangNameItem
[0]:
923 if Language
not in self
.OrderedStringList
:
924 self
.OrderedStringList
[Language
] = []
925 self
.OrderedStringDict
[Language
] = {}
928 if Name
in self
.OrderedStringDict
[Language
]:
930 if Value
is not None:
931 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
932 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
933 Item
.UpdateValue(Value
)
934 Item
.UseOtherLangDef
= ''
937 Token
= len(self
.OrderedStringList
[Language
])
939 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
944 self
.OrderedStringDict
[Language
][Name
] = Token
945 for LangName
in self
.LanguageDef
:
947 # New STRING token will be added into all language string lists.
948 # so that the unique STRING identifier is reserved for all languages in the package list.
950 if LangName
[0] != Language
:
951 if UseOtherLangDef
!= '':
952 OtherLangDef
= UseOtherLangDef
954 OtherLangDef
= Language
955 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
960 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
962 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
967 self
.OrderedStringDict
[Language
][Name
] = Index
970 # Set the string as referenced
972 def SetStringReferenced(self
, Name
):
974 # String stoken are added in the same order in all language string lists.
975 # So, only update the status of string stoken in first language string list.
977 Lang
= self
.LanguageDef
[0][0]
978 if Name
in self
.OrderedStringDict
[Lang
]:
979 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
980 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
981 Item
.Referenced
= True
984 # Search the string in language definition by Name
986 def FindStringValue(self
, Name
, Lang
):
987 if Name
in self
.OrderedStringDict
[Lang
]:
988 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
989 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
994 # Search the string in language definition by Token
996 def FindByToken(self
, Token
, Lang
):
997 for Item
in self
.OrderedStringList
[Lang
]:
998 if Item
.Token
== Token
:
1004 # Re-order strings and re-generate tokens
1007 if len(self
.LanguageDef
) == 0:
1010 # Retoken all language strings according to the status of string stoken in the first language string.
1012 FirstLangName
= self
.LanguageDef
[0][0]
1014 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1015 for LangNameItem
in self
.LanguageDef
:
1016 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
1019 # Use small token for all referred string stoken.
1022 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1023 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1024 if FirstLangItem
.Referenced
== True:
1025 for LangNameItem
in self
.LanguageDef
:
1026 LangName
= LangNameItem
[0]
1027 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1028 OtherLangItem
.Referenced
= True
1029 OtherLangItem
.Token
= RefToken
1030 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1031 RefToken
= RefToken
+ 1
1034 # Use big token for all unreferred string stoken.
1037 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1038 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1039 if FirstLangItem
.Referenced
== False:
1040 for LangNameItem
in self
.LanguageDef
:
1041 LangName
= LangNameItem
[0]
1042 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1043 OtherLangItem
.Token
= RefToken
+ UnRefToken
1044 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1045 UnRefToken
= UnRefToken
+ 1
1048 # Show the instance itself
1051 print(self
.LanguageDef
)
1052 #print self.OrderedStringList
1053 for Item
in self
.OrderedStringList
:
1055 for Member
in self
.OrderedStringList
[Item
]:
1059 # Read content from '!include' UNI file
1061 def ReadIncludeUNIfile(self
, FilaPath
):
1065 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1066 EdkLogger
.Error("Unicode File Parser",
1067 ToolError
.FILE_NOT_FOUND
,
1070 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1071 except UnicodeError as Xstr
:
1072 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1073 except UnicodeError:
1074 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1076 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)