2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2019, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
17 from __future__
import print_function
24 from Logger
import ToolError
25 from Logger
import Log
as EdkLogger
26 from Logger
import StringTable
as ST
27 from Library
.StringUtils
import GetLineNo
28 from Library
.Misc
import PathClass
29 from Library
.Misc
import GetCharIndexOutStr
30 from Library
import DataType
as DT
31 from Library
.ParserValidate
import CheckUTF16FileHeader
36 UNICODE_WIDE_CHAR
= u
'\\wide'
37 UNICODE_NARROW_CHAR
= u
'\\narrow'
38 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
39 UNICODE_UNICODE_CR
= '\r'
40 UNICODE_UNICODE_LF
= '\n'
42 NARROW_CHAR
= u
'\uFFF0'
44 NON_BREAKING_CHAR
= u
'\uFFF2'
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
122 OldUni
= NewUni
= Uni
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
130 ## GetLanguageCode1766
132 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
133 # RFC 1766 language codes supported in compatibility mode
134 # RFC 4646 language codes supported in native mode
136 # @param LangName: Language codes read from .UNI file
138 # @retval LangName: Valid language code in RFC 1766 format or None
140 def GetLanguageCode1766(LangName
, File
=None):
143 length
= len(LangName
)
145 if LangName
.isalpha():
146 for Key
in gLANG_CONV_TABLE
.keys():
147 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
150 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
153 EdkLogger
.Error("Unicode File Parser",
154 ToolError
.FORMAT_INVALID
,
155 "Invalid RFC 1766 language code : %s" % LangName
,
158 if LangName
[0:2].isalpha() and LangName
[2] == '-':
159 for Key
in gLANG_CONV_TABLE
.keys():
160 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
163 if LangName
[0:2].isalpha() and LangName
[2] == '-':
164 for Key
in gLANG_CONV_TABLE
.keys():
165 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
167 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
168 for Key
in gLANG_CONV_TABLE
.keys():
169 if Key
== LangName
[0:3].lower():
172 EdkLogger
.Error("Unicode File Parser",
173 ToolError
.FORMAT_INVALID
,
174 "Invalid RFC 4646 language code : %s" % LangName
,
179 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
180 # RFC 1766 language codes supported in compatibility mode
181 # RFC 4646 language codes supported in native mode
183 # @param LangName: Language codes read from .UNI file
185 # @retval LangName: Valid lanugage code in RFC 4646 format or None
187 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
188 length
= len(LangName
)
190 if length
== 3 and LangName
.isalpha():
191 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
192 if TempLangName
is not None:
196 EdkLogger
.Error("Unicode File Parser",
197 ToolError
.FORMAT_INVALID
,
198 "Invalid RFC 1766 language code : %s" % LangName
,
200 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
203 if LangName
.isalpha():
206 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
209 if LangName
[0:2].isalpha() and LangName
[2] == '-':
212 if LangName
[0:2].isalpha() and LangName
[2] == '-':
214 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
217 EdkLogger
.Error("Unicode File Parser",
218 ToolError
.FORMAT_INVALID
,
219 "Invalid RFC 4646 language code : %s" % LangName
,
224 # Formatted the entry in Uni file.
226 # @param StrTokenName StrTokenName.
227 # @param TokenValueList A list need to be processed.
228 # @param ContainerFile ContainerFile.
230 # @return formatted entry
231 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
234 if len(StrTokenName
) > PreFormatLength
:
235 PreFormatLength
= len(StrTokenName
) + 1
236 for (Lang
, Value
) in TokenValueList
:
237 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
240 Lang
= DT
.TAB_LANGUAGE_EN_US
242 Lang
= DT
.TAB_LANGUAGE_EN_US
243 elif len(Lang
.split('-')[0]) == 3:
244 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
246 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
247 ValueList
= Value
.split('\n')
249 for SubValue
in ValueList
:
252 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
253 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
255 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
257 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
261 ## StringDefClassObject
263 # A structure for language definition
265 class StringDefClassObject(object):
266 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
268 self
.StringNameByteList
= []
269 self
.StringValue
= ''
270 self
.StringValueByteList
= ''
272 self
.Referenced
= Referenced
273 self
.UseOtherLangDef
= UseOtherLangDef
277 self
.StringName
= Name
278 self
.StringNameByteList
= UniToHexList(Name
)
279 if Value
is not None:
280 self
.StringValue
= Value
281 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
282 self
.Length
= len(self
.StringValueByteList
)
283 if Token
is not None:
287 return repr(self
.StringName
) + ' ' + \
288 repr(self
.Token
) + ' ' + \
289 repr(self
.Referenced
) + ' ' + \
290 repr(self
.StringValue
) + ' ' + \
291 repr(self
.UseOtherLangDef
)
293 def UpdateValue(self
, Value
= None):
294 if Value
is not None:
296 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
298 self
.StringValue
= Value
299 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
300 self
.Length
= len(self
.StringValueByteList
)
302 ## UniFileClassObject
304 # A structure for .uni file definition
306 class UniFileClassObject(object):
307 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
308 self
.FileList
= FileList
310 self
.IncFileList
= FileList
311 self
.UniFileHeader
= ''
313 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
314 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
315 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
316 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
317 self
.IsCompatibleMode
= IsCompatibleMode
318 if not IncludePathList
:
319 self
.IncludePathList
= []
321 self
.IncludePathList
= IncludePathList
322 if len(self
.FileList
) > 0:
323 self
.LoadUniFiles(FileList
)
326 # Get Language definition
328 def GetLangDef(self
, File
, Line
):
329 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
332 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
333 except UnicodeError as Xstr
:
334 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
335 except UnicodeError as Xstr
:
336 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
338 EdkLogger
.Error("Unicode File Parser",
339 ToolError
.FILE_OPEN_FAILURE
,
340 "File read failure: %s" % str(Xstr
),
342 LineNo
= GetLineNo(FileIn
, Line
, False)
343 EdkLogger
.Error("Unicode File Parser",
344 ToolError
.PARSER_ERROR
,
345 "Wrong language definition",
346 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
347 File
= File
, Line
= LineNo
)
349 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
350 LangPrintName
= Lang
[2]
353 for Item
in self
.LanguageDef
:
354 if Item
[0] == LangName
:
359 self
.LanguageDef
.append([LangName
, LangPrintName
])
362 # Add language string
364 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
365 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
369 # The found STRING tokens will be added into new language string list
370 # so that the unique STRING identifier is reserved for all languages in the package list.
372 FirstLangName
= self
.LanguageDef
[0][0]
373 if LangName
!= FirstLangName
:
374 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
375 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
376 if Item
.UseOtherLangDef
!= '':
377 OtherLang
= Item
.UseOtherLangDef
379 OtherLang
= FirstLangName
380 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
385 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
389 # Get String name and value
391 def GetStringObject(self
, Item
):
395 Name
= Item
.split()[1]
396 # Check the string name is the upper character
398 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
399 if MatchString
is None or MatchString
.end(0) != len(Name
):
400 EdkLogger
.Error("Unicode File Parser",
401 ToolError
.FORMAT_INVALID
,
402 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
403 LanguageList
= Item
.split(u
'#language ')
404 for IndexI
in range(len(LanguageList
)):
408 Language
= LanguageList
[IndexI
].split()[0]
409 #.replace(u'\r\n', u'')
411 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
412 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
413 self
.AddStringToList(Name
, Language
, Value
)
416 # Get include file list and load them
418 def GetIncludeFile(self
, Item
, Dir
= None):
421 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
422 self
.LoadUniFile(FileName
)
425 # Pre-process before parse .uni file
427 def PreProcess(self
, File
, IsIncludeFile
=False):
428 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
429 EdkLogger
.Error("Unicode File Parser",
430 ToolError
.FILE_NOT_FOUND
,
434 # Check file header of the Uni file
436 # if not CheckUTF16FileHeader(File.Path):
437 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
438 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
441 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
442 except UnicodeError as Xstr
:
443 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
445 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
447 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
451 # get the file header
456 if not self
.UniFileHeader
:
457 FirstGenHeader
= True
459 FirstGenHeader
= False
464 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
465 and not HeaderEnd
and not HeaderStart
:
467 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
469 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
470 self
.UniFileHeader
+= Line
+ '\r\n'
474 # Use unique identifier
478 MultiLineFeedExits
= False
481 # 1: single String entry exist
482 # 2: line feed exist under the some single String entry
484 StringEntryExistsFlag
= 0
486 Line
= FileIn
[LineCount
]
490 # Ignore comment line and empty line
492 if Line
== u
'' or Line
.startswith(u
'//'):
494 # Change the single line String entry flag status
496 if StringEntryExistsFlag
== 1:
497 StringEntryExistsFlag
= 2
499 # If the '#string' line and the '#language' line are not in the same line,
500 # there should be only one line feed character between them
502 if MultiLineFeedExits
:
503 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
506 MultiLineFeedExits
= False
508 # Process comment embedded in string define lines
510 FindFlag
= Line
.find(u
'//')
511 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
512 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
513 if FileIn
[LineCount
].strip().startswith('#language'):
514 Line
= Line
+ FileIn
[LineCount
]
515 FileIn
[LineCount
-1] = Line
516 FileIn
[LineCount
] = '\r\n'
518 for Index
in range (LineCount
+ 1, len (FileIn
) - 1):
519 if (Index
== len(FileIn
) -1):
520 FileIn
[Index
] = '\r\n'
522 FileIn
[Index
] = FileIn
[Index
+ 1]
524 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
526 if (len(Line
) - 1) > CommIndex
:
527 if Line
[CommIndex
+1] == u
'/':
528 Line
= Line
[:CommIndex
].strip()
530 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
532 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
534 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
535 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
536 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
538 Line
= Line
.replace(u
'\\\\', u
'\u0006')
539 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
540 Line
= Line
.replace(u
'\\n', CR
+ LF
)
541 Line
= Line
.replace(u
'\\r', CR
)
542 Line
= Line
.replace(u
'\\t', u
'\t')
543 Line
= Line
.replace(u
'''\"''', u
'''"''')
544 Line
= Line
.replace(u
'\t', u
' ')
545 Line
= Line
.replace(u
'\u0006', u
'\\')
548 # Check if single line has correct '"'
550 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
551 if not Line
.endswith('"'):
552 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
553 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
554 % (LineCount
, File
.Path
))
557 # Between Name entry and Language entry can not contain line feed
559 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
560 MultiLineFeedExits
= True
562 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
563 MultiLineFeedExits
= True
566 # Between Language entry and String entry can not contain line feed
568 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
569 MultiLineFeedExits
= True
572 # Check the situation that there only has one '"' for the language entry
574 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.count(u
'"') == 1:
575 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
576 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
577 % (LineCount
, File
.Path
))
580 # Check the situation that there has more than 2 '"' for the language entry
582 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.replace(u
'\\"', '').count(u
'"') > 2:
583 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
584 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
585 % (LineCount
, File
.Path
))
588 # Between two String entry, can not contain line feed
590 if Line
.startswith(u
'"'):
591 if StringEntryExistsFlag
== 2:
592 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
593 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
595 StringEntryExistsFlag
= 1
596 if not Line
.endswith('"'):
597 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
598 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
599 % (LineCount
, File
.Path
))
602 # Check the situation that there has more than 2 '"' for the language entry
604 if Line
.strip() and Line
.replace(u
'\\"', '').count(u
'"') > 2:
605 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
606 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
607 % (LineCount
, File
.Path
))
609 elif Line
.startswith(u
'#language'):
610 if StringEntryExistsFlag
== 2:
611 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
612 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
613 StringEntryExistsFlag
= 0
615 StringEntryExistsFlag
= 0
620 # Convert string def format as below
622 # #string MY_STRING_1
624 # "My first English string line 1"
625 # "My first English string line 2"
626 # #string MY_STRING_1
628 # "Mi segunda secuencia 1"
629 # "Mi segunda secuencia 2"
632 if not IsIncludeFile
and not Lines
:
633 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
634 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
639 ExistStrNameList
= []
641 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
642 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
643 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
646 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
647 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
648 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
649 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
650 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
651 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
652 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
655 if Line
.count(u
'#language') > 1:
656 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
657 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
660 if Line
.startswith(u
'//'):
662 elif Line
.startswith(u
'#langdef'):
663 if len(Line
.split()) == 2:
664 NewLines
.append(Line
)
666 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
667 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
668 NewLines
.append(Line
[Line
.find(u
'"'):])
670 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
671 elif Line
.startswith(u
'#string'):
672 if len(Line
.split()) == 2:
675 if StrName
.split()[1] not in ExistStrNameList
:
676 ExistStrNameList
.append(StrName
.split()[1].strip())
677 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
678 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
679 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
680 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
681 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
682 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
685 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
686 if Line
[Line
.find(u
'#language')-1] != ' ' or \
687 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
688 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
690 if Line
.find(u
'"') > 0:
691 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
693 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
695 if StrName
.split()[1] not in ExistStrNameList
:
696 ExistStrNameList
.append(StrName
.split()[1].strip())
697 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
698 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
699 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
700 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
701 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
702 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
705 if StrName
not in NewLines
:
706 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
708 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
709 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
710 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
711 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
712 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
713 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
715 if Line
[Line
.find(u
'"')-1] != u
' ':
716 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
718 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
720 if StrName
.split()[1] not in ExistStrNameList
:
721 ExistStrNameList
.append(StrName
.split()[1].strip())
722 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
723 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
724 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
725 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
726 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
727 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
730 if StrName
not in NewLines
:
731 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
733 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
734 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
735 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
737 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
738 elif Line
.startswith(u
'#language'):
739 if len(Line
.split()) == 2:
741 if StrName
not in NewLines
:
742 NewLines
.append(StrName
)
744 NewLines
.append(StrName
)
745 NewLines
.append(Line
)
746 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
748 if StrName
not in NewLines
:
749 NewLines
.append(StrName
)
751 NewLines
.append(StrName
)
752 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
753 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
755 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
756 elif Line
.startswith(u
'"'):
757 if u
'#string' in Line
or u
'#language' in Line
:
758 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
759 NewLines
.append(Line
)
762 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
764 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
765 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
766 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
769 if StrName
and not NewLines
:
770 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
771 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
775 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
776 # should be Abstract, Description, BinaryAbstract, BinaryDescription
777 AbstractPosition
= -1
778 DescriptionPosition
= -1
779 BinaryAbstractPosition
= -1
780 BinaryDescriptionPosition
= -1
781 for StrName
in ExistStrNameList
:
782 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
783 if 'BINARY' in StrName
:
784 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
786 AbstractPosition
= ExistStrNameList
.index(StrName
)
787 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
788 if 'BINARY' in StrName
:
789 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
791 DescriptionPosition
= ExistStrNameList
.index(StrName
)
793 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
794 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
797 BinaryMin
= BinaryOrderList
[0]
798 BinaryMax
= BinaryOrderList
[1]
799 if BinaryDescriptionPosition
> -1:
800 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
802 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
803 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
805 elif BinaryAbstractPosition
> -1:
806 if not(BinaryAbstractPosition
> Max
):
807 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
808 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
811 if DescriptionPosition
> -1:
812 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
813 DescriptionPosition
> AbstractPosition
):
814 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
815 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
818 if not self
.UniFileHeader
:
819 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
820 Message
= ST
.ERR_NO_SOURCE_HEADER
,
828 def LoadUniFile(self
, File
= None):
830 EdkLogger
.Error("Unicode File Parser",
831 ToolError
.PARSER_ERROR
,
832 Message
='No unicode file is given',
838 # Process special char in file
840 Lines
= self
.PreProcess(File
)
843 # Get Unicode Information
845 for IndexI
in range(len(Lines
)):
847 if (IndexI
+ 1) < len(Lines
):
848 SecondLine
= Lines
[IndexI
+ 1]
849 if (IndexI
+ 2) < len(Lines
):
850 ThirdLine
= Lines
[IndexI
+ 2]
853 # Get Language def information
855 if Line
.find(u
'#langdef ') >= 0:
856 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
864 # Get string def information format as below
866 # #string MY_STRING_1
868 # "My first English string line 1"
869 # "My first English string line 2"
870 # #string MY_STRING_1
872 # "Mi segunda secuencia 1"
873 # "Mi segunda secuencia 2"
875 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
876 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
877 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
878 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
879 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
880 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
883 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
884 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
885 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
886 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
887 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
888 if Lines
[IndexJ
][-2] == ' ':
891 if Lines
[IndexJ
].strip()[1:-1].strip():
892 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
894 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
897 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
901 if Value
.endswith('\r\n'):
902 Value
= Value
[: Value
.rfind('\r\n')]
903 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
904 self
.AddStringToList(Name
, Language
, Value
)
908 # Load multiple .uni files
910 def LoadUniFiles(self
, FileList
):
911 if len(FileList
) > 0:
912 for File
in FileList
:
913 FilePath
= File
.Path
.strip()
914 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
915 self
.LoadUniFile(File
)
918 # Add a string to list
920 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
921 for LangNameItem
in self
.LanguageDef
:
922 if Language
== LangNameItem
[0]:
925 if Language
not in self
.OrderedStringList
:
926 self
.OrderedStringList
[Language
] = []
927 self
.OrderedStringDict
[Language
] = {}
930 if Name
in self
.OrderedStringDict
[Language
]:
932 if Value
is not None:
933 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
934 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
935 Item
.UpdateValue(Value
)
936 Item
.UseOtherLangDef
= ''
939 Token
= len(self
.OrderedStringList
[Language
])
941 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
946 self
.OrderedStringDict
[Language
][Name
] = Token
947 for LangName
in self
.LanguageDef
:
949 # New STRING token will be added into all language string lists.
950 # so that the unique STRING identifier is reserved for all languages in the package list.
952 if LangName
[0] != Language
:
953 if UseOtherLangDef
!= '':
954 OtherLangDef
= UseOtherLangDef
956 OtherLangDef
= Language
957 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
962 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
964 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
969 self
.OrderedStringDict
[Language
][Name
] = Index
972 # Set the string as referenced
974 def SetStringReferenced(self
, Name
):
976 # String stoken are added in the same order in all language string lists.
977 # So, only update the status of string stoken in first language string list.
979 Lang
= self
.LanguageDef
[0][0]
980 if Name
in self
.OrderedStringDict
[Lang
]:
981 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
982 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
983 Item
.Referenced
= True
986 # Search the string in language definition by Name
988 def FindStringValue(self
, Name
, Lang
):
989 if Name
in self
.OrderedStringDict
[Lang
]:
990 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
991 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
996 # Search the string in language definition by Token
998 def FindByToken(self
, Token
, Lang
):
999 for Item
in self
.OrderedStringList
[Lang
]:
1000 if Item
.Token
== Token
:
1006 # Re-order strings and re-generate tokens
1009 if len(self
.LanguageDef
) == 0:
1012 # Retoken all language strings according to the status of string stoken in the first language string.
1014 FirstLangName
= self
.LanguageDef
[0][0]
1016 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1017 for LangNameItem
in self
.LanguageDef
:
1018 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
1021 # Use small token for all referred string stoken.
1024 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1025 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1026 if FirstLangItem
.Referenced
== True:
1027 for LangNameItem
in self
.LanguageDef
:
1028 LangName
= LangNameItem
[0]
1029 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1030 OtherLangItem
.Referenced
= True
1031 OtherLangItem
.Token
= RefToken
1032 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1033 RefToken
= RefToken
+ 1
1036 # Use big token for all unreferred string stoken.
1039 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1040 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1041 if FirstLangItem
.Referenced
== False:
1042 for LangNameItem
in self
.LanguageDef
:
1043 LangName
= LangNameItem
[0]
1044 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1045 OtherLangItem
.Token
= RefToken
+ UnRefToken
1046 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1047 UnRefToken
= UnRefToken
+ 1
1050 # Show the instance itself
1053 print(self
.LanguageDef
)
1054 #print self.OrderedStringList
1055 for Item
in self
.OrderedStringList
:
1057 for Member
in self
.OrderedStringList
[Item
]:
1061 # Read content from '!include' UNI file
1063 def ReadIncludeUNIfile(self
, FilaPath
):
1067 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1068 EdkLogger
.Error("Unicode File Parser",
1069 ToolError
.FILE_NOT_FOUND
,
1072 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1073 except UnicodeError as Xstr
:
1074 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1075 except UnicodeError:
1076 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1078 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)