2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
17 from __future__
import print_function
24 from Logger
import ToolError
25 from Logger
import Log
as EdkLogger
26 from Logger
import StringTable
as ST
27 from Library
.StringUtils
import GetLineNo
28 from Library
.Misc
import PathClass
29 from Library
.Misc
import GetCharIndexOutStr
30 from Library
import DataType
as DT
31 from Library
.ParserValidate
import CheckUTF16FileHeader
36 UNICODE_WIDE_CHAR
= u
'\\wide'
37 UNICODE_NARROW_CHAR
= u
'\\narrow'
38 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
39 UNICODE_UNICODE_CR
= '\r'
40 UNICODE_UNICODE_LF
= '\n'
42 NARROW_CHAR
= u
'\uFFF0'
44 NON_BREAKING_CHAR
= u
'\uFFF2'
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
128 ## GetLanguageCode1766
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
134 # @param LangName: Language codes read from .UNI file
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 def GetLanguageCode1766(LangName
, File
=None):
141 length
= len(LangName
)
143 if LangName
.isalpha():
144 for Key
in gLANG_CONV_TABLE
.keys():
145 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
148 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
151 EdkLogger
.Error("Unicode File Parser",
152 ToolError
.FORMAT_INVALID
,
153 "Invalid RFC 1766 language code : %s" % LangName
,
156 if LangName
[0:2].isalpha() and LangName
[2] == '-':
157 for Key
in gLANG_CONV_TABLE
.keys():
158 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
161 if LangName
[0:2].isalpha() and LangName
[2] == '-':
162 for Key
in gLANG_CONV_TABLE
.keys():
163 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
165 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
166 for Key
in gLANG_CONV_TABLE
.keys():
167 if Key
== LangName
[0:3].lower():
170 EdkLogger
.Error("Unicode File Parser",
171 ToolError
.FORMAT_INVALID
,
172 "Invalid RFC 4646 language code : %s" % LangName
,
177 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
178 # RFC 1766 language codes supported in compatiblity mode
179 # RFC 4646 language codes supported in native mode
181 # @param LangName: Language codes read from .UNI file
183 # @retval LangName: Valid lanugage code in RFC 4646 format or None
185 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
186 length
= len(LangName
)
188 if length
== 3 and LangName
.isalpha():
189 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
190 if TempLangName
is not None:
194 EdkLogger
.Error("Unicode File Parser",
195 ToolError
.FORMAT_INVALID
,
196 "Invalid RFC 1766 language code : %s" % LangName
,
198 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
201 if LangName
.isalpha():
204 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
207 if LangName
[0:2].isalpha() and LangName
[2] == '-':
210 if LangName
[0:2].isalpha() and LangName
[2] == '-':
212 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
215 EdkLogger
.Error("Unicode File Parser",
216 ToolError
.FORMAT_INVALID
,
217 "Invalid RFC 4646 language code : %s" % LangName
,
222 # Formated the entry in Uni file.
224 # @param StrTokenName StrTokenName.
225 # @param TokenValueList A list need to be processed.
226 # @param ContainerFile ContainerFile.
228 # @return formated entry
229 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
232 if len(StrTokenName
) > PreFormatLength
:
233 PreFormatLength
= len(StrTokenName
) + 1
234 for (Lang
, Value
) in TokenValueList
:
235 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
238 Lang
= DT
.TAB_LANGUAGE_EN_US
240 Lang
= DT
.TAB_LANGUAGE_EN_US
241 elif len(Lang
.split('-')[0]) == 3:
242 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
244 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
245 ValueList
= Value
.split('\n')
247 for SubValue
in ValueList
:
250 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
251 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
253 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
255 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
259 ## StringDefClassObject
261 # A structure for language definition
263 class StringDefClassObject(object):
264 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
266 self
.StringNameByteList
= []
267 self
.StringValue
= ''
268 self
.StringValueByteList
= ''
270 self
.Referenced
= Referenced
271 self
.UseOtherLangDef
= UseOtherLangDef
275 self
.StringName
= Name
276 self
.StringNameByteList
= UniToHexList(Name
)
277 if Value
is not None:
278 self
.StringValue
= Value
279 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
280 self
.Length
= len(self
.StringValueByteList
)
281 if Token
is not None:
285 return repr(self
.StringName
) + ' ' + \
286 repr(self
.Token
) + ' ' + \
287 repr(self
.Referenced
) + ' ' + \
288 repr(self
.StringValue
) + ' ' + \
289 repr(self
.UseOtherLangDef
)
291 def UpdateValue(self
, Value
= None):
292 if Value
is not None:
294 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
296 self
.StringValue
= Value
297 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
298 self
.Length
= len(self
.StringValueByteList
)
300 ## UniFileClassObject
302 # A structure for .uni file definition
304 class UniFileClassObject(object):
305 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
306 self
.FileList
= FileList
308 self
.IncFileList
= FileList
309 self
.UniFileHeader
= ''
311 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
312 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
313 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
314 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
315 self
.IsCompatibleMode
= IsCompatibleMode
316 if not IncludePathList
:
317 self
.IncludePathList
= []
319 self
.IncludePathList
= IncludePathList
320 if len(self
.FileList
) > 0:
321 self
.LoadUniFiles(FileList
)
324 # Get Language definition
326 def GetLangDef(self
, File
, Line
):
327 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
330 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
331 except UnicodeError as Xstr
:
332 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
333 except UnicodeError as Xstr
:
334 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
336 EdkLogger
.Error("Unicode File Parser",
337 ToolError
.FILE_OPEN_FAILURE
,
338 "File read failure: %s" % str(Xstr
),
340 LineNo
= GetLineNo(FileIn
, Line
, False)
341 EdkLogger
.Error("Unicode File Parser",
342 ToolError
.PARSER_ERROR
,
343 "Wrong language definition",
344 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
345 File
= File
, Line
= LineNo
)
347 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
348 LangPrintName
= Lang
[2]
351 for Item
in self
.LanguageDef
:
352 if Item
[0] == LangName
:
357 self
.LanguageDef
.append([LangName
, LangPrintName
])
360 # Add language string
362 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
363 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
367 # The found STRING tokens will be added into new language string list
368 # so that the unique STRING identifier is reserved for all languages in the package list.
370 FirstLangName
= self
.LanguageDef
[0][0]
371 if LangName
!= FirstLangName
:
372 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
373 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
374 if Item
.UseOtherLangDef
!= '':
375 OtherLang
= Item
.UseOtherLangDef
377 OtherLang
= FirstLangName
378 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
383 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
387 # Get String name and value
389 def GetStringObject(self
, Item
):
393 Name
= Item
.split()[1]
394 # Check the string name is the upper character
396 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
397 if MatchString
is None or MatchString
.end(0) != len(Name
):
398 EdkLogger
.Error("Unicode File Parser",
399 ToolError
.FORMAT_INVALID
,
400 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
401 LanguageList
= Item
.split(u
'#language ')
402 for IndexI
in range(len(LanguageList
)):
406 Language
= LanguageList
[IndexI
].split()[0]
407 #.replace(u'\r\n', u'')
409 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
410 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
411 self
.AddStringToList(Name
, Language
, Value
)
414 # Get include file list and load them
416 def GetIncludeFile(self
, Item
, Dir
= None):
419 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
420 self
.LoadUniFile(FileName
)
423 # Pre-process before parse .uni file
425 def PreProcess(self
, File
, IsIncludeFile
=False):
426 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
427 EdkLogger
.Error("Unicode File Parser",
428 ToolError
.FILE_NOT_FOUND
,
432 # Check file header of the Uni file
434 # if not CheckUTF16FileHeader(File.Path):
435 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
436 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
439 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
440 except UnicodeError as Xstr
:
441 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
443 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
445 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
449 # get the file header
454 if not self
.UniFileHeader
:
455 FirstGenHeader
= True
457 FirstGenHeader
= False
462 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
463 and not HeaderEnd
and not HeaderStart
:
465 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
467 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
468 self
.UniFileHeader
+= Line
+ '\r\n'
472 # Use unique identifier
476 MultiLineFeedExits
= False
479 # 1: signle String entry exist
480 # 2: line feed exist under the some signle String entry
482 StringEntryExistsFlag
= 0
484 Line
= FileIn
[LineCount
]
488 # Ignore comment line and empty line
490 if Line
== u
'' or Line
.startswith(u
'//'):
492 # Change the single line String entry flag status
494 if StringEntryExistsFlag
== 1:
495 StringEntryExistsFlag
= 2
497 # If the '#string' line and the '#language' line are not in the same line,
498 # there should be only one line feed character betwwen them
500 if MultiLineFeedExits
:
501 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
504 MultiLineFeedExits
= False
506 # Process comment embeded in string define lines
508 FindFlag
= Line
.find(u
'//')
509 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
510 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
511 if FileIn
[LineCount
].strip().startswith('#language'):
512 Line
= Line
+ FileIn
[LineCount
]
513 FileIn
[LineCount
-1] = Line
514 FileIn
[LineCount
] = '\r\n'
516 for Index
in xrange (LineCount
+ 1, len (FileIn
) - 1):
517 if (Index
== len(FileIn
) -1):
518 FileIn
[Index
] = '\r\n'
520 FileIn
[Index
] = FileIn
[Index
+ 1]
522 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
524 if (len(Line
) - 1) > CommIndex
:
525 if Line
[CommIndex
+1] == u
'/':
526 Line
= Line
[:CommIndex
].strip()
528 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
530 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
532 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
533 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
534 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
536 Line
= Line
.replace(u
'\\\\', u
'\u0006')
537 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
538 Line
= Line
.replace(u
'\\n', CR
+ LF
)
539 Line
= Line
.replace(u
'\\r', CR
)
540 Line
= Line
.replace(u
'\\t', u
'\t')
541 Line
= Line
.replace(u
'''\"''', u
'''"''')
542 Line
= Line
.replace(u
'\t', u
' ')
543 Line
= Line
.replace(u
'\u0006', u
'\\')
546 # Check if single line has correct '"'
548 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
549 if not Line
.endswith('"'):
550 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
551 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
552 % (LineCount
, File
.Path
))
555 # Between Name entry and Language entry can not contain line feed
557 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
558 MultiLineFeedExits
= True
560 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
561 MultiLineFeedExits
= True
564 # Between Language entry and String entry can not contain line feed
566 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
567 MultiLineFeedExits
= True
570 # Between two String entry, can not contain line feed
572 if Line
.startswith(u
'"'):
573 if StringEntryExistsFlag
== 2:
574 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
575 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
577 StringEntryExistsFlag
= 1
578 if not Line
.endswith('"'):
579 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
580 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
581 % (LineCount
, File
.Path
))
582 elif Line
.startswith(u
'#language'):
583 if StringEntryExistsFlag
== 2:
584 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
585 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
586 StringEntryExistsFlag
= 0
588 StringEntryExistsFlag
= 0
593 # Convert string def format as below
595 # #string MY_STRING_1
597 # "My first English string line 1"
598 # "My first English string line 2"
599 # #string MY_STRING_1
601 # "Mi segunda secuencia 1"
602 # "Mi segunda secuencia 2"
605 if not IsIncludeFile
and not Lines
:
606 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
607 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
612 ExistStrNameList
= []
614 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
615 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
616 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
619 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
620 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
621 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
622 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
623 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
624 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
625 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
628 if Line
.count(u
'#language') > 1:
629 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
630 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
633 if Line
.startswith(u
'//'):
635 elif Line
.startswith(u
'#langdef'):
636 if len(Line
.split()) == 2:
637 NewLines
.append(Line
)
639 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
640 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
641 NewLines
.append(Line
[Line
.find(u
'"'):])
643 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
644 elif Line
.startswith(u
'#string'):
645 if len(Line
.split()) == 2:
648 if StrName
.split()[1] not in ExistStrNameList
:
649 ExistStrNameList
.append(StrName
.split()[1].strip())
650 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
651 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
652 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
653 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
654 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
655 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
658 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
659 if Line
[Line
.find(u
'#language')-1] != ' ' or \
660 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
661 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
663 if Line
.find(u
'"') > 0:
664 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
666 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
668 if StrName
.split()[1] not in ExistStrNameList
:
669 ExistStrNameList
.append(StrName
.split()[1].strip())
670 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
671 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
672 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
673 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
674 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
675 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
678 if StrName
not in NewLines
:
679 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
681 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
682 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
683 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
684 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
685 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
686 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
688 if Line
[Line
.find(u
'"')-1] != u
' ':
689 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
691 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
693 if StrName
.split()[1] not in ExistStrNameList
:
694 ExistStrNameList
.append(StrName
.split()[1].strip())
695 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
696 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
697 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
698 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
699 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
700 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
703 if StrName
not in NewLines
:
704 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
706 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
707 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
708 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
710 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
711 elif Line
.startswith(u
'#language'):
712 if len(Line
.split()) == 2:
714 if StrName
not in NewLines
:
715 NewLines
.append(StrName
)
717 NewLines
.append(StrName
)
718 NewLines
.append(Line
)
719 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
721 if StrName
not in NewLines
:
722 NewLines
.append(StrName
)
724 NewLines
.append(StrName
)
725 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
726 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
728 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
729 elif Line
.startswith(u
'"'):
730 if u
'#string' in Line
or u
'#language' in Line
:
731 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
732 NewLines
.append(Line
)
735 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
737 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
738 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
739 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
742 if StrName
and not NewLines
:
743 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
744 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
748 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
749 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
750 AbstractPosition
= -1
751 DescriptionPosition
= -1
752 BinaryAbstractPosition
= -1
753 BinaryDescriptionPosition
= -1
754 for StrName
in ExistStrNameList
:
755 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
756 if 'BINARY' in StrName
:
757 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
759 AbstractPosition
= ExistStrNameList
.index(StrName
)
760 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
761 if 'BINARY' in StrName
:
762 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
764 DescriptionPosition
= ExistStrNameList
.index(StrName
)
766 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
767 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
770 BinaryMin
= BinaryOrderList
[0]
771 BinaryMax
= BinaryOrderList
[1]
772 if BinaryDescriptionPosition
> -1:
773 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
775 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
776 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
778 elif BinaryAbstractPosition
> -1:
779 if not(BinaryAbstractPosition
> Max
):
780 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
781 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
784 if DescriptionPosition
> -1:
785 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
786 DescriptionPosition
> AbstractPosition
):
787 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
788 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
791 if not self
.UniFileHeader
:
792 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
793 Message
= ST
.ERR_NO_SOURCE_HEADER
,
801 def LoadUniFile(self
, File
= None):
803 EdkLogger
.Error("Unicode File Parser",
804 ToolError
.PARSER_ERROR
,
805 Message
='No unicode file is given',
811 # Process special char in file
813 Lines
= self
.PreProcess(File
)
816 # Get Unicode Information
818 for IndexI
in range(len(Lines
)):
820 if (IndexI
+ 1) < len(Lines
):
821 SecondLine
= Lines
[IndexI
+ 1]
822 if (IndexI
+ 2) < len(Lines
):
823 ThirdLine
= Lines
[IndexI
+ 2]
826 # Get Language def information
828 if Line
.find(u
'#langdef ') >= 0:
829 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
837 # Get string def information format as below
839 # #string MY_STRING_1
841 # "My first English string line 1"
842 # "My first English string line 2"
843 # #string MY_STRING_1
845 # "Mi segunda secuencia 1"
846 # "Mi segunda secuencia 2"
848 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
849 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
850 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
851 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
852 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
853 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
856 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
857 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
858 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
859 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
860 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
861 if Lines
[IndexJ
][-2] == ' ':
864 if Lines
[IndexJ
].strip()[1:-1].strip():
865 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
867 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
870 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
874 if Value
.endswith('\r\n'):
875 Value
= Value
[: Value
.rfind('\r\n')]
876 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
877 self
.AddStringToList(Name
, Language
, Value
)
881 # Load multiple .uni files
883 def LoadUniFiles(self
, FileList
):
884 if len(FileList
) > 0:
885 for File
in FileList
:
886 FilePath
= File
.Path
.strip()
887 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
888 self
.LoadUniFile(File
)
891 # Add a string to list
893 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
894 for LangNameItem
in self
.LanguageDef
:
895 if Language
== LangNameItem
[0]:
898 if Language
not in self
.OrderedStringList
:
899 self
.OrderedStringList
[Language
] = []
900 self
.OrderedStringDict
[Language
] = {}
903 if Name
in self
.OrderedStringDict
[Language
]:
905 if Value
is not None:
906 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
907 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
908 Item
.UpdateValue(Value
)
909 Item
.UseOtherLangDef
= ''
912 Token
= len(self
.OrderedStringList
[Language
])
914 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
919 self
.OrderedStringDict
[Language
][Name
] = Token
920 for LangName
in self
.LanguageDef
:
922 # New STRING token will be added into all language string lists.
923 # so that the unique STRING identifier is reserved for all languages in the package list.
925 if LangName
[0] != Language
:
926 if UseOtherLangDef
!= '':
927 OtherLangDef
= UseOtherLangDef
929 OtherLangDef
= Language
930 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
935 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
937 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
942 self
.OrderedStringDict
[Language
][Name
] = Index
945 # Set the string as referenced
947 def SetStringReferenced(self
, Name
):
949 # String stoken are added in the same order in all language string lists.
950 # So, only update the status of string stoken in first language string list.
952 Lang
= self
.LanguageDef
[0][0]
953 if Name
in self
.OrderedStringDict
[Lang
]:
954 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
955 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
956 Item
.Referenced
= True
959 # Search the string in language definition by Name
961 def FindStringValue(self
, Name
, Lang
):
962 if Name
in self
.OrderedStringDict
[Lang
]:
963 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
964 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
969 # Search the string in language definition by Token
971 def FindByToken(self
, Token
, Lang
):
972 for Item
in self
.OrderedStringList
[Lang
]:
973 if Item
.Token
== Token
:
979 # Re-order strings and re-generate tokens
982 if len(self
.LanguageDef
) == 0:
985 # Retoken all language strings according to the status of string stoken in the first language string.
987 FirstLangName
= self
.LanguageDef
[0][0]
989 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
990 for LangNameItem
in self
.LanguageDef
:
991 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
994 # Use small token for all referred string stoken.
997 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
998 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
999 if FirstLangItem
.Referenced
== True:
1000 for LangNameItem
in self
.LanguageDef
:
1001 LangName
= LangNameItem
[0]
1002 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1003 OtherLangItem
.Referenced
= True
1004 OtherLangItem
.Token
= RefToken
1005 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1006 RefToken
= RefToken
+ 1
1009 # Use big token for all unreferred string stoken.
1012 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1013 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1014 if FirstLangItem
.Referenced
== False:
1015 for LangNameItem
in self
.LanguageDef
:
1016 LangName
= LangNameItem
[0]
1017 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1018 OtherLangItem
.Token
= RefToken
+ UnRefToken
1019 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1020 UnRefToken
= UnRefToken
+ 1
1023 # Show the instance itself
1026 print(self
.LanguageDef
)
1027 #print self.OrderedStringList
1028 for Item
in self
.OrderedStringList
:
1030 for Member
in self
.OrderedStringList
[Item
]:
1034 # Read content from '!include' UNI file
1036 def ReadIncludeUNIfile(self
, FilaPath
):
1040 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1041 EdkLogger
.Error("Unicode File Parser",
1042 ToolError
.FILE_NOT_FOUND
,
1045 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1046 except UnicodeError as Xstr
:
1047 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1048 except UnicodeError:
1049 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1051 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)