2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
17 from __future__
import print_function
24 from Logger
import ToolError
25 from Logger
import Log
as EdkLogger
26 from Logger
import StringTable
as ST
27 from Library
.StringUtils
import GetLineNo
28 from Library
.Misc
import PathClass
29 from Library
.Misc
import GetCharIndexOutStr
30 from Library
import DataType
as DT
31 from Library
.ParserValidate
import CheckUTF16FileHeader
36 UNICODE_WIDE_CHAR
= u
'\\wide'
37 UNICODE_NARROW_CHAR
= u
'\\narrow'
38 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
39 UNICODE_UNICODE_CR
= '\r'
40 UNICODE_UNICODE_LF
= '\n'
42 NARROW_CHAR
= u
'\uFFF0'
44 NON_BREAKING_CHAR
= u
'\uFFF2'
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
128 ## GetLanguageCode1766
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
134 # @param LangName: Language codes read from .UNI file
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 def GetLanguageCode1766(LangName
, File
=None):
141 length
= len(LangName
)
143 if LangName
.isalpha():
144 for Key
in gLANG_CONV_TABLE
.keys():
145 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
148 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
151 EdkLogger
.Error("Unicode File Parser",
152 ToolError
.FORMAT_INVALID
,
153 "Invalid RFC 1766 language code : %s" % LangName
,
156 if LangName
[0:2].isalpha() and LangName
[2] == '-':
157 for Key
in gLANG_CONV_TABLE
.keys():
158 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
161 if LangName
[0:2].isalpha() and LangName
[2] == '-':
162 for Key
in gLANG_CONV_TABLE
.keys():
163 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
165 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
166 for Key
in gLANG_CONV_TABLE
.keys():
167 if Key
== LangName
[0:3].lower():
170 EdkLogger
.Error("Unicode File Parser",
171 ToolError
.FORMAT_INVALID
,
172 "Invalid RFC 4646 language code : %s" % LangName
,
177 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
178 # RFC 1766 language codes supported in compatiblity mode
179 # RFC 4646 language codes supported in native mode
181 # @param LangName: Language codes read from .UNI file
183 # @retval LangName: Valid lanugage code in RFC 4646 format or None
185 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
186 length
= len(LangName
)
188 if length
== 3 and LangName
.isalpha():
189 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
190 if TempLangName
is not None:
194 EdkLogger
.Error("Unicode File Parser",
195 ToolError
.FORMAT_INVALID
,
196 "Invalid RFC 1766 language code : %s" % LangName
,
198 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
201 if LangName
.isalpha():
204 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None:
207 if LangName
[0:2].isalpha() and LangName
[2] == '-':
210 if LangName
[0:2].isalpha() and LangName
[2] == '-':
212 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) is None and LangName
[3] == '-':
215 EdkLogger
.Error("Unicode File Parser",
216 ToolError
.FORMAT_INVALID
,
217 "Invalid RFC 4646 language code : %s" % LangName
,
222 # Formated the entry in Uni file.
224 # @param StrTokenName StrTokenName.
225 # @param TokenValueList A list need to be processed.
226 # @param ContainerFile ContainerFile.
228 # @return formated entry
229 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
232 if len(StrTokenName
) > PreFormatLength
:
233 PreFormatLength
= len(StrTokenName
) + 1
234 for (Lang
, Value
) in TokenValueList
:
235 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
238 Lang
= DT
.TAB_LANGUAGE_EN_US
240 Lang
= DT
.TAB_LANGUAGE_EN_US
241 elif len(Lang
.split('-')[0]) == 3:
242 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
244 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
245 ValueList
= Value
.split('\n')
247 for SubValue
in ValueList
:
250 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
251 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
253 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
255 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
259 ## StringDefClassObject
261 # A structure for language definition
263 class StringDefClassObject(object):
264 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
266 self
.StringNameByteList
= []
267 self
.StringValue
= ''
268 self
.StringValueByteList
= ''
270 self
.Referenced
= Referenced
271 self
.UseOtherLangDef
= UseOtherLangDef
275 self
.StringName
= Name
276 self
.StringNameByteList
= UniToHexList(Name
)
277 if Value
is not None:
278 self
.StringValue
= Value
279 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
280 self
.Length
= len(self
.StringValueByteList
)
281 if Token
is not None:
285 return repr(self
.StringName
) + ' ' + \
286 repr(self
.Token
) + ' ' + \
287 repr(self
.Referenced
) + ' ' + \
288 repr(self
.StringValue
) + ' ' + \
289 repr(self
.UseOtherLangDef
)
291 def UpdateValue(self
, Value
= None):
292 if Value
is not None:
294 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
296 self
.StringValue
= Value
297 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
298 self
.Length
= len(self
.StringValueByteList
)
300 ## UniFileClassObject
302 # A structure for .uni file definition
304 class UniFileClassObject(object):
305 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
306 self
.FileList
= FileList
308 self
.IncFileList
= FileList
309 self
.UniFileHeader
= ''
311 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
312 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
313 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
314 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
315 self
.IsCompatibleMode
= IsCompatibleMode
316 if not IncludePathList
:
317 self
.IncludePathList
= []
319 self
.IncludePathList
= IncludePathList
320 if len(self
.FileList
) > 0:
321 self
.LoadUniFiles(FileList
)
324 # Get Language definition
326 def GetLangDef(self
, File
, Line
):
327 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
330 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
331 except UnicodeError as Xstr
:
332 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
333 except UnicodeError as Xstr
:
334 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
336 EdkLogger
.Error("Unicode File Parser",
337 ToolError
.FILE_OPEN_FAILURE
,
338 "File read failure: %s" % str(Xstr
),
340 LineNo
= GetLineNo(FileIn
, Line
, False)
341 EdkLogger
.Error("Unicode File Parser",
342 ToolError
.PARSER_ERROR
,
343 "Wrong language definition",
344 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
345 File
= File
, Line
= LineNo
)
347 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
348 LangPrintName
= Lang
[2]
351 for Item
in self
.LanguageDef
:
352 if Item
[0] == LangName
:
357 self
.LanguageDef
.append([LangName
, LangPrintName
])
360 # Add language string
362 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
363 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
367 # The found STRING tokens will be added into new language string list
368 # so that the unique STRING identifier is reserved for all languages in the package list.
370 FirstLangName
= self
.LanguageDef
[0][0]
371 if LangName
!= FirstLangName
:
372 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
373 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
374 if Item
.UseOtherLangDef
!= '':
375 OtherLang
= Item
.UseOtherLangDef
377 OtherLang
= FirstLangName
378 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
383 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
387 # Get String name and value
389 def GetStringObject(self
, Item
):
393 Name
= Item
.split()[1]
394 # Check the string name is the upper character
396 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
397 if MatchString
is None or MatchString
.end(0) != len(Name
):
398 EdkLogger
.Error("Unicode File Parser",
399 ToolError
.FORMAT_INVALID
,
400 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
401 LanguageList
= Item
.split(u
'#language ')
402 for IndexI
in range(len(LanguageList
)):
406 Language
= LanguageList
[IndexI
].split()[0]
407 #.replace(u'\r\n', u'')
409 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
410 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
411 self
.AddStringToList(Name
, Language
, Value
)
414 # Get include file list and load them
416 def GetIncludeFile(self
, Item
, Dir
= None):
419 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
420 self
.LoadUniFile(FileName
)
423 # Pre-process before parse .uni file
425 def PreProcess(self
, File
, IsIncludeFile
=False):
426 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
427 EdkLogger
.Error("Unicode File Parser",
428 ToolError
.FILE_NOT_FOUND
,
432 # Check file header of the Uni file
434 # if not CheckUTF16FileHeader(File.Path):
435 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
436 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
439 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
440 except UnicodeError as Xstr
:
441 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
443 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
445 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
449 # get the file header
454 if not self
.UniFileHeader
:
455 FirstGenHeader
= True
457 FirstGenHeader
= False
462 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
463 and not HeaderEnd
and not HeaderStart
:
465 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
467 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
468 self
.UniFileHeader
+= Line
+ '\r\n'
472 # Use unique identifier
476 MultiLineFeedExits
= False
479 # 1: signle String entry exist
480 # 2: line feed exist under the some signle String entry
482 StringEntryExistsFlag
= 0
484 Line
= FileIn
[LineCount
]
488 # Ignore comment line and empty line
490 if Line
== u
'' or Line
.startswith(u
'//'):
492 # Change the single line String entry flag status
494 if StringEntryExistsFlag
== 1:
495 StringEntryExistsFlag
= 2
497 # If the '#string' line and the '#language' line are not in the same line,
498 # there should be only one line feed character betwwen them
500 if MultiLineFeedExits
:
501 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
504 MultiLineFeedExits
= False
506 # Process comment embeded in string define lines
508 FindFlag
= Line
.find(u
'//')
509 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
510 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
511 if FileIn
[LineCount
].strip().startswith('#language'):
512 Line
= Line
+ FileIn
[LineCount
]
513 FileIn
[LineCount
-1] = Line
514 FileIn
[LineCount
] = '\r\n'
516 for Index
in xrange (LineCount
+ 1, len (FileIn
) - 1):
517 if (Index
== len(FileIn
) -1):
518 FileIn
[Index
] = '\r\n'
520 FileIn
[Index
] = FileIn
[Index
+ 1]
522 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
524 if (len(Line
) - 1) > CommIndex
:
525 if Line
[CommIndex
+1] == u
'/':
526 Line
= Line
[:CommIndex
].strip()
528 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
530 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
532 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
533 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
534 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
536 Line
= Line
.replace(u
'\\\\', u
'\u0006')
537 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
538 Line
= Line
.replace(u
'\\n', CR
+ LF
)
539 Line
= Line
.replace(u
'\\r', CR
)
540 Line
= Line
.replace(u
'\\t', u
'\t')
541 Line
= Line
.replace(u
'''\"''', u
'''"''')
542 Line
= Line
.replace(u
'\t', u
' ')
543 Line
= Line
.replace(u
'\u0006', u
'\\')
546 # Check if single line has correct '"'
548 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
549 if not Line
.endswith('"'):
550 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
551 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
552 % (LineCount
, File
.Path
))
555 # Between Name entry and Language entry can not contain line feed
557 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
558 MultiLineFeedExits
= True
560 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
561 MultiLineFeedExits
= True
564 # Between Language entry and String entry can not contain line feed
566 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
567 MultiLineFeedExits
= True
570 # Check the situation that there only has one '"' for the language entry
572 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.count(u
'"') == 1:
573 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
574 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
575 % (LineCount
, File
.Path
))
578 # Check the situation that there has more than 2 '"' for the language entry
580 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.replace(u
'\\"', '').count(u
'"') > 2:
581 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
582 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
583 % (LineCount
, File
.Path
))
586 # Between two String entry, can not contain line feed
588 if Line
.startswith(u
'"'):
589 if StringEntryExistsFlag
== 2:
590 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
591 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
593 StringEntryExistsFlag
= 1
594 if not Line
.endswith('"'):
595 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
596 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
597 % (LineCount
, File
.Path
))
598 elif Line
.startswith(u
'#language'):
599 if StringEntryExistsFlag
== 2:
600 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
601 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
602 StringEntryExistsFlag
= 0
604 StringEntryExistsFlag
= 0
609 # Convert string def format as below
611 # #string MY_STRING_1
613 # "My first English string line 1"
614 # "My first English string line 2"
615 # #string MY_STRING_1
617 # "Mi segunda secuencia 1"
618 # "Mi segunda secuencia 2"
621 if not IsIncludeFile
and not Lines
:
622 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
623 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
628 ExistStrNameList
= []
630 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
631 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
632 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
635 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
636 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
637 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
638 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
639 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
640 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
641 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
644 if Line
.count(u
'#language') > 1:
645 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
646 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
649 if Line
.startswith(u
'//'):
651 elif Line
.startswith(u
'#langdef'):
652 if len(Line
.split()) == 2:
653 NewLines
.append(Line
)
655 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
656 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
657 NewLines
.append(Line
[Line
.find(u
'"'):])
659 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
660 elif Line
.startswith(u
'#string'):
661 if len(Line
.split()) == 2:
664 if StrName
.split()[1] not in ExistStrNameList
:
665 ExistStrNameList
.append(StrName
.split()[1].strip())
666 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
667 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
668 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
669 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
670 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
671 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
674 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
675 if Line
[Line
.find(u
'#language')-1] != ' ' or \
676 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
677 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
679 if Line
.find(u
'"') > 0:
680 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
682 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
684 if StrName
.split()[1] not in ExistStrNameList
:
685 ExistStrNameList
.append(StrName
.split()[1].strip())
686 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
687 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
688 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
689 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
690 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
691 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
694 if StrName
not in NewLines
:
695 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
697 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
698 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
699 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
700 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
701 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
702 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
704 if Line
[Line
.find(u
'"')-1] != u
' ':
705 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
707 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
709 if StrName
.split()[1] not in ExistStrNameList
:
710 ExistStrNameList
.append(StrName
.split()[1].strip())
711 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
712 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
713 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
714 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
715 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
716 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
719 if StrName
not in NewLines
:
720 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
722 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
723 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
724 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
726 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
727 elif Line
.startswith(u
'#language'):
728 if len(Line
.split()) == 2:
730 if StrName
not in NewLines
:
731 NewLines
.append(StrName
)
733 NewLines
.append(StrName
)
734 NewLines
.append(Line
)
735 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
737 if StrName
not in NewLines
:
738 NewLines
.append(StrName
)
740 NewLines
.append(StrName
)
741 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
742 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
744 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
745 elif Line
.startswith(u
'"'):
747 # Check the situation that there has more than 2 '"' for the language entry
749 if Line
.replace(u
'\\"', '').count(u
'"') > 2:
750 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
751 ExtraData
='''The line %s has more than 2 '"' for language entry in file %s'''
752 % (LineCount
, File
.Path
))
753 if u
'#string' in Line
or u
'#language' in Line
:
754 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
755 NewLines
.append(Line
)
758 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
760 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
761 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
762 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
765 if StrName
and not NewLines
:
766 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
767 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
771 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
772 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
773 AbstractPosition
= -1
774 DescriptionPosition
= -1
775 BinaryAbstractPosition
= -1
776 BinaryDescriptionPosition
= -1
777 for StrName
in ExistStrNameList
:
778 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
779 if 'BINARY' in StrName
:
780 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
782 AbstractPosition
= ExistStrNameList
.index(StrName
)
783 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
784 if 'BINARY' in StrName
:
785 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
787 DescriptionPosition
= ExistStrNameList
.index(StrName
)
789 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
790 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
793 BinaryMin
= BinaryOrderList
[0]
794 BinaryMax
= BinaryOrderList
[1]
795 if BinaryDescriptionPosition
> -1:
796 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
798 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
799 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
801 elif BinaryAbstractPosition
> -1:
802 if not(BinaryAbstractPosition
> Max
):
803 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
804 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
807 if DescriptionPosition
> -1:
808 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
809 DescriptionPosition
> AbstractPosition
):
810 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
811 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
814 if not self
.UniFileHeader
:
815 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
816 Message
= ST
.ERR_NO_SOURCE_HEADER
,
824 def LoadUniFile(self
, File
= None):
826 EdkLogger
.Error("Unicode File Parser",
827 ToolError
.PARSER_ERROR
,
828 Message
='No unicode file is given',
834 # Process special char in file
836 Lines
= self
.PreProcess(File
)
839 # Get Unicode Information
841 for IndexI
in range(len(Lines
)):
843 if (IndexI
+ 1) < len(Lines
):
844 SecondLine
= Lines
[IndexI
+ 1]
845 if (IndexI
+ 2) < len(Lines
):
846 ThirdLine
= Lines
[IndexI
+ 2]
849 # Get Language def information
851 if Line
.find(u
'#langdef ') >= 0:
852 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
860 # Get string def information format as below
862 # #string MY_STRING_1
864 # "My first English string line 1"
865 # "My first English string line 2"
866 # #string MY_STRING_1
868 # "Mi segunda secuencia 1"
869 # "Mi segunda secuencia 2"
871 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
872 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
873 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
874 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
875 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
876 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
879 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
880 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
881 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
882 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
883 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
884 if Lines
[IndexJ
][-2] == ' ':
887 if Lines
[IndexJ
].strip()[1:-1].strip():
888 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
890 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
893 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
897 if Value
.endswith('\r\n'):
898 Value
= Value
[: Value
.rfind('\r\n')]
899 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
900 self
.AddStringToList(Name
, Language
, Value
)
904 # Load multiple .uni files
906 def LoadUniFiles(self
, FileList
):
907 if len(FileList
) > 0:
908 for File
in FileList
:
909 FilePath
= File
.Path
.strip()
910 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
911 self
.LoadUniFile(File
)
914 # Add a string to list
916 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
917 for LangNameItem
in self
.LanguageDef
:
918 if Language
== LangNameItem
[0]:
921 if Language
not in self
.OrderedStringList
:
922 self
.OrderedStringList
[Language
] = []
923 self
.OrderedStringDict
[Language
] = {}
926 if Name
in self
.OrderedStringDict
[Language
]:
928 if Value
is not None:
929 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
930 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
931 Item
.UpdateValue(Value
)
932 Item
.UseOtherLangDef
= ''
935 Token
= len(self
.OrderedStringList
[Language
])
937 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
942 self
.OrderedStringDict
[Language
][Name
] = Token
943 for LangName
in self
.LanguageDef
:
945 # New STRING token will be added into all language string lists.
946 # so that the unique STRING identifier is reserved for all languages in the package list.
948 if LangName
[0] != Language
:
949 if UseOtherLangDef
!= '':
950 OtherLangDef
= UseOtherLangDef
952 OtherLangDef
= Language
953 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
958 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
960 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
965 self
.OrderedStringDict
[Language
][Name
] = Index
968 # Set the string as referenced
970 def SetStringReferenced(self
, Name
):
972 # String stoken are added in the same order in all language string lists.
973 # So, only update the status of string stoken in first language string list.
975 Lang
= self
.LanguageDef
[0][0]
976 if Name
in self
.OrderedStringDict
[Lang
]:
977 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
978 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
979 Item
.Referenced
= True
982 # Search the string in language definition by Name
984 def FindStringValue(self
, Name
, Lang
):
985 if Name
in self
.OrderedStringDict
[Lang
]:
986 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
987 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
992 # Search the string in language definition by Token
994 def FindByToken(self
, Token
, Lang
):
995 for Item
in self
.OrderedStringList
[Lang
]:
996 if Item
.Token
== Token
:
1002 # Re-order strings and re-generate tokens
1005 if len(self
.LanguageDef
) == 0:
1008 # Retoken all language strings according to the status of string stoken in the first language string.
1010 FirstLangName
= self
.LanguageDef
[0][0]
1012 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1013 for LangNameItem
in self
.LanguageDef
:
1014 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
1017 # Use small token for all referred string stoken.
1020 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1021 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1022 if FirstLangItem
.Referenced
== True:
1023 for LangNameItem
in self
.LanguageDef
:
1024 LangName
= LangNameItem
[0]
1025 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1026 OtherLangItem
.Referenced
= True
1027 OtherLangItem
.Token
= RefToken
1028 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1029 RefToken
= RefToken
+ 1
1032 # Use big token for all unreferred string stoken.
1035 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1036 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1037 if FirstLangItem
.Referenced
== False:
1038 for LangNameItem
in self
.LanguageDef
:
1039 LangName
= LangNameItem
[0]
1040 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1041 OtherLangItem
.Token
= RefToken
+ UnRefToken
1042 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1043 UnRefToken
= UnRefToken
+ 1
1046 # Show the instance itself
1049 print(self
.LanguageDef
)
1050 #print self.OrderedStringList
1051 for Item
in self
.OrderedStringList
:
1053 for Member
in self
.OrderedStringList
[Item
]:
1057 # Read content from '!include' UNI file
1059 def ReadIncludeUNIfile(self
, FilaPath
):
1063 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1064 EdkLogger
.Error("Unicode File Parser",
1065 ToolError
.FILE_NOT_FOUND
,
1068 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1069 except UnicodeError as Xstr
:
1070 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1071 except UnicodeError:
1072 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1074 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)