2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
23 from Logger
import ToolError
24 from Logger
import Log
as EdkLogger
25 from Logger
import StringTable
as ST
26 from Library
.String
import GetLineNo
27 from Library
.Misc
import PathClass
28 from Library
.Misc
import GetCharIndexOutStr
29 from Library
import DataType
as DT
34 UNICODE_WIDE_CHAR
= u
'\\wide'
35 UNICODE_NARROW_CHAR
= u
'\\narrow'
36 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
37 UNICODE_UNICODE_CR
= '\r'
38 UNICODE_UNICODE_LF
= '\n'
40 NARROW_CHAR
= u
'\uFFF0'
42 NON_BREAKING_CHAR
= u
'\uFFF2'
49 gINCLUDE_PATTERN
= re
.compile("^!include[\s]+([\S]+)[\s]*$", re
.MULTILINE | re
.UNICODE
)
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
128 ## GetLanguageCode1766
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
134 # @param LangName: Language codes read from .UNI file
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 def GetLanguageCode1766(LangName
, File
=None):
139 length
= len(LangName
)
141 if LangName
.isalpha():
142 for Key
in gLANG_CONV_TABLE
.keys():
143 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
146 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
149 EdkLogger
.Error("Unicode File Parser",
150 ToolError
.FORMAT_INVALID
,
151 "Invalid RFC 1766 language code : %s" % LangName
,
154 if LangName
[0:2].isalpha() and LangName
[2] == '-':
155 for Key
in gLANG_CONV_TABLE
.keys():
156 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
159 if LangName
[0:2].isalpha() and LangName
[2] == '-':
160 for Key
in gLANG_CONV_TABLE
.keys():
161 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
163 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
164 for Key
in gLANG_CONV_TABLE
.keys():
165 if Key
== LangName
[0:3].lower():
168 EdkLogger
.Error("Unicode File Parser",
169 ToolError
.FORMAT_INVALID
,
170 "Invalid RFC 4646 language code : %s" % LangName
,
175 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
176 # RFC 1766 language codes supported in compatiblity mode
177 # RFC 4646 language codes supported in native mode
179 # @param LangName: Language codes read from .UNI file
181 # @retval LangName: Valid lanugage code in RFC 4646 format or None
183 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
184 length
= len(LangName
)
186 if length
== 3 and LangName
.isalpha():
187 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
188 if TempLangName
!= None:
192 EdkLogger
.Error("Unicode File Parser",
193 ToolError
.FORMAT_INVALID
,
194 "Invalid RFC 1766 language code : %s" % LangName
,
196 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
199 if LangName
.isalpha():
202 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None:
205 if LangName
[0:2].isalpha() and LangName
[2] == '-':
208 if LangName
[0:2].isalpha() and LangName
[2] == '-':
210 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
213 EdkLogger
.Error("Unicode File Parser",
214 ToolError
.FORMAT_INVALID
,
215 "Invalid RFC 4646 language code : %s" % LangName
,
220 # Formated the entry in Uni file.
222 # @param StrTokenName StrTokenName.
223 # @param TokenValueList A list need to be processed.
224 # @param ContainerFile ContainerFile.
226 # @return formated entry
227 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
230 if len(StrTokenName
) > PreFormatLength
:
231 PreFormatLength
= len(StrTokenName
) + 1
232 for (Lang
, Value
) in TokenValueList
:
233 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
236 Lang
= DT
.TAB_LANGUAGE_EN_US
238 Lang
= DT
.TAB_LANGUAGE_EN_US
239 elif len(Lang
.split('-')[0]) == 3:
240 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
242 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
243 ValueList
= Value
.split('\n')
245 for SubValue
in ValueList
:
248 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + os
.linesep
249 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
251 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
253 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
257 ## StringDefClassObject
259 # A structure for language definition
261 class StringDefClassObject(object):
262 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
264 self
.StringNameByteList
= []
265 self
.StringValue
= ''
266 self
.StringValueByteList
= ''
268 self
.Referenced
= Referenced
269 self
.UseOtherLangDef
= UseOtherLangDef
273 self
.StringName
= Name
274 self
.StringNameByteList
= UniToHexList(Name
)
276 self
.StringValue
= Value
277 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
278 self
.Length
= len(self
.StringValueByteList
)
283 return repr(self
.StringName
) + ' ' + \
284 repr(self
.Token
) + ' ' + \
285 repr(self
.Referenced
) + ' ' + \
286 repr(self
.StringValue
) + ' ' + \
287 repr(self
.UseOtherLangDef
)
289 def UpdateValue(self
, Value
= None):
292 self
.StringValue
= self
.StringValue
+ os
.linesep
+ Value
294 self
.StringValue
= Value
295 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
296 self
.Length
= len(self
.StringValueByteList
)
298 ## UniFileClassObject
300 # A structure for .uni file definition
302 class UniFileClassObject(object):
303 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
304 self
.FileList
= FileList
306 self
.IncFileList
= FileList
307 self
.UniFileHeader
= ''
309 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
310 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
311 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
312 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
313 self
.IsCompatibleMode
= IsCompatibleMode
314 if not IncludePathList
:
315 self
.IncludePathList
= []
317 self
.IncludePathList
= IncludePathList
318 if len(self
.FileList
) > 0:
319 self
.LoadUniFiles(FileList
)
322 # Get Language definition
324 def GetLangDef(self
, File
, Line
):
325 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
328 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').read()
329 except UnicodeError, Xstr
:
330 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').read()
332 EdkLogger
.Error("Unicode File Parser",
333 ToolError
.FILE_OPEN_FAILURE
,
334 "File read failure: %s" % str(Xstr
),
336 LineNo
= GetLineNo(FileIn
, Line
, False)
337 EdkLogger
.Error("Unicode File Parser",
338 ToolError
.PARSER_ERROR
,
339 "Wrong language definition",
340 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
341 File
= File
, Line
= LineNo
)
343 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
344 LangPrintName
= Lang
[2]
347 for Item
in self
.LanguageDef
:
348 if Item
[0] == LangName
:
353 self
.LanguageDef
.append([LangName
, LangPrintName
])
356 # Add language string
358 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
359 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
363 # The found STRING tokens will be added into new language string list
364 # so that the unique STRING identifier is reserved for all languages in the package list.
366 FirstLangName
= self
.LanguageDef
[0][0]
367 if LangName
!= FirstLangName
:
368 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
369 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
370 if Item
.UseOtherLangDef
!= '':
371 OtherLang
= Item
.UseOtherLangDef
373 OtherLang
= FirstLangName
374 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
379 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
383 # Get String name and value
385 def GetStringObject(self
, Item
):
389 Name
= Item
.split()[1]
390 # Check the string name is the upper character
392 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
393 if MatchString
== None or MatchString
.end(0) != len(Name
):
394 EdkLogger
.Error("Unicode File Parser",
395 ToolError
.FORMAT_INVALID
,
396 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
397 LanguageList
= Item
.split(u
'#language ')
398 for IndexI
in range(len(LanguageList
)):
402 Language
= LanguageList
[IndexI
].split()[0]
403 #.replace(u'\r\n', u'')
405 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
406 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
407 self
.AddStringToList(Name
, Language
, Value
)
410 # Get include file list and load them
412 def GetIncludeFile(self
, Item
, Dir
= None):
415 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
416 self
.LoadUniFile(FileName
)
419 # Pre-process before parse .uni file
421 def PreProcess(self
, File
, IsIncludeFile
=False):
422 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
423 EdkLogger
.Error("Unicode File Parser",
424 ToolError
.FILE_NOT_FOUND
,
428 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
430 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
432 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
436 # get the file header
441 if not self
.UniFileHeader
:
442 FirstGenHeader
= True
444 FirstGenHeader
= False
449 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
450 and not HeaderEnd
and not HeaderStart
:
452 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
454 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
455 self
.UniFileHeader
+= Line
+ os
.linesep
459 # Use unique identifier
463 MultiLineFeedExits
= False
466 # 1: signle String entry exist
467 # 2: line feed exist under the some signle String entry
469 StringEntryExistsFlag
= 0
471 Line
= FileIn
[LineCount
]
475 # Ignore comment line and empty line
477 if Line
== u
'' or Line
.startswith(u
'//'):
479 # Change the single line String entry flag status
481 if StringEntryExistsFlag
== 1:
482 StringEntryExistsFlag
= 2
484 # If the '#string' line and the '#language' line are not in the same line,
485 # there should be only one line feed character betwwen them
487 if MultiLineFeedExits
:
488 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
491 MultiLineFeedExits
= False
493 # Process comment embeded in string define lines
495 FindFlag
= Line
.find(u
'//')
496 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
497 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
498 if FileIn
[LineCount
].strip().startswith('#language'):
499 Line
= Line
+ FileIn
[LineCount
]
500 FileIn
[LineCount
-1] = Line
501 FileIn
[LineCount
] = os
.linesep
503 for Index
in xrange (LineCount
+ 1, len (FileIn
) - 1):
504 if (Index
== len(FileIn
) -1):
505 FileIn
[Index
] = os
.linesep
507 FileIn
[Index
] = FileIn
[Index
+ 1]
509 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
511 if (len(Line
) - 1) > CommIndex
:
512 if Line
[CommIndex
+1] == u
'/':
513 Line
= Line
[:CommIndex
].strip()
515 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
517 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
519 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
520 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
521 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
523 Line
= Line
.replace(u
'\\\\', u
'\u0006')
524 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
525 Line
= Line
.replace(u
'\\n', CR
+ LF
)
526 Line
= Line
.replace(u
'\\r', CR
)
527 Line
= Line
.replace(u
'\\t', u
'\t')
528 Line
= Line
.replace(u
'''\"''', u
'''"''')
529 Line
= Line
.replace(u
'\t', u
' ')
530 Line
= Line
.replace(u
'\u0006', u
'\\')
532 IncList
= gINCLUDE_PATTERN
.findall(Line
)
533 if len(IncList
) == 1:
534 for Dir
in [File
.Dir
] + self
.IncludePathList
:
535 IncFile
= PathClass(str(IncList
[0]), Dir
)
536 self
.IncFileList
.append(IncFile
)
537 if os
.path
.isfile(IncFile
.Path
):
538 Lines
.extend(self
.PreProcess(IncFile
, True))
541 EdkLogger
.Error("Unicode File Parser",
542 ToolError
.FILE_NOT_FOUND
,
543 Message
="Cannot find include file",
544 ExtraData
=str(IncList
[0]))
548 # Between Name entry and Language entry can not contain line feed
550 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
551 MultiLineFeedExits
= True
553 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
554 MultiLineFeedExits
= True
557 # Between Language entry and String entry can not contain line feed
559 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
560 MultiLineFeedExits
= True
563 # Between two String entry, can not contain line feed
565 if Line
.startswith(u
'"'):
566 if StringEntryExistsFlag
== 2:
567 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
568 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
570 StringEntryExistsFlag
= 1
571 if not Line
.endswith('"'):
572 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
573 elif Line
.startswith(u
'#language'):
574 if StringEntryExistsFlag
== 2:
575 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
576 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
577 StringEntryExistsFlag
= 0
579 StringEntryExistsFlag
= 0
584 # Convert string def format as below
586 # #string MY_STRING_1
588 # "My first English string line 1"
589 # "My first English string line 2"
590 # #string MY_STRING_1
592 # "Mi segunda secuencia 1"
593 # "Mi segunda secuencia 2"
596 if not IsIncludeFile
and not Lines
:
597 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
598 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
603 ExistStrNameList
= []
605 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
606 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
607 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
610 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
611 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
612 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
613 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
614 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
615 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
616 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
619 if Line
.count(u
'#language') > 1:
620 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
621 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
624 if Line
.startswith(u
'//'):
626 elif Line
.startswith(u
'#langdef'):
627 if len(Line
.split()) == 2:
628 NewLines
.append(Line
)
630 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
631 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
632 NewLines
.append(Line
[Line
.find(u
'"'):])
634 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
635 elif Line
.startswith(u
'#string'):
636 if len(Line
.split()) == 2:
639 if StrName
.split()[1] not in ExistStrNameList
:
640 ExistStrNameList
.append(StrName
.split()[1].strip())
641 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
642 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
643 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
644 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
645 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
646 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
649 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
650 if Line
[Line
.find(u
'#language')-1] != ' ' or \
651 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
652 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
654 if Line
.find(u
'"') > 0:
655 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
657 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
659 if StrName
.split()[1] not in ExistStrNameList
:
660 ExistStrNameList
.append(StrName
.split()[1].strip())
661 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
662 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
663 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
664 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
665 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
666 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
669 if StrName
not in NewLines
:
670 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
672 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
673 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
674 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
675 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
676 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
677 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
679 if Line
[Line
.find(u
'"')-1] != u
' ':
680 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
682 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
684 if StrName
.split()[1] not in ExistStrNameList
:
685 ExistStrNameList
.append(StrName
.split()[1].strip())
686 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
687 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
688 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
689 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
690 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
691 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
694 if StrName
not in NewLines
:
695 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
697 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
698 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
699 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
701 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
702 elif Line
.startswith(u
'#language'):
703 if len(Line
.split()) == 2:
705 if StrName
not in NewLines
:
706 NewLines
.append(StrName
)
708 NewLines
.append(StrName
)
709 NewLines
.append(Line
)
710 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
712 if StrName
not in NewLines
:
713 NewLines
.append(StrName
)
715 NewLines
.append(StrName
)
716 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
717 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
719 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
720 elif Line
.startswith(u
'"'):
721 if u
'#string' in Line
or u
'#language' in Line
:
722 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
723 NewLines
.append(Line
)
726 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
728 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
729 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
730 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
733 if StrName
and not NewLines
:
734 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
735 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
739 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
740 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
741 AbstractPosition
= -1
742 DescriptionPosition
= -1
743 BinaryAbstractPosition
= -1
744 BinaryDescriptionPosition
= -1
745 for StrName
in ExistStrNameList
:
746 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
747 if 'BINARY' in StrName
:
748 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
750 AbstractPosition
= ExistStrNameList
.index(StrName
)
751 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
752 if 'BINARY' in StrName
:
753 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
755 DescriptionPosition
= ExistStrNameList
.index(StrName
)
757 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
758 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
761 BinaryMin
= BinaryOrderList
[0]
762 BinaryMax
= BinaryOrderList
[1]
763 if BinaryDescriptionPosition
> -1:
764 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
766 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
767 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
769 elif BinaryAbstractPosition
> -1:
770 if not(BinaryAbstractPosition
> Max
):
771 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
772 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
775 if DescriptionPosition
> -1:
776 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
777 DescriptionPosition
> AbstractPosition
):
778 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
779 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
782 if not self
.UniFileHeader
:
783 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
784 Message
= ST
.ERR_NO_SOURCE_HEADER
,
792 def LoadUniFile(self
, File
= None):
794 EdkLogger
.Error("Unicode File Parser",
795 ToolError
.PARSER_ERROR
,
796 Message
='No unicode file is given',
802 # Process special char in file
804 Lines
= self
.PreProcess(File
)
807 # Get Unicode Information
809 for IndexI
in range(len(Lines
)):
811 if (IndexI
+ 1) < len(Lines
):
812 SecondLine
= Lines
[IndexI
+ 1]
813 if (IndexI
+ 2) < len(Lines
):
814 ThirdLine
= Lines
[IndexI
+ 2]
817 # Get Language def information
819 if Line
.find(u
'#langdef ') >= 0:
820 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
828 # Get string def information format as below
830 # #string MY_STRING_1
832 # "My first English string line 1"
833 # "My first English string line 2"
834 # #string MY_STRING_1
836 # "Mi segunda secuencia 1"
837 # "Mi segunda secuencia 2"
839 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
840 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
841 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
842 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
843 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
844 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
847 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
848 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
849 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
850 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
851 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
852 if Lines
[IndexJ
][-2] == ' ':
855 if Lines
[IndexJ
].strip()[1:-1].strip():
856 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
858 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
861 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + os
.linesep
865 if Value
.endswith(os
.linesep
):
866 Value
= Value
[: Value
.rfind(os
.linesep
)]
867 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
868 self
.AddStringToList(Name
, Language
, Value
)
872 # Load multiple .uni files
874 def LoadUniFiles(self
, FileList
):
875 if len(FileList
) > 0:
876 for File
in FileList
:
877 FilePath
= File
.Path
.strip()
878 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
879 self
.LoadUniFile(File
)
882 # Add a string to list
884 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
885 for LangNameItem
in self
.LanguageDef
:
886 if Language
== LangNameItem
[0]:
889 if Language
not in self
.OrderedStringList
:
890 self
.OrderedStringList
[Language
] = []
891 self
.OrderedStringDict
[Language
] = {}
894 if Name
in self
.OrderedStringDict
[Language
]:
897 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
898 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
899 Item
.UpdateValue(Value
)
900 Item
.UseOtherLangDef
= ''
903 Token
= len(self
.OrderedStringList
[Language
])
905 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
910 self
.OrderedStringDict
[Language
][Name
] = Token
911 for LangName
in self
.LanguageDef
:
913 # New STRING token will be added into all language string lists.
914 # so that the unique STRING identifier is reserved for all languages in the package list.
916 if LangName
[0] != Language
:
917 if UseOtherLangDef
!= '':
918 OtherLangDef
= UseOtherLangDef
920 OtherLangDef
= Language
921 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
926 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
928 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
933 self
.OrderedStringDict
[Language
][Name
] = Index
936 # Set the string as referenced
938 def SetStringReferenced(self
, Name
):
940 # String stoken are added in the same order in all language string lists.
941 # So, only update the status of string stoken in first language string list.
943 Lang
= self
.LanguageDef
[0][0]
944 if Name
in self
.OrderedStringDict
[Lang
]:
945 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
946 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
947 Item
.Referenced
= True
950 # Search the string in language definition by Name
952 def FindStringValue(self
, Name
, Lang
):
953 if Name
in self
.OrderedStringDict
[Lang
]:
954 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
955 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
960 # Search the string in language definition by Token
962 def FindByToken(self
, Token
, Lang
):
963 for Item
in self
.OrderedStringList
[Lang
]:
964 if Item
.Token
== Token
:
970 # Re-order strings and re-generate tokens
973 if len(self
.LanguageDef
) == 0:
976 # Retoken all language strings according to the status of string stoken in the first language string.
978 FirstLangName
= self
.LanguageDef
[0][0]
980 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
981 for LangNameItem
in self
.LanguageDef
:
982 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
985 # Use small token for all referred string stoken.
988 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
989 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
990 if FirstLangItem
.Referenced
== True:
991 for LangNameItem
in self
.LanguageDef
:
992 LangName
= LangNameItem
[0]
993 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
994 OtherLangItem
.Referenced
= True
995 OtherLangItem
.Token
= RefToken
996 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
997 RefToken
= RefToken
+ 1
1000 # Use big token for all unreferred string stoken.
1003 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1004 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1005 if FirstLangItem
.Referenced
== False:
1006 for LangNameItem
in self
.LanguageDef
:
1007 LangName
= LangNameItem
[0]
1008 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1009 OtherLangItem
.Token
= RefToken
+ UnRefToken
1010 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1011 UnRefToken
= UnRefToken
+ 1
1014 # Show the instance itself
1017 print self
.LanguageDef
1018 #print self.OrderedStringList
1019 for Item
in self
.OrderedStringList
:
1021 for Member
in self
.OrderedStringList
[Item
]:
1025 # Read content from '!include' UNI file
1027 def ReadIncludeUNIfile(self
, FilaPath
):
1031 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1032 EdkLogger
.Error("Unicode File Parser",
1033 ToolError
.FILE_NOT_FOUND
,
1036 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1037 except UnicodeError:
1038 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1040 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)