2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
23 from Logger
import ToolError
24 from Logger
import Log
as EdkLogger
25 from Logger
import StringTable
as ST
26 from Library
.String
import GetLineNo
27 from Library
.Misc
import PathClass
28 from Library
.Misc
import GetCharIndexOutStr
29 from Library
import DataType
as DT
34 UNICODE_WIDE_CHAR
= u
'\\wide'
35 UNICODE_NARROW_CHAR
= u
'\\narrow'
36 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
37 UNICODE_UNICODE_CR
= '\r'
38 UNICODE_UNICODE_LF
= '\n'
40 NARROW_CHAR
= u
'\uFFF0'
42 NON_BREAKING_CHAR
= u
'\uFFF2'
49 gINCLUDE_PATTERN
= re
.compile("^!include[\s]+([\S]+)[\s]*$", re
.MULTILINE | re
.UNICODE
)
51 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
84 ## Convert a python unicode string to a normal string
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
89 # @param Uni: The python unicode string
91 # @retval: The formatted normal string
94 return repr(Uni
)[2:-1]
96 ## Convert a unicode string to a Hex list
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 # @param Uni: The python unicode string
103 # @retval List: The formatted hex list
105 def UniToHexList(Uni
):
108 Temp
= '%04X' % ord(Item
)
109 List
.append('0x' + Temp
[2:4])
110 List
.append('0x' + Temp
[0:2])
113 ## Convert special unicode characters
115 # Convert special characters to (c), (r) and (tm).
117 # @param Uni: The python unicode string
119 # @retval NewUni: The converted unicode string
121 def ConvertSpecialUnicodes(Uni
):
123 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
124 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
125 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
128 ## GetLanguageCode1766
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
134 # @param LangName: Language codes read from .UNI file
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 def GetLanguageCode1766(LangName
, File
=None):
139 length
= len(LangName
)
141 if LangName
.isalpha():
142 for Key
in gLANG_CONV_TABLE
.keys():
143 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
146 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
149 EdkLogger
.Error("Unicode File Parser",
150 ToolError
.FORMAT_INVALID
,
151 "Invalid RFC 1766 language code : %s" % LangName
,
154 if LangName
[0:2].isalpha() and LangName
[2] == '-':
155 for Key
in gLANG_CONV_TABLE
.keys():
156 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
159 if LangName
[0:2].isalpha() and LangName
[2] == '-':
160 for Key
in gLANG_CONV_TABLE
.keys():
161 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
163 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
164 for Key
in gLANG_CONV_TABLE
.keys():
165 if Key
== LangName
[0:3].lower():
168 EdkLogger
.Error("Unicode File Parser",
169 ToolError
.FORMAT_INVALID
,
170 "Invalid RFC 4646 language code : %s" % LangName
,
175 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
176 # RFC 1766 language codes supported in compatiblity mode
177 # RFC 4646 language codes supported in native mode
179 # @param LangName: Language codes read from .UNI file
181 # @retval LangName: Valid lanugage code in RFC 4646 format or None
183 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
184 length
= len(LangName
)
186 if length
== 3 and LangName
.isalpha():
187 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
188 if TempLangName
!= None:
192 EdkLogger
.Error("Unicode File Parser",
193 ToolError
.FORMAT_INVALID
,
194 "Invalid RFC 1766 language code : %s" % LangName
,
196 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
199 if LangName
.isalpha():
202 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None:
205 if LangName
[0:2].isalpha() and LangName
[2] == '-':
208 if LangName
[0:2].isalpha() and LangName
[2] == '-':
210 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
213 EdkLogger
.Error("Unicode File Parser",
214 ToolError
.FORMAT_INVALID
,
215 "Invalid RFC 4646 language code : %s" % LangName
,
220 # Formated the entry in Uni file.
222 # @param StrTokenName StrTokenName.
223 # @param TokenValueList A list need to be processed.
224 # @param ContainerFile ContainerFile.
226 # @return formated entry
227 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
230 if len(StrTokenName
) > PreFormatLength
:
231 PreFormatLength
= len(StrTokenName
) + 1
232 for (Lang
, Value
) in TokenValueList
:
233 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
236 Lang
= DT
.TAB_LANGUAGE_EN_US
238 Lang
= DT
.TAB_LANGUAGE_EN_US
239 elif len(Lang
.split('-')[0]) == 3:
240 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
242 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
243 ValueList
= Value
.split('\n')
245 for SubValue
in ValueList
:
248 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
249 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
251 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
253 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
257 ## StringDefClassObject
259 # A structure for language definition
261 class StringDefClassObject(object):
262 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
264 self
.StringNameByteList
= []
265 self
.StringValue
= ''
266 self
.StringValueByteList
= ''
268 self
.Referenced
= Referenced
269 self
.UseOtherLangDef
= UseOtherLangDef
273 self
.StringName
= Name
274 self
.StringNameByteList
= UniToHexList(Name
)
276 self
.StringValue
= Value
277 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
278 self
.Length
= len(self
.StringValueByteList
)
283 return repr(self
.StringName
) + ' ' + \
284 repr(self
.Token
) + ' ' + \
285 repr(self
.Referenced
) + ' ' + \
286 repr(self
.StringValue
) + ' ' + \
287 repr(self
.UseOtherLangDef
)
289 def UpdateValue(self
, Value
= None):
292 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
294 self
.StringValue
= Value
295 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
296 self
.Length
= len(self
.StringValueByteList
)
298 ## UniFileClassObject
300 # A structure for .uni file definition
302 class UniFileClassObject(object):
303 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
304 self
.FileList
= FileList
306 self
.IncFileList
= FileList
307 self
.UniFileHeader
= ''
309 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
310 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
311 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
312 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
313 self
.IsCompatibleMode
= IsCompatibleMode
314 if not IncludePathList
:
315 self
.IncludePathList
= []
317 self
.IncludePathList
= IncludePathList
318 if len(self
.FileList
) > 0:
319 self
.LoadUniFiles(FileList
)
322 # Get Language definition
324 def GetLangDef(self
, File
, Line
):
325 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
328 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').read()
329 except UnicodeError, Xstr
:
330 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').read()
332 EdkLogger
.Error("Unicode File Parser",
333 ToolError
.FILE_OPEN_FAILURE
,
334 "File read failure: %s" % str(Xstr
),
336 LineNo
= GetLineNo(FileIn
, Line
, False)
337 EdkLogger
.Error("Unicode File Parser",
338 ToolError
.PARSER_ERROR
,
339 "Wrong language definition",
340 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
341 File
= File
, Line
= LineNo
)
343 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
344 LangPrintName
= Lang
[2]
347 for Item
in self
.LanguageDef
:
348 if Item
[0] == LangName
:
353 self
.LanguageDef
.append([LangName
, LangPrintName
])
356 # Add language string
358 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
359 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
363 # The found STRING tokens will be added into new language string list
364 # so that the unique STRING identifier is reserved for all languages in the package list.
366 FirstLangName
= self
.LanguageDef
[0][0]
367 if LangName
!= FirstLangName
:
368 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
369 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
370 if Item
.UseOtherLangDef
!= '':
371 OtherLang
= Item
.UseOtherLangDef
373 OtherLang
= FirstLangName
374 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
379 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
383 # Get String name and value
385 def GetStringObject(self
, Item
):
389 Name
= Item
.split()[1]
390 # Check the string name is the upper character
392 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
393 if MatchString
== None or MatchString
.end(0) != len(Name
):
394 EdkLogger
.Error("Unicode File Parser",
395 ToolError
.FORMAT_INVALID
,
396 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
397 LanguageList
= Item
.split(u
'#language ')
398 for IndexI
in range(len(LanguageList
)):
402 Language
= LanguageList
[IndexI
].split()[0]
403 #.replace(u'\r\n', u'')
405 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
406 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
407 self
.AddStringToList(Name
, Language
, Value
)
410 # Get include file list and load them
412 def GetIncludeFile(self
, Item
, Dir
= None):
415 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
416 self
.LoadUniFile(FileName
)
419 # Pre-process before parse .uni file
421 def PreProcess(self
, File
, IsIncludeFile
=False):
422 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
423 EdkLogger
.Error("Unicode File Parser",
424 ToolError
.FILE_NOT_FOUND
,
428 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
430 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
432 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
436 # get the file header
441 if not self
.UniFileHeader
:
442 FirstGenHeader
= True
444 FirstGenHeader
= False
449 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
450 and not HeaderEnd
and not HeaderStart
:
452 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
454 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
455 self
.UniFileHeader
+= Line
+ '\r\n'
459 # Use unique identifier
463 MultiLineFeedExits
= False
466 # 1: signle String entry exist
467 # 2: line feed exist under the some signle String entry
469 StringEntryExistsFlag
= 0
471 Line
= FileIn
[LineCount
]
475 # Ignore comment line and empty line
477 if Line
== u
'' or Line
.startswith(u
'//'):
479 # Change the single line String entry flag status
481 if StringEntryExistsFlag
== 1:
482 StringEntryExistsFlag
= 2
484 # If the '#string' line and the '#language' line are not in the same line,
485 # there should be only one line feed character betwwen them
487 if MultiLineFeedExits
:
488 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
491 MultiLineFeedExits
= False
493 # Process comment embeded in string define lines
495 FindFlag
= Line
.find(u
'//')
496 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
497 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
498 if FileIn
[LineCount
].strip().startswith('#language'):
499 Line
= Line
+ FileIn
[LineCount
]
500 FileIn
[LineCount
-1] = Line
501 FileIn
[LineCount
] = '\r\n'
503 for Index
in xrange (LineCount
+ 1, len (FileIn
) - 1):
504 if (Index
== len(FileIn
) -1):
505 FileIn
[Index
] = '\r\n'
507 FileIn
[Index
] = FileIn
[Index
+ 1]
509 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
511 if (len(Line
) - 1) > CommIndex
:
512 if Line
[CommIndex
+1] == u
'/':
513 Line
= Line
[:CommIndex
].strip()
515 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
517 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
519 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
520 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
521 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
523 Line
= Line
.replace(u
'\\\\', u
'\u0006')
524 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
525 Line
= Line
.replace(u
'\\n', CR
+ LF
)
526 Line
= Line
.replace(u
'\\r', CR
)
527 Line
= Line
.replace(u
'\\t', u
'\t')
528 Line
= Line
.replace(u
'''\"''', u
'''"''')
529 Line
= Line
.replace(u
'\t', u
' ')
530 Line
= Line
.replace(u
'\u0006', u
'\\')
532 # IncList = gINCLUDE_PATTERN.findall(Line)
534 if len(IncList
) == 1:
535 for Dir
in [File
.Dir
] + self
.IncludePathList
:
536 IncFile
= PathClass(str(IncList
[0]), Dir
)
537 self
.IncFileList
.append(IncFile
)
538 if os
.path
.isfile(IncFile
.Path
):
539 Lines
.extend(self
.PreProcess(IncFile
, True))
542 EdkLogger
.Error("Unicode File Parser",
543 ToolError
.FILE_NOT_FOUND
,
544 Message
="Cannot find include file",
545 ExtraData
=str(IncList
[0]))
549 # Between Name entry and Language entry can not contain line feed
551 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
552 MultiLineFeedExits
= True
554 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
555 MultiLineFeedExits
= True
558 # Between Language entry and String entry can not contain line feed
560 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
561 MultiLineFeedExits
= True
564 # Between two String entry, can not contain line feed
566 if Line
.startswith(u
'"'):
567 if StringEntryExistsFlag
== 2:
568 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
569 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
571 StringEntryExistsFlag
= 1
572 if not Line
.endswith('"'):
573 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
574 elif Line
.startswith(u
'#language'):
575 if StringEntryExistsFlag
== 2:
576 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
577 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
578 StringEntryExistsFlag
= 0
580 StringEntryExistsFlag
= 0
585 # Convert string def format as below
587 # #string MY_STRING_1
589 # "My first English string line 1"
590 # "My first English string line 2"
591 # #string MY_STRING_1
593 # "Mi segunda secuencia 1"
594 # "Mi segunda secuencia 2"
597 if not IsIncludeFile
and not Lines
:
598 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
599 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
604 ExistStrNameList
= []
606 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
607 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
608 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
611 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
612 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
613 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
614 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
615 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
616 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
617 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
620 if Line
.count(u
'#language') > 1:
621 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
622 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
625 if Line
.startswith(u
'//'):
627 elif Line
.startswith(u
'#langdef'):
628 if len(Line
.split()) == 2:
629 NewLines
.append(Line
)
631 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
632 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
633 NewLines
.append(Line
[Line
.find(u
'"'):])
635 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
636 elif Line
.startswith(u
'#string'):
637 if len(Line
.split()) == 2:
640 if StrName
.split()[1] not in ExistStrNameList
:
641 ExistStrNameList
.append(StrName
.split()[1].strip())
642 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
643 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
644 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
645 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
646 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
647 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
650 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
651 if Line
[Line
.find(u
'#language')-1] != ' ' or \
652 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
653 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
655 if Line
.find(u
'"') > 0:
656 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
658 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
660 if StrName
.split()[1] not in ExistStrNameList
:
661 ExistStrNameList
.append(StrName
.split()[1].strip())
662 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
663 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
664 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
665 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
666 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
667 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
670 if StrName
not in NewLines
:
671 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
673 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
674 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
675 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
676 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
677 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
678 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
680 if Line
[Line
.find(u
'"')-1] != u
' ':
681 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
683 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
685 if StrName
.split()[1] not in ExistStrNameList
:
686 ExistStrNameList
.append(StrName
.split()[1].strip())
687 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
688 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
689 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
690 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
691 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
692 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
695 if StrName
not in NewLines
:
696 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
698 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
699 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
700 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
702 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
703 elif Line
.startswith(u
'#language'):
704 if len(Line
.split()) == 2:
706 if StrName
not in NewLines
:
707 NewLines
.append(StrName
)
709 NewLines
.append(StrName
)
710 NewLines
.append(Line
)
711 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
713 if StrName
not in NewLines
:
714 NewLines
.append(StrName
)
716 NewLines
.append(StrName
)
717 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
718 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
720 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
721 elif Line
.startswith(u
'"'):
722 if u
'#string' in Line
or u
'#language' in Line
:
723 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
724 NewLines
.append(Line
)
727 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
729 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
730 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
731 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
734 if StrName
and not NewLines
:
735 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
736 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
740 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
741 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
742 AbstractPosition
= -1
743 DescriptionPosition
= -1
744 BinaryAbstractPosition
= -1
745 BinaryDescriptionPosition
= -1
746 for StrName
in ExistStrNameList
:
747 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
748 if 'BINARY' in StrName
:
749 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
751 AbstractPosition
= ExistStrNameList
.index(StrName
)
752 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
753 if 'BINARY' in StrName
:
754 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
756 DescriptionPosition
= ExistStrNameList
.index(StrName
)
758 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
759 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
762 BinaryMin
= BinaryOrderList
[0]
763 BinaryMax
= BinaryOrderList
[1]
764 if BinaryDescriptionPosition
> -1:
765 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
767 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
768 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
770 elif BinaryAbstractPosition
> -1:
771 if not(BinaryAbstractPosition
> Max
):
772 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
773 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
776 if DescriptionPosition
> -1:
777 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
778 DescriptionPosition
> AbstractPosition
):
779 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
780 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
783 if not self
.UniFileHeader
:
784 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
785 Message
= ST
.ERR_NO_SOURCE_HEADER
,
793 def LoadUniFile(self
, File
= None):
795 EdkLogger
.Error("Unicode File Parser",
796 ToolError
.PARSER_ERROR
,
797 Message
='No unicode file is given',
803 # Process special char in file
805 Lines
= self
.PreProcess(File
)
808 # Get Unicode Information
810 for IndexI
in range(len(Lines
)):
812 if (IndexI
+ 1) < len(Lines
):
813 SecondLine
= Lines
[IndexI
+ 1]
814 if (IndexI
+ 2) < len(Lines
):
815 ThirdLine
= Lines
[IndexI
+ 2]
818 # Get Language def information
820 if Line
.find(u
'#langdef ') >= 0:
821 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
829 # Get string def information format as below
831 # #string MY_STRING_1
833 # "My first English string line 1"
834 # "My first English string line 2"
835 # #string MY_STRING_1
837 # "Mi segunda secuencia 1"
838 # "Mi segunda secuencia 2"
840 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
841 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
842 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
843 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
844 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
845 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
848 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
849 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
850 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
851 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
852 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
853 if Lines
[IndexJ
][-2] == ' ':
856 if Lines
[IndexJ
].strip()[1:-1].strip():
857 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
859 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
862 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
866 if Value
.endswith('\r\n'):
867 Value
= Value
[: Value
.rfind('\r\n')]
868 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
869 self
.AddStringToList(Name
, Language
, Value
)
873 # Load multiple .uni files
875 def LoadUniFiles(self
, FileList
):
876 if len(FileList
) > 0:
877 for File
in FileList
:
878 FilePath
= File
.Path
.strip()
879 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
880 self
.LoadUniFile(File
)
883 # Add a string to list
885 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
886 for LangNameItem
in self
.LanguageDef
:
887 if Language
== LangNameItem
[0]:
890 if Language
not in self
.OrderedStringList
:
891 self
.OrderedStringList
[Language
] = []
892 self
.OrderedStringDict
[Language
] = {}
895 if Name
in self
.OrderedStringDict
[Language
]:
898 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
899 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
900 Item
.UpdateValue(Value
)
901 Item
.UseOtherLangDef
= ''
904 Token
= len(self
.OrderedStringList
[Language
])
906 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
911 self
.OrderedStringDict
[Language
][Name
] = Token
912 for LangName
in self
.LanguageDef
:
914 # New STRING token will be added into all language string lists.
915 # so that the unique STRING identifier is reserved for all languages in the package list.
917 if LangName
[0] != Language
:
918 if UseOtherLangDef
!= '':
919 OtherLangDef
= UseOtherLangDef
921 OtherLangDef
= Language
922 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
927 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
929 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
934 self
.OrderedStringDict
[Language
][Name
] = Index
937 # Set the string as referenced
939 def SetStringReferenced(self
, Name
):
941 # String stoken are added in the same order in all language string lists.
942 # So, only update the status of string stoken in first language string list.
944 Lang
= self
.LanguageDef
[0][0]
945 if Name
in self
.OrderedStringDict
[Lang
]:
946 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
947 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
948 Item
.Referenced
= True
951 # Search the string in language definition by Name
953 def FindStringValue(self
, Name
, Lang
):
954 if Name
in self
.OrderedStringDict
[Lang
]:
955 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
956 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
961 # Search the string in language definition by Token
963 def FindByToken(self
, Token
, Lang
):
964 for Item
in self
.OrderedStringList
[Lang
]:
965 if Item
.Token
== Token
:
971 # Re-order strings and re-generate tokens
974 if len(self
.LanguageDef
) == 0:
977 # Retoken all language strings according to the status of string stoken in the first language string.
979 FirstLangName
= self
.LanguageDef
[0][0]
981 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
982 for LangNameItem
in self
.LanguageDef
:
983 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
986 # Use small token for all referred string stoken.
989 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
990 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
991 if FirstLangItem
.Referenced
== True:
992 for LangNameItem
in self
.LanguageDef
:
993 LangName
= LangNameItem
[0]
994 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
995 OtherLangItem
.Referenced
= True
996 OtherLangItem
.Token
= RefToken
997 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
998 RefToken
= RefToken
+ 1
1001 # Use big token for all unreferred string stoken.
1004 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1005 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1006 if FirstLangItem
.Referenced
== False:
1007 for LangNameItem
in self
.LanguageDef
:
1008 LangName
= LangNameItem
[0]
1009 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1010 OtherLangItem
.Token
= RefToken
+ UnRefToken
1011 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1012 UnRefToken
= UnRefToken
+ 1
1015 # Show the instance itself
1018 print self
.LanguageDef
1019 #print self.OrderedStringList
1020 for Item
in self
.OrderedStringList
:
1022 for Member
in self
.OrderedStringList
[Item
]:
1026 # Read content from '!include' UNI file
1028 def ReadIncludeUNIfile(self
, FilaPath
):
1032 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1033 EdkLogger
.Error("Unicode File Parser",
1034 ToolError
.FILE_NOT_FOUND
,
1037 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1038 except UnicodeError:
1039 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1041 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)