2 # Collect all defined strings in multiple uni files.
4 # Copyright (c) 2014 - 2017, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 Collect all defined strings in multiple uni files
23 from Logger
import ToolError
24 from Logger
import Log
as EdkLogger
25 from Logger
import StringTable
as ST
26 from Library
.String
import GetLineNo
27 from Library
.Misc
import PathClass
28 from Library
.Misc
import GetCharIndexOutStr
29 from Library
import DataType
as DT
30 from Library
.ParserValidate
import CheckUTF16FileHeader
35 UNICODE_WIDE_CHAR
= u
'\\wide'
36 UNICODE_NARROW_CHAR
= u
'\\narrow'
37 UNICODE_NON_BREAKING_CHAR
= u
'\\nbr'
38 UNICODE_UNICODE_CR
= '\r'
39 UNICODE_UNICODE_LF
= '\n'
41 NARROW_CHAR
= u
'\uFFF0'
43 NON_BREAKING_CHAR
= u
'\uFFF2'
50 gINCLUDE_PATTERN
= re
.compile("^!include[\s]+([\S]+)[\s]*$", re
.MULTILINE | re
.UNICODE
)
52 gLANG_CONV_TABLE
= {'eng':'en', 'fra':'fr', \
53 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
54 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
55 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
56 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
57 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
58 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
59 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
60 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
61 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
62 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
63 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
64 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
65 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
66 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
67 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
68 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
69 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
70 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
71 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
72 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
73 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
74 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
75 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
76 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
77 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
78 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
79 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
80 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
81 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
82 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
83 'zho':'zh', 'zul':'zu'}
85 ## Convert a python unicode string to a normal string
87 # Convert a python unicode string to a normal string
88 # UniToStr(u'I am a string') is 'I am a string'
90 # @param Uni: The python unicode string
92 # @retval: The formatted normal string
95 return repr(Uni
)[2:-1]
97 ## Convert a unicode string to a Hex list
99 # Convert a unicode string to a Hex list
100 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
102 # @param Uni: The python unicode string
104 # @retval List: The formatted hex list
106 def UniToHexList(Uni
):
109 Temp
= '%04X' % ord(Item
)
110 List
.append('0x' + Temp
[2:4])
111 List
.append('0x' + Temp
[0:2])
114 ## Convert special unicode characters
116 # Convert special characters to (c), (r) and (tm).
118 # @param Uni: The python unicode string
120 # @retval NewUni: The converted unicode string
122 def ConvertSpecialUnicodes(Uni
):
124 NewUni
= NewUni
.replace(u
'\u00A9', '(c)')
125 NewUni
= NewUni
.replace(u
'\u00AE', '(r)')
126 NewUni
= NewUni
.replace(u
'\u2122', '(tm)')
129 ## GetLanguageCode1766
131 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
132 # RFC 1766 language codes supported in compatiblity mode
133 # RFC 4646 language codes supported in native mode
135 # @param LangName: Language codes read from .UNI file
137 # @retval LangName: Valid lanugage code in RFC 1766 format or None
139 def GetLanguageCode1766(LangName
, File
=None):
142 length
= len(LangName
)
144 if LangName
.isalpha():
145 for Key
in gLANG_CONV_TABLE
.keys():
146 if gLANG_CONV_TABLE
.get(Key
) == LangName
.lower():
149 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()):
152 EdkLogger
.Error("Unicode File Parser",
153 ToolError
.FORMAT_INVALID
,
154 "Invalid RFC 1766 language code : %s" % LangName
,
157 if LangName
[0:2].isalpha() and LangName
[2] == '-':
158 for Key
in gLANG_CONV_TABLE
.keys():
159 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
162 if LangName
[0:2].isalpha() and LangName
[2] == '-':
163 for Key
in gLANG_CONV_TABLE
.keys():
164 if gLANG_CONV_TABLE
.get(Key
) == LangName
[0:2].lower():
166 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
167 for Key
in gLANG_CONV_TABLE
.keys():
168 if Key
== LangName
[0:3].lower():
171 EdkLogger
.Error("Unicode File Parser",
172 ToolError
.FORMAT_INVALID
,
173 "Invalid RFC 4646 language code : %s" % LangName
,
178 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
179 # RFC 1766 language codes supported in compatiblity mode
180 # RFC 4646 language codes supported in native mode
182 # @param LangName: Language codes read from .UNI file
184 # @retval LangName: Valid lanugage code in RFC 4646 format or None
186 def GetLanguageCode(LangName
, IsCompatibleMode
, File
):
187 length
= len(LangName
)
189 if length
== 3 and LangName
.isalpha():
190 TempLangName
= gLANG_CONV_TABLE
.get(LangName
.lower())
191 if TempLangName
!= None:
195 EdkLogger
.Error("Unicode File Parser",
196 ToolError
.FORMAT_INVALID
,
197 "Invalid RFC 1766 language code : %s" % LangName
,
199 if (LangName
[0] == 'X' or LangName
[0] == 'x') and LangName
[1] == '-':
202 if LangName
.isalpha():
205 if LangName
.isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None:
208 if LangName
[0:2].isalpha() and LangName
[2] == '-':
211 if LangName
[0:2].isalpha() and LangName
[2] == '-':
213 if LangName
[0:3].isalpha() and gLANG_CONV_TABLE
.get(LangName
.lower()) == None and LangName
[3] == '-':
216 EdkLogger
.Error("Unicode File Parser",
217 ToolError
.FORMAT_INVALID
,
218 "Invalid RFC 4646 language code : %s" % LangName
,
223 # Formated the entry in Uni file.
225 # @param StrTokenName StrTokenName.
226 # @param TokenValueList A list need to be processed.
227 # @param ContainerFile ContainerFile.
229 # @return formated entry
230 def FormatUniEntry(StrTokenName
, TokenValueList
, ContainerFile
):
233 if len(StrTokenName
) > PreFormatLength
:
234 PreFormatLength
= len(StrTokenName
) + 1
235 for (Lang
, Value
) in TokenValueList
:
236 if not Value
or Lang
== DT
.TAB_LANGUAGE_EN_X
:
239 Lang
= DT
.TAB_LANGUAGE_EN_US
241 Lang
= DT
.TAB_LANGUAGE_EN_US
242 elif len(Lang
.split('-')[0]) == 3:
243 Lang
= GetLanguageCode(Lang
.split('-')[0], True, ContainerFile
)
245 Lang
= GetLanguageCode(Lang
, False, ContainerFile
)
246 ValueList
= Value
.split('\n')
248 for SubValue
in ValueList
:
251 ' ' * (PreFormatLength
+ len('#language en-US ')) + '\"%s\\n\"' % SubValue
.strip() + '\r\n'
252 SubValueContent
= SubValueContent
[(PreFormatLength
+ len('#language en-US ')):SubValueContent
.rfind('\\n')] \
254 SubContent
+= ' '*PreFormatLength
+ '#language %-5s ' % Lang
+ SubValueContent
256 SubContent
= StrTokenName
+ ' '*(PreFormatLength
- len(StrTokenName
)) + SubContent
[PreFormatLength
:]
260 ## StringDefClassObject
262 # A structure for language definition
264 class StringDefClassObject(object):
265 def __init__(self
, Name
= None, Value
= None, Referenced
= False, Token
= None, UseOtherLangDef
= ''):
267 self
.StringNameByteList
= []
268 self
.StringValue
= ''
269 self
.StringValueByteList
= ''
271 self
.Referenced
= Referenced
272 self
.UseOtherLangDef
= UseOtherLangDef
276 self
.StringName
= Name
277 self
.StringNameByteList
= UniToHexList(Name
)
279 self
.StringValue
= Value
280 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
281 self
.Length
= len(self
.StringValueByteList
)
286 return repr(self
.StringName
) + ' ' + \
287 repr(self
.Token
) + ' ' + \
288 repr(self
.Referenced
) + ' ' + \
289 repr(self
.StringValue
) + ' ' + \
290 repr(self
.UseOtherLangDef
)
292 def UpdateValue(self
, Value
= None):
295 self
.StringValue
= self
.StringValue
+ '\r\n' + Value
297 self
.StringValue
= Value
298 self
.StringValueByteList
= UniToHexList(self
.StringValue
)
299 self
.Length
= len(self
.StringValueByteList
)
301 ## UniFileClassObject
303 # A structure for .uni file definition
305 class UniFileClassObject(object):
306 def __init__(self
, FileList
= None, IsCompatibleMode
= False, IncludePathList
= None):
307 self
.FileList
= FileList
309 self
.IncFileList
= FileList
310 self
.UniFileHeader
= ''
312 self
.LanguageDef
= [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
313 self
.OrderedStringList
= {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
314 self
.OrderedStringDict
= {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
315 self
.OrderedStringListByToken
= {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
316 self
.IsCompatibleMode
= IsCompatibleMode
317 if not IncludePathList
:
318 self
.IncludePathList
= []
320 self
.IncludePathList
= IncludePathList
321 if len(self
.FileList
) > 0:
322 self
.LoadUniFiles(FileList
)
325 # Get Language definition
327 def GetLangDef(self
, File
, Line
):
328 Lang
= distutils
.util
.split_quoted((Line
.split(u
"//")[0]))
331 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
332 except UnicodeError, Xstr
:
333 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
334 except UnicodeError, Xstr
:
335 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
337 EdkLogger
.Error("Unicode File Parser",
338 ToolError
.FILE_OPEN_FAILURE
,
339 "File read failure: %s" % str(Xstr
),
341 LineNo
= GetLineNo(FileIn
, Line
, False)
342 EdkLogger
.Error("Unicode File Parser",
343 ToolError
.PARSER_ERROR
,
344 "Wrong language definition",
345 ExtraData
="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line
,
346 File
= File
, Line
= LineNo
)
348 LangName
= GetLanguageCode(Lang
[1], self
.IsCompatibleMode
, self
.File
)
349 LangPrintName
= Lang
[2]
352 for Item
in self
.LanguageDef
:
353 if Item
[0] == LangName
:
358 self
.LanguageDef
.append([LangName
, LangPrintName
])
361 # Add language string
363 self
.AddStringToList(u
'$LANGUAGE_NAME', LangName
, LangName
, 0, True, Index
=0)
364 self
.AddStringToList(u
'$PRINTABLE_LANGUAGE_NAME', LangName
, LangPrintName
, 1, True, Index
=1)
368 # The found STRING tokens will be added into new language string list
369 # so that the unique STRING identifier is reserved for all languages in the package list.
371 FirstLangName
= self
.LanguageDef
[0][0]
372 if LangName
!= FirstLangName
:
373 for Index
in range (2, len (self
.OrderedStringList
[FirstLangName
])):
374 Item
= self
.OrderedStringList
[FirstLangName
][Index
]
375 if Item
.UseOtherLangDef
!= '':
376 OtherLang
= Item
.UseOtherLangDef
378 OtherLang
= FirstLangName
379 self
.OrderedStringList
[LangName
].append (StringDefClassObject(Item
.StringName
,
384 self
.OrderedStringDict
[LangName
][Item
.StringName
] = len(self
.OrderedStringList
[LangName
]) - 1
388 # Get String name and value
390 def GetStringObject(self
, Item
):
394 Name
= Item
.split()[1]
395 # Check the string name is the upper character
397 MatchString
= re
.match('[A-Z0-9_]+', Name
, re
.UNICODE
)
398 if MatchString
== None or MatchString
.end(0) != len(Name
):
399 EdkLogger
.Error("Unicode File Parser",
400 ToolError
.FORMAT_INVALID
,
401 'The string token name %s in UNI file %s must be upper case character.' %(Name
, self
.File
))
402 LanguageList
= Item
.split(u
'#language ')
403 for IndexI
in range(len(LanguageList
)):
407 Language
= LanguageList
[IndexI
].split()[0]
408 #.replace(u'\r\n', u'')
410 LanguageList
[IndexI
][LanguageList
[IndexI
].find(u
'\"') + len(u
'\"') : LanguageList
[IndexI
].rfind(u
'\"')]
411 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
412 self
.AddStringToList(Name
, Language
, Value
)
415 # Get include file list and load them
417 def GetIncludeFile(self
, Item
, Dir
= None):
420 FileName
= Item
[Item
.find(u
'!include ') + len(u
'!include ') :Item
.find(u
' ', len(u
'!include '))][1:-1]
421 self
.LoadUniFile(FileName
)
424 # Pre-process before parse .uni file
426 def PreProcess(self
, File
, IsIncludeFile
=False):
427 if not os
.path
.exists(File
.Path
) or not os
.path
.isfile(File
.Path
):
428 EdkLogger
.Error("Unicode File Parser",
429 ToolError
.FILE_NOT_FOUND
,
433 # Check file header of the Uni file
435 # if not CheckUTF16FileHeader(File.Path):
436 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
437 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
440 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_8').readlines()
441 except UnicodeError, Xstr
:
442 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16').readlines()
444 FileIn
= codecs
.open(File
.Path
, mode
='rb', encoding
='utf_16_le').readlines()
446 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=File
.Path
)
450 # get the file header
455 if not self
.UniFileHeader
:
456 FirstGenHeader
= True
458 FirstGenHeader
= False
463 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and (Line
.find(DT
.TAB_HEADER_COMMENT
) > -1) \
464 and not HeaderEnd
and not HeaderStart
:
466 if not Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
:
468 if Line
.startswith(DT
.TAB_COMMENT_EDK1_SPLIT
) and HeaderStart
and not HeaderEnd
and FirstGenHeader
:
469 self
.UniFileHeader
+= Line
+ '\r\n'
473 # Use unique identifier
477 MultiLineFeedExits
= False
480 # 1: signle String entry exist
481 # 2: line feed exist under the some signle String entry
483 StringEntryExistsFlag
= 0
485 Line
= FileIn
[LineCount
]
489 # Ignore comment line and empty line
491 if Line
== u
'' or Line
.startswith(u
'//'):
493 # Change the single line String entry flag status
495 if StringEntryExistsFlag
== 1:
496 StringEntryExistsFlag
= 2
498 # If the '#string' line and the '#language' line are not in the same line,
499 # there should be only one line feed character betwwen them
501 if MultiLineFeedExits
:
502 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
505 MultiLineFeedExits
= False
507 # Process comment embeded in string define lines
509 FindFlag
= Line
.find(u
'//')
510 if FindFlag
!= -1 and Line
.find(u
'//') < Line
.find(u
'"'):
511 Line
= Line
.replace(Line
[FindFlag
:], u
' ')
512 if FileIn
[LineCount
].strip().startswith('#language'):
513 Line
= Line
+ FileIn
[LineCount
]
514 FileIn
[LineCount
-1] = Line
515 FileIn
[LineCount
] = '\r\n'
517 for Index
in xrange (LineCount
+ 1, len (FileIn
) - 1):
518 if (Index
== len(FileIn
) -1):
519 FileIn
[Index
] = '\r\n'
521 FileIn
[Index
] = FileIn
[Index
+ 1]
523 CommIndex
= GetCharIndexOutStr(u
'/', Line
)
525 if (len(Line
) - 1) > CommIndex
:
526 if Line
[CommIndex
+1] == u
'/':
527 Line
= Line
[:CommIndex
].strip()
529 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
531 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
533 Line
= Line
.replace(UNICODE_WIDE_CHAR
, WIDE_CHAR
)
534 Line
= Line
.replace(UNICODE_NARROW_CHAR
, NARROW_CHAR
)
535 Line
= Line
.replace(UNICODE_NON_BREAKING_CHAR
, NON_BREAKING_CHAR
)
537 Line
= Line
.replace(u
'\\\\', u
'\u0006')
538 Line
= Line
.replace(u
'\\r\\n', CR
+ LF
)
539 Line
= Line
.replace(u
'\\n', CR
+ LF
)
540 Line
= Line
.replace(u
'\\r', CR
)
541 Line
= Line
.replace(u
'\\t', u
'\t')
542 Line
= Line
.replace(u
'''\"''', u
'''"''')
543 Line
= Line
.replace(u
'\t', u
' ')
544 Line
= Line
.replace(u
'\u0006', u
'\\')
546 # IncList = gINCLUDE_PATTERN.findall(Line)
548 if len(IncList
) == 1:
549 for Dir
in [File
.Dir
] + self
.IncludePathList
:
550 IncFile
= PathClass(str(IncList
[0]), Dir
)
551 self
.IncFileList
.append(IncFile
)
552 if os
.path
.isfile(IncFile
.Path
):
553 Lines
.extend(self
.PreProcess(IncFile
, True))
556 EdkLogger
.Error("Unicode File Parser",
557 ToolError
.FILE_NOT_FOUND
,
558 Message
="Cannot find include file",
559 ExtraData
=str(IncList
[0]))
563 # Check if single line has correct '"'
565 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > -1 and Line
.find('"') > Line
.find(u
'#language'):
566 if not Line
.endswith('"'):
567 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
568 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
569 % (LineCount
, File
.Path
))
572 # Between Name entry and Language entry can not contain line feed
574 if Line
.startswith(u
'#string') and Line
.find(u
'#language') == -1:
575 MultiLineFeedExits
= True
577 if Line
.startswith(u
'#string') and Line
.find(u
'#language') > 0 and Line
.find(u
'"') < 0:
578 MultiLineFeedExits
= True
581 # Between Language entry and String entry can not contain line feed
583 if Line
.startswith(u
'#language') and len(Line
.split()) == 2:
584 MultiLineFeedExits
= True
587 # Between two String entry, can not contain line feed
589 if Line
.startswith(u
'"'):
590 if StringEntryExistsFlag
== 2:
591 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
592 Message
=ST
.ERR_UNIPARSE_LINEFEED_UP_EXIST
% Line
, ExtraData
=File
.Path
)
594 StringEntryExistsFlag
= 1
595 if not Line
.endswith('"'):
596 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
597 ExtraData
='''The line %s misses '"' at the end of it in file %s'''
598 % (LineCount
, File
.Path
))
599 elif Line
.startswith(u
'#language'):
600 if StringEntryExistsFlag
== 2:
601 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
602 Message
=ST
.ERR_UNI_MISS_STRING_ENTRY
% Line
, ExtraData
=File
.Path
)
603 StringEntryExistsFlag
= 0
605 StringEntryExistsFlag
= 0
610 # Convert string def format as below
612 # #string MY_STRING_1
614 # "My first English string line 1"
615 # "My first English string line 2"
616 # #string MY_STRING_1
618 # "Mi segunda secuencia 1"
619 # "Mi segunda secuencia 2"
622 if not IsIncludeFile
and not Lines
:
623 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
624 Message
=ST
.ERR_UNIPARSE_NO_SECTION_EXIST
, \
629 ExistStrNameList
= []
631 if StrName
and not StrName
.split()[1].startswith(DT
.TAB_STR_TOKENCNAME
+ DT
.TAB_UNDERLINE_SPLIT
):
632 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
633 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
636 if StrName
and len(StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)) == 4:
637 StringTokenList
= StrName
.split()[1].split(DT
.TAB_UNDERLINE_SPLIT
)
638 if (StringTokenList
[3].upper() in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
] and \
639 StringTokenList
[3] not in [DT
.TAB_STR_TOKENPROMPT
, DT
.TAB_STR_TOKENHELP
]) or \
640 (StringTokenList
[2].upper() == DT
.TAB_STR_TOKENERR
and StringTokenList
[2] != DT
.TAB_STR_TOKENERR
):
641 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
642 Message
=ST
.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR
% StrName
.split()[1], \
645 if Line
.count(u
'#language') > 1:
646 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
647 Message
=ST
.ERR_UNIPARSE_SEP_LANGENTRY_LINE
% Line
, \
650 if Line
.startswith(u
'//'):
652 elif Line
.startswith(u
'#langdef'):
653 if len(Line
.split()) == 2:
654 NewLines
.append(Line
)
656 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
657 NewLines
.append(Line
[:Line
.find(u
'"')].strip())
658 NewLines
.append(Line
[Line
.find(u
'"'):])
660 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
661 elif Line
.startswith(u
'#string'):
662 if len(Line
.split()) == 2:
665 if StrName
.split()[1] not in ExistStrNameList
:
666 ExistStrNameList
.append(StrName
.split()[1].strip())
667 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
668 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
669 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
670 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
671 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
672 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
675 elif len(Line
.split()) == 4 and Line
.find(u
'#language') > 0:
676 if Line
[Line
.find(u
'#language')-1] != ' ' or \
677 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
678 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
680 if Line
.find(u
'"') > 0:
681 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
683 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
685 if StrName
.split()[1] not in ExistStrNameList
:
686 ExistStrNameList
.append(StrName
.split()[1].strip())
687 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
688 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
689 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
690 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
691 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
692 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
695 if StrName
not in NewLines
:
696 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
698 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
699 NewLines
.append((Line
[Line
.find(u
'#language'):]).strip())
700 elif len(Line
.split()) > 4 and Line
.find(u
'#language') > 0 and Line
.find(u
'"') > 0:
701 if Line
[Line
.find(u
'#language')-1] != u
' ' or \
702 Line
[Line
.find(u
'#language')+len(u
'#language')] != u
' ':
703 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
705 if Line
[Line
.find(u
'"')-1] != u
' ':
706 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
708 StrName
= Line
.split()[0] + u
' ' + Line
.split()[1]
710 if StrName
.split()[1] not in ExistStrNameList
:
711 ExistStrNameList
.append(StrName
.split()[1].strip())
712 elif StrName
.split()[1] in [DT
.TAB_INF_ABSTRACT
, DT
.TAB_INF_DESCRIPTION
, \
713 DT
.TAB_INF_BINARY_ABSTRACT
, DT
.TAB_INF_BINARY_DESCRIPTION
, \
714 DT
.TAB_DEC_PACKAGE_ABSTRACT
, DT
.TAB_DEC_PACKAGE_DESCRIPTION
, \
715 DT
.TAB_DEC_BINARY_ABSTRACT
, DT
.TAB_DEC_BINARY_DESCRIPTION
]:
716 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
717 Message
=ST
.ERR_UNIPARSE_MULTI_ENTRY_EXIST
% StrName
.split()[1], \
720 if StrName
not in NewLines
:
721 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
723 NewLines
.append((Line
[:Line
.find(u
'#language')]).strip())
724 NewLines
.append((Line
[Line
.find(u
'#language'):Line
.find(u
'"')]).strip())
725 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
727 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
728 elif Line
.startswith(u
'#language'):
729 if len(Line
.split()) == 2:
731 if StrName
not in NewLines
:
732 NewLines
.append(StrName
)
734 NewLines
.append(StrName
)
735 NewLines
.append(Line
)
736 elif len(Line
.split()) > 2 and Line
.find(u
'"') > 0:
738 if StrName
not in NewLines
:
739 NewLines
.append(StrName
)
741 NewLines
.append(StrName
)
742 NewLines
.append((Line
[:Line
.find(u
'"')]).strip())
743 NewLines
.append((Line
[Line
.find(u
'"'):]).strip())
745 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
746 elif Line
.startswith(u
'"'):
747 if u
'#string' in Line
or u
'#language' in Line
:
748 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
749 NewLines
.append(Line
)
752 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, ExtraData
=File
.Path
)
754 if StrName
and not StrName
.split()[1].startswith(u
'STR_'):
755 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
756 Message
=ST
.ERR_UNIPARSE_STRNAME_FORMAT_ERROR
% StrName
.split()[1], \
759 if StrName
and not NewLines
:
760 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
761 Message
=ST
.ERR_UNI_MISS_LANGENTRY
% StrName
, \
765 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
766 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
767 AbstractPosition
= -1
768 DescriptionPosition
= -1
769 BinaryAbstractPosition
= -1
770 BinaryDescriptionPosition
= -1
771 for StrName
in ExistStrNameList
:
772 if DT
.TAB_HEADER_ABSTRACT
.upper() in StrName
:
773 if 'BINARY' in StrName
:
774 BinaryAbstractPosition
= ExistStrNameList
.index(StrName
)
776 AbstractPosition
= ExistStrNameList
.index(StrName
)
777 if DT
.TAB_HEADER_DESCRIPTION
.upper() in StrName
:
778 if 'BINARY' in StrName
:
779 BinaryDescriptionPosition
= ExistStrNameList
.index(StrName
)
781 DescriptionPosition
= ExistStrNameList
.index(StrName
)
783 OrderList
= sorted([AbstractPosition
, DescriptionPosition
])
784 BinaryOrderList
= sorted([BinaryAbstractPosition
, BinaryDescriptionPosition
])
787 BinaryMin
= BinaryOrderList
[0]
788 BinaryMax
= BinaryOrderList
[1]
789 if BinaryDescriptionPosition
> -1:
790 if not(BinaryDescriptionPosition
== BinaryMax
and BinaryAbstractPosition
== BinaryMin
and \
792 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
793 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
795 elif BinaryAbstractPosition
> -1:
796 if not(BinaryAbstractPosition
> Max
):
797 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
798 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
801 if DescriptionPosition
> -1:
802 if not(DescriptionPosition
== Max
and AbstractPosition
== Min
and \
803 DescriptionPosition
> AbstractPosition
):
804 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
, \
805 Message
=ST
.ERR_UNIPARSE_ENTRY_ORDER_WRONG
, \
808 if not self
.UniFileHeader
:
809 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
810 Message
= ST
.ERR_NO_SOURCE_HEADER
,
818 def LoadUniFile(self
, File
= None):
820 EdkLogger
.Error("Unicode File Parser",
821 ToolError
.PARSER_ERROR
,
822 Message
='No unicode file is given',
828 # Process special char in file
830 Lines
= self
.PreProcess(File
)
833 # Get Unicode Information
835 for IndexI
in range(len(Lines
)):
837 if (IndexI
+ 1) < len(Lines
):
838 SecondLine
= Lines
[IndexI
+ 1]
839 if (IndexI
+ 2) < len(Lines
):
840 ThirdLine
= Lines
[IndexI
+ 2]
843 # Get Language def information
845 if Line
.find(u
'#langdef ') >= 0:
846 self
.GetLangDef(File
, Line
+ u
' ' + SecondLine
)
854 # Get string def information format as below
856 # #string MY_STRING_1
858 # "My first English string line 1"
859 # "My first English string line 2"
860 # #string MY_STRING_1
862 # "Mi segunda secuencia 1"
863 # "Mi segunda secuencia 2"
865 if Line
.find(u
'#string ') >= 0 and Line
.find(u
'#language ') < 0 and \
866 SecondLine
.find(u
'#string ') < 0 and SecondLine
.find(u
'#language ') >= 0 and \
867 ThirdLine
.find(u
'#string ') < 0 and ThirdLine
.find(u
'#language ') < 0:
868 if Line
.find('"') > 0 or SecondLine
.find('"') > 0:
869 EdkLogger
.Error("Unicode File Parser", ToolError
.FORMAT_INVALID
,
870 Message
=ST
.ERR_UNIPARSE_DBLQUOTE_UNMATCHED
,
873 Name
= Line
[Line
.find(u
'#string ') + len(u
'#string ') : ].strip(' ')
874 Language
= SecondLine
[SecondLine
.find(u
'#language ') + len(u
'#language ') : ].strip(' ')
875 for IndexJ
in range(IndexI
+ 2, len(Lines
)):
876 if Lines
[IndexJ
].find(u
'#string ') < 0 and Lines
[IndexJ
].find(u
'#language ') < 0 and \
877 Lines
[IndexJ
].strip().startswith(u
'"') and Lines
[IndexJ
].strip().endswith(u
'"'):
878 if Lines
[IndexJ
][-2] == ' ':
881 if Lines
[IndexJ
].strip()[1:-1].strip():
882 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1].rstrip() + ' '
884 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1]
887 Value
= Value
+ Lines
[IndexJ
].strip()[1:-1] + '\r\n'
891 if Value
.endswith('\r\n'):
892 Value
= Value
[: Value
.rfind('\r\n')]
893 Language
= GetLanguageCode(Language
, self
.IsCompatibleMode
, self
.File
)
894 self
.AddStringToList(Name
, Language
, Value
)
898 # Load multiple .uni files
900 def LoadUniFiles(self
, FileList
):
901 if len(FileList
) > 0:
902 for File
in FileList
:
903 FilePath
= File
.Path
.strip()
904 if FilePath
.endswith('.uni') or FilePath
.endswith('.UNI') or FilePath
.endswith('.Uni'):
905 self
.LoadUniFile(File
)
908 # Add a string to list
910 def AddStringToList(self
, Name
, Language
, Value
, Token
= 0, Referenced
= False, UseOtherLangDef
= '', Index
= -1):
911 for LangNameItem
in self
.LanguageDef
:
912 if Language
== LangNameItem
[0]:
915 if Language
not in self
.OrderedStringList
:
916 self
.OrderedStringList
[Language
] = []
917 self
.OrderedStringDict
[Language
] = {}
920 if Name
in self
.OrderedStringDict
[Language
]:
923 ItemIndexInList
= self
.OrderedStringDict
[Language
][Name
]
924 Item
= self
.OrderedStringList
[Language
][ItemIndexInList
]
925 Item
.UpdateValue(Value
)
926 Item
.UseOtherLangDef
= ''
929 Token
= len(self
.OrderedStringList
[Language
])
931 self
.OrderedStringList
[Language
].append(StringDefClassObject(Name
,
936 self
.OrderedStringDict
[Language
][Name
] = Token
937 for LangName
in self
.LanguageDef
:
939 # New STRING token will be added into all language string lists.
940 # so that the unique STRING identifier is reserved for all languages in the package list.
942 if LangName
[0] != Language
:
943 if UseOtherLangDef
!= '':
944 OtherLangDef
= UseOtherLangDef
946 OtherLangDef
= Language
947 self
.OrderedStringList
[LangName
[0]].append(StringDefClassObject(Name
,
952 self
.OrderedStringDict
[LangName
[0]][Name
] = len(self
.OrderedStringList
[LangName
[0]]) - 1
954 self
.OrderedStringList
[Language
].insert(Index
, StringDefClassObject(Name
,
959 self
.OrderedStringDict
[Language
][Name
] = Index
962 # Set the string as referenced
964 def SetStringReferenced(self
, Name
):
966 # String stoken are added in the same order in all language string lists.
967 # So, only update the status of string stoken in first language string list.
969 Lang
= self
.LanguageDef
[0][0]
970 if Name
in self
.OrderedStringDict
[Lang
]:
971 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
972 Item
= self
.OrderedStringList
[Lang
][ItemIndexInList
]
973 Item
.Referenced
= True
976 # Search the string in language definition by Name
978 def FindStringValue(self
, Name
, Lang
):
979 if Name
in self
.OrderedStringDict
[Lang
]:
980 ItemIndexInList
= self
.OrderedStringDict
[Lang
][Name
]
981 return self
.OrderedStringList
[Lang
][ItemIndexInList
]
986 # Search the string in language definition by Token
988 def FindByToken(self
, Token
, Lang
):
989 for Item
in self
.OrderedStringList
[Lang
]:
990 if Item
.Token
== Token
:
996 # Re-order strings and re-generate tokens
999 if len(self
.LanguageDef
) == 0:
1002 # Retoken all language strings according to the status of string stoken in the first language string.
1004 FirstLangName
= self
.LanguageDef
[0][0]
1006 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1007 for LangNameItem
in self
.LanguageDef
:
1008 self
.OrderedStringListByToken
[LangNameItem
[0]] = {}
1011 # Use small token for all referred string stoken.
1014 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1015 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1016 if FirstLangItem
.Referenced
== True:
1017 for LangNameItem
in self
.LanguageDef
:
1018 LangName
= LangNameItem
[0]
1019 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1020 OtherLangItem
.Referenced
= True
1021 OtherLangItem
.Token
= RefToken
1022 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1023 RefToken
= RefToken
+ 1
1026 # Use big token for all unreferred string stoken.
1029 for Index
in range (0, len (self
.OrderedStringList
[FirstLangName
])):
1030 FirstLangItem
= self
.OrderedStringList
[FirstLangName
][Index
]
1031 if FirstLangItem
.Referenced
== False:
1032 for LangNameItem
in self
.LanguageDef
:
1033 LangName
= LangNameItem
[0]
1034 OtherLangItem
= self
.OrderedStringList
[LangName
][Index
]
1035 OtherLangItem
.Token
= RefToken
+ UnRefToken
1036 self
.OrderedStringListByToken
[LangName
][OtherLangItem
.Token
] = OtherLangItem
1037 UnRefToken
= UnRefToken
+ 1
1040 # Show the instance itself
1043 print self
.LanguageDef
1044 #print self.OrderedStringList
1045 for Item
in self
.OrderedStringList
:
1047 for Member
in self
.OrderedStringList
[Item
]:
1051 # Read content from '!include' UNI file
1053 def ReadIncludeUNIfile(self
, FilaPath
):
1057 if not os
.path
.exists(FilaPath
) or not os
.path
.isfile(FilaPath
):
1058 EdkLogger
.Error("Unicode File Parser",
1059 ToolError
.FILE_NOT_FOUND
,
1062 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_8').readlines()
1063 except UnicodeError, Xstr
:
1064 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16').readlines()
1065 except UnicodeError:
1066 FileIn
= codecs
.open(FilaPath
, mode
='rb', encoding
='utf_16_le').readlines()
1068 EdkLogger
.Error("Unicode File Parser", ToolError
.FILE_OPEN_FAILURE
, ExtraData
=FilaPath
)