]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/UniClassObject.py
BaseTools/UPT: Update the import statement to use StringUtils
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14 """
15 Collect all defined strings in multiple uni files
16 """
17
18 ##
19 # Import Modules
20 #
21 import os, codecs, re
22 import distutils.util
23 from Logger import ToolError
24 from Logger import Log as EdkLogger
25 from Logger import StringTable as ST
26 from Library.StringUtils import GetLineNo
27 from Library.Misc import PathClass
28 from Library.Misc import GetCharIndexOutStr
29 from Library import DataType as DT
30 from Library.ParserValidate import CheckUTF16FileHeader
31
32 ##
33 # Static definitions
34 #
35 UNICODE_WIDE_CHAR = u'\\wide'
36 UNICODE_NARROW_CHAR = u'\\narrow'
37 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
38 UNICODE_UNICODE_CR = '\r'
39 UNICODE_UNICODE_LF = '\n'
40
41 NARROW_CHAR = u'\uFFF0'
42 WIDE_CHAR = u'\uFFF1'
43 NON_BREAKING_CHAR = u'\uFFF2'
44 CR = u'\u000D'
45 LF = u'\u000A'
46 NULL = u'\u0000'
47 TAB = u'\t'
48 BACK_SPLASH = u'\\'
49
50 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
51 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
52 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
53 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
54 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
55 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
56 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
57 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
58 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
59 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
60 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
61 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
62 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
63 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
64 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
65 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
66 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
67 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
68 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
69 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
70 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
71 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
72 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
73 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
74 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
75 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
76 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
77 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
78 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
79 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
80 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
81 'zho':'zh', 'zul':'zu'}
82
83 ## Convert a python unicode string to a normal string
84 #
85 # Convert a python unicode string to a normal string
86 # UniToStr(u'I am a string') is 'I am a string'
87 #
88 # @param Uni: The python unicode string
89 #
90 # @retval: The formatted normal string
91 #
92 def UniToStr(Uni):
93 return repr(Uni)[2:-1]
94
95 ## Convert a unicode string to a Hex list
96 #
97 # Convert a unicode string to a Hex list
98 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
99 #
100 # @param Uni: The python unicode string
101 #
102 # @retval List: The formatted hex list
103 #
104 def UniToHexList(Uni):
105 List = []
106 for Item in Uni:
107 Temp = '%04X' % ord(Item)
108 List.append('0x' + Temp[2:4])
109 List.append('0x' + Temp[0:2])
110 return List
111
112 ## Convert special unicode characters
113 #
114 # Convert special characters to (c), (r) and (tm).
115 #
116 # @param Uni: The python unicode string
117 #
118 # @retval NewUni: The converted unicode string
119 #
120 def ConvertSpecialUnicodes(Uni):
121 NewUni = Uni
122 NewUni = NewUni.replace(u'\u00A9', '(c)')
123 NewUni = NewUni.replace(u'\u00AE', '(r)')
124 NewUni = NewUni.replace(u'\u2122', '(tm)')
125 return NewUni
126
127 ## GetLanguageCode1766
128 #
129 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
130 # RFC 1766 language codes supported in compatiblity mode
131 # RFC 4646 language codes supported in native mode
132 #
133 # @param LangName: Language codes read from .UNI file
134 #
135 # @retval LangName: Valid lanugage code in RFC 1766 format or None
136 #
137 def GetLanguageCode1766(LangName, File=None):
138 return LangName
139
140 length = len(LangName)
141 if length == 2:
142 if LangName.isalpha():
143 for Key in gLANG_CONV_TABLE.keys():
144 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
145 return Key
146 elif length == 3:
147 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
148 return LangName
149 else:
150 EdkLogger.Error("Unicode File Parser",
151 ToolError.FORMAT_INVALID,
152 "Invalid RFC 1766 language code : %s" % LangName,
153 File)
154 elif length == 5:
155 if LangName[0:2].isalpha() and LangName[2] == '-':
156 for Key in gLANG_CONV_TABLE.keys():
157 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
158 return Key
159 elif length >= 6:
160 if LangName[0:2].isalpha() and LangName[2] == '-':
161 for Key in gLANG_CONV_TABLE.keys():
162 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
163 return Key
164 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
165 for Key in gLANG_CONV_TABLE.keys():
166 if Key == LangName[0:3].lower():
167 return Key
168
169 EdkLogger.Error("Unicode File Parser",
170 ToolError.FORMAT_INVALID,
171 "Invalid RFC 4646 language code : %s" % LangName,
172 File)
173
174 ## GetLanguageCode
175 #
176 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
177 # RFC 1766 language codes supported in compatiblity mode
178 # RFC 4646 language codes supported in native mode
179 #
180 # @param LangName: Language codes read from .UNI file
181 #
182 # @retval LangName: Valid lanugage code in RFC 4646 format or None
183 #
184 def GetLanguageCode(LangName, IsCompatibleMode, File):
185 length = len(LangName)
186 if IsCompatibleMode:
187 if length == 3 and LangName.isalpha():
188 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
189 if TempLangName is not None:
190 return TempLangName
191 return LangName
192 else:
193 EdkLogger.Error("Unicode File Parser",
194 ToolError.FORMAT_INVALID,
195 "Invalid RFC 1766 language code : %s" % LangName,
196 File)
197 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
198 return LangName
199 if length == 2:
200 if LangName.isalpha():
201 return LangName
202 elif length == 3:
203 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None:
204 return LangName
205 elif length == 5:
206 if LangName[0:2].isalpha() and LangName[2] == '-':
207 return LangName
208 elif length >= 6:
209 if LangName[0:2].isalpha() and LangName[2] == '-':
210 return LangName
211 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
212 return LangName
213
214 EdkLogger.Error("Unicode File Parser",
215 ToolError.FORMAT_INVALID,
216 "Invalid RFC 4646 language code : %s" % LangName,
217 File)
218
219 ## FormatUniEntry
220 #
221 # Formated the entry in Uni file.
222 #
223 # @param StrTokenName StrTokenName.
224 # @param TokenValueList A list need to be processed.
225 # @param ContainerFile ContainerFile.
226 #
227 # @return formated entry
228 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
229 SubContent = ''
230 PreFormatLength = 40
231 if len(StrTokenName) > PreFormatLength:
232 PreFormatLength = len(StrTokenName) + 1
233 for (Lang, Value) in TokenValueList:
234 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
235 continue
236 if Lang == '':
237 Lang = DT.TAB_LANGUAGE_EN_US
238 if Lang == 'eng':
239 Lang = DT.TAB_LANGUAGE_EN_US
240 elif len(Lang.split('-')[0]) == 3:
241 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
242 else:
243 Lang = GetLanguageCode(Lang, False, ContainerFile)
244 ValueList = Value.split('\n')
245 SubValueContent = ''
246 for SubValue in ValueList:
247 if SubValue.strip():
248 SubValueContent += \
249 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
250 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
251 + '\"' + '\r\n'
252 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
253 if SubContent:
254 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
255 return SubContent
256
257
258 ## StringDefClassObject
259 #
260 # A structure for language definition
261 #
262 class StringDefClassObject(object):
263 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
264 self.StringName = ''
265 self.StringNameByteList = []
266 self.StringValue = ''
267 self.StringValueByteList = ''
268 self.Token = 0
269 self.Referenced = Referenced
270 self.UseOtherLangDef = UseOtherLangDef
271 self.Length = 0
272
273 if Name is not None:
274 self.StringName = Name
275 self.StringNameByteList = UniToHexList(Name)
276 if Value is not None:
277 self.StringValue = Value
278 self.StringValueByteList = UniToHexList(self.StringValue)
279 self.Length = len(self.StringValueByteList)
280 if Token is not None:
281 self.Token = Token
282
283 def __str__(self):
284 return repr(self.StringName) + ' ' + \
285 repr(self.Token) + ' ' + \
286 repr(self.Referenced) + ' ' + \
287 repr(self.StringValue) + ' ' + \
288 repr(self.UseOtherLangDef)
289
290 def UpdateValue(self, Value = None):
291 if Value is not None:
292 if self.StringValue:
293 self.StringValue = self.StringValue + '\r\n' + Value
294 else:
295 self.StringValue = Value
296 self.StringValueByteList = UniToHexList(self.StringValue)
297 self.Length = len(self.StringValueByteList)
298
299 ## UniFileClassObject
300 #
301 # A structure for .uni file definition
302 #
303 class UniFileClassObject(object):
304 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
305 self.FileList = FileList
306 self.File = None
307 self.IncFileList = FileList
308 self.UniFileHeader = ''
309 self.Token = 2
310 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
311 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
312 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
313 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
314 self.IsCompatibleMode = IsCompatibleMode
315 if not IncludePathList:
316 self.IncludePathList = []
317 else:
318 self.IncludePathList = IncludePathList
319 if len(self.FileList) > 0:
320 self.LoadUniFiles(FileList)
321
322 #
323 # Get Language definition
324 #
325 def GetLangDef(self, File, Line):
326 Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
327 if len(Lang) != 3:
328 try:
329 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
330 except UnicodeError, Xstr:
331 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
332 except UnicodeError, Xstr:
333 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
334 except:
335 EdkLogger.Error("Unicode File Parser",
336 ToolError.FILE_OPEN_FAILURE,
337 "File read failure: %s" % str(Xstr),
338 ExtraData=File)
339 LineNo = GetLineNo(FileIn, Line, False)
340 EdkLogger.Error("Unicode File Parser",
341 ToolError.PARSER_ERROR,
342 "Wrong language definition",
343 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
344 File = File, Line = LineNo)
345 else:
346 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
347 LangPrintName = Lang[2]
348
349 IsLangInDef = False
350 for Item in self.LanguageDef:
351 if Item[0] == LangName:
352 IsLangInDef = True
353 break
354
355 if not IsLangInDef:
356 self.LanguageDef.append([LangName, LangPrintName])
357
358 #
359 # Add language string
360 #
361 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
362 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
363
364 if not IsLangInDef:
365 #
366 # The found STRING tokens will be added into new language string list
367 # so that the unique STRING identifier is reserved for all languages in the package list.
368 #
369 FirstLangName = self.LanguageDef[0][0]
370 if LangName != FirstLangName:
371 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
372 Item = self.OrderedStringList[FirstLangName][Index]
373 if Item.UseOtherLangDef != '':
374 OtherLang = Item.UseOtherLangDef
375 else:
376 OtherLang = FirstLangName
377 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
378 '',
379 Item.Referenced,
380 Item.Token,
381 OtherLang))
382 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
383 return True
384
385 #
386 # Get String name and value
387 #
388 def GetStringObject(self, Item):
389 Language = ''
390 Value = ''
391
392 Name = Item.split()[1]
393 # Check the string name is the upper character
394 if Name != '':
395 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
396 if MatchString is None or MatchString.end(0) != len(Name):
397 EdkLogger.Error("Unicode File Parser",
398 ToolError.FORMAT_INVALID,
399 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
400 LanguageList = Item.split(u'#language ')
401 for IndexI in range(len(LanguageList)):
402 if IndexI == 0:
403 continue
404 else:
405 Language = LanguageList[IndexI].split()[0]
406 #.replace(u'\r\n', u'')
407 Value = \
408 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
409 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
410 self.AddStringToList(Name, Language, Value)
411
412 #
413 # Get include file list and load them
414 #
415 def GetIncludeFile(self, Item, Dir = None):
416 if Dir:
417 pass
418 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
419 self.LoadUniFile(FileName)
420
421 #
422 # Pre-process before parse .uni file
423 #
424 def PreProcess(self, File, IsIncludeFile=False):
425 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
426 EdkLogger.Error("Unicode File Parser",
427 ToolError.FILE_NOT_FOUND,
428 ExtraData=File.Path)
429
430 #
431 # Check file header of the Uni file
432 #
433 # if not CheckUTF16FileHeader(File.Path):
434 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
435 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
436
437 try:
438 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
439 except UnicodeError, Xstr:
440 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
441 except UnicodeError:
442 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
443 except:
444 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
445
446
447 #
448 # get the file header
449 #
450 Lines = []
451 HeaderStart = False
452 HeaderEnd = False
453 if not self.UniFileHeader:
454 FirstGenHeader = True
455 else:
456 FirstGenHeader = False
457 for Line in FileIn:
458 Line = Line.strip()
459 if Line == u'':
460 continue
461 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
462 and not HeaderEnd and not HeaderStart:
463 HeaderStart = True
464 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
465 HeaderEnd = True
466 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
467 self.UniFileHeader += Line + '\r\n'
468 continue
469
470 #
471 # Use unique identifier
472 #
473 FindFlag = -1
474 LineCount = 0
475 MultiLineFeedExits = False
476 #
477 # 0: initial value
478 # 1: signle String entry exist
479 # 2: line feed exist under the some signle String entry
480 #
481 StringEntryExistsFlag = 0
482 for Line in FileIn:
483 Line = FileIn[LineCount]
484 LineCount += 1
485 Line = Line.strip()
486 #
487 # Ignore comment line and empty line
488 #
489 if Line == u'' or Line.startswith(u'//'):
490 #
491 # Change the single line String entry flag status
492 #
493 if StringEntryExistsFlag == 1:
494 StringEntryExistsFlag = 2
495 #
496 # If the '#string' line and the '#language' line are not in the same line,
497 # there should be only one line feed character betwwen them
498 #
499 if MultiLineFeedExits:
500 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
501 continue
502
503 MultiLineFeedExits = False
504 #
505 # Process comment embeded in string define lines
506 #
507 FindFlag = Line.find(u'//')
508 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
509 Line = Line.replace(Line[FindFlag:], u' ')
510 if FileIn[LineCount].strip().startswith('#language'):
511 Line = Line + FileIn[LineCount]
512 FileIn[LineCount-1] = Line
513 FileIn[LineCount] = '\r\n'
514 LineCount -= 1
515 for Index in xrange (LineCount + 1, len (FileIn) - 1):
516 if (Index == len(FileIn) -1):
517 FileIn[Index] = '\r\n'
518 else:
519 FileIn[Index] = FileIn[Index + 1]
520 continue
521 CommIndex = GetCharIndexOutStr(u'/', Line)
522 if CommIndex > -1:
523 if (len(Line) - 1) > CommIndex:
524 if Line[CommIndex+1] == u'/':
525 Line = Line[:CommIndex].strip()
526 else:
527 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
528 else:
529 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
530
531 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
532 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
533 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
534
535 Line = Line.replace(u'\\\\', u'\u0006')
536 Line = Line.replace(u'\\r\\n', CR + LF)
537 Line = Line.replace(u'\\n', CR + LF)
538 Line = Line.replace(u'\\r', CR)
539 Line = Line.replace(u'\\t', u'\t')
540 Line = Line.replace(u'''\"''', u'''"''')
541 Line = Line.replace(u'\t', u' ')
542 Line = Line.replace(u'\u0006', u'\\')
543
544 #
545 # Check if single line has correct '"'
546 #
547 if Line.startswith(u'#string') and Line.find(u'#language') > -1 and Line.find('"') > Line.find(u'#language'):
548 if not Line.endswith('"'):
549 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
550 ExtraData='''The line %s misses '"' at the end of it in file %s'''
551 % (LineCount, File.Path))
552
553 #
554 # Between Name entry and Language entry can not contain line feed
555 #
556 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
557 MultiLineFeedExits = True
558
559 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
560 MultiLineFeedExits = True
561
562 #
563 # Between Language entry and String entry can not contain line feed
564 #
565 if Line.startswith(u'#language') and len(Line.split()) == 2:
566 MultiLineFeedExits = True
567
568 #
569 # Between two String entry, can not contain line feed
570 #
571 if Line.startswith(u'"'):
572 if StringEntryExistsFlag == 2:
573 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
574 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
575
576 StringEntryExistsFlag = 1
577 if not Line.endswith('"'):
578 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
579 ExtraData='''The line %s misses '"' at the end of it in file %s'''
580 % (LineCount, File.Path))
581 elif Line.startswith(u'#language'):
582 if StringEntryExistsFlag == 2:
583 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
584 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
585 StringEntryExistsFlag = 0
586 else:
587 StringEntryExistsFlag = 0
588
589 Lines.append(Line)
590
591 #
592 # Convert string def format as below
593 #
594 # #string MY_STRING_1
595 # #language eng
596 # "My first English string line 1"
597 # "My first English string line 2"
598 # #string MY_STRING_1
599 # #language spa
600 # "Mi segunda secuencia 1"
601 # "Mi segunda secuencia 2"
602 #
603
604 if not IsIncludeFile and not Lines:
605 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
606 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
607 ExtraData=File.Path)
608
609 NewLines = []
610 StrName = u''
611 ExistStrNameList = []
612 for Line in Lines:
613 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
614 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
615 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
616 ExtraData=File.Path)
617
618 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
619 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
620 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
621 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
622 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
623 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
624 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
625 ExtraData=File.Path)
626
627 if Line.count(u'#language') > 1:
628 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
629 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
630 ExtraData=File.Path)
631
632 if Line.startswith(u'//'):
633 continue
634 elif Line.startswith(u'#langdef'):
635 if len(Line.split()) == 2:
636 NewLines.append(Line)
637 continue
638 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
639 NewLines.append(Line[:Line.find(u'"')].strip())
640 NewLines.append(Line[Line.find(u'"'):])
641 else:
642 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
643 elif Line.startswith(u'#string'):
644 if len(Line.split()) == 2:
645 StrName = Line
646 if StrName:
647 if StrName.split()[1] not in ExistStrNameList:
648 ExistStrNameList.append(StrName.split()[1].strip())
649 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
650 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
651 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
652 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
653 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
654 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
655 ExtraData=File.Path)
656 continue
657 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
658 if Line[Line.find(u'#language')-1] != ' ' or \
659 Line[Line.find(u'#language')+len(u'#language')] != u' ':
660 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
661
662 if Line.find(u'"') > 0:
663 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
664
665 StrName = Line.split()[0] + u' ' + Line.split()[1]
666 if StrName:
667 if StrName.split()[1] not in ExistStrNameList:
668 ExistStrNameList.append(StrName.split()[1].strip())
669 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
670 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
671 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
672 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
673 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
674 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
675 ExtraData=File.Path)
676 if IsIncludeFile:
677 if StrName not in NewLines:
678 NewLines.append((Line[:Line.find(u'#language')]).strip())
679 else:
680 NewLines.append((Line[:Line.find(u'#language')]).strip())
681 NewLines.append((Line[Line.find(u'#language'):]).strip())
682 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
683 if Line[Line.find(u'#language')-1] != u' ' or \
684 Line[Line.find(u'#language')+len(u'#language')] != u' ':
685 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
686
687 if Line[Line.find(u'"')-1] != u' ':
688 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
689
690 StrName = Line.split()[0] + u' ' + Line.split()[1]
691 if StrName:
692 if StrName.split()[1] not in ExistStrNameList:
693 ExistStrNameList.append(StrName.split()[1].strip())
694 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
695 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
696 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
697 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
698 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
699 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
700 ExtraData=File.Path)
701 if IsIncludeFile:
702 if StrName not in NewLines:
703 NewLines.append((Line[:Line.find(u'#language')]).strip())
704 else:
705 NewLines.append((Line[:Line.find(u'#language')]).strip())
706 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
707 NewLines.append((Line[Line.find(u'"'):]).strip())
708 else:
709 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
710 elif Line.startswith(u'#language'):
711 if len(Line.split()) == 2:
712 if IsIncludeFile:
713 if StrName not in NewLines:
714 NewLines.append(StrName)
715 else:
716 NewLines.append(StrName)
717 NewLines.append(Line)
718 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
719 if IsIncludeFile:
720 if StrName not in NewLines:
721 NewLines.append(StrName)
722 else:
723 NewLines.append(StrName)
724 NewLines.append((Line[:Line.find(u'"')]).strip())
725 NewLines.append((Line[Line.find(u'"'):]).strip())
726 else:
727 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
728 elif Line.startswith(u'"'):
729 if u'#string' in Line or u'#language' in Line:
730 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
731 NewLines.append(Line)
732 else:
733 print Line
734 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
735
736 if StrName and not StrName.split()[1].startswith(u'STR_'):
737 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
738 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
739 ExtraData=File.Path)
740
741 if StrName and not NewLines:
742 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
743 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
744 ExtraData=File.Path)
745
746 #
747 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
748 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
749 AbstractPosition = -1
750 DescriptionPosition = -1
751 BinaryAbstractPosition = -1
752 BinaryDescriptionPosition = -1
753 for StrName in ExistStrNameList:
754 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
755 if 'BINARY' in StrName:
756 BinaryAbstractPosition = ExistStrNameList.index(StrName)
757 else:
758 AbstractPosition = ExistStrNameList.index(StrName)
759 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
760 if 'BINARY' in StrName:
761 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
762 else:
763 DescriptionPosition = ExistStrNameList.index(StrName)
764
765 OrderList = sorted([AbstractPosition, DescriptionPosition])
766 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
767 Min = OrderList[0]
768 Max = OrderList[1]
769 BinaryMin = BinaryOrderList[0]
770 BinaryMax = BinaryOrderList[1]
771 if BinaryDescriptionPosition > -1:
772 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
773 BinaryMax > Max):
774 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
775 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
776 ExtraData=File.Path)
777 elif BinaryAbstractPosition > -1:
778 if not(BinaryAbstractPosition > Max):
779 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
780 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
781 ExtraData=File.Path)
782
783 if DescriptionPosition > -1:
784 if not(DescriptionPosition == Max and AbstractPosition == Min and \
785 DescriptionPosition > AbstractPosition):
786 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
787 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
788 ExtraData=File.Path)
789
790 if not self.UniFileHeader:
791 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
792 Message = ST.ERR_NO_SOURCE_HEADER,
793 ExtraData=File.Path)
794
795 return NewLines
796
797 #
798 # Load a .uni file
799 #
800 def LoadUniFile(self, File = None):
801 if File is None:
802 EdkLogger.Error("Unicode File Parser",
803 ToolError.PARSER_ERROR,
804 Message='No unicode file is given',
805 ExtraData=File.Path)
806
807 self.File = File
808
809 #
810 # Process special char in file
811 #
812 Lines = self.PreProcess(File)
813
814 #
815 # Get Unicode Information
816 #
817 for IndexI in range(len(Lines)):
818 Line = Lines[IndexI]
819 if (IndexI + 1) < len(Lines):
820 SecondLine = Lines[IndexI + 1]
821 if (IndexI + 2) < len(Lines):
822 ThirdLine = Lines[IndexI + 2]
823
824 #
825 # Get Language def information
826 #
827 if Line.find(u'#langdef ') >= 0:
828 self.GetLangDef(File, Line + u' ' + SecondLine)
829 continue
830
831 Name = ''
832 Language = ''
833 Value = ''
834 CombineToken = False
835 #
836 # Get string def information format as below
837 #
838 # #string MY_STRING_1
839 # #language eng
840 # "My first English string line 1"
841 # "My first English string line 2"
842 # #string MY_STRING_1
843 # #language spa
844 # "Mi segunda secuencia 1"
845 # "Mi segunda secuencia 2"
846 #
847 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
848 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
849 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
850 if Line.find('"') > 0 or SecondLine.find('"') > 0:
851 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
852 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
853 ExtraData=File.Path)
854
855 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
856 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
857 for IndexJ in range(IndexI + 2, len(Lines)):
858 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
859 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
860 if Lines[IndexJ][-2] == ' ':
861 CombineToken = True
862 if CombineToken:
863 if Lines[IndexJ].strip()[1:-1].strip():
864 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
865 else:
866 Value = Value + Lines[IndexJ].strip()[1:-1]
867 CombineToken = False
868 else:
869 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
870 else:
871 IndexI = IndexJ
872 break
873 if Value.endswith('\r\n'):
874 Value = Value[: Value.rfind('\r\n')]
875 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
876 self.AddStringToList(Name, Language, Value)
877 continue
878
879 #
880 # Load multiple .uni files
881 #
882 def LoadUniFiles(self, FileList):
883 if len(FileList) > 0:
884 for File in FileList:
885 FilePath = File.Path.strip()
886 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
887 self.LoadUniFile(File)
888
889 #
890 # Add a string to list
891 #
892 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
893 for LangNameItem in self.LanguageDef:
894 if Language == LangNameItem[0]:
895 break
896
897 if Language not in self.OrderedStringList:
898 self.OrderedStringList[Language] = []
899 self.OrderedStringDict[Language] = {}
900
901 IsAdded = True
902 if Name in self.OrderedStringDict[Language]:
903 IsAdded = False
904 if Value is not None:
905 ItemIndexInList = self.OrderedStringDict[Language][Name]
906 Item = self.OrderedStringList[Language][ItemIndexInList]
907 Item.UpdateValue(Value)
908 Item.UseOtherLangDef = ''
909
910 if IsAdded:
911 Token = len(self.OrderedStringList[Language])
912 if Index == -1:
913 self.OrderedStringList[Language].append(StringDefClassObject(Name,
914 Value,
915 Referenced,
916 Token,
917 UseOtherLangDef))
918 self.OrderedStringDict[Language][Name] = Token
919 for LangName in self.LanguageDef:
920 #
921 # New STRING token will be added into all language string lists.
922 # so that the unique STRING identifier is reserved for all languages in the package list.
923 #
924 if LangName[0] != Language:
925 if UseOtherLangDef != '':
926 OtherLangDef = UseOtherLangDef
927 else:
928 OtherLangDef = Language
929 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
930 '',
931 Referenced,
932 Token,
933 OtherLangDef))
934 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
935 else:
936 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
937 Value,
938 Referenced,
939 Token,
940 UseOtherLangDef))
941 self.OrderedStringDict[Language][Name] = Index
942
943 #
944 # Set the string as referenced
945 #
946 def SetStringReferenced(self, Name):
947 #
948 # String stoken are added in the same order in all language string lists.
949 # So, only update the status of string stoken in first language string list.
950 #
951 Lang = self.LanguageDef[0][0]
952 if Name in self.OrderedStringDict[Lang]:
953 ItemIndexInList = self.OrderedStringDict[Lang][Name]
954 Item = self.OrderedStringList[Lang][ItemIndexInList]
955 Item.Referenced = True
956
957 #
958 # Search the string in language definition by Name
959 #
960 def FindStringValue(self, Name, Lang):
961 if Name in self.OrderedStringDict[Lang]:
962 ItemIndexInList = self.OrderedStringDict[Lang][Name]
963 return self.OrderedStringList[Lang][ItemIndexInList]
964
965 return None
966
967 #
968 # Search the string in language definition by Token
969 #
970 def FindByToken(self, Token, Lang):
971 for Item in self.OrderedStringList[Lang]:
972 if Item.Token == Token:
973 return Item
974
975 return None
976
977 #
978 # Re-order strings and re-generate tokens
979 #
980 def ReToken(self):
981 if len(self.LanguageDef) == 0:
982 return None
983 #
984 # Retoken all language strings according to the status of string stoken in the first language string.
985 #
986 FirstLangName = self.LanguageDef[0][0]
987
988 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
989 for LangNameItem in self.LanguageDef:
990 self.OrderedStringListByToken[LangNameItem[0]] = {}
991
992 #
993 # Use small token for all referred string stoken.
994 #
995 RefToken = 0
996 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
997 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
998 if FirstLangItem.Referenced == True:
999 for LangNameItem in self.LanguageDef:
1000 LangName = LangNameItem[0]
1001 OtherLangItem = self.OrderedStringList[LangName][Index]
1002 OtherLangItem.Referenced = True
1003 OtherLangItem.Token = RefToken
1004 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1005 RefToken = RefToken + 1
1006
1007 #
1008 # Use big token for all unreferred string stoken.
1009 #
1010 UnRefToken = 0
1011 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1012 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1013 if FirstLangItem.Referenced == False:
1014 for LangNameItem in self.LanguageDef:
1015 LangName = LangNameItem[0]
1016 OtherLangItem = self.OrderedStringList[LangName][Index]
1017 OtherLangItem.Token = RefToken + UnRefToken
1018 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1019 UnRefToken = UnRefToken + 1
1020
1021 #
1022 # Show the instance itself
1023 #
1024 def ShowMe(self):
1025 print self.LanguageDef
1026 #print self.OrderedStringList
1027 for Item in self.OrderedStringList:
1028 print Item
1029 for Member in self.OrderedStringList[Item]:
1030 print str(Member)
1031
1032 #
1033 # Read content from '!include' UNI file
1034 #
1035 def ReadIncludeUNIfile(self, FilaPath):
1036 if self.File:
1037 pass
1038
1039 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1040 EdkLogger.Error("Unicode File Parser",
1041 ToolError.FILE_NOT_FOUND,
1042 ExtraData=FilaPath)
1043 try:
1044 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines()
1045 except UnicodeError, Xstr:
1046 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1047 except UnicodeError:
1048 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1049 except:
1050 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1051 return FileIn
1052