]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/UniClassObject.py
UefiCpuPkg: Move AsmRelocateApLoopStart from Mpfuncs.nasm to AmdSev.nasm
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2019, Intel Corporation. All rights reserved.<BR>
5 #
6 # SPDX-License-Identifier: BSD-2-Clause-Patent
7 #
8 """
9 Collect all defined strings in multiple uni files
10 """
11 from __future__ import print_function
12
13 ##
14 # Import Modules
15 #
16 import os, codecs, re
17 import shlex
18 from Logger import ToolError
19 from Logger import Log as EdkLogger
20 from Logger import StringTable as ST
21 from Library.StringUtils import GetLineNo
22 from Library.Misc import PathClass
23 from Library.Misc import GetCharIndexOutStr
24 from Library import DataType as DT
25 from Library.ParserValidate import CheckUTF16FileHeader
26
27 ##
28 # Static definitions
29 #
30 UNICODE_WIDE_CHAR = u'\\wide'
31 UNICODE_NARROW_CHAR = u'\\narrow'
32 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
33 UNICODE_UNICODE_CR = '\r'
34 UNICODE_UNICODE_LF = '\n'
35
36 NARROW_CHAR = u'\uFFF0'
37 WIDE_CHAR = u'\uFFF1'
38 NON_BREAKING_CHAR = u'\uFFF2'
39 CR = u'\u000D'
40 LF = u'\u000A'
41 NULL = u'\u0000'
42 TAB = u'\t'
43 BACK_SPLASH = u'\\'
44
45 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
46 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
47 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
48 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
49 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
50 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
51 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
52 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
53 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
54 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
55 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
56 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
57 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
58 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
59 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
60 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
61 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
62 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
63 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
64 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
65 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
66 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
67 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
68 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
69 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
70 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
71 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
72 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
73 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
74 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
75 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
76 'zho':'zh', 'zul':'zu'}
77
78 ## Convert a python unicode string to a normal string
79 #
80 # Convert a python unicode string to a normal string
81 # UniToStr(u'I am a string') is 'I am a string'
82 #
83 # @param Uni: The python unicode string
84 #
85 # @retval: The formatted normal string
86 #
87 def UniToStr(Uni):
88 return repr(Uni)[2:-1]
89
90 ## Convert a unicode string to a Hex list
91 #
92 # Convert a unicode string to a Hex list
93 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
94 #
95 # @param Uni: The python unicode string
96 #
97 # @retval List: The formatted hex list
98 #
99 def UniToHexList(Uni):
100 List = []
101 for Item in Uni:
102 Temp = '%04X' % ord(Item)
103 List.append('0x' + Temp[2:4])
104 List.append('0x' + Temp[0:2])
105 return List
106
107 ## Convert special unicode characters
108 #
109 # Convert special characters to (c), (r) and (tm).
110 #
111 # @param Uni: The python unicode string
112 #
113 # @retval NewUni: The converted unicode string
114 #
115 def ConvertSpecialUnicodes(Uni):
116 OldUni = NewUni = Uni
117 NewUni = NewUni.replace(u'\u00A9', '(c)')
118 NewUni = NewUni.replace(u'\u00AE', '(r)')
119 NewUni = NewUni.replace(u'\u2122', '(tm)')
120 if OldUni == NewUni:
121 NewUni = OldUni
122 return NewUni
123
124 ## GetLanguageCode1766
125 #
126 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
127 # RFC 1766 language codes supported in compatibility mode
128 # RFC 4646 language codes supported in native mode
129 #
130 # @param LangName: Language codes read from .UNI file
131 #
132 # @retval LangName: Valid language code in RFC 1766 format or None
133 #
134 def GetLanguageCode1766(LangName, File=None):
135 return LangName
136
137 length = len(LangName)
138 if length == 2:
139 if LangName.isalpha():
140 for Key in gLANG_CONV_TABLE.keys():
141 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
142 return Key
143 elif length == 3:
144 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
145 return LangName
146 else:
147 EdkLogger.Error("Unicode File Parser",
148 ToolError.FORMAT_INVALID,
149 "Invalid RFC 1766 language code : %s" % LangName,
150 File)
151 elif length == 5:
152 if LangName[0:2].isalpha() and LangName[2] == '-':
153 for Key in gLANG_CONV_TABLE.keys():
154 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
155 return Key
156 elif length >= 6:
157 if LangName[0:2].isalpha() and LangName[2] == '-':
158 for Key in gLANG_CONV_TABLE.keys():
159 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
160 return Key
161 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
162 for Key in gLANG_CONV_TABLE.keys():
163 if Key == LangName[0:3].lower():
164 return Key
165
166 EdkLogger.Error("Unicode File Parser",
167 ToolError.FORMAT_INVALID,
168 "Invalid RFC 4646 language code : %s" % LangName,
169 File)
170
171 ## GetLanguageCode
172 #
173 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
174 # RFC 1766 language codes supported in compatibility mode
175 # RFC 4646 language codes supported in native mode
176 #
177 # @param LangName: Language codes read from .UNI file
178 #
179 # @retval LangName: Valid lanugage code in RFC 4646 format or None
180 #
181 def GetLanguageCode(LangName, IsCompatibleMode, File):
182 length = len(LangName)
183 if IsCompatibleMode:
184 if length == 3 and LangName.isalpha():
185 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
186 if TempLangName is not None:
187 return TempLangName
188 return LangName
189 else:
190 EdkLogger.Error("Unicode File Parser",
191 ToolError.FORMAT_INVALID,
192 "Invalid RFC 1766 language code : %s" % LangName,
193 File)
194 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
195 return LangName
196 if length == 2:
197 if LangName.isalpha():
198 return LangName
199 elif length == 3:
200 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None:
201 return LangName
202 elif length == 5:
203 if LangName[0:2].isalpha() and LangName[2] == '-':
204 return LangName
205 elif length >= 6:
206 if LangName[0:2].isalpha() and LangName[2] == '-':
207 return LangName
208 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
209 return LangName
210
211 EdkLogger.Error("Unicode File Parser",
212 ToolError.FORMAT_INVALID,
213 "Invalid RFC 4646 language code : %s" % LangName,
214 File)
215
216 ## FormatUniEntry
217 #
218 # Formatted the entry in Uni file.
219 #
220 # @param StrTokenName StrTokenName.
221 # @param TokenValueList A list need to be processed.
222 # @param ContainerFile ContainerFile.
223 #
224 # @return formatted entry
225 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
226 SubContent = ''
227 PreFormatLength = 40
228 if len(StrTokenName) > PreFormatLength:
229 PreFormatLength = len(StrTokenName) + 1
230 for (Lang, Value) in TokenValueList:
231 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
232 continue
233 if Lang == '':
234 Lang = DT.TAB_LANGUAGE_EN_US
235 if Lang == 'eng':
236 Lang = DT.TAB_LANGUAGE_EN_US
237 elif len(Lang.split('-')[0]) == 3:
238 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
239 else:
240 Lang = GetLanguageCode(Lang, False, ContainerFile)
241 ValueList = Value.split('\n')
242 SubValueContent = ''
243 for SubValue in ValueList:
244 if SubValue.strip():
245 SubValueContent += \
246 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
247 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
248 + '\"' + '\r\n'
249 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
250 if SubContent:
251 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
252 return SubContent
253
254
255 ## StringDefClassObject
256 #
257 # A structure for language definition
258 #
259 class StringDefClassObject(object):
260 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
261 self.StringName = ''
262 self.StringNameByteList = []
263 self.StringValue = ''
264 self.StringValueByteList = ''
265 self.Token = 0
266 self.Referenced = Referenced
267 self.UseOtherLangDef = UseOtherLangDef
268 self.Length = 0
269
270 if Name is not None:
271 self.StringName = Name
272 self.StringNameByteList = UniToHexList(Name)
273 if Value is not None:
274 self.StringValue = Value
275 self.StringValueByteList = UniToHexList(self.StringValue)
276 self.Length = len(self.StringValueByteList)
277 if Token is not None:
278 self.Token = Token
279
280 def __str__(self):
281 return repr(self.StringName) + ' ' + \
282 repr(self.Token) + ' ' + \
283 repr(self.Referenced) + ' ' + \
284 repr(self.StringValue) + ' ' + \
285 repr(self.UseOtherLangDef)
286
287 def UpdateValue(self, Value = None):
288 if Value is not None:
289 if self.StringValue:
290 self.StringValue = self.StringValue + '\r\n' + Value
291 else:
292 self.StringValue = Value
293 self.StringValueByteList = UniToHexList(self.StringValue)
294 self.Length = len(self.StringValueByteList)
295
296 ## UniFileClassObject
297 #
298 # A structure for .uni file definition
299 #
300 class UniFileClassObject(object):
301 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
302 self.FileList = FileList
303 self.File = None
304 self.IncFileList = FileList
305 self.UniFileHeader = ''
306 self.Token = 2
307 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
308 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
309 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
310 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
311 self.IsCompatibleMode = IsCompatibleMode
312 if not IncludePathList:
313 self.IncludePathList = []
314 else:
315 self.IncludePathList = IncludePathList
316 if len(self.FileList) > 0:
317 self.LoadUniFiles(FileList)
318
319 #
320 # Get Language definition
321 #
322 def GetLangDef(self, File, Line):
323 Lang = shlex.split(Line.split(u"//")[0])
324 if len(Lang) != 3:
325 try:
326 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
327 except UnicodeError as Xstr:
328 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
329 except UnicodeError as Xstr:
330 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
331 except:
332 EdkLogger.Error("Unicode File Parser",
333 ToolError.FILE_OPEN_FAILURE,
334 "File read failure: %s" % str(Xstr),
335 ExtraData=File)
336 LineNo = GetLineNo(FileIn, Line, False)
337 EdkLogger.Error("Unicode File Parser",
338 ToolError.PARSER_ERROR,
339 "Wrong language definition",
340 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
341 File = File, Line = LineNo)
342 else:
343 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
344 LangPrintName = Lang[2]
345
346 IsLangInDef = False
347 for Item in self.LanguageDef:
348 if Item[0] == LangName:
349 IsLangInDef = True
350 break
351
352 if not IsLangInDef:
353 self.LanguageDef.append([LangName, LangPrintName])
354
355 #
356 # Add language string
357 #
358 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
359 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
360
361 if not IsLangInDef:
362 #
363 # The found STRING tokens will be added into new language string list
364 # so that the unique STRING identifier is reserved for all languages in the package list.
365 #
366 FirstLangName = self.LanguageDef[0][0]
367 if LangName != FirstLangName:
368 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
369 Item = self.OrderedStringList[FirstLangName][Index]
370 if Item.UseOtherLangDef != '':
371 OtherLang = Item.UseOtherLangDef
372 else:
373 OtherLang = FirstLangName
374 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
375 '',
376 Item.Referenced,
377 Item.Token,
378 OtherLang))
379 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
380 return True
381
382 #
383 # Get String name and value
384 #
385 def GetStringObject(self, Item):
386 Language = ''
387 Value = ''
388
389 Name = Item.split()[1]
390 # Check the string name is the upper character
391 if Name != '':
392 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
393 if MatchString is None or MatchString.end(0) != len(Name):
394 EdkLogger.Error("Unicode File Parser",
395 ToolError.FORMAT_INVALID,
396 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
397 LanguageList = Item.split(u'#language ')
398 for IndexI in range(len(LanguageList)):
399 if IndexI == 0:
400 continue
401 else:
402 Language = LanguageList[IndexI].split()[0]
403 #.replace(u'\r\n', u'')
404 Value = \
405 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
406 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
407 self.AddStringToList(Name, Language, Value)
408
409 #
410 # Get include file list and load them
411 #
412 def GetIncludeFile(self, Item, Dir = None):
413 if Dir:
414 pass
415 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
416 self.LoadUniFile(FileName)
417
418 #
419 # Pre-process before parse .uni file
420 #
421 def PreProcess(self, File, IsIncludeFile=False):
422 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
423 EdkLogger.Error("Unicode File Parser",
424 ToolError.FILE_NOT_FOUND,
425 ExtraData=File.Path)
426
427 #
428 # Check file header of the Uni file
429 #
430 # if not CheckUTF16FileHeader(File.Path):
431 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
432 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
433
434 try:
435 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
436 except UnicodeError as Xstr:
437 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
438 except UnicodeError:
439 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
440 except:
441 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
442
443
444 #
445 # get the file header
446 #
447 Lines = []
448 HeaderStart = False
449 HeaderEnd = False
450 if not self.UniFileHeader:
451 FirstGenHeader = True
452 else:
453 FirstGenHeader = False
454 for Line in FileIn:
455 Line = Line.strip()
456 if Line == u'':
457 continue
458 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
459 and not HeaderEnd and not HeaderStart:
460 HeaderStart = True
461 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
462 HeaderEnd = True
463 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
464 self.UniFileHeader += Line + '\r\n'
465 continue
466
467 #
468 # Use unique identifier
469 #
470 FindFlag = -1
471 LineCount = 0
472 MultiLineFeedExits = False
473 #
474 # 0: initial value
475 # 1: single String entry exist
476 # 2: line feed exist under the some single String entry
477 #
478 StringEntryExistsFlag = 0
479 for Line in FileIn:
480 Line = FileIn[LineCount]
481 LineCount += 1
482 Line = Line.strip()
483 #
484 # Ignore comment line and empty line
485 #
486 if Line == u'' or Line.startswith(u'//'):
487 #
488 # Change the single line String entry flag status
489 #
490 if StringEntryExistsFlag == 1:
491 StringEntryExistsFlag = 2
492 #
493 # If the '#string' line and the '#language' line are not in the same line,
494 # there should be only one line feed character between them
495 #
496 if MultiLineFeedExits:
497 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
498 continue
499
500 MultiLineFeedExits = False
501 #
502 # Process comment embedded in string define lines
503 #
504 FindFlag = Line.find(u'//')
505 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
506 Line = Line.replace(Line[FindFlag:], u' ')
507 if FileIn[LineCount].strip().startswith('#language'):
508 Line = Line + FileIn[LineCount]
509 FileIn[LineCount-1] = Line
510 FileIn[LineCount] = '\r\n'
511 LineCount -= 1
512 for Index in range (LineCount + 1, len (FileIn) - 1):
513 if (Index == len(FileIn) -1):
514 FileIn[Index] = '\r\n'
515 else:
516 FileIn[Index] = FileIn[Index + 1]
517 continue
518 CommIndex = GetCharIndexOutStr(u'/', Line)
519 if CommIndex > -1:
520 if (len(Line) - 1) > CommIndex:
521 if Line[CommIndex+1] == u'/':
522 Line = Line[:CommIndex].strip()
523 else:
524 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
525 else:
526 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
527
528 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
529 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
530 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
531
532 Line = Line.replace(u'\\\\', u'\u0006')
533 Line = Line.replace(u'\\r\\n', CR + LF)
534 Line = Line.replace(u'\\n', CR + LF)
535 Line = Line.replace(u'\\r', CR)
536 Line = Line.replace(u'\\t', u'\t')
537 Line = Line.replace(u'''\"''', u'''"''')
538 Line = Line.replace(u'\t', u' ')
539 Line = Line.replace(u'\u0006', u'\\')
540
541 #
542 # Check if single line has correct '"'
543 #
544 if Line.startswith(u'#string') and Line.find(u'#language') > -1 and Line.find('"') > Line.find(u'#language'):
545 if not Line.endswith('"'):
546 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
547 ExtraData='''The line %s misses '"' at the end of it in file %s'''
548 % (LineCount, File.Path))
549
550 #
551 # Between Name entry and Language entry can not contain line feed
552 #
553 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
554 MultiLineFeedExits = True
555
556 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
557 MultiLineFeedExits = True
558
559 #
560 # Between Language entry and String entry can not contain line feed
561 #
562 if Line.startswith(u'#language') and len(Line.split()) == 2:
563 MultiLineFeedExits = True
564
565 #
566 # Check the situation that there only has one '"' for the language entry
567 #
568 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.count(u'"') == 1:
569 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
570 ExtraData='''The line %s misses '"' at the end of it in file %s'''
571 % (LineCount, File.Path))
572
573 #
574 # Check the situation that there has more than 2 '"' for the language entry
575 #
576 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.replace(u'\\"', '').count(u'"') > 2:
577 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
578 ExtraData='''The line %s has more than 2 '"' for language entry in file %s'''
579 % (LineCount, File.Path))
580
581 #
582 # Between two String entry, can not contain line feed
583 #
584 if Line.startswith(u'"'):
585 if StringEntryExistsFlag == 2:
586 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
587 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
588
589 StringEntryExistsFlag = 1
590 if not Line.endswith('"'):
591 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
592 ExtraData='''The line %s misses '"' at the end of it in file %s'''
593 % (LineCount, File.Path))
594
595 #
596 # Check the situation that there has more than 2 '"' for the language entry
597 #
598 if Line.strip() and Line.replace(u'\\"', '').count(u'"') > 2:
599 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
600 ExtraData='''The line %s has more than 2 '"' for language entry in file %s'''
601 % (LineCount, File.Path))
602
603 elif Line.startswith(u'#language'):
604 if StringEntryExistsFlag == 2:
605 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
606 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
607 StringEntryExistsFlag = 0
608 else:
609 StringEntryExistsFlag = 0
610
611 Lines.append(Line)
612
613 #
614 # Convert string def format as below
615 #
616 # #string MY_STRING_1
617 # #language eng
618 # "My first English string line 1"
619 # "My first English string line 2"
620 # #string MY_STRING_1
621 # #language spa
622 # "Mi segunda secuencia 1"
623 # "Mi segunda secuencia 2"
624 #
625
626 if not IsIncludeFile and not Lines:
627 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
628 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
629 ExtraData=File.Path)
630
631 NewLines = []
632 StrName = u''
633 ExistStrNameList = []
634 for Line in Lines:
635 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
636 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
637 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
638 ExtraData=File.Path)
639
640 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
641 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
642 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
643 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
644 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
645 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
646 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
647 ExtraData=File.Path)
648
649 if Line.count(u'#language') > 1:
650 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
651 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
652 ExtraData=File.Path)
653
654 if Line.startswith(u'//'):
655 continue
656 elif Line.startswith(u'#langdef'):
657 if len(Line.split()) == 2:
658 NewLines.append(Line)
659 continue
660 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
661 NewLines.append(Line[:Line.find(u'"')].strip())
662 NewLines.append(Line[Line.find(u'"'):])
663 else:
664 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
665 elif Line.startswith(u'#string'):
666 if len(Line.split()) == 2:
667 StrName = Line
668 if StrName:
669 if StrName.split()[1] not in ExistStrNameList:
670 ExistStrNameList.append(StrName.split()[1].strip())
671 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
672 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
673 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
674 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
675 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
676 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
677 ExtraData=File.Path)
678 continue
679 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
680 if Line[Line.find(u'#language')-1] != ' ' or \
681 Line[Line.find(u'#language')+len(u'#language')] != u' ':
682 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
683
684 if Line.find(u'"') > 0:
685 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
686
687 StrName = Line.split()[0] + u' ' + Line.split()[1]
688 if StrName:
689 if StrName.split()[1] not in ExistStrNameList:
690 ExistStrNameList.append(StrName.split()[1].strip())
691 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
692 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
693 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
694 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
695 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
696 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
697 ExtraData=File.Path)
698 if IsIncludeFile:
699 if StrName not in NewLines:
700 NewLines.append((Line[:Line.find(u'#language')]).strip())
701 else:
702 NewLines.append((Line[:Line.find(u'#language')]).strip())
703 NewLines.append((Line[Line.find(u'#language'):]).strip())
704 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
705 if Line[Line.find(u'#language')-1] != u' ' or \
706 Line[Line.find(u'#language')+len(u'#language')] != u' ':
707 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
708
709 if Line[Line.find(u'"')-1] != u' ':
710 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
711
712 StrName = Line.split()[0] + u' ' + Line.split()[1]
713 if StrName:
714 if StrName.split()[1] not in ExistStrNameList:
715 ExistStrNameList.append(StrName.split()[1].strip())
716 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
717 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
718 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
719 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
720 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
721 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
722 ExtraData=File.Path)
723 if IsIncludeFile:
724 if StrName not in NewLines:
725 NewLines.append((Line[:Line.find(u'#language')]).strip())
726 else:
727 NewLines.append((Line[:Line.find(u'#language')]).strip())
728 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
729 NewLines.append((Line[Line.find(u'"'):]).strip())
730 else:
731 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
732 elif Line.startswith(u'#language'):
733 if len(Line.split()) == 2:
734 if IsIncludeFile:
735 if StrName not in NewLines:
736 NewLines.append(StrName)
737 else:
738 NewLines.append(StrName)
739 NewLines.append(Line)
740 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
741 if IsIncludeFile:
742 if StrName not in NewLines:
743 NewLines.append(StrName)
744 else:
745 NewLines.append(StrName)
746 NewLines.append((Line[:Line.find(u'"')]).strip())
747 NewLines.append((Line[Line.find(u'"'):]).strip())
748 else:
749 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
750 elif Line.startswith(u'"'):
751 if u'#string' in Line or u'#language' in Line:
752 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
753 NewLines.append(Line)
754 else:
755 print(Line)
756 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
757
758 if StrName and not StrName.split()[1].startswith(u'STR_'):
759 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
760 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
761 ExtraData=File.Path)
762
763 if StrName and not NewLines:
764 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
765 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
766 ExtraData=File.Path)
767
768 #
769 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
770 # should be Abstract, Description, BinaryAbstract, BinaryDescription
771 AbstractPosition = -1
772 DescriptionPosition = -1
773 BinaryAbstractPosition = -1
774 BinaryDescriptionPosition = -1
775 for StrName in ExistStrNameList:
776 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
777 if 'BINARY' in StrName:
778 BinaryAbstractPosition = ExistStrNameList.index(StrName)
779 else:
780 AbstractPosition = ExistStrNameList.index(StrName)
781 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
782 if 'BINARY' in StrName:
783 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
784 else:
785 DescriptionPosition = ExistStrNameList.index(StrName)
786
787 OrderList = sorted([AbstractPosition, DescriptionPosition])
788 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
789 Min = OrderList[0]
790 Max = OrderList[1]
791 BinaryMin = BinaryOrderList[0]
792 BinaryMax = BinaryOrderList[1]
793 if BinaryDescriptionPosition > -1:
794 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
795 BinaryMax > Max):
796 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
797 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
798 ExtraData=File.Path)
799 elif BinaryAbstractPosition > -1:
800 if not(BinaryAbstractPosition > Max):
801 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
802 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
803 ExtraData=File.Path)
804
805 if DescriptionPosition > -1:
806 if not(DescriptionPosition == Max and AbstractPosition == Min and \
807 DescriptionPosition > AbstractPosition):
808 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
809 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
810 ExtraData=File.Path)
811
812 if not self.UniFileHeader:
813 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
814 Message = ST.ERR_NO_SOURCE_HEADER,
815 ExtraData=File.Path)
816
817 return NewLines
818
819 #
820 # Load a .uni file
821 #
822 def LoadUniFile(self, File = None):
823 if File is None:
824 EdkLogger.Error("Unicode File Parser",
825 ToolError.PARSER_ERROR,
826 Message='No unicode file is given',
827 ExtraData=File.Path)
828
829 self.File = File
830
831 #
832 # Process special char in file
833 #
834 Lines = self.PreProcess(File)
835
836 #
837 # Get Unicode Information
838 #
839 for IndexI in range(len(Lines)):
840 Line = Lines[IndexI]
841 if (IndexI + 1) < len(Lines):
842 SecondLine = Lines[IndexI + 1]
843 if (IndexI + 2) < len(Lines):
844 ThirdLine = Lines[IndexI + 2]
845
846 #
847 # Get Language def information
848 #
849 if Line.find(u'#langdef ') >= 0:
850 self.GetLangDef(File, Line + u' ' + SecondLine)
851 continue
852
853 Name = ''
854 Language = ''
855 Value = ''
856 CombineToken = False
857 #
858 # Get string def information format as below
859 #
860 # #string MY_STRING_1
861 # #language eng
862 # "My first English string line 1"
863 # "My first English string line 2"
864 # #string MY_STRING_1
865 # #language spa
866 # "Mi segunda secuencia 1"
867 # "Mi segunda secuencia 2"
868 #
869 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
870 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
871 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
872 if Line.find('"') > 0 or SecondLine.find('"') > 0:
873 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
874 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
875 ExtraData=File.Path)
876
877 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
878 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
879 for IndexJ in range(IndexI + 2, len(Lines)):
880 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
881 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
882 if Lines[IndexJ][-2] == ' ':
883 CombineToken = True
884 if CombineToken:
885 if Lines[IndexJ].strip()[1:-1].strip():
886 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
887 else:
888 Value = Value + Lines[IndexJ].strip()[1:-1]
889 CombineToken = False
890 else:
891 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
892 else:
893 IndexI = IndexJ
894 break
895 if Value.endswith('\r\n'):
896 Value = Value[: Value.rfind('\r\n')]
897 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
898 self.AddStringToList(Name, Language, Value)
899 continue
900
901 #
902 # Load multiple .uni files
903 #
904 def LoadUniFiles(self, FileList):
905 if len(FileList) > 0:
906 for File in FileList:
907 FilePath = File.Path.strip()
908 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
909 self.LoadUniFile(File)
910
911 #
912 # Add a string to list
913 #
914 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
915 for LangNameItem in self.LanguageDef:
916 if Language == LangNameItem[0]:
917 break
918
919 if Language not in self.OrderedStringList:
920 self.OrderedStringList[Language] = []
921 self.OrderedStringDict[Language] = {}
922
923 IsAdded = True
924 if Name in self.OrderedStringDict[Language]:
925 IsAdded = False
926 if Value is not None:
927 ItemIndexInList = self.OrderedStringDict[Language][Name]
928 Item = self.OrderedStringList[Language][ItemIndexInList]
929 Item.UpdateValue(Value)
930 Item.UseOtherLangDef = ''
931
932 if IsAdded:
933 Token = len(self.OrderedStringList[Language])
934 if Index == -1:
935 self.OrderedStringList[Language].append(StringDefClassObject(Name,
936 Value,
937 Referenced,
938 Token,
939 UseOtherLangDef))
940 self.OrderedStringDict[Language][Name] = Token
941 for LangName in self.LanguageDef:
942 #
943 # New STRING token will be added into all language string lists.
944 # so that the unique STRING identifier is reserved for all languages in the package list.
945 #
946 if LangName[0] != Language:
947 if UseOtherLangDef != '':
948 OtherLangDef = UseOtherLangDef
949 else:
950 OtherLangDef = Language
951 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
952 '',
953 Referenced,
954 Token,
955 OtherLangDef))
956 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
957 else:
958 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
959 Value,
960 Referenced,
961 Token,
962 UseOtherLangDef))
963 self.OrderedStringDict[Language][Name] = Index
964
965 #
966 # Set the string as referenced
967 #
968 def SetStringReferenced(self, Name):
969 #
970 # String stoken are added in the same order in all language string lists.
971 # So, only update the status of string stoken in first language string list.
972 #
973 Lang = self.LanguageDef[0][0]
974 if Name in self.OrderedStringDict[Lang]:
975 ItemIndexInList = self.OrderedStringDict[Lang][Name]
976 Item = self.OrderedStringList[Lang][ItemIndexInList]
977 Item.Referenced = True
978
979 #
980 # Search the string in language definition by Name
981 #
982 def FindStringValue(self, Name, Lang):
983 if Name in self.OrderedStringDict[Lang]:
984 ItemIndexInList = self.OrderedStringDict[Lang][Name]
985 return self.OrderedStringList[Lang][ItemIndexInList]
986
987 return None
988
989 #
990 # Search the string in language definition by Token
991 #
992 def FindByToken(self, Token, Lang):
993 for Item in self.OrderedStringList[Lang]:
994 if Item.Token == Token:
995 return Item
996
997 return None
998
999 #
1000 # Re-order strings and re-generate tokens
1001 #
1002 def ReToken(self):
1003 if len(self.LanguageDef) == 0:
1004 return None
1005 #
1006 # Retoken all language strings according to the status of string stoken in the first language string.
1007 #
1008 FirstLangName = self.LanguageDef[0][0]
1009
1010 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1011 for LangNameItem in self.LanguageDef:
1012 self.OrderedStringListByToken[LangNameItem[0]] = {}
1013
1014 #
1015 # Use small token for all referred string stoken.
1016 #
1017 RefToken = 0
1018 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1019 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1020 if FirstLangItem.Referenced == True:
1021 for LangNameItem in self.LanguageDef:
1022 LangName = LangNameItem[0]
1023 OtherLangItem = self.OrderedStringList[LangName][Index]
1024 OtherLangItem.Referenced = True
1025 OtherLangItem.Token = RefToken
1026 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1027 RefToken = RefToken + 1
1028
1029 #
1030 # Use big token for all unreferred string stoken.
1031 #
1032 UnRefToken = 0
1033 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1034 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1035 if FirstLangItem.Referenced == False:
1036 for LangNameItem in self.LanguageDef:
1037 LangName = LangNameItem[0]
1038 OtherLangItem = self.OrderedStringList[LangName][Index]
1039 OtherLangItem.Token = RefToken + UnRefToken
1040 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1041 UnRefToken = UnRefToken + 1
1042
1043 #
1044 # Show the instance itself
1045 #
1046 def ShowMe(self):
1047 print(self.LanguageDef)
1048 #print self.OrderedStringList
1049 for Item in self.OrderedStringList:
1050 print(Item)
1051 for Member in self.OrderedStringList[Item]:
1052 print(str(Member))
1053
1054 #
1055 # Read content from '!include' UNI file
1056 #
1057 def ReadIncludeUNIfile(self, FilaPath):
1058 if self.File:
1059 pass
1060
1061 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1062 EdkLogger.Error("Unicode File Parser",
1063 ToolError.FILE_NOT_FOUND,
1064 ExtraData=FilaPath)
1065 try:
1066 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines()
1067 except UnicodeError as Xstr:
1068 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1069 except UnicodeError:
1070 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1071 except:
1072 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1073 return FileIn
1074