]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/UniClassObject.py
BaseTools: Use absolute import in UPT
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2018, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14 """
15 Collect all defined strings in multiple uni files
16 """
17 from __future__ import print_function
18
19 ##
20 # Import Modules
21 #
22 import os, codecs, re
23 import distutils.util
24 from Logger import ToolError
25 from Logger import Log as EdkLogger
26 from Logger import StringTable as ST
27 from Library.StringUtils import GetLineNo
28 from Library.Misc import PathClass
29 from Library.Misc import GetCharIndexOutStr
30 from Library import DataType as DT
31 from Library.ParserValidate import CheckUTF16FileHeader
32
33 ##
34 # Static definitions
35 #
36 UNICODE_WIDE_CHAR = u'\\wide'
37 UNICODE_NARROW_CHAR = u'\\narrow'
38 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
39 UNICODE_UNICODE_CR = '\r'
40 UNICODE_UNICODE_LF = '\n'
41
42 NARROW_CHAR = u'\uFFF0'
43 WIDE_CHAR = u'\uFFF1'
44 NON_BREAKING_CHAR = u'\uFFF2'
45 CR = u'\u000D'
46 LF = u'\u000A'
47 NULL = u'\u0000'
48 TAB = u'\t'
49 BACK_SPLASH = u'\\'
50
51 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
83
84 ## Convert a python unicode string to a normal string
85 #
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
88 #
89 # @param Uni: The python unicode string
90 #
91 # @retval: The formatted normal string
92 #
93 def UniToStr(Uni):
94 return repr(Uni)[2:-1]
95
96 ## Convert a unicode string to a Hex list
97 #
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
100 #
101 # @param Uni: The python unicode string
102 #
103 # @retval List: The formatted hex list
104 #
105 def UniToHexList(Uni):
106 List = []
107 for Item in Uni:
108 Temp = '%04X' % ord(Item)
109 List.append('0x' + Temp[2:4])
110 List.append('0x' + Temp[0:2])
111 return List
112
113 ## Convert special unicode characters
114 #
115 # Convert special characters to (c), (r) and (tm).
116 #
117 # @param Uni: The python unicode string
118 #
119 # @retval NewUni: The converted unicode string
120 #
121 def ConvertSpecialUnicodes(Uni):
122 NewUni = Uni
123 NewUni = NewUni.replace(u'\u00A9', '(c)')
124 NewUni = NewUni.replace(u'\u00AE', '(r)')
125 NewUni = NewUni.replace(u'\u2122', '(tm)')
126 return NewUni
127
128 ## GetLanguageCode1766
129 #
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
133 #
134 # @param LangName: Language codes read from .UNI file
135 #
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
137 #
138 def GetLanguageCode1766(LangName, File=None):
139 return LangName
140
141 length = len(LangName)
142 if length == 2:
143 if LangName.isalpha():
144 for Key in gLANG_CONV_TABLE.keys():
145 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
146 return Key
147 elif length == 3:
148 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
149 return LangName
150 else:
151 EdkLogger.Error("Unicode File Parser",
152 ToolError.FORMAT_INVALID,
153 "Invalid RFC 1766 language code : %s" % LangName,
154 File)
155 elif length == 5:
156 if LangName[0:2].isalpha() and LangName[2] == '-':
157 for Key in gLANG_CONV_TABLE.keys():
158 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
159 return Key
160 elif length >= 6:
161 if LangName[0:2].isalpha() and LangName[2] == '-':
162 for Key in gLANG_CONV_TABLE.keys():
163 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
164 return Key
165 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
166 for Key in gLANG_CONV_TABLE.keys():
167 if Key == LangName[0:3].lower():
168 return Key
169
170 EdkLogger.Error("Unicode File Parser",
171 ToolError.FORMAT_INVALID,
172 "Invalid RFC 4646 language code : %s" % LangName,
173 File)
174
175 ## GetLanguageCode
176 #
177 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
178 # RFC 1766 language codes supported in compatiblity mode
179 # RFC 4646 language codes supported in native mode
180 #
181 # @param LangName: Language codes read from .UNI file
182 #
183 # @retval LangName: Valid lanugage code in RFC 4646 format or None
184 #
185 def GetLanguageCode(LangName, IsCompatibleMode, File):
186 length = len(LangName)
187 if IsCompatibleMode:
188 if length == 3 and LangName.isalpha():
189 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
190 if TempLangName is not None:
191 return TempLangName
192 return LangName
193 else:
194 EdkLogger.Error("Unicode File Parser",
195 ToolError.FORMAT_INVALID,
196 "Invalid RFC 1766 language code : %s" % LangName,
197 File)
198 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
199 return LangName
200 if length == 2:
201 if LangName.isalpha():
202 return LangName
203 elif length == 3:
204 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None:
205 return LangName
206 elif length == 5:
207 if LangName[0:2].isalpha() and LangName[2] == '-':
208 return LangName
209 elif length >= 6:
210 if LangName[0:2].isalpha() and LangName[2] == '-':
211 return LangName
212 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-':
213 return LangName
214
215 EdkLogger.Error("Unicode File Parser",
216 ToolError.FORMAT_INVALID,
217 "Invalid RFC 4646 language code : %s" % LangName,
218 File)
219
220 ## FormatUniEntry
221 #
222 # Formated the entry in Uni file.
223 #
224 # @param StrTokenName StrTokenName.
225 # @param TokenValueList A list need to be processed.
226 # @param ContainerFile ContainerFile.
227 #
228 # @return formated entry
229 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
230 SubContent = ''
231 PreFormatLength = 40
232 if len(StrTokenName) > PreFormatLength:
233 PreFormatLength = len(StrTokenName) + 1
234 for (Lang, Value) in TokenValueList:
235 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
236 continue
237 if Lang == '':
238 Lang = DT.TAB_LANGUAGE_EN_US
239 if Lang == 'eng':
240 Lang = DT.TAB_LANGUAGE_EN_US
241 elif len(Lang.split('-')[0]) == 3:
242 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
243 else:
244 Lang = GetLanguageCode(Lang, False, ContainerFile)
245 ValueList = Value.split('\n')
246 SubValueContent = ''
247 for SubValue in ValueList:
248 if SubValue.strip():
249 SubValueContent += \
250 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
251 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
252 + '\"' + '\r\n'
253 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
254 if SubContent:
255 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
256 return SubContent
257
258
259 ## StringDefClassObject
260 #
261 # A structure for language definition
262 #
263 class StringDefClassObject(object):
264 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
265 self.StringName = ''
266 self.StringNameByteList = []
267 self.StringValue = ''
268 self.StringValueByteList = ''
269 self.Token = 0
270 self.Referenced = Referenced
271 self.UseOtherLangDef = UseOtherLangDef
272 self.Length = 0
273
274 if Name is not None:
275 self.StringName = Name
276 self.StringNameByteList = UniToHexList(Name)
277 if Value is not None:
278 self.StringValue = Value
279 self.StringValueByteList = UniToHexList(self.StringValue)
280 self.Length = len(self.StringValueByteList)
281 if Token is not None:
282 self.Token = Token
283
284 def __str__(self):
285 return repr(self.StringName) + ' ' + \
286 repr(self.Token) + ' ' + \
287 repr(self.Referenced) + ' ' + \
288 repr(self.StringValue) + ' ' + \
289 repr(self.UseOtherLangDef)
290
291 def UpdateValue(self, Value = None):
292 if Value is not None:
293 if self.StringValue:
294 self.StringValue = self.StringValue + '\r\n' + Value
295 else:
296 self.StringValue = Value
297 self.StringValueByteList = UniToHexList(self.StringValue)
298 self.Length = len(self.StringValueByteList)
299
300 ## UniFileClassObject
301 #
302 # A structure for .uni file definition
303 #
304 class UniFileClassObject(object):
305 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
306 self.FileList = FileList
307 self.File = None
308 self.IncFileList = FileList
309 self.UniFileHeader = ''
310 self.Token = 2
311 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
312 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
313 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
314 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
315 self.IsCompatibleMode = IsCompatibleMode
316 if not IncludePathList:
317 self.IncludePathList = []
318 else:
319 self.IncludePathList = IncludePathList
320 if len(self.FileList) > 0:
321 self.LoadUniFiles(FileList)
322
323 #
324 # Get Language definition
325 #
326 def GetLangDef(self, File, Line):
327 Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
328 if len(Lang) != 3:
329 try:
330 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
331 except UnicodeError as Xstr:
332 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
333 except UnicodeError as Xstr:
334 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
335 except:
336 EdkLogger.Error("Unicode File Parser",
337 ToolError.FILE_OPEN_FAILURE,
338 "File read failure: %s" % str(Xstr),
339 ExtraData=File)
340 LineNo = GetLineNo(FileIn, Line, False)
341 EdkLogger.Error("Unicode File Parser",
342 ToolError.PARSER_ERROR,
343 "Wrong language definition",
344 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
345 File = File, Line = LineNo)
346 else:
347 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
348 LangPrintName = Lang[2]
349
350 IsLangInDef = False
351 for Item in self.LanguageDef:
352 if Item[0] == LangName:
353 IsLangInDef = True
354 break
355
356 if not IsLangInDef:
357 self.LanguageDef.append([LangName, LangPrintName])
358
359 #
360 # Add language string
361 #
362 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
363 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
364
365 if not IsLangInDef:
366 #
367 # The found STRING tokens will be added into new language string list
368 # so that the unique STRING identifier is reserved for all languages in the package list.
369 #
370 FirstLangName = self.LanguageDef[0][0]
371 if LangName != FirstLangName:
372 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
373 Item = self.OrderedStringList[FirstLangName][Index]
374 if Item.UseOtherLangDef != '':
375 OtherLang = Item.UseOtherLangDef
376 else:
377 OtherLang = FirstLangName
378 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
379 '',
380 Item.Referenced,
381 Item.Token,
382 OtherLang))
383 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
384 return True
385
386 #
387 # Get String name and value
388 #
389 def GetStringObject(self, Item):
390 Language = ''
391 Value = ''
392
393 Name = Item.split()[1]
394 # Check the string name is the upper character
395 if Name != '':
396 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
397 if MatchString is None or MatchString.end(0) != len(Name):
398 EdkLogger.Error("Unicode File Parser",
399 ToolError.FORMAT_INVALID,
400 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
401 LanguageList = Item.split(u'#language ')
402 for IndexI in range(len(LanguageList)):
403 if IndexI == 0:
404 continue
405 else:
406 Language = LanguageList[IndexI].split()[0]
407 #.replace(u'\r\n', u'')
408 Value = \
409 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
410 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
411 self.AddStringToList(Name, Language, Value)
412
413 #
414 # Get include file list and load them
415 #
416 def GetIncludeFile(self, Item, Dir = None):
417 if Dir:
418 pass
419 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
420 self.LoadUniFile(FileName)
421
422 #
423 # Pre-process before parse .uni file
424 #
425 def PreProcess(self, File, IsIncludeFile=False):
426 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
427 EdkLogger.Error("Unicode File Parser",
428 ToolError.FILE_NOT_FOUND,
429 ExtraData=File.Path)
430
431 #
432 # Check file header of the Uni file
433 #
434 # if not CheckUTF16FileHeader(File.Path):
435 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
436 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
437
438 try:
439 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
440 except UnicodeError as Xstr:
441 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
442 except UnicodeError:
443 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
444 except:
445 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
446
447
448 #
449 # get the file header
450 #
451 Lines = []
452 HeaderStart = False
453 HeaderEnd = False
454 if not self.UniFileHeader:
455 FirstGenHeader = True
456 else:
457 FirstGenHeader = False
458 for Line in FileIn:
459 Line = Line.strip()
460 if Line == u'':
461 continue
462 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
463 and not HeaderEnd and not HeaderStart:
464 HeaderStart = True
465 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
466 HeaderEnd = True
467 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
468 self.UniFileHeader += Line + '\r\n'
469 continue
470
471 #
472 # Use unique identifier
473 #
474 FindFlag = -1
475 LineCount = 0
476 MultiLineFeedExits = False
477 #
478 # 0: initial value
479 # 1: signle String entry exist
480 # 2: line feed exist under the some signle String entry
481 #
482 StringEntryExistsFlag = 0
483 for Line in FileIn:
484 Line = FileIn[LineCount]
485 LineCount += 1
486 Line = Line.strip()
487 #
488 # Ignore comment line and empty line
489 #
490 if Line == u'' or Line.startswith(u'//'):
491 #
492 # Change the single line String entry flag status
493 #
494 if StringEntryExistsFlag == 1:
495 StringEntryExistsFlag = 2
496 #
497 # If the '#string' line and the '#language' line are not in the same line,
498 # there should be only one line feed character betwwen them
499 #
500 if MultiLineFeedExits:
501 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
502 continue
503
504 MultiLineFeedExits = False
505 #
506 # Process comment embeded in string define lines
507 #
508 FindFlag = Line.find(u'//')
509 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
510 Line = Line.replace(Line[FindFlag:], u' ')
511 if FileIn[LineCount].strip().startswith('#language'):
512 Line = Line + FileIn[LineCount]
513 FileIn[LineCount-1] = Line
514 FileIn[LineCount] = '\r\n'
515 LineCount -= 1
516 for Index in xrange (LineCount + 1, len (FileIn) - 1):
517 if (Index == len(FileIn) -1):
518 FileIn[Index] = '\r\n'
519 else:
520 FileIn[Index] = FileIn[Index + 1]
521 continue
522 CommIndex = GetCharIndexOutStr(u'/', Line)
523 if CommIndex > -1:
524 if (len(Line) - 1) > CommIndex:
525 if Line[CommIndex+1] == u'/':
526 Line = Line[:CommIndex].strip()
527 else:
528 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
529 else:
530 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
531
532 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
533 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
534 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
535
536 Line = Line.replace(u'\\\\', u'\u0006')
537 Line = Line.replace(u'\\r\\n', CR + LF)
538 Line = Line.replace(u'\\n', CR + LF)
539 Line = Line.replace(u'\\r', CR)
540 Line = Line.replace(u'\\t', u'\t')
541 Line = Line.replace(u'''\"''', u'''"''')
542 Line = Line.replace(u'\t', u' ')
543 Line = Line.replace(u'\u0006', u'\\')
544
545 #
546 # Check if single line has correct '"'
547 #
548 if Line.startswith(u'#string') and Line.find(u'#language') > -1 and Line.find('"') > Line.find(u'#language'):
549 if not Line.endswith('"'):
550 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
551 ExtraData='''The line %s misses '"' at the end of it in file %s'''
552 % (LineCount, File.Path))
553
554 #
555 # Between Name entry and Language entry can not contain line feed
556 #
557 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
558 MultiLineFeedExits = True
559
560 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
561 MultiLineFeedExits = True
562
563 #
564 # Between Language entry and String entry can not contain line feed
565 #
566 if Line.startswith(u'#language') and len(Line.split()) == 2:
567 MultiLineFeedExits = True
568
569 #
570 # Between two String entry, can not contain line feed
571 #
572 if Line.startswith(u'"'):
573 if StringEntryExistsFlag == 2:
574 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
575 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
576
577 StringEntryExistsFlag = 1
578 if not Line.endswith('"'):
579 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
580 ExtraData='''The line %s misses '"' at the end of it in file %s'''
581 % (LineCount, File.Path))
582 elif Line.startswith(u'#language'):
583 if StringEntryExistsFlag == 2:
584 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
585 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
586 StringEntryExistsFlag = 0
587 else:
588 StringEntryExistsFlag = 0
589
590 Lines.append(Line)
591
592 #
593 # Convert string def format as below
594 #
595 # #string MY_STRING_1
596 # #language eng
597 # "My first English string line 1"
598 # "My first English string line 2"
599 # #string MY_STRING_1
600 # #language spa
601 # "Mi segunda secuencia 1"
602 # "Mi segunda secuencia 2"
603 #
604
605 if not IsIncludeFile and not Lines:
606 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
607 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
608 ExtraData=File.Path)
609
610 NewLines = []
611 StrName = u''
612 ExistStrNameList = []
613 for Line in Lines:
614 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
615 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
616 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
617 ExtraData=File.Path)
618
619 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
620 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
621 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
622 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
623 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
624 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
625 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
626 ExtraData=File.Path)
627
628 if Line.count(u'#language') > 1:
629 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
630 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
631 ExtraData=File.Path)
632
633 if Line.startswith(u'//'):
634 continue
635 elif Line.startswith(u'#langdef'):
636 if len(Line.split()) == 2:
637 NewLines.append(Line)
638 continue
639 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
640 NewLines.append(Line[:Line.find(u'"')].strip())
641 NewLines.append(Line[Line.find(u'"'):])
642 else:
643 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
644 elif Line.startswith(u'#string'):
645 if len(Line.split()) == 2:
646 StrName = Line
647 if StrName:
648 if StrName.split()[1] not in ExistStrNameList:
649 ExistStrNameList.append(StrName.split()[1].strip())
650 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
651 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
652 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
653 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
654 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
655 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
656 ExtraData=File.Path)
657 continue
658 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
659 if Line[Line.find(u'#language')-1] != ' ' or \
660 Line[Line.find(u'#language')+len(u'#language')] != u' ':
661 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
662
663 if Line.find(u'"') > 0:
664 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
665
666 StrName = Line.split()[0] + u' ' + Line.split()[1]
667 if StrName:
668 if StrName.split()[1] not in ExistStrNameList:
669 ExistStrNameList.append(StrName.split()[1].strip())
670 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
671 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
672 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
673 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
674 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
675 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
676 ExtraData=File.Path)
677 if IsIncludeFile:
678 if StrName not in NewLines:
679 NewLines.append((Line[:Line.find(u'#language')]).strip())
680 else:
681 NewLines.append((Line[:Line.find(u'#language')]).strip())
682 NewLines.append((Line[Line.find(u'#language'):]).strip())
683 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
684 if Line[Line.find(u'#language')-1] != u' ' or \
685 Line[Line.find(u'#language')+len(u'#language')] != u' ':
686 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
687
688 if Line[Line.find(u'"')-1] != u' ':
689 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
690
691 StrName = Line.split()[0] + u' ' + Line.split()[1]
692 if StrName:
693 if StrName.split()[1] not in ExistStrNameList:
694 ExistStrNameList.append(StrName.split()[1].strip())
695 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
696 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
697 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
698 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
699 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
700 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
701 ExtraData=File.Path)
702 if IsIncludeFile:
703 if StrName not in NewLines:
704 NewLines.append((Line[:Line.find(u'#language')]).strip())
705 else:
706 NewLines.append((Line[:Line.find(u'#language')]).strip())
707 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
708 NewLines.append((Line[Line.find(u'"'):]).strip())
709 else:
710 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
711 elif Line.startswith(u'#language'):
712 if len(Line.split()) == 2:
713 if IsIncludeFile:
714 if StrName not in NewLines:
715 NewLines.append(StrName)
716 else:
717 NewLines.append(StrName)
718 NewLines.append(Line)
719 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
720 if IsIncludeFile:
721 if StrName not in NewLines:
722 NewLines.append(StrName)
723 else:
724 NewLines.append(StrName)
725 NewLines.append((Line[:Line.find(u'"')]).strip())
726 NewLines.append((Line[Line.find(u'"'):]).strip())
727 else:
728 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
729 elif Line.startswith(u'"'):
730 if u'#string' in Line or u'#language' in Line:
731 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
732 NewLines.append(Line)
733 else:
734 print(Line)
735 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
736
737 if StrName and not StrName.split()[1].startswith(u'STR_'):
738 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
739 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
740 ExtraData=File.Path)
741
742 if StrName and not NewLines:
743 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
744 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
745 ExtraData=File.Path)
746
747 #
748 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
749 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
750 AbstractPosition = -1
751 DescriptionPosition = -1
752 BinaryAbstractPosition = -1
753 BinaryDescriptionPosition = -1
754 for StrName in ExistStrNameList:
755 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
756 if 'BINARY' in StrName:
757 BinaryAbstractPosition = ExistStrNameList.index(StrName)
758 else:
759 AbstractPosition = ExistStrNameList.index(StrName)
760 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
761 if 'BINARY' in StrName:
762 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
763 else:
764 DescriptionPosition = ExistStrNameList.index(StrName)
765
766 OrderList = sorted([AbstractPosition, DescriptionPosition])
767 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
768 Min = OrderList[0]
769 Max = OrderList[1]
770 BinaryMin = BinaryOrderList[0]
771 BinaryMax = BinaryOrderList[1]
772 if BinaryDescriptionPosition > -1:
773 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
774 BinaryMax > Max):
775 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
776 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
777 ExtraData=File.Path)
778 elif BinaryAbstractPosition > -1:
779 if not(BinaryAbstractPosition > Max):
780 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
781 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
782 ExtraData=File.Path)
783
784 if DescriptionPosition > -1:
785 if not(DescriptionPosition == Max and AbstractPosition == Min and \
786 DescriptionPosition > AbstractPosition):
787 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
788 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
789 ExtraData=File.Path)
790
791 if not self.UniFileHeader:
792 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
793 Message = ST.ERR_NO_SOURCE_HEADER,
794 ExtraData=File.Path)
795
796 return NewLines
797
798 #
799 # Load a .uni file
800 #
801 def LoadUniFile(self, File = None):
802 if File is None:
803 EdkLogger.Error("Unicode File Parser",
804 ToolError.PARSER_ERROR,
805 Message='No unicode file is given',
806 ExtraData=File.Path)
807
808 self.File = File
809
810 #
811 # Process special char in file
812 #
813 Lines = self.PreProcess(File)
814
815 #
816 # Get Unicode Information
817 #
818 for IndexI in range(len(Lines)):
819 Line = Lines[IndexI]
820 if (IndexI + 1) < len(Lines):
821 SecondLine = Lines[IndexI + 1]
822 if (IndexI + 2) < len(Lines):
823 ThirdLine = Lines[IndexI + 2]
824
825 #
826 # Get Language def information
827 #
828 if Line.find(u'#langdef ') >= 0:
829 self.GetLangDef(File, Line + u' ' + SecondLine)
830 continue
831
832 Name = ''
833 Language = ''
834 Value = ''
835 CombineToken = False
836 #
837 # Get string def information format as below
838 #
839 # #string MY_STRING_1
840 # #language eng
841 # "My first English string line 1"
842 # "My first English string line 2"
843 # #string MY_STRING_1
844 # #language spa
845 # "Mi segunda secuencia 1"
846 # "Mi segunda secuencia 2"
847 #
848 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
849 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
850 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
851 if Line.find('"') > 0 or SecondLine.find('"') > 0:
852 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
853 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
854 ExtraData=File.Path)
855
856 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
857 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
858 for IndexJ in range(IndexI + 2, len(Lines)):
859 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
860 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
861 if Lines[IndexJ][-2] == ' ':
862 CombineToken = True
863 if CombineToken:
864 if Lines[IndexJ].strip()[1:-1].strip():
865 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
866 else:
867 Value = Value + Lines[IndexJ].strip()[1:-1]
868 CombineToken = False
869 else:
870 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
871 else:
872 IndexI = IndexJ
873 break
874 if Value.endswith('\r\n'):
875 Value = Value[: Value.rfind('\r\n')]
876 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
877 self.AddStringToList(Name, Language, Value)
878 continue
879
880 #
881 # Load multiple .uni files
882 #
883 def LoadUniFiles(self, FileList):
884 if len(FileList) > 0:
885 for File in FileList:
886 FilePath = File.Path.strip()
887 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
888 self.LoadUniFile(File)
889
890 #
891 # Add a string to list
892 #
893 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
894 for LangNameItem in self.LanguageDef:
895 if Language == LangNameItem[0]:
896 break
897
898 if Language not in self.OrderedStringList:
899 self.OrderedStringList[Language] = []
900 self.OrderedStringDict[Language] = {}
901
902 IsAdded = True
903 if Name in self.OrderedStringDict[Language]:
904 IsAdded = False
905 if Value is not None:
906 ItemIndexInList = self.OrderedStringDict[Language][Name]
907 Item = self.OrderedStringList[Language][ItemIndexInList]
908 Item.UpdateValue(Value)
909 Item.UseOtherLangDef = ''
910
911 if IsAdded:
912 Token = len(self.OrderedStringList[Language])
913 if Index == -1:
914 self.OrderedStringList[Language].append(StringDefClassObject(Name,
915 Value,
916 Referenced,
917 Token,
918 UseOtherLangDef))
919 self.OrderedStringDict[Language][Name] = Token
920 for LangName in self.LanguageDef:
921 #
922 # New STRING token will be added into all language string lists.
923 # so that the unique STRING identifier is reserved for all languages in the package list.
924 #
925 if LangName[0] != Language:
926 if UseOtherLangDef != '':
927 OtherLangDef = UseOtherLangDef
928 else:
929 OtherLangDef = Language
930 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
931 '',
932 Referenced,
933 Token,
934 OtherLangDef))
935 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
936 else:
937 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
938 Value,
939 Referenced,
940 Token,
941 UseOtherLangDef))
942 self.OrderedStringDict[Language][Name] = Index
943
944 #
945 # Set the string as referenced
946 #
947 def SetStringReferenced(self, Name):
948 #
949 # String stoken are added in the same order in all language string lists.
950 # So, only update the status of string stoken in first language string list.
951 #
952 Lang = self.LanguageDef[0][0]
953 if Name in self.OrderedStringDict[Lang]:
954 ItemIndexInList = self.OrderedStringDict[Lang][Name]
955 Item = self.OrderedStringList[Lang][ItemIndexInList]
956 Item.Referenced = True
957
958 #
959 # Search the string in language definition by Name
960 #
961 def FindStringValue(self, Name, Lang):
962 if Name in self.OrderedStringDict[Lang]:
963 ItemIndexInList = self.OrderedStringDict[Lang][Name]
964 return self.OrderedStringList[Lang][ItemIndexInList]
965
966 return None
967
968 #
969 # Search the string in language definition by Token
970 #
971 def FindByToken(self, Token, Lang):
972 for Item in self.OrderedStringList[Lang]:
973 if Item.Token == Token:
974 return Item
975
976 return None
977
978 #
979 # Re-order strings and re-generate tokens
980 #
981 def ReToken(self):
982 if len(self.LanguageDef) == 0:
983 return None
984 #
985 # Retoken all language strings according to the status of string stoken in the first language string.
986 #
987 FirstLangName = self.LanguageDef[0][0]
988
989 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
990 for LangNameItem in self.LanguageDef:
991 self.OrderedStringListByToken[LangNameItem[0]] = {}
992
993 #
994 # Use small token for all referred string stoken.
995 #
996 RefToken = 0
997 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
998 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
999 if FirstLangItem.Referenced == True:
1000 for LangNameItem in self.LanguageDef:
1001 LangName = LangNameItem[0]
1002 OtherLangItem = self.OrderedStringList[LangName][Index]
1003 OtherLangItem.Referenced = True
1004 OtherLangItem.Token = RefToken
1005 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1006 RefToken = RefToken + 1
1007
1008 #
1009 # Use big token for all unreferred string stoken.
1010 #
1011 UnRefToken = 0
1012 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1013 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1014 if FirstLangItem.Referenced == False:
1015 for LangNameItem in self.LanguageDef:
1016 LangName = LangNameItem[0]
1017 OtherLangItem = self.OrderedStringList[LangName][Index]
1018 OtherLangItem.Token = RefToken + UnRefToken
1019 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1020 UnRefToken = UnRefToken + 1
1021
1022 #
1023 # Show the instance itself
1024 #
1025 def ShowMe(self):
1026 print(self.LanguageDef)
1027 #print self.OrderedStringList
1028 for Item in self.OrderedStringList:
1029 print(Item)
1030 for Member in self.OrderedStringList[Item]:
1031 print(str(Member))
1032
1033 #
1034 # Read content from '!include' UNI file
1035 #
1036 def ReadIncludeUNIfile(self, FilaPath):
1037 if self.File:
1038 pass
1039
1040 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1041 EdkLogger.Error("Unicode File Parser",
1042 ToolError.FILE_NOT_FOUND,
1043 ExtraData=FilaPath)
1044 try:
1045 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines()
1046 except UnicodeError as Xstr:
1047 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1048 except UnicodeError:
1049 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1050 except:
1051 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1052 return FileIn
1053