]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/UniClassObject.py
332ae273c78eb0eb3ef7cd04f206ac68f69361c3
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14 """
15 Collect all defined strings in multiple uni files
16 """
17
18 ##
19 # Import Modules
20 #
21 import os, codecs, re
22 import distutils.util
23 from Logger import ToolError
24 from Logger import Log as EdkLogger
25 from Logger import StringTable as ST
26 from Library.String import GetLineNo
27 from Library.Misc import PathClass
28 from Library.Misc import GetCharIndexOutStr
29 from Library import DataType as DT
30
31 ##
32 # Static definitions
33 #
34 UNICODE_WIDE_CHAR = u'\\wide'
35 UNICODE_NARROW_CHAR = u'\\narrow'
36 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
37 UNICODE_UNICODE_CR = '\r'
38 UNICODE_UNICODE_LF = '\n'
39
40 NARROW_CHAR = u'\uFFF0'
41 WIDE_CHAR = u'\uFFF1'
42 NON_BREAKING_CHAR = u'\uFFF2'
43 CR = u'\u000D'
44 LF = u'\u000A'
45 NULL = u'\u0000'
46 TAB = u'\t'
47 BACK_SPLASH = u'\\'
48
49 gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
50
51 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
52 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
53 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
54 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
55 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
56 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
57 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
58 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
59 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
60 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
61 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
62 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
63 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
64 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
65 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
66 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
67 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
68 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
69 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
70 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
71 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
72 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
73 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
74 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
75 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
76 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
77 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
78 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
79 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
80 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
81 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
82 'zho':'zh', 'zul':'zu'}
83
84 ## Convert a python unicode string to a normal string
85 #
86 # Convert a python unicode string to a normal string
87 # UniToStr(u'I am a string') is 'I am a string'
88 #
89 # @param Uni: The python unicode string
90 #
91 # @retval: The formatted normal string
92 #
93 def UniToStr(Uni):
94 return repr(Uni)[2:-1]
95
96 ## Convert a unicode string to a Hex list
97 #
98 # Convert a unicode string to a Hex list
99 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
100 #
101 # @param Uni: The python unicode string
102 #
103 # @retval List: The formatted hex list
104 #
105 def UniToHexList(Uni):
106 List = []
107 for Item in Uni:
108 Temp = '%04X' % ord(Item)
109 List.append('0x' + Temp[2:4])
110 List.append('0x' + Temp[0:2])
111 return List
112
113 ## Convert special unicode characters
114 #
115 # Convert special characters to (c), (r) and (tm).
116 #
117 # @param Uni: The python unicode string
118 #
119 # @retval NewUni: The converted unicode string
120 #
121 def ConvertSpecialUnicodes(Uni):
122 NewUni = Uni
123 NewUni = NewUni.replace(u'\u00A9', '(c)')
124 NewUni = NewUni.replace(u'\u00AE', '(r)')
125 NewUni = NewUni.replace(u'\u2122', '(tm)')
126 return NewUni
127
128 ## GetLanguageCode1766
129 #
130 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
131 # RFC 1766 language codes supported in compatiblity mode
132 # RFC 4646 language codes supported in native mode
133 #
134 # @param LangName: Language codes read from .UNI file
135 #
136 # @retval LangName: Valid lanugage code in RFC 1766 format or None
137 #
138 def GetLanguageCode1766(LangName, File=None):
139 length = len(LangName)
140 if length == 2:
141 if LangName.isalpha():
142 for Key in gLANG_CONV_TABLE.keys():
143 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
144 return Key
145 elif length == 3:
146 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
147 return LangName
148 else:
149 EdkLogger.Error("Unicode File Parser",
150 ToolError.FORMAT_INVALID,
151 "Invalid RFC 1766 language code : %s" % LangName,
152 File)
153 elif length == 5:
154 if LangName[0:2].isalpha() and LangName[2] == '-':
155 for Key in gLANG_CONV_TABLE.keys():
156 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
157 return Key
158 elif length >= 6:
159 if LangName[0:2].isalpha() and LangName[2] == '-':
160 for Key in gLANG_CONV_TABLE.keys():
161 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
162 return Key
163 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
164 for Key in gLANG_CONV_TABLE.keys():
165 if Key == LangName[0:3].lower():
166 return Key
167
168 EdkLogger.Error("Unicode File Parser",
169 ToolError.FORMAT_INVALID,
170 "Invalid RFC 4646 language code : %s" % LangName,
171 File)
172
173 ## GetLanguageCode
174 #
175 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
176 # RFC 1766 language codes supported in compatiblity mode
177 # RFC 4646 language codes supported in native mode
178 #
179 # @param LangName: Language codes read from .UNI file
180 #
181 # @retval LangName: Valid lanugage code in RFC 4646 format or None
182 #
183 def GetLanguageCode(LangName, IsCompatibleMode, File):
184 length = len(LangName)
185 if IsCompatibleMode:
186 if length == 3 and LangName.isalpha():
187 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
188 if TempLangName != None:
189 return TempLangName
190 return LangName
191 else:
192 EdkLogger.Error("Unicode File Parser",
193 ToolError.FORMAT_INVALID,
194 "Invalid RFC 1766 language code : %s" % LangName,
195 File)
196 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
197 return LangName
198 if length == 2:
199 if LangName.isalpha():
200 return LangName
201 elif length == 3:
202 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
203 return LangName
204 elif length == 5:
205 if LangName[0:2].isalpha() and LangName[2] == '-':
206 return LangName
207 elif length >= 6:
208 if LangName[0:2].isalpha() and LangName[2] == '-':
209 return LangName
210 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
211 return LangName
212
213 EdkLogger.Error("Unicode File Parser",
214 ToolError.FORMAT_INVALID,
215 "Invalid RFC 4646 language code : %s" % LangName,
216 File)
217
218 ## FormatUniEntry
219 #
220 # Formated the entry in Uni file.
221 #
222 # @param StrTokenName StrTokenName.
223 # @param TokenValueList A list need to be processed.
224 # @param ContainerFile ContainerFile.
225 #
226 # @return formated entry
227 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
228 SubContent = ''
229 PreFormatLength = 40
230 if len(StrTokenName) > PreFormatLength:
231 PreFormatLength = len(StrTokenName) + 1
232 for (Lang, Value) in TokenValueList:
233 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
234 continue
235 if Lang == '':
236 Lang = DT.TAB_LANGUAGE_EN_US
237 if Lang == 'eng':
238 Lang = DT.TAB_LANGUAGE_EN_US
239 elif len(Lang.split('-')[0]) == 3:
240 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
241 else:
242 Lang = GetLanguageCode(Lang, False, ContainerFile)
243 ValueList = Value.split('\n')
244 SubValueContent = ''
245 for SubValue in ValueList:
246 if SubValue.strip():
247 SubValueContent += \
248 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
249 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
250 + '\"' + '\r\n'
251 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
252 if SubContent:
253 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
254 return SubContent
255
256
257 ## StringDefClassObject
258 #
259 # A structure for language definition
260 #
261 class StringDefClassObject(object):
262 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
263 self.StringName = ''
264 self.StringNameByteList = []
265 self.StringValue = ''
266 self.StringValueByteList = ''
267 self.Token = 0
268 self.Referenced = Referenced
269 self.UseOtherLangDef = UseOtherLangDef
270 self.Length = 0
271
272 if Name != None:
273 self.StringName = Name
274 self.StringNameByteList = UniToHexList(Name)
275 if Value != None:
276 self.StringValue = Value
277 self.StringValueByteList = UniToHexList(self.StringValue)
278 self.Length = len(self.StringValueByteList)
279 if Token != None:
280 self.Token = Token
281
282 def __str__(self):
283 return repr(self.StringName) + ' ' + \
284 repr(self.Token) + ' ' + \
285 repr(self.Referenced) + ' ' + \
286 repr(self.StringValue) + ' ' + \
287 repr(self.UseOtherLangDef)
288
289 def UpdateValue(self, Value = None):
290 if Value != None:
291 if self.StringValue:
292 self.StringValue = self.StringValue + '\r\n' + Value
293 else:
294 self.StringValue = Value
295 self.StringValueByteList = UniToHexList(self.StringValue)
296 self.Length = len(self.StringValueByteList)
297
298 ## UniFileClassObject
299 #
300 # A structure for .uni file definition
301 #
302 class UniFileClassObject(object):
303 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
304 self.FileList = FileList
305 self.File = None
306 self.IncFileList = FileList
307 self.UniFileHeader = ''
308 self.Token = 2
309 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
310 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
311 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
312 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
313 self.IsCompatibleMode = IsCompatibleMode
314 if not IncludePathList:
315 self.IncludePathList = []
316 else:
317 self.IncludePathList = IncludePathList
318 if len(self.FileList) > 0:
319 self.LoadUniFiles(FileList)
320
321 #
322 # Get Language definition
323 #
324 def GetLangDef(self, File, Line):
325 Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
326 if len(Lang) != 3:
327 try:
328 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').read()
329 except UnicodeError, Xstr:
330 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').read()
331 except:
332 EdkLogger.Error("Unicode File Parser",
333 ToolError.FILE_OPEN_FAILURE,
334 "File read failure: %s" % str(Xstr),
335 ExtraData=File)
336 LineNo = GetLineNo(FileIn, Line, False)
337 EdkLogger.Error("Unicode File Parser",
338 ToolError.PARSER_ERROR,
339 "Wrong language definition",
340 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
341 File = File, Line = LineNo)
342 else:
343 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
344 LangPrintName = Lang[2]
345
346 IsLangInDef = False
347 for Item in self.LanguageDef:
348 if Item[0] == LangName:
349 IsLangInDef = True
350 break
351
352 if not IsLangInDef:
353 self.LanguageDef.append([LangName, LangPrintName])
354
355 #
356 # Add language string
357 #
358 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
359 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
360
361 if not IsLangInDef:
362 #
363 # The found STRING tokens will be added into new language string list
364 # so that the unique STRING identifier is reserved for all languages in the package list.
365 #
366 FirstLangName = self.LanguageDef[0][0]
367 if LangName != FirstLangName:
368 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
369 Item = self.OrderedStringList[FirstLangName][Index]
370 if Item.UseOtherLangDef != '':
371 OtherLang = Item.UseOtherLangDef
372 else:
373 OtherLang = FirstLangName
374 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
375 '',
376 Item.Referenced,
377 Item.Token,
378 OtherLang))
379 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
380 return True
381
382 #
383 # Get String name and value
384 #
385 def GetStringObject(self, Item):
386 Language = ''
387 Value = ''
388
389 Name = Item.split()[1]
390 # Check the string name is the upper character
391 if Name != '':
392 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
393 if MatchString == None or MatchString.end(0) != len(Name):
394 EdkLogger.Error("Unicode File Parser",
395 ToolError.FORMAT_INVALID,
396 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
397 LanguageList = Item.split(u'#language ')
398 for IndexI in range(len(LanguageList)):
399 if IndexI == 0:
400 continue
401 else:
402 Language = LanguageList[IndexI].split()[0]
403 #.replace(u'\r\n', u'')
404 Value = \
405 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
406 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
407 self.AddStringToList(Name, Language, Value)
408
409 #
410 # Get include file list and load them
411 #
412 def GetIncludeFile(self, Item, Dir = None):
413 if Dir:
414 pass
415 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
416 self.LoadUniFile(FileName)
417
418 #
419 # Pre-process before parse .uni file
420 #
421 def PreProcess(self, File, IsIncludeFile=False):
422 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
423 EdkLogger.Error("Unicode File Parser",
424 ToolError.FILE_NOT_FOUND,
425 ExtraData=File.Path)
426
427 try:
428 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
429 except UnicodeError:
430 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
431 except:
432 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
433
434
435 #
436 # get the file header
437 #
438 Lines = []
439 HeaderStart = False
440 HeaderEnd = False
441 if not self.UniFileHeader:
442 FirstGenHeader = True
443 else:
444 FirstGenHeader = False
445 for Line in FileIn:
446 Line = Line.strip()
447 if Line == u'':
448 continue
449 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
450 and not HeaderEnd and not HeaderStart:
451 HeaderStart = True
452 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
453 HeaderEnd = True
454 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
455 self.UniFileHeader += Line + '\r\n'
456 continue
457
458 #
459 # Use unique identifier
460 #
461 FindFlag = -1
462 LineCount = 0
463 MultiLineFeedExits = False
464 #
465 # 0: initial value
466 # 1: signle String entry exist
467 # 2: line feed exist under the some signle String entry
468 #
469 StringEntryExistsFlag = 0
470 for Line in FileIn:
471 Line = FileIn[LineCount]
472 LineCount += 1
473 Line = Line.strip()
474 #
475 # Ignore comment line and empty line
476 #
477 if Line == u'' or Line.startswith(u'//'):
478 #
479 # Change the single line String entry flag status
480 #
481 if StringEntryExistsFlag == 1:
482 StringEntryExistsFlag = 2
483 #
484 # If the '#string' line and the '#language' line are not in the same line,
485 # there should be only one line feed character betwwen them
486 #
487 if MultiLineFeedExits:
488 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
489 continue
490
491 MultiLineFeedExits = False
492 #
493 # Process comment embeded in string define lines
494 #
495 FindFlag = Line.find(u'//')
496 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
497 Line = Line.replace(Line[FindFlag:], u' ')
498 if FileIn[LineCount].strip().startswith('#language'):
499 Line = Line + FileIn[LineCount]
500 FileIn[LineCount-1] = Line
501 FileIn[LineCount] = '\r\n'
502 LineCount -= 1
503 for Index in xrange (LineCount + 1, len (FileIn) - 1):
504 if (Index == len(FileIn) -1):
505 FileIn[Index] = '\r\n'
506 else:
507 FileIn[Index] = FileIn[Index + 1]
508 continue
509 CommIndex = GetCharIndexOutStr(u'/', Line)
510 if CommIndex > -1:
511 if (len(Line) - 1) > CommIndex:
512 if Line[CommIndex+1] == u'/':
513 Line = Line[:CommIndex].strip()
514 else:
515 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
516 else:
517 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
518
519 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
520 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
521 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
522
523 Line = Line.replace(u'\\\\', u'\u0006')
524 Line = Line.replace(u'\\r\\n', CR + LF)
525 Line = Line.replace(u'\\n', CR + LF)
526 Line = Line.replace(u'\\r', CR)
527 Line = Line.replace(u'\\t', u'\t')
528 Line = Line.replace(u'''\"''', u'''"''')
529 Line = Line.replace(u'\t', u' ')
530 Line = Line.replace(u'\u0006', u'\\')
531
532 # IncList = gINCLUDE_PATTERN.findall(Line)
533 IncList = []
534 if len(IncList) == 1:
535 for Dir in [File.Dir] + self.IncludePathList:
536 IncFile = PathClass(str(IncList[0]), Dir)
537 self.IncFileList.append(IncFile)
538 if os.path.isfile(IncFile.Path):
539 Lines.extend(self.PreProcess(IncFile, True))
540 break
541 else:
542 EdkLogger.Error("Unicode File Parser",
543 ToolError.FILE_NOT_FOUND,
544 Message="Cannot find include file",
545 ExtraData=str(IncList[0]))
546 continue
547
548 #
549 # Between Name entry and Language entry can not contain line feed
550 #
551 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
552 MultiLineFeedExits = True
553
554 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
555 MultiLineFeedExits = True
556
557 #
558 # Between Language entry and String entry can not contain line feed
559 #
560 if Line.startswith(u'#language') and len(Line.split()) == 2:
561 MultiLineFeedExits = True
562
563 #
564 # Between two String entry, can not contain line feed
565 #
566 if Line.startswith(u'"'):
567 if StringEntryExistsFlag == 2:
568 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
569 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
570
571 StringEntryExistsFlag = 1
572 if not Line.endswith('"'):
573 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
574 elif Line.startswith(u'#language'):
575 if StringEntryExistsFlag == 2:
576 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
577 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
578 StringEntryExistsFlag = 0
579 else:
580 StringEntryExistsFlag = 0
581
582 Lines.append(Line)
583
584 #
585 # Convert string def format as below
586 #
587 # #string MY_STRING_1
588 # #language eng
589 # "My first English string line 1"
590 # "My first English string line 2"
591 # #string MY_STRING_1
592 # #language spa
593 # "Mi segunda secuencia 1"
594 # "Mi segunda secuencia 2"
595 #
596
597 if not IsIncludeFile and not Lines:
598 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
599 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
600 ExtraData=File.Path)
601
602 NewLines = []
603 StrName = u''
604 ExistStrNameList = []
605 for Line in Lines:
606 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
607 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
608 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
609 ExtraData=File.Path)
610
611 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
612 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
613 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
614 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
615 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
616 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
617 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
618 ExtraData=File.Path)
619
620 if Line.count(u'#language') > 1:
621 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
622 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
623 ExtraData=File.Path)
624
625 if Line.startswith(u'//'):
626 continue
627 elif Line.startswith(u'#langdef'):
628 if len(Line.split()) == 2:
629 NewLines.append(Line)
630 continue
631 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
632 NewLines.append(Line[:Line.find(u'"')].strip())
633 NewLines.append(Line[Line.find(u'"'):])
634 else:
635 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
636 elif Line.startswith(u'#string'):
637 if len(Line.split()) == 2:
638 StrName = Line
639 if StrName:
640 if StrName.split()[1] not in ExistStrNameList:
641 ExistStrNameList.append(StrName.split()[1].strip())
642 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
643 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
644 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
645 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
646 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
647 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
648 ExtraData=File.Path)
649 continue
650 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
651 if Line[Line.find(u'#language')-1] != ' ' or \
652 Line[Line.find(u'#language')+len(u'#language')] != u' ':
653 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
654
655 if Line.find(u'"') > 0:
656 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
657
658 StrName = Line.split()[0] + u' ' + Line.split()[1]
659 if StrName:
660 if StrName.split()[1] not in ExistStrNameList:
661 ExistStrNameList.append(StrName.split()[1].strip())
662 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
663 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
664 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
665 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
666 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
667 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
668 ExtraData=File.Path)
669 if IsIncludeFile:
670 if StrName not in NewLines:
671 NewLines.append((Line[:Line.find(u'#language')]).strip())
672 else:
673 NewLines.append((Line[:Line.find(u'#language')]).strip())
674 NewLines.append((Line[Line.find(u'#language'):]).strip())
675 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
676 if Line[Line.find(u'#language')-1] != u' ' or \
677 Line[Line.find(u'#language')+len(u'#language')] != u' ':
678 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
679
680 if Line[Line.find(u'"')-1] != u' ':
681 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
682
683 StrName = Line.split()[0] + u' ' + Line.split()[1]
684 if StrName:
685 if StrName.split()[1] not in ExistStrNameList:
686 ExistStrNameList.append(StrName.split()[1].strip())
687 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
688 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
689 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
690 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
691 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
692 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
693 ExtraData=File.Path)
694 if IsIncludeFile:
695 if StrName not in NewLines:
696 NewLines.append((Line[:Line.find(u'#language')]).strip())
697 else:
698 NewLines.append((Line[:Line.find(u'#language')]).strip())
699 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
700 NewLines.append((Line[Line.find(u'"'):]).strip())
701 else:
702 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
703 elif Line.startswith(u'#language'):
704 if len(Line.split()) == 2:
705 if IsIncludeFile:
706 if StrName not in NewLines:
707 NewLines.append(StrName)
708 else:
709 NewLines.append(StrName)
710 NewLines.append(Line)
711 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
712 if IsIncludeFile:
713 if StrName not in NewLines:
714 NewLines.append(StrName)
715 else:
716 NewLines.append(StrName)
717 NewLines.append((Line[:Line.find(u'"')]).strip())
718 NewLines.append((Line[Line.find(u'"'):]).strip())
719 else:
720 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
721 elif Line.startswith(u'"'):
722 if u'#string' in Line or u'#language' in Line:
723 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
724 NewLines.append(Line)
725 else:
726 print Line
727 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
728
729 if StrName and not StrName.split()[1].startswith(u'STR_'):
730 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
731 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
732 ExtraData=File.Path)
733
734 if StrName and not NewLines:
735 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
736 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
737 ExtraData=File.Path)
738
739 #
740 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
741 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
742 AbstractPosition = -1
743 DescriptionPosition = -1
744 BinaryAbstractPosition = -1
745 BinaryDescriptionPosition = -1
746 for StrName in ExistStrNameList:
747 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
748 if 'BINARY' in StrName:
749 BinaryAbstractPosition = ExistStrNameList.index(StrName)
750 else:
751 AbstractPosition = ExistStrNameList.index(StrName)
752 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
753 if 'BINARY' in StrName:
754 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
755 else:
756 DescriptionPosition = ExistStrNameList.index(StrName)
757
758 OrderList = sorted([AbstractPosition, DescriptionPosition])
759 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
760 Min = OrderList[0]
761 Max = OrderList[1]
762 BinaryMin = BinaryOrderList[0]
763 BinaryMax = BinaryOrderList[1]
764 if BinaryDescriptionPosition > -1:
765 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
766 BinaryMax > Max):
767 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
768 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
769 ExtraData=File.Path)
770 elif BinaryAbstractPosition > -1:
771 if not(BinaryAbstractPosition > Max):
772 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
773 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
774 ExtraData=File.Path)
775
776 if DescriptionPosition > -1:
777 if not(DescriptionPosition == Max and AbstractPosition == Min and \
778 DescriptionPosition > AbstractPosition):
779 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
780 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
781 ExtraData=File.Path)
782
783 if not self.UniFileHeader:
784 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
785 Message = ST.ERR_NO_SOURCE_HEADER,
786 ExtraData=File.Path)
787
788 return NewLines
789
790 #
791 # Load a .uni file
792 #
793 def LoadUniFile(self, File = None):
794 if File == None:
795 EdkLogger.Error("Unicode File Parser",
796 ToolError.PARSER_ERROR,
797 Message='No unicode file is given',
798 ExtraData=File.Path)
799
800 self.File = File
801
802 #
803 # Process special char in file
804 #
805 Lines = self.PreProcess(File)
806
807 #
808 # Get Unicode Information
809 #
810 for IndexI in range(len(Lines)):
811 Line = Lines[IndexI]
812 if (IndexI + 1) < len(Lines):
813 SecondLine = Lines[IndexI + 1]
814 if (IndexI + 2) < len(Lines):
815 ThirdLine = Lines[IndexI + 2]
816
817 #
818 # Get Language def information
819 #
820 if Line.find(u'#langdef ') >= 0:
821 self.GetLangDef(File, Line + u' ' + SecondLine)
822 continue
823
824 Name = ''
825 Language = ''
826 Value = ''
827 CombineToken = False
828 #
829 # Get string def information format as below
830 #
831 # #string MY_STRING_1
832 # #language eng
833 # "My first English string line 1"
834 # "My first English string line 2"
835 # #string MY_STRING_1
836 # #language spa
837 # "Mi segunda secuencia 1"
838 # "Mi segunda secuencia 2"
839 #
840 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
841 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
842 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
843 if Line.find('"') > 0 or SecondLine.find('"') > 0:
844 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
845 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
846 ExtraData=File.Path)
847
848 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
849 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
850 for IndexJ in range(IndexI + 2, len(Lines)):
851 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
852 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
853 if Lines[IndexJ][-2] == ' ':
854 CombineToken = True
855 if CombineToken:
856 if Lines[IndexJ].strip()[1:-1].strip():
857 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
858 else:
859 Value = Value + Lines[IndexJ].strip()[1:-1]
860 CombineToken = False
861 else:
862 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
863 else:
864 IndexI = IndexJ
865 break
866 if Value.endswith('\r\n'):
867 Value = Value[: Value.rfind('\r\n')]
868 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
869 self.AddStringToList(Name, Language, Value)
870 continue
871
872 #
873 # Load multiple .uni files
874 #
875 def LoadUniFiles(self, FileList):
876 if len(FileList) > 0:
877 for File in FileList:
878 FilePath = File.Path.strip()
879 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
880 self.LoadUniFile(File)
881
882 #
883 # Add a string to list
884 #
885 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
886 for LangNameItem in self.LanguageDef:
887 if Language == LangNameItem[0]:
888 break
889
890 if Language not in self.OrderedStringList:
891 self.OrderedStringList[Language] = []
892 self.OrderedStringDict[Language] = {}
893
894 IsAdded = True
895 if Name in self.OrderedStringDict[Language]:
896 IsAdded = False
897 if Value != None:
898 ItemIndexInList = self.OrderedStringDict[Language][Name]
899 Item = self.OrderedStringList[Language][ItemIndexInList]
900 Item.UpdateValue(Value)
901 Item.UseOtherLangDef = ''
902
903 if IsAdded:
904 Token = len(self.OrderedStringList[Language])
905 if Index == -1:
906 self.OrderedStringList[Language].append(StringDefClassObject(Name,
907 Value,
908 Referenced,
909 Token,
910 UseOtherLangDef))
911 self.OrderedStringDict[Language][Name] = Token
912 for LangName in self.LanguageDef:
913 #
914 # New STRING token will be added into all language string lists.
915 # so that the unique STRING identifier is reserved for all languages in the package list.
916 #
917 if LangName[0] != Language:
918 if UseOtherLangDef != '':
919 OtherLangDef = UseOtherLangDef
920 else:
921 OtherLangDef = Language
922 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
923 '',
924 Referenced,
925 Token,
926 OtherLangDef))
927 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
928 else:
929 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
930 Value,
931 Referenced,
932 Token,
933 UseOtherLangDef))
934 self.OrderedStringDict[Language][Name] = Index
935
936 #
937 # Set the string as referenced
938 #
939 def SetStringReferenced(self, Name):
940 #
941 # String stoken are added in the same order in all language string lists.
942 # So, only update the status of string stoken in first language string list.
943 #
944 Lang = self.LanguageDef[0][0]
945 if Name in self.OrderedStringDict[Lang]:
946 ItemIndexInList = self.OrderedStringDict[Lang][Name]
947 Item = self.OrderedStringList[Lang][ItemIndexInList]
948 Item.Referenced = True
949
950 #
951 # Search the string in language definition by Name
952 #
953 def FindStringValue(self, Name, Lang):
954 if Name in self.OrderedStringDict[Lang]:
955 ItemIndexInList = self.OrderedStringDict[Lang][Name]
956 return self.OrderedStringList[Lang][ItemIndexInList]
957
958 return None
959
960 #
961 # Search the string in language definition by Token
962 #
963 def FindByToken(self, Token, Lang):
964 for Item in self.OrderedStringList[Lang]:
965 if Item.Token == Token:
966 return Item
967
968 return None
969
970 #
971 # Re-order strings and re-generate tokens
972 #
973 def ReToken(self):
974 if len(self.LanguageDef) == 0:
975 return None
976 #
977 # Retoken all language strings according to the status of string stoken in the first language string.
978 #
979 FirstLangName = self.LanguageDef[0][0]
980
981 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
982 for LangNameItem in self.LanguageDef:
983 self.OrderedStringListByToken[LangNameItem[0]] = {}
984
985 #
986 # Use small token for all referred string stoken.
987 #
988 RefToken = 0
989 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
990 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
991 if FirstLangItem.Referenced == True:
992 for LangNameItem in self.LanguageDef:
993 LangName = LangNameItem[0]
994 OtherLangItem = self.OrderedStringList[LangName][Index]
995 OtherLangItem.Referenced = True
996 OtherLangItem.Token = RefToken
997 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
998 RefToken = RefToken + 1
999
1000 #
1001 # Use big token for all unreferred string stoken.
1002 #
1003 UnRefToken = 0
1004 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1005 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1006 if FirstLangItem.Referenced == False:
1007 for LangNameItem in self.LanguageDef:
1008 LangName = LangNameItem[0]
1009 OtherLangItem = self.OrderedStringList[LangName][Index]
1010 OtherLangItem.Token = RefToken + UnRefToken
1011 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1012 UnRefToken = UnRefToken + 1
1013
1014 #
1015 # Show the instance itself
1016 #
1017 def ShowMe(self):
1018 print self.LanguageDef
1019 #print self.OrderedStringList
1020 for Item in self.OrderedStringList:
1021 print Item
1022 for Member in self.OrderedStringList[Item]:
1023 print str(Member)
1024
1025 #
1026 # Read content from '!include' UNI file
1027 #
1028 def ReadIncludeUNIfile(self, FilaPath):
1029 if self.File:
1030 pass
1031
1032 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1033 EdkLogger.Error("Unicode File Parser",
1034 ToolError.FILE_NOT_FOUND,
1035 ExtraData=FilaPath)
1036 try:
1037 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1038 except UnicodeError:
1039 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1040 except:
1041 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1042 return FileIn
1043