]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/UniClassObject.py
BaseTools/Upt: Add a BOM check for UNI file and fix some help message error
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14 """
15 Collect all defined strings in multiple uni files
16 """
17
18 ##
19 # Import Modules
20 #
21 import os, codecs, re
22 import distutils.util
23 from Logger import ToolError
24 from Logger import Log as EdkLogger
25 from Logger import StringTable as ST
26 from Library.String import GetLineNo
27 from Library.Misc import PathClass
28 from Library.Misc import GetCharIndexOutStr
29 from Library import DataType as DT
30 from Library.ParserValidate import CheckUTF16FileHeader
31
32 ##
33 # Static definitions
34 #
35 UNICODE_WIDE_CHAR = u'\\wide'
36 UNICODE_NARROW_CHAR = u'\\narrow'
37 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
38 UNICODE_UNICODE_CR = '\r'
39 UNICODE_UNICODE_LF = '\n'
40
41 NARROW_CHAR = u'\uFFF0'
42 WIDE_CHAR = u'\uFFF1'
43 NON_BREAKING_CHAR = u'\uFFF2'
44 CR = u'\u000D'
45 LF = u'\u000A'
46 NULL = u'\u0000'
47 TAB = u'\t'
48 BACK_SPLASH = u'\\'
49
50 gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
51
52 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
53 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
54 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
55 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
56 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
57 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
58 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
59 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
60 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
61 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
62 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
63 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
64 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
65 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
66 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
67 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
68 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
69 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
70 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
71 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
72 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
73 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
74 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
75 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
76 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
77 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
78 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
79 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
80 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
81 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
82 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
83 'zho':'zh', 'zul':'zu'}
84
85 ## Convert a python unicode string to a normal string
86 #
87 # Convert a python unicode string to a normal string
88 # UniToStr(u'I am a string') is 'I am a string'
89 #
90 # @param Uni: The python unicode string
91 #
92 # @retval: The formatted normal string
93 #
94 def UniToStr(Uni):
95 return repr(Uni)[2:-1]
96
97 ## Convert a unicode string to a Hex list
98 #
99 # Convert a unicode string to a Hex list
100 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 #
102 # @param Uni: The python unicode string
103 #
104 # @retval List: The formatted hex list
105 #
106 def UniToHexList(Uni):
107 List = []
108 for Item in Uni:
109 Temp = '%04X' % ord(Item)
110 List.append('0x' + Temp[2:4])
111 List.append('0x' + Temp[0:2])
112 return List
113
114 ## Convert special unicode characters
115 #
116 # Convert special characters to (c), (r) and (tm).
117 #
118 # @param Uni: The python unicode string
119 #
120 # @retval NewUni: The converted unicode string
121 #
122 def ConvertSpecialUnicodes(Uni):
123 NewUni = Uni
124 NewUni = NewUni.replace(u'\u00A9', '(c)')
125 NewUni = NewUni.replace(u'\u00AE', '(r)')
126 NewUni = NewUni.replace(u'\u2122', '(tm)')
127 return NewUni
128
129 ## GetLanguageCode1766
130 #
131 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
132 # RFC 1766 language codes supported in compatiblity mode
133 # RFC 4646 language codes supported in native mode
134 #
135 # @param LangName: Language codes read from .UNI file
136 #
137 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 #
139 def GetLanguageCode1766(LangName, File=None):
140 return LangName
141
142 length = len(LangName)
143 if length == 2:
144 if LangName.isalpha():
145 for Key in gLANG_CONV_TABLE.keys():
146 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
147 return Key
148 elif length == 3:
149 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
150 return LangName
151 else:
152 EdkLogger.Error("Unicode File Parser",
153 ToolError.FORMAT_INVALID,
154 "Invalid RFC 1766 language code : %s" % LangName,
155 File)
156 elif length == 5:
157 if LangName[0:2].isalpha() and LangName[2] == '-':
158 for Key in gLANG_CONV_TABLE.keys():
159 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
160 return Key
161 elif length >= 6:
162 if LangName[0:2].isalpha() and LangName[2] == '-':
163 for Key in gLANG_CONV_TABLE.keys():
164 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
165 return Key
166 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
167 for Key in gLANG_CONV_TABLE.keys():
168 if Key == LangName[0:3].lower():
169 return Key
170
171 EdkLogger.Error("Unicode File Parser",
172 ToolError.FORMAT_INVALID,
173 "Invalid RFC 4646 language code : %s" % LangName,
174 File)
175
176 ## GetLanguageCode
177 #
178 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
179 # RFC 1766 language codes supported in compatiblity mode
180 # RFC 4646 language codes supported in native mode
181 #
182 # @param LangName: Language codes read from .UNI file
183 #
184 # @retval LangName: Valid lanugage code in RFC 4646 format or None
185 #
186 def GetLanguageCode(LangName, IsCompatibleMode, File):
187 length = len(LangName)
188 if IsCompatibleMode:
189 if length == 3 and LangName.isalpha():
190 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
191 if TempLangName != None:
192 return TempLangName
193 return LangName
194 else:
195 EdkLogger.Error("Unicode File Parser",
196 ToolError.FORMAT_INVALID,
197 "Invalid RFC 1766 language code : %s" % LangName,
198 File)
199 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
200 return LangName
201 if length == 2:
202 if LangName.isalpha():
203 return LangName
204 elif length == 3:
205 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
206 return LangName
207 elif length == 5:
208 if LangName[0:2].isalpha() and LangName[2] == '-':
209 return LangName
210 elif length >= 6:
211 if LangName[0:2].isalpha() and LangName[2] == '-':
212 return LangName
213 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
214 return LangName
215
216 EdkLogger.Error("Unicode File Parser",
217 ToolError.FORMAT_INVALID,
218 "Invalid RFC 4646 language code : %s" % LangName,
219 File)
220
221 ## FormatUniEntry
222 #
223 # Formated the entry in Uni file.
224 #
225 # @param StrTokenName StrTokenName.
226 # @param TokenValueList A list need to be processed.
227 # @param ContainerFile ContainerFile.
228 #
229 # @return formated entry
230 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
231 SubContent = ''
232 PreFormatLength = 40
233 if len(StrTokenName) > PreFormatLength:
234 PreFormatLength = len(StrTokenName) + 1
235 for (Lang, Value) in TokenValueList:
236 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
237 continue
238 if Lang == '':
239 Lang = DT.TAB_LANGUAGE_EN_US
240 if Lang == 'eng':
241 Lang = DT.TAB_LANGUAGE_EN_US
242 elif len(Lang.split('-')[0]) == 3:
243 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
244 else:
245 Lang = GetLanguageCode(Lang, False, ContainerFile)
246 ValueList = Value.split('\n')
247 SubValueContent = ''
248 for SubValue in ValueList:
249 if SubValue.strip():
250 SubValueContent += \
251 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
252 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
253 + '\"' + '\r\n'
254 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
255 if SubContent:
256 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
257 return SubContent
258
259
260 ## StringDefClassObject
261 #
262 # A structure for language definition
263 #
264 class StringDefClassObject(object):
265 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
266 self.StringName = ''
267 self.StringNameByteList = []
268 self.StringValue = ''
269 self.StringValueByteList = ''
270 self.Token = 0
271 self.Referenced = Referenced
272 self.UseOtherLangDef = UseOtherLangDef
273 self.Length = 0
274
275 if Name != None:
276 self.StringName = Name
277 self.StringNameByteList = UniToHexList(Name)
278 if Value != None:
279 self.StringValue = Value
280 self.StringValueByteList = UniToHexList(self.StringValue)
281 self.Length = len(self.StringValueByteList)
282 if Token != None:
283 self.Token = Token
284
285 def __str__(self):
286 return repr(self.StringName) + ' ' + \
287 repr(self.Token) + ' ' + \
288 repr(self.Referenced) + ' ' + \
289 repr(self.StringValue) + ' ' + \
290 repr(self.UseOtherLangDef)
291
292 def UpdateValue(self, Value = None):
293 if Value != None:
294 if self.StringValue:
295 self.StringValue = self.StringValue + '\r\n' + Value
296 else:
297 self.StringValue = Value
298 self.StringValueByteList = UniToHexList(self.StringValue)
299 self.Length = len(self.StringValueByteList)
300
301 ## UniFileClassObject
302 #
303 # A structure for .uni file definition
304 #
305 class UniFileClassObject(object):
306 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
307 self.FileList = FileList
308 self.File = None
309 self.IncFileList = FileList
310 self.UniFileHeader = ''
311 self.Token = 2
312 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
313 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
314 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
315 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
316 self.IsCompatibleMode = IsCompatibleMode
317 if not IncludePathList:
318 self.IncludePathList = []
319 else:
320 self.IncludePathList = IncludePathList
321 if len(self.FileList) > 0:
322 self.LoadUniFiles(FileList)
323
324 #
325 # Get Language definition
326 #
327 def GetLangDef(self, File, Line):
328 Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
329 if len(Lang) != 3:
330 try:
331 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').read()
332 except UnicodeError, Xstr:
333 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').read()
334 except:
335 EdkLogger.Error("Unicode File Parser",
336 ToolError.FILE_OPEN_FAILURE,
337 "File read failure: %s" % str(Xstr),
338 ExtraData=File)
339 LineNo = GetLineNo(FileIn, Line, False)
340 EdkLogger.Error("Unicode File Parser",
341 ToolError.PARSER_ERROR,
342 "Wrong language definition",
343 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
344 File = File, Line = LineNo)
345 else:
346 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
347 LangPrintName = Lang[2]
348
349 IsLangInDef = False
350 for Item in self.LanguageDef:
351 if Item[0] == LangName:
352 IsLangInDef = True
353 break
354
355 if not IsLangInDef:
356 self.LanguageDef.append([LangName, LangPrintName])
357
358 #
359 # Add language string
360 #
361 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
362 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
363
364 if not IsLangInDef:
365 #
366 # The found STRING tokens will be added into new language string list
367 # so that the unique STRING identifier is reserved for all languages in the package list.
368 #
369 FirstLangName = self.LanguageDef[0][0]
370 if LangName != FirstLangName:
371 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
372 Item = self.OrderedStringList[FirstLangName][Index]
373 if Item.UseOtherLangDef != '':
374 OtherLang = Item.UseOtherLangDef
375 else:
376 OtherLang = FirstLangName
377 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
378 '',
379 Item.Referenced,
380 Item.Token,
381 OtherLang))
382 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
383 return True
384
385 #
386 # Get String name and value
387 #
388 def GetStringObject(self, Item):
389 Language = ''
390 Value = ''
391
392 Name = Item.split()[1]
393 # Check the string name is the upper character
394 if Name != '':
395 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
396 if MatchString == None or MatchString.end(0) != len(Name):
397 EdkLogger.Error("Unicode File Parser",
398 ToolError.FORMAT_INVALID,
399 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
400 LanguageList = Item.split(u'#language ')
401 for IndexI in range(len(LanguageList)):
402 if IndexI == 0:
403 continue
404 else:
405 Language = LanguageList[IndexI].split()[0]
406 #.replace(u'\r\n', u'')
407 Value = \
408 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
409 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
410 self.AddStringToList(Name, Language, Value)
411
412 #
413 # Get include file list and load them
414 #
415 def GetIncludeFile(self, Item, Dir = None):
416 if Dir:
417 pass
418 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
419 self.LoadUniFile(FileName)
420
421 #
422 # Pre-process before parse .uni file
423 #
424 def PreProcess(self, File, IsIncludeFile=False):
425 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
426 EdkLogger.Error("Unicode File Parser",
427 ToolError.FILE_NOT_FOUND,
428 ExtraData=File.Path)
429
430 #
431 # Check file header of the Uni file
432 #
433 if not CheckUTF16FileHeader(File.Path):
434 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
435 ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
436
437 try:
438 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
439 except UnicodeError:
440 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
441 except:
442 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
443
444
445 #
446 # get the file header
447 #
448 Lines = []
449 HeaderStart = False
450 HeaderEnd = False
451 if not self.UniFileHeader:
452 FirstGenHeader = True
453 else:
454 FirstGenHeader = False
455 for Line in FileIn:
456 Line = Line.strip()
457 if Line == u'':
458 continue
459 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
460 and not HeaderEnd and not HeaderStart:
461 HeaderStart = True
462 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
463 HeaderEnd = True
464 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
465 self.UniFileHeader += Line + '\r\n'
466 continue
467
468 #
469 # Use unique identifier
470 #
471 FindFlag = -1
472 LineCount = 0
473 MultiLineFeedExits = False
474 #
475 # 0: initial value
476 # 1: signle String entry exist
477 # 2: line feed exist under the some signle String entry
478 #
479 StringEntryExistsFlag = 0
480 for Line in FileIn:
481 Line = FileIn[LineCount]
482 LineCount += 1
483 Line = Line.strip()
484 #
485 # Ignore comment line and empty line
486 #
487 if Line == u'' or Line.startswith(u'//'):
488 #
489 # Change the single line String entry flag status
490 #
491 if StringEntryExistsFlag == 1:
492 StringEntryExistsFlag = 2
493 #
494 # If the '#string' line and the '#language' line are not in the same line,
495 # there should be only one line feed character betwwen them
496 #
497 if MultiLineFeedExits:
498 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
499 continue
500
501 MultiLineFeedExits = False
502 #
503 # Process comment embeded in string define lines
504 #
505 FindFlag = Line.find(u'//')
506 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
507 Line = Line.replace(Line[FindFlag:], u' ')
508 if FileIn[LineCount].strip().startswith('#language'):
509 Line = Line + FileIn[LineCount]
510 FileIn[LineCount-1] = Line
511 FileIn[LineCount] = '\r\n'
512 LineCount -= 1
513 for Index in xrange (LineCount + 1, len (FileIn) - 1):
514 if (Index == len(FileIn) -1):
515 FileIn[Index] = '\r\n'
516 else:
517 FileIn[Index] = FileIn[Index + 1]
518 continue
519 CommIndex = GetCharIndexOutStr(u'/', Line)
520 if CommIndex > -1:
521 if (len(Line) - 1) > CommIndex:
522 if Line[CommIndex+1] == u'/':
523 Line = Line[:CommIndex].strip()
524 else:
525 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
526 else:
527 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
528
529 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
530 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
531 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
532
533 Line = Line.replace(u'\\\\', u'\u0006')
534 Line = Line.replace(u'\\r\\n', CR + LF)
535 Line = Line.replace(u'\\n', CR + LF)
536 Line = Line.replace(u'\\r', CR)
537 Line = Line.replace(u'\\t', u'\t')
538 Line = Line.replace(u'''\"''', u'''"''')
539 Line = Line.replace(u'\t', u' ')
540 Line = Line.replace(u'\u0006', u'\\')
541
542 # IncList = gINCLUDE_PATTERN.findall(Line)
543 IncList = []
544 if len(IncList) == 1:
545 for Dir in [File.Dir] + self.IncludePathList:
546 IncFile = PathClass(str(IncList[0]), Dir)
547 self.IncFileList.append(IncFile)
548 if os.path.isfile(IncFile.Path):
549 Lines.extend(self.PreProcess(IncFile, True))
550 break
551 else:
552 EdkLogger.Error("Unicode File Parser",
553 ToolError.FILE_NOT_FOUND,
554 Message="Cannot find include file",
555 ExtraData=str(IncList[0]))
556 continue
557
558 #
559 # Between Name entry and Language entry can not contain line feed
560 #
561 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
562 MultiLineFeedExits = True
563
564 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
565 MultiLineFeedExits = True
566
567 #
568 # Between Language entry and String entry can not contain line feed
569 #
570 if Line.startswith(u'#language') and len(Line.split()) == 2:
571 MultiLineFeedExits = True
572
573 #
574 # Between two String entry, can not contain line feed
575 #
576 if Line.startswith(u'"'):
577 if StringEntryExistsFlag == 2:
578 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
579 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
580
581 StringEntryExistsFlag = 1
582 if not Line.endswith('"'):
583 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
584 ExtraData='''The line %s misses '"' at the end of it in file %s'''
585 % (LineCount, File.Path))
586 elif Line.startswith(u'#language'):
587 if StringEntryExistsFlag == 2:
588 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
589 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
590 StringEntryExistsFlag = 0
591 else:
592 StringEntryExistsFlag = 0
593
594 Lines.append(Line)
595
596 #
597 # Convert string def format as below
598 #
599 # #string MY_STRING_1
600 # #language eng
601 # "My first English string line 1"
602 # "My first English string line 2"
603 # #string MY_STRING_1
604 # #language spa
605 # "Mi segunda secuencia 1"
606 # "Mi segunda secuencia 2"
607 #
608
609 if not IsIncludeFile and not Lines:
610 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
611 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
612 ExtraData=File.Path)
613
614 NewLines = []
615 StrName = u''
616 ExistStrNameList = []
617 for Line in Lines:
618 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
619 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
620 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
621 ExtraData=File.Path)
622
623 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
624 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
625 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
626 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
627 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
628 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
629 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
630 ExtraData=File.Path)
631
632 if Line.count(u'#language') > 1:
633 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
634 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
635 ExtraData=File.Path)
636
637 if Line.startswith(u'//'):
638 continue
639 elif Line.startswith(u'#langdef'):
640 if len(Line.split()) == 2:
641 NewLines.append(Line)
642 continue
643 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
644 NewLines.append(Line[:Line.find(u'"')].strip())
645 NewLines.append(Line[Line.find(u'"'):])
646 else:
647 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
648 elif Line.startswith(u'#string'):
649 if len(Line.split()) == 2:
650 StrName = Line
651 if StrName:
652 if StrName.split()[1] not in ExistStrNameList:
653 ExistStrNameList.append(StrName.split()[1].strip())
654 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
655 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
656 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
657 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
658 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
659 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
660 ExtraData=File.Path)
661 continue
662 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
663 if Line[Line.find(u'#language')-1] != ' ' or \
664 Line[Line.find(u'#language')+len(u'#language')] != u' ':
665 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
666
667 if Line.find(u'"') > 0:
668 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
669
670 StrName = Line.split()[0] + u' ' + Line.split()[1]
671 if StrName:
672 if StrName.split()[1] not in ExistStrNameList:
673 ExistStrNameList.append(StrName.split()[1].strip())
674 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
675 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
676 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
677 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
678 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
679 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
680 ExtraData=File.Path)
681 if IsIncludeFile:
682 if StrName not in NewLines:
683 NewLines.append((Line[:Line.find(u'#language')]).strip())
684 else:
685 NewLines.append((Line[:Line.find(u'#language')]).strip())
686 NewLines.append((Line[Line.find(u'#language'):]).strip())
687 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
688 if Line[Line.find(u'#language')-1] != u' ' or \
689 Line[Line.find(u'#language')+len(u'#language')] != u' ':
690 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
691
692 if Line[Line.find(u'"')-1] != u' ':
693 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
694
695 StrName = Line.split()[0] + u' ' + Line.split()[1]
696 if StrName:
697 if StrName.split()[1] not in ExistStrNameList:
698 ExistStrNameList.append(StrName.split()[1].strip())
699 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
700 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
701 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
702 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
703 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
704 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
705 ExtraData=File.Path)
706 if IsIncludeFile:
707 if StrName not in NewLines:
708 NewLines.append((Line[:Line.find(u'#language')]).strip())
709 else:
710 NewLines.append((Line[:Line.find(u'#language')]).strip())
711 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
712 NewLines.append((Line[Line.find(u'"'):]).strip())
713 else:
714 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
715 elif Line.startswith(u'#language'):
716 if len(Line.split()) == 2:
717 if IsIncludeFile:
718 if StrName not in NewLines:
719 NewLines.append(StrName)
720 else:
721 NewLines.append(StrName)
722 NewLines.append(Line)
723 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
724 if IsIncludeFile:
725 if StrName not in NewLines:
726 NewLines.append(StrName)
727 else:
728 NewLines.append(StrName)
729 NewLines.append((Line[:Line.find(u'"')]).strip())
730 NewLines.append((Line[Line.find(u'"'):]).strip())
731 else:
732 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
733 elif Line.startswith(u'"'):
734 if u'#string' in Line or u'#language' in Line:
735 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
736 NewLines.append(Line)
737 else:
738 print Line
739 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
740
741 if StrName and not StrName.split()[1].startswith(u'STR_'):
742 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
743 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
744 ExtraData=File.Path)
745
746 if StrName and not NewLines:
747 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
748 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
749 ExtraData=File.Path)
750
751 #
752 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
753 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
754 AbstractPosition = -1
755 DescriptionPosition = -1
756 BinaryAbstractPosition = -1
757 BinaryDescriptionPosition = -1
758 for StrName in ExistStrNameList:
759 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
760 if 'BINARY' in StrName:
761 BinaryAbstractPosition = ExistStrNameList.index(StrName)
762 else:
763 AbstractPosition = ExistStrNameList.index(StrName)
764 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
765 if 'BINARY' in StrName:
766 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
767 else:
768 DescriptionPosition = ExistStrNameList.index(StrName)
769
770 OrderList = sorted([AbstractPosition, DescriptionPosition])
771 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
772 Min = OrderList[0]
773 Max = OrderList[1]
774 BinaryMin = BinaryOrderList[0]
775 BinaryMax = BinaryOrderList[1]
776 if BinaryDescriptionPosition > -1:
777 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
778 BinaryMax > Max):
779 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
780 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
781 ExtraData=File.Path)
782 elif BinaryAbstractPosition > -1:
783 if not(BinaryAbstractPosition > Max):
784 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
785 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
786 ExtraData=File.Path)
787
788 if DescriptionPosition > -1:
789 if not(DescriptionPosition == Max and AbstractPosition == Min and \
790 DescriptionPosition > AbstractPosition):
791 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
792 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
793 ExtraData=File.Path)
794
795 if not self.UniFileHeader:
796 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
797 Message = ST.ERR_NO_SOURCE_HEADER,
798 ExtraData=File.Path)
799
800 return NewLines
801
802 #
803 # Load a .uni file
804 #
805 def LoadUniFile(self, File = None):
806 if File == None:
807 EdkLogger.Error("Unicode File Parser",
808 ToolError.PARSER_ERROR,
809 Message='No unicode file is given',
810 ExtraData=File.Path)
811
812 self.File = File
813
814 #
815 # Process special char in file
816 #
817 Lines = self.PreProcess(File)
818
819 #
820 # Get Unicode Information
821 #
822 for IndexI in range(len(Lines)):
823 Line = Lines[IndexI]
824 if (IndexI + 1) < len(Lines):
825 SecondLine = Lines[IndexI + 1]
826 if (IndexI + 2) < len(Lines):
827 ThirdLine = Lines[IndexI + 2]
828
829 #
830 # Get Language def information
831 #
832 if Line.find(u'#langdef ') >= 0:
833 self.GetLangDef(File, Line + u' ' + SecondLine)
834 continue
835
836 Name = ''
837 Language = ''
838 Value = ''
839 CombineToken = False
840 #
841 # Get string def information format as below
842 #
843 # #string MY_STRING_1
844 # #language eng
845 # "My first English string line 1"
846 # "My first English string line 2"
847 # #string MY_STRING_1
848 # #language spa
849 # "Mi segunda secuencia 1"
850 # "Mi segunda secuencia 2"
851 #
852 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
853 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
854 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
855 if Line.find('"') > 0 or SecondLine.find('"') > 0:
856 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
857 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
858 ExtraData=File.Path)
859
860 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
861 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
862 for IndexJ in range(IndexI + 2, len(Lines)):
863 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
864 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
865 if Lines[IndexJ][-2] == ' ':
866 CombineToken = True
867 if CombineToken:
868 if Lines[IndexJ].strip()[1:-1].strip():
869 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
870 else:
871 Value = Value + Lines[IndexJ].strip()[1:-1]
872 CombineToken = False
873 else:
874 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
875 else:
876 IndexI = IndexJ
877 break
878 if Value.endswith('\r\n'):
879 Value = Value[: Value.rfind('\r\n')]
880 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
881 self.AddStringToList(Name, Language, Value)
882 continue
883
884 #
885 # Load multiple .uni files
886 #
887 def LoadUniFiles(self, FileList):
888 if len(FileList) > 0:
889 for File in FileList:
890 FilePath = File.Path.strip()
891 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
892 self.LoadUniFile(File)
893
894 #
895 # Add a string to list
896 #
897 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
898 for LangNameItem in self.LanguageDef:
899 if Language == LangNameItem[0]:
900 break
901
902 if Language not in self.OrderedStringList:
903 self.OrderedStringList[Language] = []
904 self.OrderedStringDict[Language] = {}
905
906 IsAdded = True
907 if Name in self.OrderedStringDict[Language]:
908 IsAdded = False
909 if Value != None:
910 ItemIndexInList = self.OrderedStringDict[Language][Name]
911 Item = self.OrderedStringList[Language][ItemIndexInList]
912 Item.UpdateValue(Value)
913 Item.UseOtherLangDef = ''
914
915 if IsAdded:
916 Token = len(self.OrderedStringList[Language])
917 if Index == -1:
918 self.OrderedStringList[Language].append(StringDefClassObject(Name,
919 Value,
920 Referenced,
921 Token,
922 UseOtherLangDef))
923 self.OrderedStringDict[Language][Name] = Token
924 for LangName in self.LanguageDef:
925 #
926 # New STRING token will be added into all language string lists.
927 # so that the unique STRING identifier is reserved for all languages in the package list.
928 #
929 if LangName[0] != Language:
930 if UseOtherLangDef != '':
931 OtherLangDef = UseOtherLangDef
932 else:
933 OtherLangDef = Language
934 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
935 '',
936 Referenced,
937 Token,
938 OtherLangDef))
939 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
940 else:
941 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
942 Value,
943 Referenced,
944 Token,
945 UseOtherLangDef))
946 self.OrderedStringDict[Language][Name] = Index
947
948 #
949 # Set the string as referenced
950 #
951 def SetStringReferenced(self, Name):
952 #
953 # String stoken are added in the same order in all language string lists.
954 # So, only update the status of string stoken in first language string list.
955 #
956 Lang = self.LanguageDef[0][0]
957 if Name in self.OrderedStringDict[Lang]:
958 ItemIndexInList = self.OrderedStringDict[Lang][Name]
959 Item = self.OrderedStringList[Lang][ItemIndexInList]
960 Item.Referenced = True
961
962 #
963 # Search the string in language definition by Name
964 #
965 def FindStringValue(self, Name, Lang):
966 if Name in self.OrderedStringDict[Lang]:
967 ItemIndexInList = self.OrderedStringDict[Lang][Name]
968 return self.OrderedStringList[Lang][ItemIndexInList]
969
970 return None
971
972 #
973 # Search the string in language definition by Token
974 #
975 def FindByToken(self, Token, Lang):
976 for Item in self.OrderedStringList[Lang]:
977 if Item.Token == Token:
978 return Item
979
980 return None
981
982 #
983 # Re-order strings and re-generate tokens
984 #
985 def ReToken(self):
986 if len(self.LanguageDef) == 0:
987 return None
988 #
989 # Retoken all language strings according to the status of string stoken in the first language string.
990 #
991 FirstLangName = self.LanguageDef[0][0]
992
993 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
994 for LangNameItem in self.LanguageDef:
995 self.OrderedStringListByToken[LangNameItem[0]] = {}
996
997 #
998 # Use small token for all referred string stoken.
999 #
1000 RefToken = 0
1001 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1002 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1003 if FirstLangItem.Referenced == True:
1004 for LangNameItem in self.LanguageDef:
1005 LangName = LangNameItem[0]
1006 OtherLangItem = self.OrderedStringList[LangName][Index]
1007 OtherLangItem.Referenced = True
1008 OtherLangItem.Token = RefToken
1009 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1010 RefToken = RefToken + 1
1011
1012 #
1013 # Use big token for all unreferred string stoken.
1014 #
1015 UnRefToken = 0
1016 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1017 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1018 if FirstLangItem.Referenced == False:
1019 for LangNameItem in self.LanguageDef:
1020 LangName = LangNameItem[0]
1021 OtherLangItem = self.OrderedStringList[LangName][Index]
1022 OtherLangItem.Token = RefToken + UnRefToken
1023 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1024 UnRefToken = UnRefToken + 1
1025
1026 #
1027 # Show the instance itself
1028 #
1029 def ShowMe(self):
1030 print self.LanguageDef
1031 #print self.OrderedStringList
1032 for Item in self.OrderedStringList:
1033 print Item
1034 for Member in self.OrderedStringList[Item]:
1035 print str(Member)
1036
1037 #
1038 # Read content from '!include' UNI file
1039 #
1040 def ReadIncludeUNIfile(self, FilaPath):
1041 if self.File:
1042 pass
1043
1044 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1045 EdkLogger.Error("Unicode File Parser",
1046 ToolError.FILE_NOT_FOUND,
1047 ExtraData=FilaPath)
1048 try:
1049 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1050 except UnicodeError:
1051 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1052 except:
1053 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1054 return FileIn
1055