1fbbf2e498877864cb1a91f3883638b8d8ae33b0
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / UniClassObject.py
1 ## @file
2 # Collect all defined strings in multiple uni files.
3 #
4 # Copyright (c) 2014 - 2017, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14 """
15 Collect all defined strings in multiple uni files
16 """
17
18 ##
19 # Import Modules
20 #
21 import os, codecs, re
22 import distutils.util
23 from Logger import ToolError
24 from Logger import Log as EdkLogger
25 from Logger import StringTable as ST
26 from Library.String import GetLineNo
27 from Library.Misc import PathClass
28 from Library.Misc import GetCharIndexOutStr
29 from Library import DataType as DT
30 from Library.ParserValidate import CheckUTF16FileHeader
31
32 ##
33 # Static definitions
34 #
35 UNICODE_WIDE_CHAR = u'\\wide'
36 UNICODE_NARROW_CHAR = u'\\narrow'
37 UNICODE_NON_BREAKING_CHAR = u'\\nbr'
38 UNICODE_UNICODE_CR = '\r'
39 UNICODE_UNICODE_LF = '\n'
40
41 NARROW_CHAR = u'\uFFF0'
42 WIDE_CHAR = u'\uFFF1'
43 NON_BREAKING_CHAR = u'\uFFF2'
44 CR = u'\u000D'
45 LF = u'\u000A'
46 NULL = u'\u0000'
47 TAB = u'\t'
48 BACK_SPLASH = u'\\'
49
50 gINCLUDE_PATTERN = re.compile("^!include[\s]+([\S]+)[\s]*$", re.MULTILINE | re.UNICODE)
51
52 gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \
53 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \
54 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \
55 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \
56 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \
57 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \
58 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \
59 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \
60 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \
61 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \
62 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \
63 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \
64 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \
65 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \
66 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \
67 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \
68 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \
69 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \
70 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \
71 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \
72 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \
73 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \
74 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \
75 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \
76 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \
77 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \
78 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \
79 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \
80 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \
81 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \
82 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \
83 'zho':'zh', 'zul':'zu'}
84
85 ## Convert a python unicode string to a normal string
86 #
87 # Convert a python unicode string to a normal string
88 # UniToStr(u'I am a string') is 'I am a string'
89 #
90 # @param Uni: The python unicode string
91 #
92 # @retval: The formatted normal string
93 #
94 def UniToStr(Uni):
95 return repr(Uni)[2:-1]
96
97 ## Convert a unicode string to a Hex list
98 #
99 # Convert a unicode string to a Hex list
100 # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']
101 #
102 # @param Uni: The python unicode string
103 #
104 # @retval List: The formatted hex list
105 #
106 def UniToHexList(Uni):
107 List = []
108 for Item in Uni:
109 Temp = '%04X' % ord(Item)
110 List.append('0x' + Temp[2:4])
111 List.append('0x' + Temp[0:2])
112 return List
113
114 ## Convert special unicode characters
115 #
116 # Convert special characters to (c), (r) and (tm).
117 #
118 # @param Uni: The python unicode string
119 #
120 # @retval NewUni: The converted unicode string
121 #
122 def ConvertSpecialUnicodes(Uni):
123 NewUni = Uni
124 NewUni = NewUni.replace(u'\u00A9', '(c)')
125 NewUni = NewUni.replace(u'\u00AE', '(r)')
126 NewUni = NewUni.replace(u'\u2122', '(tm)')
127 return NewUni
128
129 ## GetLanguageCode1766
130 #
131 # Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes
132 # RFC 1766 language codes supported in compatiblity mode
133 # RFC 4646 language codes supported in native mode
134 #
135 # @param LangName: Language codes read from .UNI file
136 #
137 # @retval LangName: Valid lanugage code in RFC 1766 format or None
138 #
139 def GetLanguageCode1766(LangName, File=None):
140 return LangName
141
142 length = len(LangName)
143 if length == 2:
144 if LangName.isalpha():
145 for Key in gLANG_CONV_TABLE.keys():
146 if gLANG_CONV_TABLE.get(Key) == LangName.lower():
147 return Key
148 elif length == 3:
149 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()):
150 return LangName
151 else:
152 EdkLogger.Error("Unicode File Parser",
153 ToolError.FORMAT_INVALID,
154 "Invalid RFC 1766 language code : %s" % LangName,
155 File)
156 elif length == 5:
157 if LangName[0:2].isalpha() and LangName[2] == '-':
158 for Key in gLANG_CONV_TABLE.keys():
159 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
160 return Key
161 elif length >= 6:
162 if LangName[0:2].isalpha() and LangName[2] == '-':
163 for Key in gLANG_CONV_TABLE.keys():
164 if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower():
165 return Key
166 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
167 for Key in gLANG_CONV_TABLE.keys():
168 if Key == LangName[0:3].lower():
169 return Key
170
171 EdkLogger.Error("Unicode File Parser",
172 ToolError.FORMAT_INVALID,
173 "Invalid RFC 4646 language code : %s" % LangName,
174 File)
175
176 ## GetLanguageCode
177 #
178 # Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate
179 # RFC 1766 language codes supported in compatiblity mode
180 # RFC 4646 language codes supported in native mode
181 #
182 # @param LangName: Language codes read from .UNI file
183 #
184 # @retval LangName: Valid lanugage code in RFC 4646 format or None
185 #
186 def GetLanguageCode(LangName, IsCompatibleMode, File):
187 length = len(LangName)
188 if IsCompatibleMode:
189 if length == 3 and LangName.isalpha():
190 TempLangName = gLANG_CONV_TABLE.get(LangName.lower())
191 if TempLangName != None:
192 return TempLangName
193 return LangName
194 else:
195 EdkLogger.Error("Unicode File Parser",
196 ToolError.FORMAT_INVALID,
197 "Invalid RFC 1766 language code : %s" % LangName,
198 File)
199 if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':
200 return LangName
201 if length == 2:
202 if LangName.isalpha():
203 return LangName
204 elif length == 3:
205 if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None:
206 return LangName
207 elif length == 5:
208 if LangName[0:2].isalpha() and LangName[2] == '-':
209 return LangName
210 elif length >= 6:
211 if LangName[0:2].isalpha() and LangName[2] == '-':
212 return LangName
213 if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) == None and LangName[3] == '-':
214 return LangName
215
216 EdkLogger.Error("Unicode File Parser",
217 ToolError.FORMAT_INVALID,
218 "Invalid RFC 4646 language code : %s" % LangName,
219 File)
220
221 ## FormatUniEntry
222 #
223 # Formated the entry in Uni file.
224 #
225 # @param StrTokenName StrTokenName.
226 # @param TokenValueList A list need to be processed.
227 # @param ContainerFile ContainerFile.
228 #
229 # @return formated entry
230 def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile):
231 SubContent = ''
232 PreFormatLength = 40
233 if len(StrTokenName) > PreFormatLength:
234 PreFormatLength = len(StrTokenName) + 1
235 for (Lang, Value) in TokenValueList:
236 if not Value or Lang == DT.TAB_LANGUAGE_EN_X:
237 continue
238 if Lang == '':
239 Lang = DT.TAB_LANGUAGE_EN_US
240 if Lang == 'eng':
241 Lang = DT.TAB_LANGUAGE_EN_US
242 elif len(Lang.split('-')[0]) == 3:
243 Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile)
244 else:
245 Lang = GetLanguageCode(Lang, False, ContainerFile)
246 ValueList = Value.split('\n')
247 SubValueContent = ''
248 for SubValue in ValueList:
249 if SubValue.strip():
250 SubValueContent += \
251 ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n'
252 SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \
253 + '\"' + '\r\n'
254 SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent
255 if SubContent:
256 SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:]
257 return SubContent
258
259
260 ## StringDefClassObject
261 #
262 # A structure for language definition
263 #
264 class StringDefClassObject(object):
265 def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):
266 self.StringName = ''
267 self.StringNameByteList = []
268 self.StringValue = ''
269 self.StringValueByteList = ''
270 self.Token = 0
271 self.Referenced = Referenced
272 self.UseOtherLangDef = UseOtherLangDef
273 self.Length = 0
274
275 if Name != None:
276 self.StringName = Name
277 self.StringNameByteList = UniToHexList(Name)
278 if Value != None:
279 self.StringValue = Value
280 self.StringValueByteList = UniToHexList(self.StringValue)
281 self.Length = len(self.StringValueByteList)
282 if Token != None:
283 self.Token = Token
284
285 def __str__(self):
286 return repr(self.StringName) + ' ' + \
287 repr(self.Token) + ' ' + \
288 repr(self.Referenced) + ' ' + \
289 repr(self.StringValue) + ' ' + \
290 repr(self.UseOtherLangDef)
291
292 def UpdateValue(self, Value = None):
293 if Value != None:
294 if self.StringValue:
295 self.StringValue = self.StringValue + '\r\n' + Value
296 else:
297 self.StringValue = Value
298 self.StringValueByteList = UniToHexList(self.StringValue)
299 self.Length = len(self.StringValueByteList)
300
301 ## UniFileClassObject
302 #
303 # A structure for .uni file definition
304 #
305 class UniFileClassObject(object):
306 def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None):
307 self.FileList = FileList
308 self.File = None
309 self.IncFileList = FileList
310 self.UniFileHeader = ''
311 self.Token = 2
312 self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]
313 self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }
314 self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }
315 self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }
316 self.IsCompatibleMode = IsCompatibleMode
317 if not IncludePathList:
318 self.IncludePathList = []
319 else:
320 self.IncludePathList = IncludePathList
321 if len(self.FileList) > 0:
322 self.LoadUniFiles(FileList)
323
324 #
325 # Get Language definition
326 #
327 def GetLangDef(self, File, Line):
328 Lang = distutils.util.split_quoted((Line.split(u"//")[0]))
329 if len(Lang) != 3:
330 try:
331 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
332 except UnicodeError, Xstr:
333 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
334 except UnicodeError, Xstr:
335 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
336 except:
337 EdkLogger.Error("Unicode File Parser",
338 ToolError.FILE_OPEN_FAILURE,
339 "File read failure: %s" % str(Xstr),
340 ExtraData=File)
341 LineNo = GetLineNo(FileIn, Line, False)
342 EdkLogger.Error("Unicode File Parser",
343 ToolError.PARSER_ERROR,
344 "Wrong language definition",
345 ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line,
346 File = File, Line = LineNo)
347 else:
348 LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)
349 LangPrintName = Lang[2]
350
351 IsLangInDef = False
352 for Item in self.LanguageDef:
353 if Item[0] == LangName:
354 IsLangInDef = True
355 break
356
357 if not IsLangInDef:
358 self.LanguageDef.append([LangName, LangPrintName])
359
360 #
361 # Add language string
362 #
363 self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)
364 self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)
365
366 if not IsLangInDef:
367 #
368 # The found STRING tokens will be added into new language string list
369 # so that the unique STRING identifier is reserved for all languages in the package list.
370 #
371 FirstLangName = self.LanguageDef[0][0]
372 if LangName != FirstLangName:
373 for Index in range (2, len (self.OrderedStringList[FirstLangName])):
374 Item = self.OrderedStringList[FirstLangName][Index]
375 if Item.UseOtherLangDef != '':
376 OtherLang = Item.UseOtherLangDef
377 else:
378 OtherLang = FirstLangName
379 self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName,
380 '',
381 Item.Referenced,
382 Item.Token,
383 OtherLang))
384 self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1
385 return True
386
387 #
388 # Get String name and value
389 #
390 def GetStringObject(self, Item):
391 Language = ''
392 Value = ''
393
394 Name = Item.split()[1]
395 # Check the string name is the upper character
396 if Name != '':
397 MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
398 if MatchString == None or MatchString.end(0) != len(Name):
399 EdkLogger.Error("Unicode File Parser",
400 ToolError.FORMAT_INVALID,
401 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File))
402 LanguageList = Item.split(u'#language ')
403 for IndexI in range(len(LanguageList)):
404 if IndexI == 0:
405 continue
406 else:
407 Language = LanguageList[IndexI].split()[0]
408 #.replace(u'\r\n', u'')
409 Value = \
410 LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')]
411 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
412 self.AddStringToList(Name, Language, Value)
413
414 #
415 # Get include file list and load them
416 #
417 def GetIncludeFile(self, Item, Dir = None):
418 if Dir:
419 pass
420 FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1]
421 self.LoadUniFile(FileName)
422
423 #
424 # Pre-process before parse .uni file
425 #
426 def PreProcess(self, File, IsIncludeFile=False):
427 if not os.path.exists(File.Path) or not os.path.isfile(File.Path):
428 EdkLogger.Error("Unicode File Parser",
429 ToolError.FILE_NOT_FOUND,
430 ExtraData=File.Path)
431
432 #
433 # Check file header of the Uni file
434 #
435 # if not CheckUTF16FileHeader(File.Path):
436 # EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
437 # ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path)
438
439 try:
440 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines()
441 except UnicodeError, Xstr:
442 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines()
443 except UnicodeError:
444 FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines()
445 except:
446 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path)
447
448
449 #
450 # get the file header
451 #
452 Lines = []
453 HeaderStart = False
454 HeaderEnd = False
455 if not self.UniFileHeader:
456 FirstGenHeader = True
457 else:
458 FirstGenHeader = False
459 for Line in FileIn:
460 Line = Line.strip()
461 if Line == u'':
462 continue
463 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \
464 and not HeaderEnd and not HeaderStart:
465 HeaderStart = True
466 if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd:
467 HeaderEnd = True
468 if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader:
469 self.UniFileHeader += Line + '\r\n'
470 continue
471
472 #
473 # Use unique identifier
474 #
475 FindFlag = -1
476 LineCount = 0
477 MultiLineFeedExits = False
478 #
479 # 0: initial value
480 # 1: signle String entry exist
481 # 2: line feed exist under the some signle String entry
482 #
483 StringEntryExistsFlag = 0
484 for Line in FileIn:
485 Line = FileIn[LineCount]
486 LineCount += 1
487 Line = Line.strip()
488 #
489 # Ignore comment line and empty line
490 #
491 if Line == u'' or Line.startswith(u'//'):
492 #
493 # Change the single line String entry flag status
494 #
495 if StringEntryExistsFlag == 1:
496 StringEntryExistsFlag = 2
497 #
498 # If the '#string' line and the '#language' line are not in the same line,
499 # there should be only one line feed character betwwen them
500 #
501 if MultiLineFeedExits:
502 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
503 continue
504
505 MultiLineFeedExits = False
506 #
507 # Process comment embeded in string define lines
508 #
509 FindFlag = Line.find(u'//')
510 if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'):
511 Line = Line.replace(Line[FindFlag:], u' ')
512 if FileIn[LineCount].strip().startswith('#language'):
513 Line = Line + FileIn[LineCount]
514 FileIn[LineCount-1] = Line
515 FileIn[LineCount] = '\r\n'
516 LineCount -= 1
517 for Index in xrange (LineCount + 1, len (FileIn) - 1):
518 if (Index == len(FileIn) -1):
519 FileIn[Index] = '\r\n'
520 else:
521 FileIn[Index] = FileIn[Index + 1]
522 continue
523 CommIndex = GetCharIndexOutStr(u'/', Line)
524 if CommIndex > -1:
525 if (len(Line) - 1) > CommIndex:
526 if Line[CommIndex+1] == u'/':
527 Line = Line[:CommIndex].strip()
528 else:
529 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
530 else:
531 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
532
533 Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)
534 Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)
535 Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)
536
537 Line = Line.replace(u'\\\\', u'\u0006')
538 Line = Line.replace(u'\\r\\n', CR + LF)
539 Line = Line.replace(u'\\n', CR + LF)
540 Line = Line.replace(u'\\r', CR)
541 Line = Line.replace(u'\\t', u'\t')
542 Line = Line.replace(u'''\"''', u'''"''')
543 Line = Line.replace(u'\t', u' ')
544 Line = Line.replace(u'\u0006', u'\\')
545
546 # IncList = gINCLUDE_PATTERN.findall(Line)
547 IncList = []
548 if len(IncList) == 1:
549 for Dir in [File.Dir] + self.IncludePathList:
550 IncFile = PathClass(str(IncList[0]), Dir)
551 self.IncFileList.append(IncFile)
552 if os.path.isfile(IncFile.Path):
553 Lines.extend(self.PreProcess(IncFile, True))
554 break
555 else:
556 EdkLogger.Error("Unicode File Parser",
557 ToolError.FILE_NOT_FOUND,
558 Message="Cannot find include file",
559 ExtraData=str(IncList[0]))
560 continue
561
562 #
563 # Check if single line has correct '"'
564 #
565 if Line.startswith(u'#string') and Line.find(u'#language') > -1 and Line.find('"') > Line.find(u'#language'):
566 if not Line.endswith('"'):
567 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
568 ExtraData='''The line %s misses '"' at the end of it in file %s'''
569 % (LineCount, File.Path))
570
571 #
572 # Between Name entry and Language entry can not contain line feed
573 #
574 if Line.startswith(u'#string') and Line.find(u'#language') == -1:
575 MultiLineFeedExits = True
576
577 if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0:
578 MultiLineFeedExits = True
579
580 #
581 # Between Language entry and String entry can not contain line feed
582 #
583 if Line.startswith(u'#language') and len(Line.split()) == 2:
584 MultiLineFeedExits = True
585
586 #
587 # Between two String entry, can not contain line feed
588 #
589 if Line.startswith(u'"'):
590 if StringEntryExistsFlag == 2:
591 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
592 Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path)
593
594 StringEntryExistsFlag = 1
595 if not Line.endswith('"'):
596 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
597 ExtraData='''The line %s misses '"' at the end of it in file %s'''
598 % (LineCount, File.Path))
599 elif Line.startswith(u'#language'):
600 if StringEntryExistsFlag == 2:
601 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
602 Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path)
603 StringEntryExistsFlag = 0
604 else:
605 StringEntryExistsFlag = 0
606
607 Lines.append(Line)
608
609 #
610 # Convert string def format as below
611 #
612 # #string MY_STRING_1
613 # #language eng
614 # "My first English string line 1"
615 # "My first English string line 2"
616 # #string MY_STRING_1
617 # #language spa
618 # "Mi segunda secuencia 1"
619 # "Mi segunda secuencia 2"
620 #
621
622 if not IsIncludeFile and not Lines:
623 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
624 Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \
625 ExtraData=File.Path)
626
627 NewLines = []
628 StrName = u''
629 ExistStrNameList = []
630 for Line in Lines:
631 if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT):
632 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
633 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
634 ExtraData=File.Path)
635
636 if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4:
637 StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)
638 if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \
639 StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \
640 (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR):
641 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
642 Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \
643 ExtraData=File.Path)
644
645 if Line.count(u'#language') > 1:
646 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
647 Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \
648 ExtraData=File.Path)
649
650 if Line.startswith(u'//'):
651 continue
652 elif Line.startswith(u'#langdef'):
653 if len(Line.split()) == 2:
654 NewLines.append(Line)
655 continue
656 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
657 NewLines.append(Line[:Line.find(u'"')].strip())
658 NewLines.append(Line[Line.find(u'"'):])
659 else:
660 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
661 elif Line.startswith(u'#string'):
662 if len(Line.split()) == 2:
663 StrName = Line
664 if StrName:
665 if StrName.split()[1] not in ExistStrNameList:
666 ExistStrNameList.append(StrName.split()[1].strip())
667 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
668 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
669 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
670 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
671 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
672 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
673 ExtraData=File.Path)
674 continue
675 elif len(Line.split()) == 4 and Line.find(u'#language') > 0:
676 if Line[Line.find(u'#language')-1] != ' ' or \
677 Line[Line.find(u'#language')+len(u'#language')] != u' ':
678 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
679
680 if Line.find(u'"') > 0:
681 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
682
683 StrName = Line.split()[0] + u' ' + Line.split()[1]
684 if StrName:
685 if StrName.split()[1] not in ExistStrNameList:
686 ExistStrNameList.append(StrName.split()[1].strip())
687 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
688 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
689 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
690 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
691 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
692 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
693 ExtraData=File.Path)
694 if IsIncludeFile:
695 if StrName not in NewLines:
696 NewLines.append((Line[:Line.find(u'#language')]).strip())
697 else:
698 NewLines.append((Line[:Line.find(u'#language')]).strip())
699 NewLines.append((Line[Line.find(u'#language'):]).strip())
700 elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0:
701 if Line[Line.find(u'#language')-1] != u' ' or \
702 Line[Line.find(u'#language')+len(u'#language')] != u' ':
703 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
704
705 if Line[Line.find(u'"')-1] != u' ':
706 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
707
708 StrName = Line.split()[0] + u' ' + Line.split()[1]
709 if StrName:
710 if StrName.split()[1] not in ExistStrNameList:
711 ExistStrNameList.append(StrName.split()[1].strip())
712 elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \
713 DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \
714 DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \
715 DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]:
716 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
717 Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \
718 ExtraData=File.Path)
719 if IsIncludeFile:
720 if StrName not in NewLines:
721 NewLines.append((Line[:Line.find(u'#language')]).strip())
722 else:
723 NewLines.append((Line[:Line.find(u'#language')]).strip())
724 NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip())
725 NewLines.append((Line[Line.find(u'"'):]).strip())
726 else:
727 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
728 elif Line.startswith(u'#language'):
729 if len(Line.split()) == 2:
730 if IsIncludeFile:
731 if StrName not in NewLines:
732 NewLines.append(StrName)
733 else:
734 NewLines.append(StrName)
735 NewLines.append(Line)
736 elif len(Line.split()) > 2 and Line.find(u'"') > 0:
737 if IsIncludeFile:
738 if StrName not in NewLines:
739 NewLines.append(StrName)
740 else:
741 NewLines.append(StrName)
742 NewLines.append((Line[:Line.find(u'"')]).strip())
743 NewLines.append((Line[Line.find(u'"'):]).strip())
744 else:
745 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
746 elif Line.startswith(u'"'):
747 if u'#string' in Line or u'#language' in Line:
748 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
749 NewLines.append(Line)
750 else:
751 print Line
752 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path)
753
754 if StrName and not StrName.split()[1].startswith(u'STR_'):
755 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
756 Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \
757 ExtraData=File.Path)
758
759 if StrName and not NewLines:
760 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
761 Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \
762 ExtraData=File.Path)
763
764 #
765 # Check Abstract, Description, BinaryAbstract and BinaryDescription order,
766 # should be Abstract, Description, BinaryAbstract, BinaryDesctiption
767 AbstractPosition = -1
768 DescriptionPosition = -1
769 BinaryAbstractPosition = -1
770 BinaryDescriptionPosition = -1
771 for StrName in ExistStrNameList:
772 if DT.TAB_HEADER_ABSTRACT.upper() in StrName:
773 if 'BINARY' in StrName:
774 BinaryAbstractPosition = ExistStrNameList.index(StrName)
775 else:
776 AbstractPosition = ExistStrNameList.index(StrName)
777 if DT.TAB_HEADER_DESCRIPTION.upper() in StrName:
778 if 'BINARY' in StrName:
779 BinaryDescriptionPosition = ExistStrNameList.index(StrName)
780 else:
781 DescriptionPosition = ExistStrNameList.index(StrName)
782
783 OrderList = sorted([AbstractPosition, DescriptionPosition])
784 BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition])
785 Min = OrderList[0]
786 Max = OrderList[1]
787 BinaryMin = BinaryOrderList[0]
788 BinaryMax = BinaryOrderList[1]
789 if BinaryDescriptionPosition > -1:
790 if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \
791 BinaryMax > Max):
792 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
793 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
794 ExtraData=File.Path)
795 elif BinaryAbstractPosition > -1:
796 if not(BinaryAbstractPosition > Max):
797 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
798 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
799 ExtraData=File.Path)
800
801 if DescriptionPosition > -1:
802 if not(DescriptionPosition == Max and AbstractPosition == Min and \
803 DescriptionPosition > AbstractPosition):
804 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \
805 Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \
806 ExtraData=File.Path)
807
808 if not self.UniFileHeader:
809 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
810 Message = ST.ERR_NO_SOURCE_HEADER,
811 ExtraData=File.Path)
812
813 return NewLines
814
815 #
816 # Load a .uni file
817 #
818 def LoadUniFile(self, File = None):
819 if File == None:
820 EdkLogger.Error("Unicode File Parser",
821 ToolError.PARSER_ERROR,
822 Message='No unicode file is given',
823 ExtraData=File.Path)
824
825 self.File = File
826
827 #
828 # Process special char in file
829 #
830 Lines = self.PreProcess(File)
831
832 #
833 # Get Unicode Information
834 #
835 for IndexI in range(len(Lines)):
836 Line = Lines[IndexI]
837 if (IndexI + 1) < len(Lines):
838 SecondLine = Lines[IndexI + 1]
839 if (IndexI + 2) < len(Lines):
840 ThirdLine = Lines[IndexI + 2]
841
842 #
843 # Get Language def information
844 #
845 if Line.find(u'#langdef ') >= 0:
846 self.GetLangDef(File, Line + u' ' + SecondLine)
847 continue
848
849 Name = ''
850 Language = ''
851 Value = ''
852 CombineToken = False
853 #
854 # Get string def information format as below
855 #
856 # #string MY_STRING_1
857 # #language eng
858 # "My first English string line 1"
859 # "My first English string line 2"
860 # #string MY_STRING_1
861 # #language spa
862 # "Mi segunda secuencia 1"
863 # "Mi segunda secuencia 2"
864 #
865 if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
866 SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
867 ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
868 if Line.find('"') > 0 or SecondLine.find('"') > 0:
869 EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID,
870 Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED,
871 ExtraData=File.Path)
872
873 Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
874 Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
875 for IndexJ in range(IndexI + 2, len(Lines)):
876 if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \
877 Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'):
878 if Lines[IndexJ][-2] == ' ':
879 CombineToken = True
880 if CombineToken:
881 if Lines[IndexJ].strip()[1:-1].strip():
882 Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' '
883 else:
884 Value = Value + Lines[IndexJ].strip()[1:-1]
885 CombineToken = False
886 else:
887 Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n'
888 else:
889 IndexI = IndexJ
890 break
891 if Value.endswith('\r\n'):
892 Value = Value[: Value.rfind('\r\n')]
893 Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
894 self.AddStringToList(Name, Language, Value)
895 continue
896
897 #
898 # Load multiple .uni files
899 #
900 def LoadUniFiles(self, FileList):
901 if len(FileList) > 0:
902 for File in FileList:
903 FilePath = File.Path.strip()
904 if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'):
905 self.LoadUniFile(File)
906
907 #
908 # Add a string to list
909 #
910 def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1):
911 for LangNameItem in self.LanguageDef:
912 if Language == LangNameItem[0]:
913 break
914
915 if Language not in self.OrderedStringList:
916 self.OrderedStringList[Language] = []
917 self.OrderedStringDict[Language] = {}
918
919 IsAdded = True
920 if Name in self.OrderedStringDict[Language]:
921 IsAdded = False
922 if Value != None:
923 ItemIndexInList = self.OrderedStringDict[Language][Name]
924 Item = self.OrderedStringList[Language][ItemIndexInList]
925 Item.UpdateValue(Value)
926 Item.UseOtherLangDef = ''
927
928 if IsAdded:
929 Token = len(self.OrderedStringList[Language])
930 if Index == -1:
931 self.OrderedStringList[Language].append(StringDefClassObject(Name,
932 Value,
933 Referenced,
934 Token,
935 UseOtherLangDef))
936 self.OrderedStringDict[Language][Name] = Token
937 for LangName in self.LanguageDef:
938 #
939 # New STRING token will be added into all language string lists.
940 # so that the unique STRING identifier is reserved for all languages in the package list.
941 #
942 if LangName[0] != Language:
943 if UseOtherLangDef != '':
944 OtherLangDef = UseOtherLangDef
945 else:
946 OtherLangDef = Language
947 self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name,
948 '',
949 Referenced,
950 Token,
951 OtherLangDef))
952 self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1
953 else:
954 self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name,
955 Value,
956 Referenced,
957 Token,
958 UseOtherLangDef))
959 self.OrderedStringDict[Language][Name] = Index
960
961 #
962 # Set the string as referenced
963 #
964 def SetStringReferenced(self, Name):
965 #
966 # String stoken are added in the same order in all language string lists.
967 # So, only update the status of string stoken in first language string list.
968 #
969 Lang = self.LanguageDef[0][0]
970 if Name in self.OrderedStringDict[Lang]:
971 ItemIndexInList = self.OrderedStringDict[Lang][Name]
972 Item = self.OrderedStringList[Lang][ItemIndexInList]
973 Item.Referenced = True
974
975 #
976 # Search the string in language definition by Name
977 #
978 def FindStringValue(self, Name, Lang):
979 if Name in self.OrderedStringDict[Lang]:
980 ItemIndexInList = self.OrderedStringDict[Lang][Name]
981 return self.OrderedStringList[Lang][ItemIndexInList]
982
983 return None
984
985 #
986 # Search the string in language definition by Token
987 #
988 def FindByToken(self, Token, Lang):
989 for Item in self.OrderedStringList[Lang]:
990 if Item.Token == Token:
991 return Item
992
993 return None
994
995 #
996 # Re-order strings and re-generate tokens
997 #
998 def ReToken(self):
999 if len(self.LanguageDef) == 0:
1000 return None
1001 #
1002 # Retoken all language strings according to the status of string stoken in the first language string.
1003 #
1004 FirstLangName = self.LanguageDef[0][0]
1005
1006 # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token
1007 for LangNameItem in self.LanguageDef:
1008 self.OrderedStringListByToken[LangNameItem[0]] = {}
1009
1010 #
1011 # Use small token for all referred string stoken.
1012 #
1013 RefToken = 0
1014 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1015 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1016 if FirstLangItem.Referenced == True:
1017 for LangNameItem in self.LanguageDef:
1018 LangName = LangNameItem[0]
1019 OtherLangItem = self.OrderedStringList[LangName][Index]
1020 OtherLangItem.Referenced = True
1021 OtherLangItem.Token = RefToken
1022 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1023 RefToken = RefToken + 1
1024
1025 #
1026 # Use big token for all unreferred string stoken.
1027 #
1028 UnRefToken = 0
1029 for Index in range (0, len (self.OrderedStringList[FirstLangName])):
1030 FirstLangItem = self.OrderedStringList[FirstLangName][Index]
1031 if FirstLangItem.Referenced == False:
1032 for LangNameItem in self.LanguageDef:
1033 LangName = LangNameItem[0]
1034 OtherLangItem = self.OrderedStringList[LangName][Index]
1035 OtherLangItem.Token = RefToken + UnRefToken
1036 self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem
1037 UnRefToken = UnRefToken + 1
1038
1039 #
1040 # Show the instance itself
1041 #
1042 def ShowMe(self):
1043 print self.LanguageDef
1044 #print self.OrderedStringList
1045 for Item in self.OrderedStringList:
1046 print Item
1047 for Member in self.OrderedStringList[Item]:
1048 print str(Member)
1049
1050 #
1051 # Read content from '!include' UNI file
1052 #
1053 def ReadIncludeUNIfile(self, FilaPath):
1054 if self.File:
1055 pass
1056
1057 if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath):
1058 EdkLogger.Error("Unicode File Parser",
1059 ToolError.FILE_NOT_FOUND,
1060 ExtraData=FilaPath)
1061 try:
1062 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines()
1063 except UnicodeError, Xstr:
1064 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines()
1065 except UnicodeError:
1066 FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines()
1067 except:
1068 EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath)
1069 return FileIn
1070