]>
Commit | Line | Data |
---|---|---|
97fa0ee9 YL |
1 | ## @file\r |
2 | # This file is used to collect all defined strings in multiple uni files\r | |
3 | #\r | |
71f02911 CS |
4 | #\r |
5 | # Copyright (c) 2014 Hewlett-Packard Development Company, L.P.<BR>\r | |
6 | #\r | |
018f7b82 | 7 | # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>\r |
40d841f6 | 8 | # This program and the accompanying materials\r |
30fdf114 LG |
9 | # are licensed and made available under the terms and conditions of the BSD License\r |
10 | # which accompanies this distribution. The full text of the license may be found at\r | |
11 | # http://opensource.org/licenses/bsd-license.php\r | |
12 | #\r | |
13 | # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r | |
14 | # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r | |
15 | \r | |
30fdf114 LG |
16 | ##\r |
17 | # Import Modules\r | |
18 | #\r | |
1ccc4d89 | 19 | from __future__ import print_function\r |
1be2ed90 | 20 | import Common.LongFilePathOs as os, codecs, re\r |
40d841f6 | 21 | import distutils.util\r |
30fdf114 | 22 | import Common.EdkLogger as EdkLogger\r |
86379ac4 | 23 | from io import BytesIO\r |
30fdf114 | 24 | from Common.BuildToolError import *\r |
5a57246e | 25 | from Common.StringUtils import GetLineNo\r |
30fdf114 | 26 | from Common.Misc import PathClass\r |
1be2ed90 | 27 | from Common.LongFilePathSupport import LongFilePath\r |
018f7b82 | 28 | from Common.GlobalData import *\r |
30fdf114 LG |
29 | ##\r |
30 | # Static definitions\r | |
31 | #\r | |
32 | UNICODE_WIDE_CHAR = u'\\wide'\r | |
33 | UNICODE_NARROW_CHAR = u'\\narrow'\r | |
34 | UNICODE_NON_BREAKING_CHAR = u'\\nbr'\r | |
35 | UNICODE_UNICODE_CR = '\r'\r | |
36 | UNICODE_UNICODE_LF = '\n'\r | |
37 | \r | |
38 | NARROW_CHAR = u'\uFFF0'\r | |
39 | WIDE_CHAR = u'\uFFF1'\r | |
40 | NON_BREAKING_CHAR = u'\uFFF2'\r | |
41 | CR = u'\u000D'\r | |
42 | LF = u'\u000A'\r | |
43 | NULL = u'\u0000'\r | |
44 | TAB = u'\t'\r | |
71f02911 | 45 | BACK_SLASH_PLACEHOLDER = u'\u0006'\r |
30fdf114 LG |
46 | \r |
47 | gIncludePattern = re.compile("^#include +[\"<]+([^\"< >]+)[>\"]+$", re.MULTILINE | re.UNICODE)\r | |
48 | \r | |
1ccc4d89 LG |
49 | ## Convert a python unicode string to a normal string\r |
50 | #\r | |
51 | # Convert a python unicode string to a normal string\r | |
52 | # UniToStr(u'I am a string') is 'I am a string'\r | |
53 | #\r | |
54 | # @param Uni: The python unicode string\r | |
55 | #\r | |
56 | # @retval: The formatted normal string\r | |
57 | #\r | |
58 | def UniToStr(Uni):\r | |
59 | return repr(Uni)[2:-1]\r | |
60 | \r | |
30fdf114 LG |
61 | ## Convert a unicode string to a Hex list\r |
62 | #\r | |
63 | # Convert a unicode string to a Hex list\r | |
64 | # UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00']\r | |
65 | #\r | |
66 | # @param Uni: The python unicode string\r | |
67 | #\r | |
68 | # @retval List: The formatted hex list\r | |
69 | #\r | |
70 | def UniToHexList(Uni):\r | |
71 | List = []\r | |
72 | for Item in Uni:\r | |
73 | Temp = '%04X' % ord(Item)\r | |
74 | List.append('0x' + Temp[2:4])\r | |
75 | List.append('0x' + Temp[0:2])\r | |
76 | return List\r | |
77 | \r | |
78 | LangConvTable = {'eng':'en', 'fra':'fr', \\r | |
79 | 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \\r | |
80 | 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \\r | |
81 | 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \\r | |
82 | 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \\r | |
83 | 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \\r | |
84 | 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \\r | |
85 | 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \\r | |
86 | 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \\r | |
87 | 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \\r | |
88 | 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \\r | |
89 | 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \\r | |
90 | 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \\r | |
91 | 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \\r | |
92 | 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \\r | |
93 | 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \\r | |
94 | 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \\r | |
95 | 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \\r | |
96 | 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \\r | |
97 | 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \\r | |
98 | 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \\r | |
99 | 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \\r | |
100 | 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \\r | |
101 | 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \\r | |
102 | 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \\r | |
103 | 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \\r | |
104 | 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \\r | |
105 | 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \\r | |
106 | 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \\r | |
107 | 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \\r | |
108 | 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \\r | |
109 | 'zho':'zh', 'zul':'zu'}\r | |
110 | \r | |
111 | ## GetLanguageCode\r | |
112 | #\r | |
113 | # Check the language code read from .UNI file and convert ISO 639-2 codes to RFC 4646 codes if appropriate\r | |
114 | # ISO 639-2 language codes supported in compatiblity mode\r | |
115 | # RFC 4646 language codes supported in native mode\r | |
116 | #\r | |
117 | # @param LangName: Language codes read from .UNI file\r | |
118 | #\r | |
119 | # @retval LangName: Valid lanugage code in RFC 4646 format or None\r | |
120 | #\r | |
121 | def GetLanguageCode(LangName, IsCompatibleMode, File):\r | |
30fdf114 LG |
122 | length = len(LangName)\r |
123 | if IsCompatibleMode:\r | |
124 | if length == 3 and LangName.isalpha():\r | |
125 | TempLangName = LangConvTable.get(LangName.lower())\r | |
4231a819 | 126 | if TempLangName is not None:\r |
da92f276 | 127 | return TempLangName\r |
30fdf114 LG |
128 | return LangName\r |
129 | else:\r | |
130 | EdkLogger.error("Unicode File Parser", FORMAT_INVALID, "Invalid ISO 639-2 language code : %s" % LangName, File)\r | |
131 | \r | |
40d841f6 LG |
132 | if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-':\r |
133 | return LangName\r | |
30fdf114 LG |
134 | if length == 2:\r |
135 | if LangName.isalpha():\r | |
136 | return LangName\r | |
137 | elif length == 3:\r | |
4231a819 | 138 | if LangName.isalpha() and LangConvTable.get(LangName.lower()) is None:\r |
30fdf114 LG |
139 | return LangName\r |
140 | elif length == 5:\r | |
141 | if LangName[0:2].isalpha() and LangName[2] == '-':\r | |
142 | return LangName\r | |
143 | elif length >= 6:\r | |
144 | if LangName[0:2].isalpha() and LangName[2] == '-':\r | |
145 | return LangName\r | |
4231a819 | 146 | if LangName[0:3].isalpha() and LangConvTable.get(LangName.lower()) is None and LangName[3] == '-':\r |
30fdf114 LG |
147 | return LangName\r |
148 | \r | |
149 | EdkLogger.error("Unicode File Parser", FORMAT_INVALID, "Invalid RFC 4646 language code : %s" % LangName, File)\r | |
150 | \r | |
d80e451b JJ |
151 | ## Ucs2Codec\r |
152 | #\r | |
153 | # This is only a partial codec implementation. It only supports\r | |
154 | # encoding, and is primarily used to check that all the characters are\r | |
155 | # valid for UCS-2.\r | |
156 | #\r | |
157 | class Ucs2Codec(codecs.Codec):\r | |
158 | def __init__(self):\r | |
159 | self.__utf16 = codecs.lookup('utf-16')\r | |
160 | \r | |
161 | def encode(self, input, errors='strict'):\r | |
162 | for Char in input:\r | |
163 | CodePoint = ord(Char)\r | |
164 | if CodePoint >= 0xd800 and CodePoint <= 0xdfff:\r | |
165 | raise ValueError("Code Point is in range reserved for " +\r | |
166 | "UTF-16 surrogate pairs")\r | |
167 | elif CodePoint > 0xffff:\r | |
168 | raise ValueError("Code Point too large to encode in UCS-2")\r | |
169 | return self.__utf16.encode(input)\r | |
170 | \r | |
171 | TheUcs2Codec = Ucs2Codec()\r | |
172 | def Ucs2Search(name):\r | |
173 | if name == 'ucs-2':\r | |
174 | return codecs.CodecInfo(\r | |
175 | name=name,\r | |
176 | encode=TheUcs2Codec.encode,\r | |
177 | decode=TheUcs2Codec.decode)\r | |
178 | else:\r | |
179 | return None\r | |
180 | codecs.register(Ucs2Search)\r | |
181 | \r | |
30fdf114 LG |
182 | ## StringDefClassObject\r |
183 | #\r | |
184 | # A structure for language definition\r | |
185 | #\r | |
186 | class StringDefClassObject(object):\r | |
187 | def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''):\r | |
188 | self.StringName = ''\r | |
189 | self.StringNameByteList = []\r | |
190 | self.StringValue = ''\r | |
191 | self.StringValueByteList = ''\r | |
192 | self.Token = 0\r | |
193 | self.Referenced = Referenced\r | |
194 | self.UseOtherLangDef = UseOtherLangDef\r | |
195 | self.Length = 0\r | |
196 | \r | |
4231a819 | 197 | if Name is not None:\r |
30fdf114 LG |
198 | self.StringName = Name\r |
199 | self.StringNameByteList = UniToHexList(Name)\r | |
4231a819 | 200 | if Value is not None:\r |
30fdf114 LG |
201 | self.StringValue = Value + u'\x00' # Add a NULL at string tail\r |
202 | self.StringValueByteList = UniToHexList(self.StringValue)\r | |
203 | self.Length = len(self.StringValueByteList)\r | |
4231a819 | 204 | if Token is not None:\r |
30fdf114 LG |
205 | self.Token = Token\r |
206 | \r | |
207 | def __str__(self):\r | |
208 | return repr(self.StringName) + ' ' + \\r | |
209 | repr(self.Token) + ' ' + \\r | |
210 | repr(self.Referenced) + ' ' + \\r | |
211 | repr(self.StringValue) + ' ' + \\r | |
212 | repr(self.UseOtherLangDef)\r | |
213 | \r | |
756ad8f8 | 214 | def UpdateValue(self, Value = None):\r |
4231a819 | 215 | if Value is not None:\r |
756ad8f8 LG |
216 | self.StringValue = Value + u'\x00' # Add a NULL at string tail\r |
217 | self.StringValueByteList = UniToHexList(self.StringValue)\r | |
218 | self.Length = len(self.StringValueByteList)\r | |
219 | \r | |
c23ef28c CJ |
220 | def StripComments(Line):\r |
221 | Comment = u'//'\r | |
222 | CommentPos = Line.find(Comment)\r | |
223 | while CommentPos >= 0:\r | |
224 | # if there are non matched quotes before the comment header\r | |
225 | # then we are in the middle of a string\r | |
226 | # but we need to ignore the escaped quotes and backslashes.\r | |
227 | if ((Line.count(u'"', 0, CommentPos) - Line.count(u'\\"', 0, CommentPos)) & 1) == 1:\r | |
228 | CommentPos = Line.find (Comment, CommentPos + 1)\r | |
229 | else:\r | |
230 | return Line[:CommentPos].strip()\r | |
231 | return Line.strip()\r | |
232 | \r | |
30fdf114 LG |
233 | ## UniFileClassObject\r |
234 | #\r | |
235 | # A structure for .uni file definition\r | |
236 | #\r | |
237 | class UniFileClassObject(object):\r | |
756ad8f8 | 238 | def __init__(self, FileList = [], IsCompatibleMode = False, IncludePathList = []):\r |
30fdf114 LG |
239 | self.FileList = FileList\r |
240 | self.Token = 2\r | |
241 | self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ]\r | |
242 | self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] }\r | |
79b74a03 LG |
243 | self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} }\r |
244 | self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} }\r | |
30fdf114 | 245 | self.IsCompatibleMode = IsCompatibleMode\r |
756ad8f8 | 246 | self.IncludePathList = IncludePathList\r |
30fdf114 LG |
247 | if len(self.FileList) > 0:\r |
248 | self.LoadUniFiles(FileList)\r | |
249 | \r | |
250 | #\r | |
251 | # Get Language definition\r | |
252 | #\r | |
253 | def GetLangDef(self, File, Line):\r | |
40d841f6 | 254 | Lang = distutils.util.split_quoted((Line.split(u"//")[0]))\r |
30fdf114 LG |
255 | if len(Lang) != 3:\r |
256 | try:\r | |
3b743b3b | 257 | FileIn = UniFileClassObject.OpenUniFile(LongFilePath(File.Path))\r |
5b0671c1 | 258 | except UnicodeError as X:\r |
30fdf114 LG |
259 | EdkLogger.error("build", FILE_READ_FAILURE, "File read failure: %s" % str(X), ExtraData=File);\r |
260 | except:\r | |
261 | EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=File);\r | |
262 | LineNo = GetLineNo(FileIn, Line, False)\r | |
263 | EdkLogger.error("Unicode File Parser", PARSER_ERROR, "Wrong language definition",\r | |
47fea6af | 264 | ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line, File=File, Line=LineNo)\r |
30fdf114 LG |
265 | else:\r |
266 | LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File)\r | |
40d841f6 | 267 | LangPrintName = Lang[2]\r |
30fdf114 LG |
268 | \r |
269 | IsLangInDef = False\r | |
270 | for Item in self.LanguageDef:\r | |
271 | if Item[0] == LangName:\r | |
272 | IsLangInDef = True\r | |
273 | break;\r | |
274 | \r | |
275 | if not IsLangInDef:\r | |
276 | self.LanguageDef.append([LangName, LangPrintName])\r | |
277 | \r | |
278 | #\r | |
279 | # Add language string\r | |
280 | #\r | |
281 | self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0)\r | |
282 | self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1)\r | |
283 | \r | |
756ad8f8 LG |
284 | if not IsLangInDef:\r |
285 | #\r | |
286 | # The found STRING tokens will be added into new language string list\r | |
f7496d71 | 287 | # so that the unique STRING identifier is reserved for all languages in the package list.\r |
756ad8f8 LG |
288 | #\r |
289 | FirstLangName = self.LanguageDef[0][0]\r | |
290 | if LangName != FirstLangName:\r | |
291 | for Index in range (2, len (self.OrderedStringList[FirstLangName])):\r | |
292 | Item = self.OrderedStringList[FirstLangName][Index]\r | |
293 | if Item.UseOtherLangDef != '':\r | |
294 | OtherLang = Item.UseOtherLangDef\r | |
295 | else:\r | |
296 | OtherLang = FirstLangName\r | |
297 | self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName, '', Item.Referenced, Item.Token, OtherLang))\r | |
79b74a03 | 298 | self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1\r |
30fdf114 LG |
299 | return True\r |
300 | \r | |
3b743b3b CJ |
301 | @staticmethod\r |
302 | def OpenUniFile(FileName):\r | |
d80e451b JJ |
303 | #\r |
304 | # Read file\r | |
305 | #\r | |
306 | try:\r | |
307 | UniFile = open(FileName, mode='rb')\r | |
308 | FileIn = UniFile.read()\r | |
309 | UniFile.close()\r | |
310 | except:\r | |
311 | EdkLogger.Error("build", FILE_OPEN_FAILURE, ExtraData=File)\r | |
312 | \r | |
313 | #\r | |
be264422 | 314 | # Detect Byte Order Mark at beginning of file. Default to UTF-8\r |
d80e451b | 315 | #\r |
be264422 JJ |
316 | Encoding = 'utf-8'\r |
317 | if (FileIn.startswith(codecs.BOM_UTF16_BE) or\r | |
318 | FileIn.startswith(codecs.BOM_UTF16_LE)):\r | |
319 | Encoding = 'utf-16'\r | |
d80e451b | 320 | \r |
3b743b3b | 321 | UniFileClassObject.VerifyUcs2Data(FileIn, FileName, Encoding)\r |
d80e451b | 322 | \r |
86379ac4 | 323 | UniFile = BytesIO(FileIn)\r |
d80e451b JJ |
324 | Info = codecs.lookup(Encoding)\r |
325 | (Reader, Writer) = (Info.streamreader, Info.streamwriter)\r | |
326 | return codecs.StreamReaderWriter(UniFile, Reader, Writer)\r | |
327 | \r | |
3b743b3b CJ |
328 | @staticmethod\r |
329 | def VerifyUcs2Data(FileIn, FileName, Encoding):\r | |
d80e451b JJ |
330 | Ucs2Info = codecs.lookup('ucs-2')\r |
331 | #\r | |
332 | # Convert to unicode\r | |
333 | #\r | |
334 | try:\r | |
335 | FileDecoded = codecs.decode(FileIn, Encoding)\r | |
336 | Ucs2Info.encode(FileDecoded)\r | |
337 | except:\r | |
86379ac4 | 338 | UniFile = BytesIO(FileIn)\r |
d80e451b JJ |
339 | Info = codecs.lookup(Encoding)\r |
340 | (Reader, Writer) = (Info.streamreader, Info.streamwriter)\r | |
341 | File = codecs.StreamReaderWriter(UniFile, Reader, Writer)\r | |
342 | LineNumber = 0\r | |
343 | ErrMsg = lambda Encoding, LineNumber: \\r | |
344 | '%s contains invalid %s characters on line %d.' % \\r | |
345 | (FileName, Encoding, LineNumber)\r | |
346 | while True:\r | |
347 | LineNumber = LineNumber + 1\r | |
348 | try:\r | |
349 | Line = File.readline()\r | |
350 | if Line == '':\r | |
351 | EdkLogger.error('Unicode File Parser', PARSER_ERROR,\r | |
352 | ErrMsg(Encoding, LineNumber))\r | |
353 | Ucs2Info.encode(Line)\r | |
354 | except:\r | |
355 | EdkLogger.error('Unicode File Parser', PARSER_ERROR,\r | |
356 | ErrMsg('UCS-2', LineNumber))\r | |
357 | \r | |
30fdf114 LG |
358 | #\r |
359 | # Get String name and value\r | |
360 | #\r | |
361 | def GetStringObject(self, Item):\r | |
30fdf114 LG |
362 | Language = ''\r |
363 | Value = ''\r | |
364 | \r | |
365 | Name = Item.split()[1]\r | |
c3915fa5 | 366 | # Check the string name\r |
2bcc713e | 367 | if Name != '':\r |
1f26f5fd | 368 | MatchString = gIdentifierPattern.match(Name)\r |
bfc8f566 | 369 | if MatchString is None:\r |
c3915fa5 | 370 | EdkLogger.error('Unicode File Parser', FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the invalid character.' % (Name, self.File))\r |
30fdf114 LG |
371 | LanguageList = Item.split(u'#language ')\r |
372 | for IndexI in range(len(LanguageList)):\r | |
373 | if IndexI == 0:\r | |
374 | continue\r | |
375 | else:\r | |
376 | Language = LanguageList[IndexI].split()[0]\r | |
377 | Value = LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')] #.replace(u'\r\n', u'')\r | |
378 | Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)\r | |
379 | self.AddStringToList(Name, Language, Value)\r | |
380 | \r | |
381 | #\r | |
382 | # Get include file list and load them\r | |
383 | #\r | |
384 | def GetIncludeFile(self, Item, Dir):\r | |
385 | FileName = Item[Item.find(u'#include ') + len(u'#include ') :Item.find(u' ', len(u'#include '))][1:-1]\r | |
386 | self.LoadUniFile(FileName)\r | |
387 | \r | |
388 | #\r | |
389 | # Pre-process before parse .uni file\r | |
390 | #\r | |
391 | def PreProcess(self, File):\r | |
392 | if not os.path.exists(File.Path) or not os.path.isfile(File.Path):\r | |
393 | EdkLogger.error("Unicode File Parser", FILE_NOT_FOUND, ExtraData=File.Path)\r | |
394 | \r | |
30fdf114 | 395 | try:\r |
3b743b3b | 396 | FileIn = UniFileClassObject.OpenUniFile(LongFilePath(File.Path))\r |
5b0671c1 | 397 | except UnicodeError as X:\r |
30fdf114 LG |
398 | EdkLogger.error("build", FILE_READ_FAILURE, "File read failure: %s" % str(X), ExtraData=File.Path);\r |
399 | except:\r | |
400 | EdkLogger.error("build", FILE_OPEN_FAILURE, ExtraData=File.Path);\r | |
401 | \r | |
402 | Lines = []\r | |
403 | #\r | |
404 | # Use unique identifier\r | |
405 | #\r | |
406 | for Line in FileIn:\r | |
407 | Line = Line.strip()\r | |
71f02911 | 408 | Line = Line.replace(u'\\\\', BACK_SLASH_PLACEHOLDER)\r |
c23ef28c | 409 | Line = StripComments(Line)\r |
71f02911 | 410 | \r |
30fdf114 | 411 | #\r |
71f02911 | 412 | # Ignore empty line\r |
30fdf114 | 413 | #\r |
f7496d71 LG |
414 | if len(Line) == 0:\r |
415 | continue\r | |
416 | \r | |
417 | \r | |
30fdf114 LG |
418 | Line = Line.replace(u'/langdef', u'#langdef')\r |
419 | Line = Line.replace(u'/string', u'#string')\r | |
420 | Line = Line.replace(u'/language', u'#language')\r | |
421 | Line = Line.replace(u'/include', u'#include')\r | |
422 | \r | |
423 | Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR)\r | |
424 | Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR)\r | |
425 | Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR)\r | |
426 | \r | |
30fdf114 LG |
427 | Line = Line.replace(u'\\r\\n', CR + LF)\r |
428 | Line = Line.replace(u'\\n', CR + LF)\r | |
429 | Line = Line.replace(u'\\r', CR)\r | |
71f02911 | 430 | Line = Line.replace(u'\\t', u' ')\r |
8546dfea | 431 | Line = Line.replace(u'\t', u' ')\r |
f7496d71 LG |
432 | Line = Line.replace(u'\\"', u'"')\r |
433 | Line = Line.replace(u"\\'", u"'")\r | |
71f02911 | 434 | Line = Line.replace(BACK_SLASH_PLACEHOLDER, u'\\')\r |
4afd3d04 | 435 | \r |
314e2fb1 YZ |
436 | StartPos = Line.find(u'\\x')\r |
437 | while (StartPos != -1):\r | |
438 | EndPos = Line.find(u'\\', StartPos + 1, StartPos + 7)\r | |
439 | if EndPos != -1 and EndPos - StartPos == 6 :\r | |
018f7b82 | 440 | if g4HexChar.match(Line[StartPos + 2 : EndPos], re.UNICODE):\r |
314e2fb1 | 441 | EndStr = Line[EndPos: ]\r |
1ccc4d89 | 442 | UniStr = ('\u' + (Line[StartPos + 2 : EndPos])).decode('unicode_escape')\r |
314e2fb1 | 443 | if EndStr.startswith(u'\\x') and len(EndStr) >= 7:\r |
018f7b82 | 444 | if EndStr[6] == u'\\' and g4HexChar.match(EndStr[2 : 6], re.UNICODE):\r |
314e2fb1 YZ |
445 | Line = Line[0 : StartPos] + UniStr + EndStr\r |
446 | else:\r | |
447 | Line = Line[0 : StartPos] + UniStr + EndStr[1:]\r | |
ad319b93 | 448 | StartPos = Line.find(u'\\x', StartPos + 1)\r |
30fdf114 LG |
449 | \r |
450 | IncList = gIncludePattern.findall(Line)\r | |
451 | if len(IncList) == 1:\r | |
756ad8f8 LG |
452 | for Dir in [File.Dir] + self.IncludePathList:\r |
453 | IncFile = PathClass(str(IncList[0]), Dir)\r | |
454 | if os.path.isfile(IncFile.Path):\r | |
455 | Lines.extend(self.PreProcess(IncFile))\r | |
456 | break\r | |
457 | else:\r | |
458 | EdkLogger.error("Unicode File Parser", FILE_NOT_FOUND, Message="Cannot find include file", ExtraData=str(IncList[0]))\r | |
30fdf114 LG |
459 | continue\r |
460 | \r | |
461 | Lines.append(Line)\r | |
462 | \r | |
463 | return Lines\r | |
464 | \r | |
465 | #\r | |
466 | # Load a .uni file\r | |
467 | #\r | |
468 | def LoadUniFile(self, File = None):\r | |
4231a819 | 469 | if File is None:\r |
30fdf114 LG |
470 | EdkLogger.error("Unicode File Parser", PARSER_ERROR, 'No unicode file is given')\r |
471 | self.File = File\r | |
472 | #\r | |
473 | # Process special char in file\r | |
474 | #\r | |
475 | Lines = self.PreProcess(File)\r | |
476 | \r | |
477 | #\r | |
478 | # Get Unicode Information\r | |
479 | #\r | |
480 | for IndexI in range(len(Lines)):\r | |
481 | Line = Lines[IndexI]\r | |
482 | if (IndexI + 1) < len(Lines):\r | |
483 | SecondLine = Lines[IndexI + 1]\r | |
484 | if (IndexI + 2) < len(Lines):\r | |
485 | ThirdLine = Lines[IndexI + 2]\r | |
486 | \r | |
487 | #\r | |
488 | # Get Language def information\r | |
489 | #\r | |
490 | if Line.find(u'#langdef ') >= 0:\r | |
491 | self.GetLangDef(File, Line)\r | |
492 | continue\r | |
493 | \r | |
494 | Name = ''\r | |
495 | Language = ''\r | |
496 | Value = ''\r | |
497 | #\r | |
498 | # Get string def information format 1 as below\r | |
499 | #\r | |
500 | # #string MY_STRING_1\r | |
501 | # #language eng\r | |
502 | # My first English string line 1\r | |
503 | # My first English string line 2\r | |
504 | # #string MY_STRING_1\r | |
505 | # #language spa\r | |
506 | # Mi segunda secuencia 1\r | |
507 | # Mi segunda secuencia 2\r | |
508 | #\r | |
509 | if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \\r | |
510 | SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \\r | |
511 | ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:\r | |
512 | Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')\r | |
513 | Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')\r | |
514 | for IndexJ in range(IndexI + 2, len(Lines)):\r | |
515 | if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0:\r | |
516 | Value = Value + Lines[IndexJ]\r | |
517 | else:\r | |
518 | IndexI = IndexJ\r | |
519 | break\r | |
520 | # Value = Value.replace(u'\r\n', u'')\r | |
521 | Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)\r | |
c3915fa5 | 522 | # Check the string name\r |
b303ea72 | 523 | if not self.IsCompatibleMode and Name != '':\r |
1f26f5fd | 524 | MatchString = gIdentifierPattern.match(Name)\r |
bfc8f566 | 525 | if MatchString is None:\r |
c3915fa5 | 526 | EdkLogger.error('Unicode File Parser', FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the invalid character.' % (Name, self.File))\r |
30fdf114 LG |
527 | self.AddStringToList(Name, Language, Value)\r |
528 | continue\r | |
529 | \r | |
530 | #\r | |
531 | # Get string def information format 2 as below\r | |
532 | #\r | |
533 | # #string MY_STRING_1 #language eng "My first English string line 1"\r | |
534 | # "My first English string line 2"\r | |
535 | # #language spa "Mi segunda secuencia 1"\r | |
536 | # "Mi segunda secuencia 2"\r | |
537 | # #string MY_STRING_2 #language eng "My first English string line 1"\r | |
538 | # "My first English string line 2"\r | |
539 | # #string MY_STRING_2 #language spa "Mi segunda secuencia 1"\r | |
540 | # "Mi segunda secuencia 2"\r | |
541 | #\r | |
542 | if Line.find(u'#string ') >= 0 and Line.find(u'#language ') >= 0:\r | |
543 | StringItem = Line\r | |
544 | for IndexJ in range(IndexI + 1, len(Lines)):\r | |
545 | if Lines[IndexJ].find(u'#string ') >= 0 and Lines[IndexJ].find(u'#language ') >= 0:\r | |
546 | IndexI = IndexJ\r | |
547 | break\r | |
548 | elif Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') >= 0:\r | |
549 | StringItem = StringItem + Lines[IndexJ]\r | |
550 | elif Lines[IndexJ].count(u'\"') >= 2:\r | |
551 | StringItem = StringItem[ : StringItem.rfind(u'\"')] + Lines[IndexJ][Lines[IndexJ].find(u'\"') + len(u'\"') : ]\r | |
552 | self.GetStringObject(StringItem)\r | |
553 | continue\r | |
554 | \r | |
555 | #\r | |
556 | # Load multiple .uni files\r | |
557 | #\r | |
52302d4d | 558 | def LoadUniFiles(self, FileList):\r |
30fdf114 | 559 | if len(FileList) > 0:\r |
52302d4d LG |
560 | for File in FileList:\r |
561 | self.LoadUniFile(File)\r | |
30fdf114 LG |
562 | \r |
563 | #\r | |
564 | # Add a string to list\r | |
565 | #\r | |
566 | def AddStringToList(self, Name, Language, Value, Token = None, Referenced = False, UseOtherLangDef = '', Index = -1):\r | |
08dd311f LG |
567 | for LangNameItem in self.LanguageDef:\r |
568 | if Language == LangNameItem[0]:\r | |
569 | break\r | |
570 | else:\r | |
571 | EdkLogger.error('Unicode File Parser', FORMAT_NOT_SUPPORTED, "The language '%s' for %s is not defined in Unicode file %s." \\r | |
572 | % (Language, Name, self.File))\r | |
f7496d71 | 573 | \r |
30fdf114 LG |
574 | if Language not in self.OrderedStringList:\r |
575 | self.OrderedStringList[Language] = []\r | |
79b74a03 | 576 | self.OrderedStringDict[Language] = {}\r |
30fdf114 | 577 | \r |
756ad8f8 | 578 | IsAdded = True\r |
79b74a03 LG |
579 | if Name in self.OrderedStringDict[Language]:\r |
580 | IsAdded = False\r | |
4231a819 | 581 | if Value is not None:\r |
79b74a03 LG |
582 | ItemIndexInList = self.OrderedStringDict[Language][Name]\r |
583 | Item = self.OrderedStringList[Language][ItemIndexInList]\r | |
584 | Item.UpdateValue(Value)\r | |
47fea6af | 585 | Item.UseOtherLangDef = ''\r |
756ad8f8 LG |
586 | \r |
587 | if IsAdded:\r | |
30fdf114 LG |
588 | Token = len(self.OrderedStringList[Language])\r |
589 | if Index == -1:\r | |
590 | self.OrderedStringList[Language].append(StringDefClassObject(Name, Value, Referenced, Token, UseOtherLangDef))\r | |
79b74a03 | 591 | self.OrderedStringDict[Language][Name] = Token\r |
756ad8f8 LG |
592 | for LangName in self.LanguageDef:\r |
593 | #\r | |
594 | # New STRING token will be added into all language string lists.\r | |
f7496d71 | 595 | # so that the unique STRING identifier is reserved for all languages in the package list.\r |
756ad8f8 LG |
596 | #\r |
597 | if LangName[0] != Language:\r | |
598 | if UseOtherLangDef != '':\r | |
599 | OtherLangDef = UseOtherLangDef\r | |
600 | else:\r | |
601 | OtherLangDef = Language\r | |
602 | self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name, '', Referenced, Token, OtherLangDef))\r | |
79b74a03 | 603 | self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1\r |
30fdf114 LG |
604 | else:\r |
605 | self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name, Value, Referenced, Token, UseOtherLangDef))\r | |
79b74a03 | 606 | self.OrderedStringDict[Language][Name] = Index\r |
30fdf114 LG |
607 | \r |
608 | #\r | |
609 | # Set the string as referenced\r | |
610 | #\r | |
611 | def SetStringReferenced(self, Name):\r | |
756ad8f8 LG |
612 | #\r |
613 | # String stoken are added in the same order in all language string lists.\r | |
614 | # So, only update the status of string stoken in first language string list.\r | |
615 | #\r | |
616 | Lang = self.LanguageDef[0][0]\r | |
79b74a03 LG |
617 | if Name in self.OrderedStringDict[Lang]:\r |
618 | ItemIndexInList = self.OrderedStringDict[Lang][Name]\r | |
619 | Item = self.OrderedStringList[Lang][ItemIndexInList]\r | |
620 | Item.Referenced = True\r | |
621 | \r | |
30fdf114 LG |
622 | #\r |
623 | # Search the string in language definition by Name\r | |
624 | #\r | |
625 | def FindStringValue(self, Name, Lang):\r | |
79b74a03 LG |
626 | if Name in self.OrderedStringDict[Lang]:\r |
627 | ItemIndexInList = self.OrderedStringDict[Lang][Name]\r | |
628 | return self.OrderedStringList[Lang][ItemIndexInList]\r | |
30fdf114 LG |
629 | \r |
630 | return None\r | |
631 | \r | |
632 | #\r | |
633 | # Search the string in language definition by Token\r | |
634 | #\r | |
635 | def FindByToken(self, Token, Lang):\r | |
636 | for Item in self.OrderedStringList[Lang]:\r | |
637 | if Item.Token == Token:\r | |
638 | return Item\r | |
639 | \r | |
640 | return None\r | |
641 | \r | |
642 | #\r | |
643 | # Re-order strings and re-generate tokens\r | |
644 | #\r | |
645 | def ReToken(self):\r | |
646 | #\r | |
756ad8f8 | 647 | # Retoken all language strings according to the status of string stoken in the first language string.\r |
30fdf114 | 648 | #\r |
756ad8f8 | 649 | FirstLangName = self.LanguageDef[0][0]\r |
52302d4d | 650 | \r |
79b74a03 LG |
651 | # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token\r |
652 | for LangNameItem in self.LanguageDef:\r | |
653 | self.OrderedStringListByToken[LangNameItem[0]] = {}\r | |
654 | \r | |
52302d4d | 655 | #\r |
756ad8f8 | 656 | # Use small token for all referred string stoken.\r |
52302d4d | 657 | #\r |
756ad8f8 LG |
658 | RefToken = 0\r |
659 | for Index in range (0, len (self.OrderedStringList[FirstLangName])):\r | |
660 | FirstLangItem = self.OrderedStringList[FirstLangName][Index]\r | |
661 | if FirstLangItem.Referenced == True:\r | |
662 | for LangNameItem in self.LanguageDef:\r | |
663 | LangName = LangNameItem[0]\r | |
664 | OtherLangItem = self.OrderedStringList[LangName][Index]\r | |
665 | OtherLangItem.Referenced = True\r | |
666 | OtherLangItem.Token = RefToken\r | |
79b74a03 | 667 | self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem\r |
756ad8f8 | 668 | RefToken = RefToken + 1\r |
30fdf114 LG |
669 | \r |
670 | #\r | |
756ad8f8 | 671 | # Use big token for all unreferred string stoken.\r |
30fdf114 | 672 | #\r |
756ad8f8 LG |
673 | UnRefToken = 0\r |
674 | for Index in range (0, len (self.OrderedStringList[FirstLangName])):\r | |
675 | FirstLangItem = self.OrderedStringList[FirstLangName][Index]\r | |
676 | if FirstLangItem.Referenced == False:\r | |
677 | for LangNameItem in self.LanguageDef:\r | |
678 | LangName = LangNameItem[0]\r | |
679 | OtherLangItem = self.OrderedStringList[LangName][Index]\r | |
680 | OtherLangItem.Token = RefToken + UnRefToken\r | |
79b74a03 | 681 | self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem\r |
756ad8f8 | 682 | UnRefToken = UnRefToken + 1\r |
30fdf114 LG |
683 | \r |
684 | #\r | |
685 | # Show the instance itself\r | |
686 | #\r | |
687 | def ShowMe(self):\r | |
72443dd2 | 688 | print(self.LanguageDef)\r |
30fdf114 LG |
689 | #print self.OrderedStringList\r |
690 | for Item in self.OrderedStringList:\r | |
72443dd2 | 691 | print(Item)\r |
30fdf114 | 692 | for Member in self.OrderedStringList[Item]:\r |
72443dd2 | 693 | print(str(Member))\r |
30fdf114 LG |
694 | \r |
695 | # This acts like the main() function for the script, unless it is 'import'ed into another\r | |
696 | # script.\r | |
697 | if __name__ == '__main__':\r | |
698 | EdkLogger.Initialize()\r | |
699 | EdkLogger.SetLevel(EdkLogger.DEBUG_0)\r | |
da92f276 | 700 | a = UniFileClassObject([PathClass("C:\\Edk\\Strings.uni"), PathClass("C:\\Edk\\Strings2.uni")])\r |
30fdf114 LG |
701 | a.ReToken()\r |
702 | a.ShowMe()\r |