]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/Trim/Trim.py
BaseTools:Trim will trig exception when input asl UTF8 format file
[mirror_edk2.git] / BaseTools / Source / Python / Trim / Trim.py
1 ## @file
2 # Trim files preprocessed by compiler
3 #
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
5 # This program and the accompanying materials
6 # are licensed and made available under the terms and conditions of the BSD License
7 # which accompanies this distribution. The full text of the license may be found at
8 # http://opensource.org/licenses/bsd-license.php
9 #
10 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 #
13
14 ##
15 # Import Modules
16 #
17 import Common.LongFilePathOs as os
18 import sys
19 import re
20 from io import BytesIO
21 import codecs
22 from optparse import OptionParser
23 from optparse import make_option
24 from Common.BuildToolError import *
25 from Common.Misc import *
26 from Common.DataType import *
27 from Common.BuildVersion import gBUILD_VERSION
28 import Common.EdkLogger as EdkLogger
29 from Common.LongFilePathSupport import OpenLongFilePath as open
30
31 # Version and Copyright
32 __version_number__ = ("0.10" + " " + gBUILD_VERSION)
33 __version__ = "%prog Version " + __version_number__
34 __copyright__ = "Copyright (c) 2007-2018, Intel Corporation. All rights reserved."
35
36 ## Regular expression for matching Line Control directive like "#line xxx"
37 gLineControlDirective = re.compile('^\s*#(?:line)?\s+([0-9]+)\s+"*([^"]*)"')
38 ## Regular expression for matching "typedef struct"
39 gTypedefPattern = re.compile("^\s*typedef\s+struct(\s+\w+)?\s*[{]*$", re.MULTILINE)
40 ## Regular expression for matching "#pragma pack"
41 gPragmaPattern = re.compile("^\s*#pragma\s+pack", re.MULTILINE)
42 ## Regular expression for matching "typedef"
43 gTypedef_SinglePattern = re.compile("^\s*typedef", re.MULTILINE)
44 ## Regular expression for matching "typedef struct, typedef union, struct, union"
45 gTypedef_MulPattern = re.compile("^\s*(typedef)?\s+(struct|union)(\s+\w+)?\s*[{]*$", re.MULTILINE)
46
47 #
48 # The following number pattern match will only match if following criteria is met:
49 # There is leading non-(alphanumeric or _) character, and no following alphanumeric or _
50 # as the pattern is greedily match, so it is ok for the gDecNumberPattern or gHexNumberPattern to grab the maximum match
51 #
52 ## Regular expression for matching HEX number
53 gHexNumberPattern = re.compile("(?<=[^a-zA-Z0-9_])(0[xX])([0-9a-fA-F]+)(U(?=$|[^a-zA-Z0-9_]))?")
54 ## Regular expression for matching decimal number with 'U' postfix
55 gDecNumberPattern = re.compile("(?<=[^a-zA-Z0-9_])([0-9]+)U(?=$|[^a-zA-Z0-9_])")
56 ## Regular expression for matching constant with 'ULL' 'LL' postfix
57 gLongNumberPattern = re.compile("(?<=[^a-zA-Z0-9_])(0[xX][0-9a-fA-F]+|[0-9]+)U?LL(?=$|[^a-zA-Z0-9_])")
58
59 ## Regular expression for matching "Include ()" in asl file
60 gAslIncludePattern = re.compile("^(\s*)[iI]nclude\s*\(\"?([^\"\(\)]+)\"\)", re.MULTILINE)
61 ## Regular expression for matching C style #include "XXX.asl" in asl file
62 gAslCIncludePattern = re.compile(r'^(\s*)#include\s*[<"]\s*([-\\/\w.]+)\s*([>"])', re.MULTILINE)
63 ## Patterns used to convert EDK conventions to EDK2 ECP conventions
64
65 ## file cache to avoid circular include in ASL file
66 gIncludedAslFile = []
67
68 ## Trim preprocessed source code
69 #
70 # Remove extra content made by preprocessor. The preprocessor must enable the
71 # line number generation option when preprocessing.
72 #
73 # @param Source File to be trimmed
74 # @param Target File to store the trimmed content
75 # @param Convert If True, convert standard HEX format to MASM format
76 #
77 def TrimPreprocessedFile(Source, Target, ConvertHex, TrimLong):
78 CreateDirectory(os.path.dirname(Target))
79 try:
80 with open(Source, "r") as File:
81 Lines = File.readlines()
82 except:
83 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Source)
84
85 PreprocessedFile = ""
86 InjectedFile = ""
87 LineIndexOfOriginalFile = None
88 NewLines = []
89 LineControlDirectiveFound = False
90 for Index in range(len(Lines)):
91 Line = Lines[Index]
92 #
93 # Find out the name of files injected by preprocessor from the lines
94 # with Line Control directive
95 #
96 MatchList = gLineControlDirective.findall(Line)
97 if MatchList != []:
98 MatchList = MatchList[0]
99 if len(MatchList) == 2:
100 LineNumber = int(MatchList[0], 0)
101 InjectedFile = MatchList[1]
102 InjectedFile = os.path.normpath(InjectedFile)
103 InjectedFile = os.path.normcase(InjectedFile)
104 # The first injected file must be the preprocessed file itself
105 if PreprocessedFile == "":
106 PreprocessedFile = InjectedFile
107 LineControlDirectiveFound = True
108 continue
109 elif PreprocessedFile == "" or InjectedFile != PreprocessedFile:
110 continue
111
112 if LineIndexOfOriginalFile is None:
113 #
114 # Any non-empty lines must be from original preprocessed file.
115 # And this must be the first one.
116 #
117 LineIndexOfOriginalFile = Index
118 EdkLogger.verbose("Found original file content starting from line %d"
119 % (LineIndexOfOriginalFile + 1))
120
121 if TrimLong:
122 Line = gLongNumberPattern.sub(r"\1", Line)
123 # convert HEX number format if indicated
124 if ConvertHex:
125 Line = gHexNumberPattern.sub(r"0\2h", Line)
126 else:
127 Line = gHexNumberPattern.sub(r"\1\2", Line)
128
129 # convert Decimal number format
130 Line = gDecNumberPattern.sub(r"\1", Line)
131
132 if LineNumber is not None:
133 EdkLogger.verbose("Got line directive: line=%d" % LineNumber)
134 # in case preprocessor removed some lines, like blank or comment lines
135 if LineNumber <= len(NewLines):
136 # possible?
137 NewLines[LineNumber - 1] = Line
138 else:
139 if LineNumber > (len(NewLines) + 1):
140 for LineIndex in range(len(NewLines), LineNumber-1):
141 NewLines.append(TAB_LINE_BREAK)
142 NewLines.append(Line)
143 LineNumber = None
144 EdkLogger.verbose("Now we have lines: %d" % len(NewLines))
145 else:
146 NewLines.append(Line)
147
148 # in case there's no line directive or linemarker found
149 if (not LineControlDirectiveFound) and NewLines == []:
150 MulPatternFlag = False
151 SinglePatternFlag = False
152 Brace = 0
153 for Index in range(len(Lines)):
154 Line = Lines[Index]
155 if MulPatternFlag == False and gTypedef_MulPattern.search(Line) is None:
156 if SinglePatternFlag == False and gTypedef_SinglePattern.search(Line) is None:
157 # remove "#pragram pack" directive
158 if gPragmaPattern.search(Line) is None:
159 NewLines.append(Line)
160 continue
161 elif SinglePatternFlag == False:
162 SinglePatternFlag = True
163 if Line.find(";") >= 0:
164 SinglePatternFlag = False
165 elif MulPatternFlag == False:
166 # found "typedef struct, typedef union, union, struct", keep its position and set a flag
167 MulPatternFlag = True
168
169 # match { and } to find the end of typedef definition
170 if Line.find("{") >= 0:
171 Brace += 1
172 elif Line.find("}") >= 0:
173 Brace -= 1
174
175 # "typedef struct, typedef union, union, struct" must end with a ";"
176 if Brace == 0 and Line.find(";") >= 0:
177 MulPatternFlag = False
178
179 # save to file
180 try:
181 with open(Target, 'w') as File:
182 File.writelines(NewLines)
183 except:
184 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Target)
185
186 ## Trim preprocessed VFR file
187 #
188 # Remove extra content made by preprocessor. The preprocessor doesn't need to
189 # enable line number generation option when preprocessing.
190 #
191 # @param Source File to be trimmed
192 # @param Target File to store the trimmed content
193 #
194 def TrimPreprocessedVfr(Source, Target):
195 CreateDirectory(os.path.dirname(Target))
196
197 try:
198 with open(Source, "r") as File:
199 Lines = File.readlines()
200 except:
201 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Source)
202 # read whole file
203
204 FoundTypedef = False
205 Brace = 0
206 TypedefStart = 0
207 TypedefEnd = 0
208 for Index in range(len(Lines)):
209 Line = Lines[Index]
210 # don't trim the lines from "formset" definition to the end of file
211 if Line.strip() == 'formset':
212 break
213
214 if FoundTypedef == False and (Line.find('#line') == 0 or Line.find('# ') == 0):
215 # empty the line number directive if it's not aomong "typedef struct"
216 Lines[Index] = "\n"
217 continue
218
219 if FoundTypedef == False and gTypedefPattern.search(Line) is None:
220 # keep "#pragram pack" directive
221 if gPragmaPattern.search(Line) is None:
222 Lines[Index] = "\n"
223 continue
224 elif FoundTypedef == False:
225 # found "typedef struct", keept its position and set a flag
226 FoundTypedef = True
227 TypedefStart = Index
228
229 # match { and } to find the end of typedef definition
230 if Line.find("{") >= 0:
231 Brace += 1
232 elif Line.find("}") >= 0:
233 Brace -= 1
234
235 # "typedef struct" must end with a ";"
236 if Brace == 0 and Line.find(";") >= 0:
237 FoundTypedef = False
238 TypedefEnd = Index
239 # keep all "typedef struct" except to GUID, EFI_PLABEL and PAL_CALL_RETURN
240 if Line.strip("} ;\r\n") in [TAB_GUID, "EFI_PLABEL", "PAL_CALL_RETURN"]:
241 for i in range(TypedefStart, TypedefEnd+1):
242 Lines[i] = "\n"
243
244 # save all lines trimmed
245 try:
246 with open(Target, 'w') as File:
247 File.writelines(Lines)
248 except:
249 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Target)
250
251 ## Read the content ASL file, including ASL included, recursively
252 #
253 # @param Source File to be read
254 # @param Indent Spaces before the Include() statement
255 # @param IncludePathList The list of external include file
256 # @param LocalSearchPath If LocalSearchPath is specified, this path will be searched
257 # first for the included file; otherwise, only the path specified
258 # in the IncludePathList will be searched.
259 #
260 def DoInclude(Source, Indent='', IncludePathList=[], LocalSearchPath=None):
261 NewFileContent = []
262
263 try:
264 #
265 # Search LocalSearchPath first if it is specified.
266 #
267 if LocalSearchPath:
268 SearchPathList = [LocalSearchPath] + IncludePathList
269 else:
270 SearchPathList = IncludePathList
271
272 for IncludePath in SearchPathList:
273 IncludeFile = os.path.join(IncludePath, Source)
274 if os.path.isfile(IncludeFile):
275 try:
276 with open(IncludeFile, "r") as File:
277 F = File.readlines()
278 except:
279 with codecs.open(IncludeFile, "r", encoding='utf-8') as File:
280 F = File.readlines()
281 break
282 else:
283 EdkLogger.error("Trim", "Failed to find include file %s" % Source)
284 except:
285 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Source)
286
287
288 # avoid A "include" B and B "include" A
289 IncludeFile = os.path.abspath(os.path.normpath(IncludeFile))
290 if IncludeFile in gIncludedAslFile:
291 EdkLogger.warn("Trim", "Circular include",
292 ExtraData= "%s -> %s" % (" -> ".join(gIncludedAslFile), IncludeFile))
293 return []
294 gIncludedAslFile.append(IncludeFile)
295
296 for Line in F:
297 LocalSearchPath = None
298 Result = gAslIncludePattern.findall(Line)
299 if len(Result) == 0:
300 Result = gAslCIncludePattern.findall(Line)
301 if len(Result) == 0 or os.path.splitext(Result[0][1])[1].lower() not in [".asl", ".asi"]:
302 NewFileContent.append("%s%s" % (Indent, Line))
303 continue
304 #
305 # We should first search the local directory if current file are using pattern #include "XXX"
306 #
307 if Result[0][2] == '"':
308 LocalSearchPath = os.path.dirname(IncludeFile)
309 CurrentIndent = Indent + Result[0][0]
310 IncludedFile = Result[0][1]
311 NewFileContent.extend(DoInclude(IncludedFile, CurrentIndent, IncludePathList, LocalSearchPath))
312 NewFileContent.append("\n")
313
314 gIncludedAslFile.pop()
315
316 return NewFileContent
317
318
319 ## Trim ASL file
320 #
321 # Replace ASL include statement with the content the included file
322 #
323 # @param Source File to be trimmed
324 # @param Target File to store the trimmed content
325 # @param IncludePathFile The file to log the external include path
326 #
327 def TrimAslFile(Source, Target, IncludePathFile):
328 CreateDirectory(os.path.dirname(Target))
329
330 SourceDir = os.path.dirname(Source)
331 if SourceDir == '':
332 SourceDir = '.'
333
334 #
335 # Add source directory as the first search directory
336 #
337 IncludePathList = [SourceDir]
338
339 #
340 # If additional include path file is specified, append them all
341 # to the search directory list.
342 #
343 if IncludePathFile:
344 try:
345 LineNum = 0
346 with open(IncludePathFile, 'r') as File:
347 FileLines = File.readlines()
348 for Line in FileLines:
349 LineNum += 1
350 if Line.startswith("/I") or Line.startswith ("-I"):
351 IncludePathList.append(Line[2:].strip())
352 else:
353 EdkLogger.warn("Trim", "Invalid include line in include list file.", IncludePathFile, LineNum)
354 except:
355 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=IncludePathFile)
356
357 Lines = DoInclude(Source, '', IncludePathList)
358
359 #
360 # Undef MIN and MAX to avoid collision in ASL source code
361 #
362 Lines.insert(0, "#undef MIN\n#undef MAX\n")
363
364 # save all lines trimmed
365 try:
366 with open(Target, 'w') as File:
367 File.writelines(Lines)
368 except:
369 EdkLogger.error("Trim", FILE_OPEN_FAILURE, ExtraData=Target)
370
371 def GenerateVfrBinSec(ModuleName, DebugDir, OutputFile):
372 VfrNameList = []
373 if os.path.isdir(DebugDir):
374 for CurrentDir, Dirs, Files in os.walk(DebugDir):
375 for FileName in Files:
376 Name, Ext = os.path.splitext(FileName)
377 if Ext == '.c' and Name != 'AutoGen':
378 VfrNameList.append (Name + 'Bin')
379
380 VfrNameList.append (ModuleName + 'Strings')
381
382 EfiFileName = os.path.join(DebugDir, ModuleName + '.efi')
383 MapFileName = os.path.join(DebugDir, ModuleName + '.map')
384 VfrUniOffsetList = GetVariableOffset(MapFileName, EfiFileName, VfrNameList)
385
386 if not VfrUniOffsetList:
387 return
388
389 try:
390 fInputfile = open(OutputFile, "wb+")
391 except:
392 EdkLogger.error("Trim", FILE_OPEN_FAILURE, "File open failed for %s" %OutputFile, None)
393
394 # Use a instance of BytesIO to cache data
395 fStringIO = BytesIO()
396
397 for Item in VfrUniOffsetList:
398 if (Item[0].find("Strings") != -1):
399 #
400 # UNI offset in image.
401 # GUID + Offset
402 # { 0x8913c5e0, 0x33f6, 0x4d86, { 0x9b, 0xf1, 0x43, 0xef, 0x89, 0xfc, 0x6, 0x66 } }
403 #
404 UniGuid = b'\xe0\xc5\x13\x89\xf63\x86M\x9b\xf1C\xef\x89\xfc\x06f'
405 fStringIO.write(UniGuid)
406 UniValue = pack ('Q', int (Item[1], 16))
407 fStringIO.write (UniValue)
408 else:
409 #
410 # VFR binary offset in image.
411 # GUID + Offset
412 # { 0xd0bc7cb4, 0x6a47, 0x495f, { 0xaa, 0x11, 0x71, 0x7, 0x46, 0xda, 0x6, 0xa2 } };
413 #
414 VfrGuid = b'\xb4|\xbc\xd0Gj_I\xaa\x11q\x07F\xda\x06\xa2'
415 fStringIO.write(VfrGuid)
416 type (Item[1])
417 VfrValue = pack ('Q', int (Item[1], 16))
418 fStringIO.write (VfrValue)
419
420 #
421 # write data into file.
422 #
423 try :
424 fInputfile.write (fStringIO.getvalue())
425 except:
426 EdkLogger.error("Trim", FILE_WRITE_FAILURE, "Write data to file %s failed, please check whether the file been locked or using by other applications." %OutputFile, None)
427
428 fStringIO.close ()
429 fInputfile.close ()
430
431
432 ## Parse command line options
433 #
434 # Using standard Python module optparse to parse command line option of this tool.
435 #
436 # @retval Options A optparse.Values object containing the parsed options
437 # @retval InputFile Path of file to be trimmed
438 #
439 def Options():
440 OptionList = [
441 make_option("-s", "--source-code", dest="FileType", const="SourceCode", action="store_const",
442 help="The input file is preprocessed source code, including C or assembly code"),
443 make_option("-r", "--vfr-file", dest="FileType", const="Vfr", action="store_const",
444 help="The input file is preprocessed VFR file"),
445 make_option("--Vfr-Uni-Offset", dest="FileType", const="VfrOffsetBin", action="store_const",
446 help="The input file is EFI image"),
447 make_option("-a", "--asl-file", dest="FileType", const="Asl", action="store_const",
448 help="The input file is ASL file"),
449 make_option("-c", "--convert-hex", dest="ConvertHex", action="store_true",
450 help="Convert standard hex format (0xabcd) to MASM format (abcdh)"),
451
452 make_option("-l", "--trim-long", dest="TrimLong", action="store_true",
453 help="Remove postfix of long number"),
454 make_option("-i", "--include-path-file", dest="IncludePathFile",
455 help="The input file is include path list to search for ASL include file"),
456 make_option("-o", "--output", dest="OutputFile",
457 help="File to store the trimmed content"),
458 make_option("--ModuleName", dest="ModuleName", help="The module's BASE_NAME"),
459 make_option("--DebugDir", dest="DebugDir",
460 help="Debug Output directory to store the output files"),
461 make_option("-v", "--verbose", dest="LogLevel", action="store_const", const=EdkLogger.VERBOSE,
462 help="Run verbosely"),
463 make_option("-d", "--debug", dest="LogLevel", type="int",
464 help="Run with debug information"),
465 make_option("-q", "--quiet", dest="LogLevel", action="store_const", const=EdkLogger.QUIET,
466 help="Run quietly"),
467 make_option("-?", action="help", help="show this help message and exit"),
468 ]
469
470 # use clearer usage to override default usage message
471 UsageString = "%prog [-s|-r|-a|--Vfr-Uni-Offset] [-c] [-v|-d <debug_level>|-q] [-i <include_path_file>] [-o <output_file>] [--ModuleName <ModuleName>] [--DebugDir <DebugDir>] [<input_file>]"
472
473 Parser = OptionParser(description=__copyright__, version=__version__, option_list=OptionList, usage=UsageString)
474 Parser.set_defaults(FileType="Vfr")
475 Parser.set_defaults(ConvertHex=False)
476 Parser.set_defaults(LogLevel=EdkLogger.INFO)
477
478 Options, Args = Parser.parse_args()
479
480 # error check
481 if Options.FileType == 'VfrOffsetBin':
482 if len(Args) == 0:
483 return Options, ''
484 elif len(Args) > 1:
485 EdkLogger.error("Trim", OPTION_NOT_SUPPORTED, ExtraData=Parser.get_usage())
486 if len(Args) == 0:
487 EdkLogger.error("Trim", OPTION_MISSING, ExtraData=Parser.get_usage())
488 if len(Args) > 1:
489 EdkLogger.error("Trim", OPTION_NOT_SUPPORTED, ExtraData=Parser.get_usage())
490
491 InputFile = Args[0]
492 return Options, InputFile
493
494 ## Entrance method
495 #
496 # This method mainly dispatch specific methods per the command line options.
497 # If no error found, return zero value so the caller of this tool can know
498 # if it's executed successfully or not.
499 #
500 # @retval 0 Tool was successful
501 # @retval 1 Tool failed
502 #
503 def Main():
504 try:
505 EdkLogger.Initialize()
506 CommandOptions, InputFile = Options()
507 if CommandOptions.LogLevel < EdkLogger.DEBUG_9:
508 EdkLogger.SetLevel(CommandOptions.LogLevel + 1)
509 else:
510 EdkLogger.SetLevel(CommandOptions.LogLevel)
511 except FatalError as X:
512 return 1
513
514 try:
515 if CommandOptions.FileType == "Vfr":
516 if CommandOptions.OutputFile is None:
517 CommandOptions.OutputFile = os.path.splitext(InputFile)[0] + '.iii'
518 TrimPreprocessedVfr(InputFile, CommandOptions.OutputFile)
519 elif CommandOptions.FileType == "Asl":
520 if CommandOptions.OutputFile is None:
521 CommandOptions.OutputFile = os.path.splitext(InputFile)[0] + '.iii'
522 TrimAslFile(InputFile, CommandOptions.OutputFile, CommandOptions.IncludePathFile)
523 elif CommandOptions.FileType == "VfrOffsetBin":
524 GenerateVfrBinSec(CommandOptions.ModuleName, CommandOptions.DebugDir, CommandOptions.OutputFile)
525 else :
526 if CommandOptions.OutputFile is None:
527 CommandOptions.OutputFile = os.path.splitext(InputFile)[0] + '.iii'
528 TrimPreprocessedFile(InputFile, CommandOptions.OutputFile, CommandOptions.ConvertHex, CommandOptions.TrimLong)
529 except FatalError as X:
530 import platform
531 import traceback
532 if CommandOptions is not None and CommandOptions.LogLevel <= EdkLogger.DEBUG_9:
533 EdkLogger.quiet("(Python %s on %s) " % (platform.python_version(), sys.platform) + traceback.format_exc())
534 return 1
535 except:
536 import traceback
537 import platform
538 EdkLogger.error(
539 "\nTrim",
540 CODE_ERROR,
541 "Unknown fatal error when trimming [%s]" % InputFile,
542 ExtraData="\n(Please send email to edk2-devel@lists.01.org for help, attaching following call stack trace!)\n",
543 RaiseError=False
544 )
545 EdkLogger.quiet("(Python %s on %s) " % (platform.python_version(), sys.platform) + traceback.format_exc())
546 return 1
547
548 return 0
549
550 if __name__ == '__main__':
551 r = Main()
552 ## 0-127 is a safe return range, and 1 is a standard default error
553 if r < 0 or r > 127: r = 1
554 sys.exit(r)
555