2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
21 binutils_prefix
= None
23 binary_name_filter
= None
24 fix_filename_patterns
= None
26 allow_system_symbolizer
= True
27 force_system_symbolizer
= False
29 # FIXME: merge the code that calls fix_filename().
30 def fix_filename(file_name
):
31 if fix_filename_patterns
:
32 for path_to_cut
in fix_filename_patterns
:
33 file_name
= re
.sub('.*' + path_to_cut
, '', file_name
)
34 file_name
= re
.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name
)
35 file_name
= re
.sub('.*crtstuff.c:0', '???:0', file_name
)
38 def sysroot_path_filter(binary_name
):
39 return sysroot_path
+ binary_name
42 return s
in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
43 "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
46 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
52 class Symbolizer(object):
56 def symbolize(self
, addr
, binary
, offset
):
57 """Symbolize the given address (pair of binary and offset).
59 Overriden in subclasses.
61 addr: virtual address of an instruction.
62 binary: path to executable/shared object containing this instruction.
63 offset: instruction offset in the @binary.
65 list of strings (one string for each inlined frame) describing
66 the code locations for this instruction (that is, function name, file
67 name, line and column numbers).
72 class LLVMSymbolizer(Symbolizer
):
73 def __init__(self
, symbolizer_path
, default_arch
, system
, dsym_hints
=[]):
74 super(LLVMSymbolizer
, self
).__init
__()
75 self
.symbolizer_path
= symbolizer_path
76 self
.default_arch
= default_arch
78 self
.dsym_hints
= dsym_hints
79 self
.pipe
= self
.open_llvm_symbolizer()
81 def open_llvm_symbolizer(self
):
82 cmd
= [self
.symbolizer_path
,
83 '--use-symbol-table=true',
84 '--demangle=%s' % demangle
,
85 '--functions=linkage',
87 '--default-arch=%s' % self
.default_arch
]
88 if self
.system
== 'Darwin':
89 for hint
in self
.dsym_hints
:
90 cmd
.append('--dsym-hint=%s' % hint
)
94 result
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
,
95 stdout
=subprocess
.PIPE
,
97 universal_newlines
=True)
102 def symbolize(self
, addr
, binary
, offset
):
103 """Overrides Symbolizer.symbolize."""
108 symbolizer_input
= '"%s" %s' % (binary
, offset
)
110 print(symbolizer_input
)
111 self
.pipe
.stdin
.write("%s\n" % symbolizer_input
)
113 function_name
= self
.pipe
.stdout
.readline().rstrip()
114 if not function_name
:
116 file_name
= self
.pipe
.stdout
.readline().rstrip()
117 file_name
= fix_filename(file_name
)
118 if (not function_name
.startswith('??') or
119 not file_name
.startswith('??')):
120 # Append only non-trivial frames.
121 result
.append('%s in %s %s' % (addr
, function_name
,
130 def LLVMSymbolizerFactory(system
, default_arch
, dsym_hints
=[]):
131 symbolizer_path
= os
.getenv('LLVM_SYMBOLIZER_PATH')
132 if not symbolizer_path
:
133 symbolizer_path
= os
.getenv('ASAN_SYMBOLIZER_PATH')
134 if not symbolizer_path
:
135 # Assume llvm-symbolizer is in PATH.
136 symbolizer_path
= 'llvm-symbolizer'
137 return LLVMSymbolizer(symbolizer_path
, default_arch
, system
, dsym_hints
)
140 class Addr2LineSymbolizer(Symbolizer
):
141 def __init__(self
, binary
):
142 super(Addr2LineSymbolizer
, self
).__init
__()
144 self
.pipe
= self
.open_addr2line()
145 self
.output_terminator
= -1
147 def open_addr2line(self
):
148 addr2line_tool
= 'addr2line'
150 addr2line_tool
= binutils_prefix
+ addr2line_tool
151 cmd
= [addr2line_tool
, '-fi']
153 cmd
+= ['--demangle']
154 cmd
+= ['-e', self
.binary
]
157 return subprocess
.Popen(cmd
,
158 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
,
160 universal_newlines
=True)
162 def symbolize(self
, addr
, binary
, offset
):
163 """Overrides Symbolizer.symbolize."""
164 if self
.binary
!= binary
:
168 self
.pipe
.stdin
.write("%s\n" % offset
)
169 self
.pipe
.stdin
.write("%s\n" % self
.output_terminator
)
170 is_first_frame
= True
172 function_name
= self
.pipe
.stdout
.readline().rstrip()
173 file_name
= self
.pipe
.stdout
.readline().rstrip()
175 is_first_frame
= False
176 elif function_name
in ['', '??']:
177 assert file_name
== function_name
179 lines
.append((function_name
, file_name
));
181 lines
.append(('??', '??:0'))
182 return ['%s in %s %s' % (addr
, function
, fix_filename(file)) for (function
, file) in lines
]
184 class UnbufferedLineConverter(object):
186 Wrap a child process that responds to each line of input with one line of
187 output. Uses pty to trick the child into providing unbuffered output.
189 def __init__(self
, args
, close_stderr
=False):
190 # Local imports so that the script can start on Windows.
195 # We're the child. Transfer control to command.
197 dev_null
= os
.open('/dev/null', 0)
199 os
.execvp(args
[0], args
)
202 attr
= termios
.tcgetattr(fd
)
203 attr
[3] = attr
[3] & ~termios
.ECHO
204 termios
.tcsetattr(fd
, termios
.TCSANOW
, attr
)
205 # Set up a file()-like interface to the child process
206 self
.r
= os
.fdopen(fd
, "r", 1)
207 self
.w
= os
.fdopen(os
.dup(fd
), "w", 1)
209 def convert(self
, line
):
210 self
.w
.write(line
+ "\n")
211 return self
.readline()
214 return self
.r
.readline().rstrip()
217 class DarwinSymbolizer(Symbolizer
):
218 def __init__(self
, addr
, binary
, arch
):
219 super(DarwinSymbolizer
, self
).__init
__()
226 print('atos -o %s -arch %s' % (self
.binary
, self
.arch
))
227 cmdline
= ['atos', '-o', self
.binary
, '-arch', self
.arch
]
228 self
.atos
= UnbufferedLineConverter(cmdline
, close_stderr
=True)
230 def symbolize(self
, addr
, binary
, offset
):
231 """Overrides Symbolizer.symbolize."""
232 if self
.binary
!= binary
:
234 atos_line
= self
.atos
.convert('0x%x' % int(offset
, 16))
235 while "got symbolicator for" in atos_line
:
236 atos_line
= self
.atos
.readline()
237 # A well-formed atos response looks like this:
238 # foo(type1, type2) (in object.name) (filename.cc:80)
239 match
= re
.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line
)
241 print('atos_line: ', atos_line
)
243 function_name
= match
.group(1)
244 function_name
= re
.sub('\(.*?\)', '', function_name
)
245 file_name
= fix_filename(match
.group(3))
246 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
248 return ['%s in %s' % (addr
, atos_line
)]
251 # Chain several symbolizers so that if one symbolizer fails, we fall back
252 # to the next symbolizer in chain.
253 class ChainSymbolizer(Symbolizer
):
254 def __init__(self
, symbolizer_list
):
255 super(ChainSymbolizer
, self
).__init
__()
256 self
.symbolizer_list
= symbolizer_list
258 def symbolize(self
, addr
, binary
, offset
):
259 """Overrides Symbolizer.symbolize."""
260 for symbolizer
in self
.symbolizer_list
:
262 result
= symbolizer
.symbolize(addr
, binary
, offset
)
267 def append_symbolizer(self
, symbolizer
):
268 self
.symbolizer_list
.append(symbolizer
)
271 def BreakpadSymbolizerFactory(binary
):
272 suffix
= os
.getenv('BREAKPAD_SUFFIX')
274 filename
= binary
+ suffix
275 if os
.access(filename
, os
.F_OK
):
276 return BreakpadSymbolizer(filename
)
280 def SystemSymbolizerFactory(system
, addr
, binary
, arch
):
281 if system
== 'Darwin':
282 return DarwinSymbolizer(addr
, binary
, arch
)
283 elif system
in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']:
284 return Addr2LineSymbolizer(binary
)
287 class BreakpadSymbolizer(Symbolizer
):
288 def __init__(self
, filename
):
289 super(BreakpadSymbolizer
, self
).__init
__()
290 self
.filename
= filename
291 lines
= file(filename
).readlines()
294 self
.address_list
= []
296 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
297 fragments
= lines
[0].rstrip().split()
298 self
.arch
= fragments
[2]
299 self
.debug_id
= fragments
[3]
300 self
.binary
= ' '.join(fragments
[4:])
301 self
.parse_lines(lines
[1:])
303 def parse_lines(self
, lines
):
304 cur_function_addr
= ''
306 fragments
= line
.split()
307 if fragments
[0] == 'FILE':
308 assert int(fragments
[1]) == len(self
.files
)
309 self
.files
.append(' '.join(fragments
[2:]))
310 elif fragments
[0] == 'PUBLIC':
311 self
.symbols
[int(fragments
[1], 16)] = ' '.join(fragments
[3:])
312 elif fragments
[0] in ['CFI', 'STACK']:
314 elif fragments
[0] == 'FUNC':
315 cur_function_addr
= int(fragments
[1], 16)
316 if not cur_function_addr
in self
.symbols
.keys():
317 self
.symbols
[cur_function_addr
] = ' '.join(fragments
[4:])
319 # Line starting with an address.
320 addr
= int(fragments
[0], 16)
321 self
.address_list
.append(addr
)
322 # Tuple of symbol address, size, line, file number.
323 self
.addresses
[addr
] = (cur_function_addr
,
324 int(fragments
[1], 16),
327 self
.address_list
.sort()
329 def get_sym_file_line(self
, addr
):
331 if addr
in self
.addresses
.keys():
334 index
= bisect
.bisect_left(self
.address_list
, addr
)
338 key
= self
.address_list
[index
- 1]
339 sym_id
, size
, line_no
, file_no
= self
.addresses
[key
]
340 symbol
= self
.symbols
[sym_id
]
341 filename
= self
.files
[file_no
]
342 if addr
< key
+ size
:
343 return symbol
, filename
, line_no
347 def symbolize(self
, addr
, binary
, offset
):
348 if self
.binary
!= binary
:
350 res
= self
.get_sym_file_line(int(offset
, 16))
352 function_name
, file_name
, line_no
= res
353 result
= ['%s in %s %s:%d' % (
354 addr
, function_name
, file_name
, line_no
)]
361 class SymbolizationLoop(object):
362 def __init__(self
, binary_name_filter
=None, dsym_hint_producer
=None):
363 if sys
.platform
== 'win32':
364 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
365 # even in sandboxed processes. Nothing needs to be done here.
366 self
.process_line
= self
.process_line_echo
368 # Used by clients who may want to supply a different binary name.
369 # E.g. in Chrome several binaries may share a single .dSYM.
370 self
.binary_name_filter
= binary_name_filter
371 self
.dsym_hint_producer
= dsym_hint_producer
372 self
.system
= os
.uname()[0]
373 if self
.system
not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']:
374 raise Exception('Unknown system')
375 self
.llvm_symbolizers
= {}
376 self
.last_llvm_symbolizer
= None
377 self
.dsym_hints
= set([])
379 self
.process_line
= self
.process_line_posix
381 def symbolize_address(self
, addr
, binary
, offset
, arch
):
382 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
383 # a single symbolizer binary.
384 # On Darwin, if the dsym hint producer is present:
385 # 1. check whether we've seen this binary already; if so,
386 # use |llvm_symbolizers[binary]|, which has already loaded the debug
387 # info for this binary (might not be the case for
388 # |last_llvm_symbolizer|);
389 # 2. otherwise check if we've seen all the hints for this binary already;
390 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
391 # 3. otherwise create a new symbolizer and pass all currently known
394 if not force_system_symbolizer
:
395 if not binary
in self
.llvm_symbolizers
:
396 use_new_symbolizer
= True
397 if self
.system
== 'Darwin' and self
.dsym_hint_producer
:
398 dsym_hints_for_binary
= set(self
.dsym_hint_producer(binary
))
399 use_new_symbolizer
= bool(dsym_hints_for_binary
- self
.dsym_hints
)
400 self
.dsym_hints |
= dsym_hints_for_binary
401 if self
.last_llvm_symbolizer
and not use_new_symbolizer
:
402 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
404 self
.last_llvm_symbolizer
= LLVMSymbolizerFactory(
405 self
.system
, arch
, self
.dsym_hints
)
406 self
.llvm_symbolizers
[binary
] = self
.last_llvm_symbolizer
407 # Use the chain of symbolizers:
408 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
409 # (fall back to next symbolizer if the previous one fails).
410 if not binary
in symbolizers
:
411 symbolizers
[binary
] = ChainSymbolizer(
412 [BreakpadSymbolizerFactory(binary
), self
.llvm_symbolizers
[binary
]])
413 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
415 symbolizers
[binary
] = ChainSymbolizer([])
417 if not allow_system_symbolizer
:
418 raise Exception('Failed to launch or use llvm-symbolizer.')
419 # Initialize system symbolizer only if other symbolizers failed.
420 symbolizers
[binary
].append_symbolizer(
421 SystemSymbolizerFactory(self
.system
, addr
, binary
, arch
))
422 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
423 # The system symbolizer must produce some result.
427 def get_symbolized_lines(self
, symbolized_lines
):
428 if not symbolized_lines
:
429 return [self
.current_line
]
432 for symbolized_frame
in symbolized_lines
:
433 result
.append(' #%s %s' % (str(self
.frame_no
), symbolized_frame
.rstrip()))
437 def process_logfile(self
):
440 processed
= self
.process_line(line
)
441 print('\n'.join(processed
))
443 def process_line_echo(self
, line
):
444 return [line
.rstrip()]
446 def process_line_posix(self
, line
):
447 self
.current_line
= line
.rstrip()
448 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
449 stack_trace_line_format
= (
450 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
451 match
= re
.match(stack_trace_line_format
, line
)
453 return [self
.current_line
]
456 _
, frameno_str
, addr
, binary
, offset
= match
.groups()
458 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
459 colon_pos
= binary
.rfind(":")
461 maybe_arch
= binary
[colon_pos
+1:]
462 if is_valid_arch(maybe_arch
):
464 binary
= binary
[0:colon_pos
]
466 arch
= guess_arch(addr
)
467 if frameno_str
== '0':
468 # Assume that frame #0 is the first frame of new stack trace.
470 original_binary
= binary
471 if self
.binary_name_filter
:
472 binary
= self
.binary_name_filter(binary
)
473 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
, arch
)
474 if not symbolized_line
:
475 if original_binary
!= binary
:
476 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
, arch
)
477 return self
.get_symbolized_lines(symbolized_line
)
480 if __name__
== '__main__':
481 parser
= argparse
.ArgumentParser(
482 formatter_class
=argparse
.RawDescriptionHelpFormatter
,
483 description
='ASan symbolization script',
484 epilog
='Example of use:\n'
485 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
486 '-s "$HOME/SymbolFiles" < asan.log')
487 parser
.add_argument('path_to_cut', nargs
='*',
488 help='pattern to be cut from the result file path ')
489 parser
.add_argument('-d','--demangle', action
='store_true',
490 help='demangle function names')
491 parser
.add_argument('-s', metavar
='SYSROOT',
492 help='set path to sysroot for sanitized binaries')
493 parser
.add_argument('-c', metavar
='CROSS_COMPILE',
494 help='set prefix for binutils')
495 parser
.add_argument('-l','--logfile', default
=sys
.stdin
,
496 type=argparse
.FileType('r'),
497 help='set log file name to parse, default is stdin')
498 parser
.add_argument('--force-system-symbolizer', action
='store_true',
499 help='don\'t use llvm-symbolizer')
500 args
= parser
.parse_args()
502 fix_filename_patterns
= args
.path_to_cut
506 binary_name_filter
= sysroot_path_filter
507 sysroot_path
= args
.s
509 binutils_prefix
= args
.c
511 logfile
= args
.logfile
514 if args
.force_system_symbolizer
:
515 force_system_symbolizer
= True
516 if force_system_symbolizer
:
517 assert(allow_system_symbolizer
)
518 loop
= SymbolizationLoop(binary_name_filter
)
519 loop
.process_logfile()