]> git.proxmox.com Git - rustc.git/blob - src/libcompiler_builtins/compiler-rt/lib/asan/scripts/asan_symbolize.py
New upstream version 1.25.0+dfsg1
[rustc.git] / src / libcompiler_builtins / compiler-rt / lib / asan / scripts / asan_symbolize.py
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3 #
4 # The LLVM Compiler Infrastructure
5 #
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
8 #
9 #===------------------------------------------------------------------------===#
10 import argparse
11 import bisect
12 import getopt
13 import os
14 import re
15 import subprocess
16 import sys
17
18 symbolizers = {}
19 DEBUG = False
20 demangle = False
21 binutils_prefix = None
22 sysroot_path = None
23 binary_name_filter = None
24 fix_filename_patterns = None
25 logfile = sys.stdin
26 allow_system_symbolizer = True
27 force_system_symbolizer = False
28
29 # FIXME: merge the code that calls fix_filename().
30 def fix_filename(file_name):
31 if fix_filename_patterns:
32 for path_to_cut in fix_filename_patterns:
33 file_name = re.sub('.*' + path_to_cut, '', file_name)
34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
36 return file_name
37
38 def sysroot_path_filter(binary_name):
39 return sysroot_path + binary_name
40
41 def is_valid_arch(s):
42 return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
43 "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
44
45 def guess_arch(addr):
46 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
47 if len(addr) > 10:
48 return 'x86_64'
49 else:
50 return 'i386'
51
52 class Symbolizer(object):
53 def __init__(self):
54 pass
55
56 def symbolize(self, addr, binary, offset):
57 """Symbolize the given address (pair of binary and offset).
58
59 Overriden in subclasses.
60 Args:
61 addr: virtual address of an instruction.
62 binary: path to executable/shared object containing this instruction.
63 offset: instruction offset in the @binary.
64 Returns:
65 list of strings (one string for each inlined frame) describing
66 the code locations for this instruction (that is, function name, file
67 name, line and column numbers).
68 """
69 return None
70
71
72 class LLVMSymbolizer(Symbolizer):
73 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
74 super(LLVMSymbolizer, self).__init__()
75 self.symbolizer_path = symbolizer_path
76 self.default_arch = default_arch
77 self.system = system
78 self.dsym_hints = dsym_hints
79 self.pipe = self.open_llvm_symbolizer()
80
81 def open_llvm_symbolizer(self):
82 cmd = [self.symbolizer_path,
83 '--use-symbol-table=true',
84 '--demangle=%s' % demangle,
85 '--functions=linkage',
86 '--inlining=true',
87 '--default-arch=%s' % self.default_arch]
88 if self.system == 'Darwin':
89 for hint in self.dsym_hints:
90 cmd.append('--dsym-hint=%s' % hint)
91 if DEBUG:
92 print(' '.join(cmd))
93 try:
94 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
95 stdout=subprocess.PIPE,
96 bufsize=0,
97 universal_newlines=True)
98 except OSError:
99 result = None
100 return result
101
102 def symbolize(self, addr, binary, offset):
103 """Overrides Symbolizer.symbolize."""
104 if not self.pipe:
105 return None
106 result = []
107 try:
108 symbolizer_input = '"%s" %s' % (binary, offset)
109 if DEBUG:
110 print(symbolizer_input)
111 self.pipe.stdin.write("%s\n" % symbolizer_input)
112 while True:
113 function_name = self.pipe.stdout.readline().rstrip()
114 if not function_name:
115 break
116 file_name = self.pipe.stdout.readline().rstrip()
117 file_name = fix_filename(file_name)
118 if (not function_name.startswith('??') or
119 not file_name.startswith('??')):
120 # Append only non-trivial frames.
121 result.append('%s in %s %s' % (addr, function_name,
122 file_name))
123 except Exception:
124 result = []
125 if not result:
126 result = None
127 return result
128
129
130 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
131 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
132 if not symbolizer_path:
133 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
134 if not symbolizer_path:
135 # Assume llvm-symbolizer is in PATH.
136 symbolizer_path = 'llvm-symbolizer'
137 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
138
139
140 class Addr2LineSymbolizer(Symbolizer):
141 def __init__(self, binary):
142 super(Addr2LineSymbolizer, self).__init__()
143 self.binary = binary
144 self.pipe = self.open_addr2line()
145 self.output_terminator = -1
146
147 def open_addr2line(self):
148 addr2line_tool = 'addr2line'
149 if binutils_prefix:
150 addr2line_tool = binutils_prefix + addr2line_tool
151 cmd = [addr2line_tool, '-fi']
152 if demangle:
153 cmd += ['--demangle']
154 cmd += ['-e', self.binary]
155 if DEBUG:
156 print(' '.join(cmd))
157 return subprocess.Popen(cmd,
158 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
159 bufsize=0,
160 universal_newlines=True)
161
162 def symbolize(self, addr, binary, offset):
163 """Overrides Symbolizer.symbolize."""
164 if self.binary != binary:
165 return None
166 lines = []
167 try:
168 self.pipe.stdin.write("%s\n" % offset)
169 self.pipe.stdin.write("%s\n" % self.output_terminator)
170 is_first_frame = True
171 while True:
172 function_name = self.pipe.stdout.readline().rstrip()
173 file_name = self.pipe.stdout.readline().rstrip()
174 if is_first_frame:
175 is_first_frame = False
176 elif function_name in ['', '??']:
177 assert file_name == function_name
178 break
179 lines.append((function_name, file_name));
180 except Exception:
181 lines.append(('??', '??:0'))
182 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines]
183
184 class UnbufferedLineConverter(object):
185 """
186 Wrap a child process that responds to each line of input with one line of
187 output. Uses pty to trick the child into providing unbuffered output.
188 """
189 def __init__(self, args, close_stderr=False):
190 # Local imports so that the script can start on Windows.
191 import pty
192 import termios
193 pid, fd = pty.fork()
194 if pid == 0:
195 # We're the child. Transfer control to command.
196 if close_stderr:
197 dev_null = os.open('/dev/null', 0)
198 os.dup2(dev_null, 2)
199 os.execvp(args[0], args)
200 else:
201 # Disable echoing.
202 attr = termios.tcgetattr(fd)
203 attr[3] = attr[3] & ~termios.ECHO
204 termios.tcsetattr(fd, termios.TCSANOW, attr)
205 # Set up a file()-like interface to the child process
206 self.r = os.fdopen(fd, "r", 1)
207 self.w = os.fdopen(os.dup(fd), "w", 1)
208
209 def convert(self, line):
210 self.w.write(line + "\n")
211 return self.readline()
212
213 def readline(self):
214 return self.r.readline().rstrip()
215
216
217 class DarwinSymbolizer(Symbolizer):
218 def __init__(self, addr, binary, arch):
219 super(DarwinSymbolizer, self).__init__()
220 self.binary = binary
221 self.arch = arch
222 self.open_atos()
223
224 def open_atos(self):
225 if DEBUG:
226 print('atos -o %s -arch %s' % (self.binary, self.arch))
227 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
228 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
229
230 def symbolize(self, addr, binary, offset):
231 """Overrides Symbolizer.symbolize."""
232 if self.binary != binary:
233 return None
234 atos_line = self.atos.convert('0x%x' % int(offset, 16))
235 while "got symbolicator for" in atos_line:
236 atos_line = self.atos.readline()
237 # A well-formed atos response looks like this:
238 # foo(type1, type2) (in object.name) (filename.cc:80)
239 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
240 if DEBUG:
241 print('atos_line: ', atos_line)
242 if match:
243 function_name = match.group(1)
244 function_name = re.sub('\(.*?\)', '', function_name)
245 file_name = fix_filename(match.group(3))
246 return ['%s in %s %s' % (addr, function_name, file_name)]
247 else:
248 return ['%s in %s' % (addr, atos_line)]
249
250
251 # Chain several symbolizers so that if one symbolizer fails, we fall back
252 # to the next symbolizer in chain.
253 class ChainSymbolizer(Symbolizer):
254 def __init__(self, symbolizer_list):
255 super(ChainSymbolizer, self).__init__()
256 self.symbolizer_list = symbolizer_list
257
258 def symbolize(self, addr, binary, offset):
259 """Overrides Symbolizer.symbolize."""
260 for symbolizer in self.symbolizer_list:
261 if symbolizer:
262 result = symbolizer.symbolize(addr, binary, offset)
263 if result:
264 return result
265 return None
266
267 def append_symbolizer(self, symbolizer):
268 self.symbolizer_list.append(symbolizer)
269
270
271 def BreakpadSymbolizerFactory(binary):
272 suffix = os.getenv('BREAKPAD_SUFFIX')
273 if suffix:
274 filename = binary + suffix
275 if os.access(filename, os.F_OK):
276 return BreakpadSymbolizer(filename)
277 return None
278
279
280 def SystemSymbolizerFactory(system, addr, binary, arch):
281 if system == 'Darwin':
282 return DarwinSymbolizer(addr, binary, arch)
283 elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']:
284 return Addr2LineSymbolizer(binary)
285
286
287 class BreakpadSymbolizer(Symbolizer):
288 def __init__(self, filename):
289 super(BreakpadSymbolizer, self).__init__()
290 self.filename = filename
291 lines = file(filename).readlines()
292 self.files = []
293 self.symbols = {}
294 self.address_list = []
295 self.addresses = {}
296 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
297 fragments = lines[0].rstrip().split()
298 self.arch = fragments[2]
299 self.debug_id = fragments[3]
300 self.binary = ' '.join(fragments[4:])
301 self.parse_lines(lines[1:])
302
303 def parse_lines(self, lines):
304 cur_function_addr = ''
305 for line in lines:
306 fragments = line.split()
307 if fragments[0] == 'FILE':
308 assert int(fragments[1]) == len(self.files)
309 self.files.append(' '.join(fragments[2:]))
310 elif fragments[0] == 'PUBLIC':
311 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
312 elif fragments[0] in ['CFI', 'STACK']:
313 pass
314 elif fragments[0] == 'FUNC':
315 cur_function_addr = int(fragments[1], 16)
316 if not cur_function_addr in self.symbols.keys():
317 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
318 else:
319 # Line starting with an address.
320 addr = int(fragments[0], 16)
321 self.address_list.append(addr)
322 # Tuple of symbol address, size, line, file number.
323 self.addresses[addr] = (cur_function_addr,
324 int(fragments[1], 16),
325 int(fragments[2]),
326 int(fragments[3]))
327 self.address_list.sort()
328
329 def get_sym_file_line(self, addr):
330 key = None
331 if addr in self.addresses.keys():
332 key = addr
333 else:
334 index = bisect.bisect_left(self.address_list, addr)
335 if index == 0:
336 return None
337 else:
338 key = self.address_list[index - 1]
339 sym_id, size, line_no, file_no = self.addresses[key]
340 symbol = self.symbols[sym_id]
341 filename = self.files[file_no]
342 if addr < key + size:
343 return symbol, filename, line_no
344 else:
345 return None
346
347 def symbolize(self, addr, binary, offset):
348 if self.binary != binary:
349 return None
350 res = self.get_sym_file_line(int(offset, 16))
351 if res:
352 function_name, file_name, line_no = res
353 result = ['%s in %s %s:%d' % (
354 addr, function_name, file_name, line_no)]
355 print(result)
356 return result
357 else:
358 return None
359
360
361 class SymbolizationLoop(object):
362 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
363 if sys.platform == 'win32':
364 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
365 # even in sandboxed processes. Nothing needs to be done here.
366 self.process_line = self.process_line_echo
367 else:
368 # Used by clients who may want to supply a different binary name.
369 # E.g. in Chrome several binaries may share a single .dSYM.
370 self.binary_name_filter = binary_name_filter
371 self.dsym_hint_producer = dsym_hint_producer
372 self.system = os.uname()[0]
373 if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']:
374 raise Exception('Unknown system')
375 self.llvm_symbolizers = {}
376 self.last_llvm_symbolizer = None
377 self.dsym_hints = set([])
378 self.frame_no = 0
379 self.process_line = self.process_line_posix
380
381 def symbolize_address(self, addr, binary, offset, arch):
382 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
383 # a single symbolizer binary.
384 # On Darwin, if the dsym hint producer is present:
385 # 1. check whether we've seen this binary already; if so,
386 # use |llvm_symbolizers[binary]|, which has already loaded the debug
387 # info for this binary (might not be the case for
388 # |last_llvm_symbolizer|);
389 # 2. otherwise check if we've seen all the hints for this binary already;
390 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
391 # 3. otherwise create a new symbolizer and pass all currently known
392 # .dSYM hints to it.
393 result = None
394 if not force_system_symbolizer:
395 if not binary in self.llvm_symbolizers:
396 use_new_symbolizer = True
397 if self.system == 'Darwin' and self.dsym_hint_producer:
398 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
399 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
400 self.dsym_hints |= dsym_hints_for_binary
401 if self.last_llvm_symbolizer and not use_new_symbolizer:
402 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
403 else:
404 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
405 self.system, arch, self.dsym_hints)
406 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
407 # Use the chain of symbolizers:
408 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
409 # (fall back to next symbolizer if the previous one fails).
410 if not binary in symbolizers:
411 symbolizers[binary] = ChainSymbolizer(
412 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
413 result = symbolizers[binary].symbolize(addr, binary, offset)
414 else:
415 symbolizers[binary] = ChainSymbolizer([])
416 if result is None:
417 if not allow_system_symbolizer:
418 raise Exception('Failed to launch or use llvm-symbolizer.')
419 # Initialize system symbolizer only if other symbolizers failed.
420 symbolizers[binary].append_symbolizer(
421 SystemSymbolizerFactory(self.system, addr, binary, arch))
422 result = symbolizers[binary].symbolize(addr, binary, offset)
423 # The system symbolizer must produce some result.
424 assert result
425 return result
426
427 def get_symbolized_lines(self, symbolized_lines):
428 if not symbolized_lines:
429 return [self.current_line]
430 else:
431 result = []
432 for symbolized_frame in symbolized_lines:
433 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
434 self.frame_no += 1
435 return result
436
437 def process_logfile(self):
438 self.frame_no = 0
439 for line in logfile:
440 processed = self.process_line(line)
441 print('\n'.join(processed))
442
443 def process_line_echo(self, line):
444 return [line.rstrip()]
445
446 def process_line_posix(self, line):
447 self.current_line = line.rstrip()
448 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
449 stack_trace_line_format = (
450 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
451 match = re.match(stack_trace_line_format, line)
452 if not match:
453 return [self.current_line]
454 if DEBUG:
455 print(line)
456 _, frameno_str, addr, binary, offset = match.groups()
457 arch = ""
458 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h"
459 colon_pos = binary.rfind(":")
460 if colon_pos != -1:
461 maybe_arch = binary[colon_pos+1:]
462 if is_valid_arch(maybe_arch):
463 arch = maybe_arch
464 binary = binary[0:colon_pos]
465 if arch == "":
466 arch = guess_arch(addr)
467 if frameno_str == '0':
468 # Assume that frame #0 is the first frame of new stack trace.
469 self.frame_no = 0
470 original_binary = binary
471 if self.binary_name_filter:
472 binary = self.binary_name_filter(binary)
473 symbolized_line = self.symbolize_address(addr, binary, offset, arch)
474 if not symbolized_line:
475 if original_binary != binary:
476 symbolized_line = self.symbolize_address(addr, binary, offset, arch)
477 return self.get_symbolized_lines(symbolized_line)
478
479
480 if __name__ == '__main__':
481 parser = argparse.ArgumentParser(
482 formatter_class=argparse.RawDescriptionHelpFormatter,
483 description='ASan symbolization script',
484 epilog='Example of use:\n'
485 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
486 '-s "$HOME/SymbolFiles" < asan.log')
487 parser.add_argument('path_to_cut', nargs='*',
488 help='pattern to be cut from the result file path ')
489 parser.add_argument('-d','--demangle', action='store_true',
490 help='demangle function names')
491 parser.add_argument('-s', metavar='SYSROOT',
492 help='set path to sysroot for sanitized binaries')
493 parser.add_argument('-c', metavar='CROSS_COMPILE',
494 help='set prefix for binutils')
495 parser.add_argument('-l','--logfile', default=sys.stdin,
496 type=argparse.FileType('r'),
497 help='set log file name to parse, default is stdin')
498 parser.add_argument('--force-system-symbolizer', action='store_true',
499 help='don\'t use llvm-symbolizer')
500 args = parser.parse_args()
501 if args.path_to_cut:
502 fix_filename_patterns = args.path_to_cut
503 if args.demangle:
504 demangle = True
505 if args.s:
506 binary_name_filter = sysroot_path_filter
507 sysroot_path = args.s
508 if args.c:
509 binutils_prefix = args.c
510 if args.logfile:
511 logfile = args.logfile
512 else:
513 logfile = sys.stdin
514 if args.force_system_symbolizer:
515 force_system_symbolizer = True
516 if force_system_symbolizer:
517 assert(allow_system_symbolizer)
518 loop = SymbolizationLoop(binary_name_filter)
519 loop.process_logfile()