]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/scripts/addr2line.py
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / scripts / addr2line.py
1 #!/usr/bin/env python3
2 #
3 # This file is open source software, licensed to you under the terms
4 # of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 # distributed with this work for additional information regarding copyright
6 # ownership. You may not use this file except in compliance with the License.
7 #
8 # You may obtain a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing,
13 # software distributed under the License is distributed on an
14 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 # KIND, either express or implied. See the License for the
16 # specific language governing permissions and limitations
17 # under the License.
18 #
19 # Copyright (C) 2017 ScyllaDB
20
21 import bisect
22 import collections
23 import re
24 import sys
25 import subprocess
26 from enum import Enum
27 from typing import Any
28
29 # special binary path/module indicating that the address is from the kernel
30 KERNEL_MODULE = '<kernel>'
31
32 class Addr2Line:
33
34 # Matcher for a line that appears at the end a single decoded
35 # address, which we force by adding a dummy 0x0 address. The
36 # pattern varies between binutils addr2line and llvm-addr2line
37 # so we match both.
38 dummy_pattern = re.compile(
39 r"(.*0x0000000000000000: \?\? \?\?:0\n)" # addr2line pattern
40 r"|"
41 r"(.*0x0: \?\? at \?\?:0\n)" # llvm-addr2line pattern
42 )
43
44 def __init__(self, binary, concise=False, cmd_path="addr2line"):
45 self._binary = binary
46
47 # Print warning if binary has no debug info according to `file`.
48 # Note: no message is printed for system errors as they will be
49 # printed also by addr2line later on.
50 output = subprocess.check_output(["file", self._binary])
51 s = output.decode("utf-8")
52 if s.find('ELF') >= 0 and s.find('debug_info', len(self._binary)) < 0:
53 print('{}'.format(s))
54
55 options = f"-{'C' if not concise else ''}fpia"
56 self._input = subprocess.Popen([cmd_path, options, "-e", self._binary], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
57 if concise:
58 self._output = subprocess.Popen(["c++filt", "-p"], stdin=self._input.stdout, stdout=subprocess.PIPE, universal_newlines=True)
59 else:
60 self._output = self._input
61
62 # If a library doesn't exist in a particular path, addr2line
63 # will just exit. We need to be robust against that. We
64 # can't just wait on self._addr2line since there is no
65 # guarantee on what timeout is sufficient.
66 self._input.stdin.write('\n')
67 self._input.stdin.flush()
68 res = self._output.stdout.readline()
69 self._missing = res == ''
70
71 def _read_resolved_address(self):
72 res = self._output.stdout.readline()
73 # remove the address
74 res = res.split(': ', 1)[1]
75 line = ''
76 while Addr2Line.dummy_pattern.fullmatch(line) is None:
77 res += line
78 line = self._output.stdout.readline()
79 return res
80
81 def __call__(self, address):
82 if self._missing:
83 return " ".join([self._binary, address, '\n'])
84 # We print a dummy 0x0 address after the address we are interested in
85 # which we can look for in _read_address
86 self._input.stdin.write(address + '\n0x0\n')
87 self._input.stdin.flush()
88 return self._read_resolved_address()
89
90 class KernelResolver:
91 """A resolver for kernel addresses which tries to read from /proc/kallsyms."""
92
93 LAST_SYMBOL_MAX_SIZE = 1024
94
95 def __init__(self):
96 syms : list[tuple[int, str]] = []
97 ksym_re = re.compile(r'(?P<addr>[0-9a-f]+) (?P<type>.+) (?P<name>\S+)')
98 warnings_left = 10
99
100 self.error = None
101
102 try:
103 f = open('/proc/kallsyms', 'r')
104 except OSError as e:
105 self.error = f'Cannot open /proc/kallsyms: {e}'
106 print(self.error)
107 return
108
109 try:
110 for line in f:
111 m = ksym_re.match(line)
112 if not m:
113 if warnings_left > 0: # don't spam too much
114 print(f'WARNING: /proc/kallsyms regex match failure: {line.strip()}', file=sys.stdout)
115 warnings_left -= 1
116 else:
117 syms.append((int(m.group('addr'), 16), m.group('name')))
118 finally:
119 f.close()
120
121 if not syms:
122 # make empty kallsyms (?) an error so we can assum len >= 1 below
123 self.error = 'kallsyms was empty'
124 print(self.error)
125 return
126
127 syms.sort()
128
129 if syms[-1][0] == 0:
130 # zero values for all symbols means that kptr_restrict blocked you
131 # from seeing the kernel symbol addresses
132 print('kallsyms is restricted, set /proc/sys/kernel/kptr_restrict to 0 to decode')
133 self.error = 'kallsyms is restricted'
134 return
135
136 # split because bisect can't take a key func before 3.10
137 self.sym_addrs : tuple[int]
138 self.sym_names : tuple[str]
139 self.sym_addrs, self.sym_names = zip(*syms) # type: ignore
140
141
142 def __call__(self, addrstr):
143 if self.error:
144 return addrstr + '\n'
145
146 sa = self.sym_addrs
147 sn = self.sym_names
148 slen = len(sa)
149 address = int(addrstr, 16)
150 idx = bisect.bisect_right(sa, address) - 1
151 assert -1 <= idx < slen
152 if idx == -1:
153 return f'{addrstr} ({sa[0] - address} bytes before first symbol)\n'
154 if idx == slen - 1:
155 # We can easily detect symbol addresses which are too small: they fall before
156 # the first symbol in kallsyms, but for too large it is harder: we can't really
157 # distinguish between an address that is in the *very last* function in the symbol map
158 # and one which is beyond that, since kallsyms doesn't include symbol size. Instead
159 # we use a bit of a quick and dirty heuristic: if the symbol is *far enough* beyond
160 # the last symbol we assume it is not valid. Most likely, the overwhelming majority
161 # of cases are invalid (e.g., due to KASLR) as the final symbol in the map is usually
162 # something obscure.
163 lastsym = sa[-1]
164 if address - lastsym > self.LAST_SYMBOL_MAX_SIZE:
165 return f'{addrstr} ({address - lastsym} bytes after last symbol)\n'
166 saddr = sa[idx]
167 assert saddr <= address
168 return f'{sn[idx]}+0x{address - saddr:x}\n'
169
170
171 class BacktraceResolver(object):
172
173 class BacktraceParser(object):
174 class Type(Enum):
175 ADDRESS = 1
176 SEPARATOR = 2
177
178 def __init__(self):
179 addr = "0x[0-9a-f]+"
180 path = "\S+"
181 token = f"(?:{path}\+)?{addr}"
182 full_addr_match = f"(?:(?P<path>{path})\s*\+\s*)?(?P<addr>{addr})"
183 ignore_addr_match = f"(?:(?P<path>{path})\s*\+\s*)?(?:{addr})"
184 self.oneline_re = re.compile(f"^((?:.*(?:(?:at|backtrace):?|:))?(?:\s+))?({token}(?:\s+{token})*)(?:\).*|\s*)$", flags=re.IGNORECASE)
185 self.address_re = re.compile(full_addr_match, flags=re.IGNORECASE)
186 self.syslog_re = re.compile(f"^(?:#\d+\s+)(?P<addr>{addr})(?:.*\s+)\({ignore_addr_match}\)\s*$", flags=re.IGNORECASE)
187 self.kernel_re = re.compile(fr'^kernel callstack: (?P<addrs>(?:{addr}\s*)+)$')
188 self.asan_re = re.compile(f"^(?:.*\s+)\({full_addr_match}\)(\s+\(BuildId: [0-9a-fA-F]+\))?$", flags=re.IGNORECASE)
189 self.asan_ignore_re = re.compile(f"^=.*$", flags=re.IGNORECASE)
190 self.generic_re = re.compile(f"^(?:.*\s+){full_addr_match}\s*$", flags=re.IGNORECASE)
191 self.separator_re = re.compile('^\W*-+\W*$')
192
193
194 def split_addresses(self, addrstring: str, default_path=None):
195 addresses : list[dict[str, Any]] = []
196 for obj in addrstring.split():
197 m = re.match(self.address_re, obj)
198 assert m, f'addr did not match address regex: {obj}'
199 #print(f" >>> '{obj}': address {m.groups()}")
200 addresses.append({'path': m.group(1) or default_path, 'addr': m.group(2)})
201 return addresses
202
203 def __call__(self, line):
204 def get_prefix(s):
205 if s is not None:
206 s = s.strip()
207 return s or None
208
209 # order here is important: the kernel callstack regex
210 # needs to come first since it is more specific and would
211 # otherwise be matched by the online regex which comes next
212 m = self.kernel_re.match(line)
213 if m:
214 return {
215 'type': self.Type.ADDRESS,
216 'prefix': 'kernel callstack: ',
217 'addresses' : self.split_addresses(m.group('addrs'), KERNEL_MODULE)
218 }
219
220 m = re.match(self.oneline_re, line)
221 if m:
222 #print(f">>> '{line}': oneline {m.groups()}")
223 return {
224 'type': self.Type.ADDRESS,
225 'prefix': get_prefix(m.group(1)),
226 'addresses': self.split_addresses(m.group(2))
227 }
228
229 m = re.match(self.syslog_re, line)
230 if m:
231 #print(f">>> '{line}': syslog {m.groups()}")
232 ret = {'type': self.Type.ADDRESS}
233 ret['prefix'] = None
234 ret['addresses'] = [{'path': m.group('path'), 'addr': m.group('addr')}]
235 return ret
236
237 m = re.match(self.asan_ignore_re, line)
238 if m:
239 #print(f">>> '{line}': asan ignore")
240 return None
241
242 m = re.match(self.asan_re, line)
243 if m:
244 #print(f">>> '{line}': asan {m.groups()}")
245 ret = {'type': self.Type.ADDRESS}
246 ret['prefix'] = None
247 ret['addresses'] = [{'path': m.group('path'), 'addr': m.group('addr')}]
248 return ret
249
250 m = re.match(self.generic_re, line)
251 if m:
252 #print(f">>> '{line}': generic {m.groups()}")
253 ret = {'type': self.Type.ADDRESS}
254 ret['prefix'] = None
255 ret['addresses'] = [{'path': m.group('path'), 'addr': m.group('addr')}]
256 return ret
257
258 match = re.match(self.separator_re, line)
259 if match:
260 return {'type': self.Type.SEPARATOR}
261
262 #print(f">>> '{line}': None")
263 return None
264
265 def __init__(self, executable, before_lines=1, context_re='', verbose=False, concise=False, cmd_path='addr2line'):
266 self._executable = executable
267 self._current_backtrace = []
268 self._prefix = None
269 self._before_lines = before_lines
270 self._before_lines_queue = collections.deque(maxlen=before_lines)
271 self._i = 0
272 self._known_backtraces = {}
273 if context_re is not None:
274 self._context_re = re.compile(context_re)
275 else:
276 self._context_re = None
277 self._verbose = verbose
278 self._concise = concise
279 self._cmd_path = cmd_path
280 self._known_modules = {}
281 self._get_resolver_for_module(self._executable) # fail fast if there is something wrong with the exe resolver
282 self.parser = self.BacktraceParser()
283
284 def _get_resolver_for_module(self, module):
285 if not module in self._known_modules:
286 if module == KERNEL_MODULE:
287 resolver = KernelResolver()
288 else:
289 resolver = Addr2Line(module, self._concise, self._cmd_path)
290 self._known_modules[module] = resolver
291 return self._known_modules[module]
292
293 def __enter__(self):
294 return self
295
296 def __exit__(self, type, value, tb):
297 self._print_current_backtrace()
298
299 def resolve_address(self, address, module=None, verbose=None):
300 if module is None:
301 module = self._executable
302 if verbose is None:
303 verbose = self._verbose
304 resolved_address = self._get_resolver_for_module(module)(address)
305 if verbose:
306 resolved_address = '{{{}}} {}: {}'.format(module, address, resolved_address)
307 return resolved_address
308
309 def _print_resolved_address(self, module, address):
310 sys.stdout.write(self.resolve_address(address, module))
311
312 def _backtrace_context_matches(self):
313 if self._context_re is None:
314 return True
315
316 if any(map(lambda x: self._context_re.search(x) is not None, self._before_lines_queue)):
317 return True
318
319 if (not self._prefix is None) and self._context_re.search(self._prefix):
320 return True
321
322 return False
323
324 def _print_current_backtrace(self):
325 if len(self._current_backtrace) == 0:
326 return
327
328 if not self._backtrace_context_matches():
329 self._current_backtrace = []
330 return
331
332 for line in self._before_lines_queue:
333 sys.stdout.write(line)
334
335 if not self._prefix is None:
336 print(self._prefix)
337 self._prefix = None
338
339 backtrace = "".join(map(str, self._current_backtrace))
340 if backtrace in self._known_backtraces:
341 print("[Backtrace #{}] Already seen, not resolving again.".format(self._known_backtraces[backtrace]))
342 print("") # To separate traces with an empty line
343 self._current_backtrace = []
344 return
345
346 self._known_backtraces[backtrace] = self._i
347
348 print("[Backtrace #{}]".format(self._i))
349
350 for module, addr in self._current_backtrace:
351 self._print_resolved_address(module, addr)
352
353 print("") # To separate traces with an empty line
354
355 self._current_backtrace = []
356 self._i += 1
357
358 def __call__(self, line):
359 res = self.parser(line)
360
361 if not res:
362 self._print_current_backtrace()
363 if self._before_lines > 0:
364 self._before_lines_queue.append(line)
365 elif self._before_lines < 0:
366 sys.stdout.write(line) # line already has a trailing newline
367 else:
368 pass # when == 0 no non-backtrace lines are printed
369 elif res['type'] == self.BacktraceParser.Type.SEPARATOR:
370 pass
371 elif res['type'] == self.BacktraceParser.Type.ADDRESS:
372 addresses = res['addresses']
373 if len(addresses) > 1:
374 self._print_current_backtrace()
375 if len(self._current_backtrace) == 0:
376 self._prefix = res['prefix']
377 for r in addresses:
378 if r['path']:
379 self._current_backtrace.append((r['path'], r['addr']))
380 else:
381 self._current_backtrace.append((self._executable, r['addr']))
382 if len(addresses) > 1:
383 self._print_current_backtrace()
384 else:
385 print(f"Unknown '{line}': {res}")
386 raise RuntimeError("Unknown result type {res}")
387