]>
git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/scripts/addr2line.py
3 # This file is open source software, licensed to you under the terms
4 # of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 # distributed with this work for additional information regarding copyright
6 # ownership. You may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing,
13 # software distributed under the License is distributed on an
14 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 # KIND, either express or implied. See the License for the
16 # specific language governing permissions and limitations
19 # Copyright (C) 2017 ScyllaDB
27 from typing
import Any
29 # special binary path/module indicating that the address is from the kernel
30 KERNEL_MODULE
= '<kernel>'
34 # Matcher for a line that appears at the end a single decoded
35 # address, which we force by adding a dummy 0x0 address. The
36 # pattern varies between binutils addr2line and llvm-addr2line
38 dummy_pattern
= re
.compile(
39 r
"(.*0x0000000000000000: \?\? \?\?:0\n)" # addr2line pattern
41 r
"(.*0x0: \?\? at \?\?:0\n)" # llvm-addr2line pattern
44 def __init__(self
, binary
, concise
=False, cmd_path
="addr2line"):
47 # Print warning if binary has no debug info according to `file`.
48 # Note: no message is printed for system errors as they will be
49 # printed also by addr2line later on.
50 output
= subprocess
.check_output(["file", self
._binary
])
51 s
= output
.decode("utf-8")
52 if s
.find('ELF') >= 0 and s
.find('debug_info', len(self
._binary
)) < 0:
55 options
= f
"-{'C' if not concise else ''}fpia"
56 self
._input
= subprocess
.Popen([cmd_path
, options
, "-e", self
._binary
], stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
, universal_newlines
=True)
58 self
._output
= subprocess
.Popen(["c++filt", "-p"], stdin
=self
._input
.stdout
, stdout
=subprocess
.PIPE
, universal_newlines
=True)
60 self
._output
= self
._input
62 # If a library doesn't exist in a particular path, addr2line
63 # will just exit. We need to be robust against that. We
64 # can't just wait on self._addr2line since there is no
65 # guarantee on what timeout is sufficient.
66 self
._input
.stdin
.write('\n')
67 self
._input
.stdin
.flush()
68 res
= self
._output
.stdout
.readline()
69 self
._missing
= res
== ''
71 def _read_resolved_address(self
):
72 res
= self
._output
.stdout
.readline()
74 res
= res
.split(': ', 1)[1]
76 while Addr2Line
.dummy_pattern
.fullmatch(line
) is None:
78 line
= self
._output
.stdout
.readline()
81 def __call__(self
, address
):
83 return " ".join([self
._binary
, address
, '\n'])
84 # We print a dummy 0x0 address after the address we are interested in
85 # which we can look for in _read_address
86 self
._input
.stdin
.write(address
+ '\n0x0\n')
87 self
._input
.stdin
.flush()
88 return self
._read
_resolved
_address
()
91 """A resolver for kernel addresses which tries to read from /proc/kallsyms."""
93 LAST_SYMBOL_MAX_SIZE
= 1024
96 syms
: list[tuple[int, str]] = []
97 ksym_re
= re
.compile(r
'(?P<addr>[0-9a-f]+) (?P<type>.+) (?P<name>\S+)')
103 f
= open('/proc/kallsyms', 'r')
105 self
.error
= f
'Cannot open /proc/kallsyms: {e}'
111 m
= ksym_re
.match(line
)
113 if warnings_left
> 0: # don't spam too much
114 print(f
'WARNING: /proc/kallsyms regex match failure: {line.strip()}', file=sys
.stdout
)
117 syms
.append((int(m
.group('addr'), 16), m
.group('name')))
122 # make empty kallsyms (?) an error so we can assum len >= 1 below
123 self
.error
= 'kallsyms was empty'
130 # zero values for all symbols means that kptr_restrict blocked you
131 # from seeing the kernel symbol addresses
132 print('kallsyms is restricted, set /proc/sys/kernel/kptr_restrict to 0 to decode')
133 self
.error
= 'kallsyms is restricted'
136 # split because bisect can't take a key func before 3.10
137 self
.sym_addrs
: tuple[int]
138 self
.sym_names
: tuple[str]
139 self
.sym_addrs
, self
.sym_names
= zip(*syms
) # type: ignore
142 def __call__(self
, addrstr
):
144 return addrstr
+ '\n'
149 address
= int(addrstr
, 16)
150 idx
= bisect
.bisect_right(sa
, address
) - 1
151 assert -1 <= idx
< slen
153 return f
'{addrstr} ({sa[0] - address} bytes before first symbol)\n'
155 # We can easily detect symbol addresses which are too small: they fall before
156 # the first symbol in kallsyms, but for too large it is harder: we can't really
157 # distinguish between an address that is in the *very last* function in the symbol map
158 # and one which is beyond that, since kallsyms doesn't include symbol size. Instead
159 # we use a bit of a quick and dirty heuristic: if the symbol is *far enough* beyond
160 # the last symbol we assume it is not valid. Most likely, the overwhelming majority
161 # of cases are invalid (e.g., due to KASLR) as the final symbol in the map is usually
164 if address
- lastsym
> self
.LAST_SYMBOL_MAX_SIZE
:
165 return f
'{addrstr} ({address - lastsym} bytes after last symbol)\n'
167 assert saddr
<= address
168 return f
'{sn[idx]}+0x{address - saddr:x}\n'
171 class BacktraceResolver(object):
173 class BacktraceParser(object):
181 token
= f
"(?:{path}\+)?{addr}"
182 full_addr_match
= f
"(?:(?P<path>{path})\s*\+\s*)?(?P<addr>{addr})"
183 ignore_addr_match
= f
"(?:(?P<path>{path})\s*\+\s*)?(?:{addr})"
184 self
.oneline_re
= re
.compile(f
"^((?:.*(?:(?:at|backtrace):?|:))?(?:\s+))?({token}(?:\s+{token})*)(?:\).*|\s*)$", flags
=re
.IGNORECASE
)
185 self
.address_re
= re
.compile(full_addr_match
, flags
=re
.IGNORECASE
)
186 self
.syslog_re
= re
.compile(f
"^(?:#\d+\s+)(?P<addr>{addr})(?:.*\s+)\({ignore_addr_match}\)\s*$", flags
=re
.IGNORECASE
)
187 self
.kernel_re
= re
.compile(fr
'^kernel callstack: (?P<addrs>(?:{addr}\s*)+)$')
188 self
.asan_re
= re
.compile(f
"^(?:.*\s+)\({full_addr_match}\)(\s+\(BuildId: [0-9a-fA-F]+\))?$", flags
=re
.IGNORECASE
)
189 self
.asan_ignore_re
= re
.compile(f
"^=.*$", flags
=re
.IGNORECASE
)
190 self
.generic_re
= re
.compile(f
"^(?:.*\s+){full_addr_match}\s*$", flags
=re
.IGNORECASE
)
191 self
.separator_re
= re
.compile('^\W*-+\W*$')
194 def split_addresses(self
, addrstring
: str, default_path
=None):
195 addresses
: list[dict[str, Any
]] = []
196 for obj
in addrstring
.split():
197 m
= re
.match(self
.address_re
, obj
)
198 assert m
, f
'addr did not match address regex: {obj}'
199 #print(f" >>> '{obj}': address {m.groups()}")
200 addresses
.append({'path': m
.group(1) or default_path
, 'addr': m
.group(2)})
203 def __call__(self
, line
):
209 # order here is important: the kernel callstack regex
210 # needs to come first since it is more specific and would
211 # otherwise be matched by the online regex which comes next
212 m
= self
.kernel_re
.match(line
)
215 'type': self
.Type
.ADDRESS
,
216 'prefix': 'kernel callstack: ',
217 'addresses' : self
.split_addresses(m
.group('addrs'), KERNEL_MODULE
)
220 m
= re
.match(self
.oneline_re
, line
)
222 #print(f">>> '{line}': oneline {m.groups()}")
224 'type': self
.Type
.ADDRESS
,
225 'prefix': get_prefix(m
.group(1)),
226 'addresses': self
.split_addresses(m
.group(2))
229 m
= re
.match(self
.syslog_re
, line
)
231 #print(f">>> '{line}': syslog {m.groups()}")
232 ret
= {'type': self
.Type
.ADDRESS
}
234 ret
['addresses'] = [{'path': m
.group('path'), 'addr': m
.group('addr')}]
237 m
= re
.match(self
.asan_ignore_re
, line
)
239 #print(f">>> '{line}': asan ignore")
242 m
= re
.match(self
.asan_re
, line
)
244 #print(f">>> '{line}': asan {m.groups()}")
245 ret
= {'type': self
.Type
.ADDRESS
}
247 ret
['addresses'] = [{'path': m
.group('path'), 'addr': m
.group('addr')}]
250 m
= re
.match(self
.generic_re
, line
)
252 #print(f">>> '{line}': generic {m.groups()}")
253 ret
= {'type': self
.Type
.ADDRESS
}
255 ret
['addresses'] = [{'path': m
.group('path'), 'addr': m
.group('addr')}]
258 match
= re
.match(self
.separator_re
, line
)
260 return {'type': self
.Type
.SEPARATOR
}
262 #print(f">>> '{line}': None")
265 def __init__(self
, executable
, before_lines
=1, context_re
='', verbose
=False, concise
=False, cmd_path
='addr2line'):
266 self
._executable
= executable
267 self
._current
_backtrace
= []
269 self
._before
_lines
= before_lines
270 self
._before
_lines
_queue
= collections
.deque(maxlen
=before_lines
)
272 self
._known
_backtraces
= {}
273 if context_re
is not None:
274 self
._context
_re
= re
.compile(context_re
)
276 self
._context
_re
= None
277 self
._verbose
= verbose
278 self
._concise
= concise
279 self
._cmd
_path
= cmd_path
280 self
._known
_modules
= {}
281 self
._get
_resolver
_for
_module
(self
._executable
) # fail fast if there is something wrong with the exe resolver
282 self
.parser
= self
.BacktraceParser()
284 def _get_resolver_for_module(self
, module
):
285 if not module
in self
._known
_modules
:
286 if module
== KERNEL_MODULE
:
287 resolver
= KernelResolver()
289 resolver
= Addr2Line(module
, self
._concise
, self
._cmd
_path
)
290 self
._known
_modules
[module
] = resolver
291 return self
._known
_modules
[module
]
296 def __exit__(self
, type, value
, tb
):
297 self
._print
_current
_backtrace
()
299 def resolve_address(self
, address
, module
=None, verbose
=None):
301 module
= self
._executable
303 verbose
= self
._verbose
304 resolved_address
= self
._get
_resolver
_for
_module
(module
)(address
)
306 resolved_address
= '{{{}}} {}: {}'.format(module
, address
, resolved_address
)
307 return resolved_address
309 def _print_resolved_address(self
, module
, address
):
310 sys
.stdout
.write(self
.resolve_address(address
, module
))
312 def _backtrace_context_matches(self
):
313 if self
._context
_re
is None:
316 if any(map(lambda x
: self
._context
_re
.search(x
) is not None, self
._before
_lines
_queue
)):
319 if (not self
._prefix
is None) and self
._context
_re
.search(self
._prefix
):
324 def _print_current_backtrace(self
):
325 if len(self
._current
_backtrace
) == 0:
328 if not self
._backtrace
_context
_matches
():
329 self
._current
_backtrace
= []
332 for line
in self
._before
_lines
_queue
:
333 sys
.stdout
.write(line
)
335 if not self
._prefix
is None:
339 backtrace
= "".join(map(str, self
._current
_backtrace
))
340 if backtrace
in self
._known
_backtraces
:
341 print("[Backtrace #{}] Already seen, not resolving again.".format(self
._known
_backtraces
[backtrace
]))
342 print("") # To separate traces with an empty line
343 self
._current
_backtrace
= []
346 self
._known
_backtraces
[backtrace
] = self
._i
348 print("[Backtrace #{}]".format(self
._i
))
350 for module
, addr
in self
._current
_backtrace
:
351 self
._print
_resolved
_address
(module
, addr
)
353 print("") # To separate traces with an empty line
355 self
._current
_backtrace
= []
358 def __call__(self
, line
):
359 res
= self
.parser(line
)
362 self
._print
_current
_backtrace
()
363 if self
._before
_lines
> 0:
364 self
._before
_lines
_queue
.append(line
)
365 elif self
._before
_lines
< 0:
366 sys
.stdout
.write(line
) # line already has a trailing newline
368 pass # when == 0 no non-backtrace lines are printed
369 elif res
['type'] == self
.BacktraceParser
.Type
.SEPARATOR
:
371 elif res
['type'] == self
.BacktraceParser
.Type
.ADDRESS
:
372 addresses
= res
['addresses']
373 if len(addresses
) > 1:
374 self
._print
_current
_backtrace
()
375 if len(self
._current
_backtrace
) == 0:
376 self
._prefix
= res
['prefix']
379 self
._current
_backtrace
.append((r
['path'], r
['addr']))
381 self
._current
_backtrace
.append((self
._executable
, r
['addr']))
382 if len(addresses
) > 1:
383 self
._print
_current
_backtrace
()
385 print(f
"Unknown '{line}': {res}")
386 raise RuntimeError("Unknown result type {res}")