]> git.proxmox.com Git - mirror_frr.git/blame - python/clippy/elf.py
python: apply black formatting
[mirror_frr.git] / python / clippy / elf.py
CommitLineData
36a8fdfd
DL
1# FRR libelf wrapper
2#
3# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License as published by the Free
7# Software Foundation; either version 2 of the License, or (at your option)
8# any later version.
9#
10# This program is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13# more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with this program; see the file COPYING; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
00f0c399 19"""
36a8fdfd
DL
20Wrapping layer and additional utility around _clippy.ELFFile.
21
22Essentially, the C bits have the low-level ELF access bits that should be
23fast while this has the bits that string everything together (and would've
24been a PITA to do in C.)
25
26Surprisingly - or maybe through proper engineering - this actually works
27across architecture, word size and even endianness boundaries. Both the C
28module (through GElf_*) and this code (cf. struct.unpack format mangling
29in ELFDissectStruct) will take appropriate measures to flip and resize
30fields as needed.
00f0c399 31"""
36a8fdfd
DL
32
33import struct
34from collections import OrderedDict
35from weakref import WeakValueDictionary
36
37from _clippy import ELFFile, ELFAccessError
38
39#
40# data access
41#
42
00f0c399 43
36a8fdfd 44class ELFNull(object):
00f0c399 45 """
36a8fdfd 46 NULL pointer, returned instead of ELFData
00f0c399
DL
47 """
48
36a8fdfd
DL
49 def __init__(self):
50 self.symname = None
51 self._dstsect = None
52
53 def __repr__(self):
00f0c399 54 return "<ptr: NULL>"
36a8fdfd
DL
55
56 def __hash__(self):
57 return hash(None)
58
59 def get_string(self):
60 return None
61
00f0c399 62
36a8fdfd 63class ELFUnresolved(object):
00f0c399 64 """
36a8fdfd
DL
65 Reference to an unresolved external symbol, returned instead of ELFData
66
67 :param symname: name of the referenced symbol
68 :param addend: offset added to the symbol, normally zero
00f0c399
DL
69 """
70
36a8fdfd
DL
71 def __init__(self, symname, addend):
72 self.addend = addend
73 self.symname = symname
74 self._dstsect = None
75
76 def __repr__(self):
00f0c399 77 return "<unresolved: %s+%d>" % (self.symname, self.addend)
36a8fdfd
DL
78
79 def __hash__(self):
80 return hash((self.symname, self.addend))
81
00f0c399 82
36a8fdfd 83class ELFData(object):
00f0c399 84 """
36a8fdfd
DL
85 Actual data somewhere in the ELF file.
86
87 :type dstsect: ELFSubset
88 :param dstsect: container data area (section or entire file)
89 :param dstoffs: byte offset into dstsect
90 :param dstlen: byte size of object, or None if unknown, open-ended or string
00f0c399
DL
91 """
92
36a8fdfd
DL
93 def __init__(self, dstsect, dstoffs, dstlen):
94 self._dstsect = dstsect
95 self._dstoffs = dstoffs
96 self._dstlen = dstlen
97 self.symname = None
98
99 def __repr__(self):
00f0c399
DL
100 return "<ptr: %s+0x%05x/%d>" % (
101 self._dstsect.name,
102 self._dstoffs,
103 self._dstlen or -1,
104 )
36a8fdfd
DL
105
106 def __hash__(self):
107 return hash((self._dstsect, self._dstoffs))
108
109 def get_string(self):
00f0c399 110 """
36a8fdfd 111 Interpret as C string / null terminated UTF-8 and get the actual text.
00f0c399 112 """
36a8fdfd 113 try:
00f0c399 114 return self._dstsect[self._dstoffs : str].decode("UTF-8")
36a8fdfd 115 except:
00f0c399
DL
116 import pdb
117
118 pdb.set_trace()
36a8fdfd
DL
119
120 def get_data(self, reflen):
00f0c399 121 """
36a8fdfd
DL
122 Interpret as some structure (and check vs. expected length)
123
124 :param reflen: expected size of the object, compared against actual
125 size (which is only known in rare cases, mostly when directly
126 accessing a symbol since symbols have their destination object
127 size recorded)
00f0c399 128 """
36a8fdfd 129 if self._dstlen is not None and self._dstlen != reflen:
00f0c399
DL
130 raise ValueError(
131 "symbol size mismatch (got %d, expected %d)" % (self._dstlen, reflen)
132 )
133 return self._dstsect[self._dstoffs : self._dstoffs + reflen]
36a8fdfd
DL
134
135 def offset(self, offs, within_symbol=False):
00f0c399 136 """
36a8fdfd
DL
137 Get another ELFData at an offset
138
139 :param offs: byte offset, can be negative (e.g. in container_of)
140 :param within_symbol: retain length information
00f0c399 141 """
36a8fdfd
DL
142 if self._dstlen is None or not within_symbol:
143 return ELFData(self._dstsect, self._dstoffs + offs, None)
144 else:
145 return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
146
00f0c399 147
36a8fdfd
DL
148#
149# dissection data items
150#
151
00f0c399 152
36a8fdfd 153class ELFDissectData(object):
00f0c399 154 """
36a8fdfd 155 Common bits for ELFDissectStruct and ELFDissectUnion
00f0c399 156 """
36a8fdfd
DL
157
158 def __len__(self):
00f0c399 159 """
36a8fdfd 160 Used for boolean evaluation, e.g. "if struct: ..."
00f0c399
DL
161 """
162 return not (
163 isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved)
164 )
36a8fdfd
DL
165
166 def container_of(self, parent, fieldname):
00f0c399 167 """
36a8fdfd
DL
168 Assume this struct is embedded in a larger struct and get at the larger
169
170 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
171
172 :param parent: class (not instance) of the larger struct
173 :param fieldname: fieldname that refers back to this
174 :returns: instance of parent, with fieldname set to this object
00f0c399 175 """
36a8fdfd 176 offset = 0
00f0c399 177 if not hasattr(parent, "_efields"):
36a8fdfd
DL
178 parent._setup_efields()
179
180 for field in parent._efields[self.elfclass]:
181 if field[0] == fieldname:
182 break
b17f302b 183 spec = field[1]
00f0c399
DL
184 if spec == "P":
185 spec = "I" if self.elfclass == 32 else "Q"
b17f302b 186 offset += struct.calcsize(spec)
36a8fdfd 187 else:
00f0c399
DL
188 raise AttributeError("%r not found in %r.fields" % (fieldname, parent))
189
190 return parent(self._data.offset(-offset), replace={fieldname: self})
36a8fdfd 191
36a8fdfd
DL
192
193class ELFDissectStruct(ELFDissectData):
00f0c399 194 """
36a8fdfd
DL
195 Decode and provide access to a struct somewhere in the ELF file
196
197 Handles pointers and strings somewhat nicely. Create a subclass for each
198 struct that is to be accessed, and give a field list in a "fields"
199 class-member.
200
201 :param dataptr: ELFData referring to the data bits to decode.
202 :param parent: where this was instantiated from; only for reference, has
203 no functional impact.
204 :param replace: substitute data values for specific fields. Used by
205 `container_of` to replace the inner struct when creating the outer
206 one.
207
208 .. attribute:: fields
209
210 List of tuples describing the struct members. Items can be:
211 - ``('name', ELFDissectData)`` - directly embed another struct
212 - ``('name', 'I')`` - simple data types; second item for struct.unpack
213 - ``('name', 'I', None)`` - field to ignore
214 - ``('name', 'P', str)`` - pointer to string
215 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
216
217 ``P`` is added as unpack format for pointers (sized appropriately for
218 the ELF file.)
219
220 Refer to tiabwarfo.py for extracting this from ``pahole``.
221
222 TBD: replace tuples with a class.
223
224 .. attribute:: fieldrename
225
226 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
00f0c399 227 """
36a8fdfd
DL
228
229 class Pointer(object):
00f0c399 230 """
36a8fdfd
DL
231 Quick wrapper for pointers to further structs
232
233 This is just here to avoid going into infinite loops when loading
234 structs that have pointers to each other (e.g. struct xref <-->
235 struct xrefdata.) The pointer destination is only instantiated when
236 actually accessed.
00f0c399
DL
237 """
238
36a8fdfd
DL
239 def __init__(self, cls, ptr):
240 self.cls = cls
241 self.ptr = ptr
242
243 def __repr__(self):
00f0c399 244 return "<Pointer:%s %r>" % (self.cls.__name__, self.ptr)
36a8fdfd
DL
245
246 def __call__(self):
247 if isinstance(self.ptr, ELFNull):
248 return None
249 return self.cls(self.ptr)
250
00f0c399 251 def __new__(cls, dataptr, parent=None, replace=None):
36a8fdfd
DL
252 if dataptr._dstsect is None:
253 return super().__new__(cls)
254
255 obj = dataptr._dstsect._pointers.get((cls, dataptr))
256 if obj is not None:
257 return obj
258 obj = super().__new__(cls)
259 dataptr._dstsect._pointers[(cls, dataptr)] = obj
260 return obj
261
00f0c399 262 replacements = "lLnN"
36a8fdfd
DL
263
264 @classmethod
265 def _preproc_structspec(cls, elfclass, spec):
266 elfbits = elfclass
267
00f0c399
DL
268 if hasattr(spec, "calcsize"):
269 spec = "%ds" % (spec.calcsize(elfclass),)
36a8fdfd
DL
270
271 if elfbits == 32:
00f0c399 272 repl = ["i", "I"]
36a8fdfd 273 else:
00f0c399 274 repl = ["q", "Q"]
36a8fdfd
DL
275 for c in cls.replacements:
276 spec = spec.replace(c, repl[int(c.isupper())])
277 return spec
278
279 @classmethod
280 def _setup_efields(cls):
281 cls._efields = {}
282 cls._esize = {}
283 for elfclass in [32, 64]:
284 cls._efields[elfclass] = []
285 size = 0
286 for f in cls.fields:
287 newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
288 cls._efields[elfclass].append(newf)
289 size += struct.calcsize(newf[1])
290 cls._esize[elfclass] = size
291
00f0c399
DL
292 def __init__(self, dataptr, parent=None, replace=None):
293 if not hasattr(self.__class__, "_efields"):
36a8fdfd
DL
294 self._setup_efields()
295
296 self._fdata = None
297 self._data = dataptr
298 self._parent = parent
299 self.symname = dataptr.symname
300 if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
301 self._fdata = {}
302 return
303
304 self._elfsect = dataptr._dstsect
305 self.elfclass = self._elfsect._elffile.elfclass
306 self.offset = dataptr._dstoffs
307
308 pspecl = [f[1] for f in self._efields[self.elfclass]]
309
310 # need to correlate output from struct.unpack with extra metadata
311 # about the particular fields, so note down byte offsets (in locs)
312 # and tuple indices of pointers (in ptrs)
00f0c399 313 pspec = ""
36a8fdfd
DL
314 locs = {}
315 ptrs = set()
316
317 for idx, spec in enumerate(pspecl):
00f0c399 318 if spec == "P":
36a8fdfd
DL
319 ptrs.add(idx)
320 spec = self._elfsect.ptrtype
321
322 locs[idx] = struct.calcsize(pspec)
323 pspec = pspec + spec
324
325 self._total_size = struct.calcsize(pspec)
326
327 def replace_ptrs(v):
328 idx, val = v[0], v[1]
329 if idx not in ptrs:
330 return val
331 return self._elfsect.pointer(self.offset + locs[idx])
332
333 data = dataptr.get_data(struct.calcsize(pspec))
334 unpacked = struct.unpack(self._elfsect.endian + pspec, data)
335 unpacked = list(map(replace_ptrs, enumerate(unpacked)))
336 self._fraw = unpacked
337 self._fdata = OrderedDict()
338 replace = replace or {}
339
340 for i, item in enumerate(unpacked):
341 name = self.fields[i][0]
342 if name is None:
343 continue
344
345 if name in replace:
346 self._fdata[name] = replace[name]
347 continue
348
00f0c399
DL
349 if isinstance(self.fields[i][1], type) and issubclass(
350 self.fields[i][1], ELFDissectData
351 ):
36a8fdfd
DL
352 dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
353 self._fdata[name] = dataobj
354 continue
355 if len(self.fields[i]) == 3:
356 if self.fields[i][2] == str:
357 self._fdata[name] = item.get_string()
358 continue
359 elif self.fields[i][2] is None:
360 pass
361 elif issubclass(self.fields[i][2], ELFDissectData):
362 cls = self.fields[i][2]
363 dataobj = self.Pointer(cls, item)
364 self._fdata[name] = dataobj
365 continue
366
367 self._fdata[name] = item
368
369 def __getattr__(self, attrname):
370 if attrname not in self._fdata:
371 raise AttributeError(attrname)
372 if isinstance(self._fdata[attrname], self.Pointer):
373 self._fdata[attrname] = self._fdata[attrname]()
374 return self._fdata[attrname]
375
376 def __repr__(self):
377 if not isinstance(self._data, ELFData):
00f0c399
DL
378 return "<%s: %r>" % (self.__class__.__name__, self._data)
379 return "<%s: %s>" % (
380 self.__class__.__name__,
381 ", ".join(["%s=%r" % t for t in self._fdata.items()]),
382 )
36a8fdfd
DL
383
384 @classmethod
385 def calcsize(cls, elfclass):
00f0c399 386 """
36a8fdfd
DL
387 Sum up byte size of this struct
388
389 Wraps struct.calcsize with some extra features.
00f0c399
DL
390 """
391 if not hasattr(cls, "_efields"):
36a8fdfd
DL
392 cls._setup_efields()
393
00f0c399 394 pspec = "".join([f[1] for f in cls._efields[elfclass]])
36a8fdfd 395
00f0c399
DL
396 ptrtype = "I" if elfclass == 32 else "Q"
397 pspec = pspec.replace("P", ptrtype)
36a8fdfd
DL
398
399 return struct.calcsize(pspec)
400
00f0c399 401
36a8fdfd 402class ELFDissectUnion(ELFDissectData):
00f0c399 403 """
36a8fdfd
DL
404 Decode multiple structs in the same place.
405
406 Not currently used (and hence not tested.) Worked at some point but not
407 needed anymore and may be borked now. Remove this comment when using.
00f0c399
DL
408 """
409
410 def __init__(self, dataptr, parent=None):
36a8fdfd
DL
411 self._dataptr = dataptr
412 self._parent = parent
413 self.members = []
414 for name, membercls in self.__class__.members:
415 item = membercls(dataptr, parent)
416 self.members.append(item)
417 setattr(self, name, item)
418
419 def __repr__(self):
00f0c399
DL
420 return "<%s: %s>" % (
421 self.__class__.__name__,
422 ", ".join([repr(i) for i in self.members]),
423 )
36a8fdfd
DL
424
425 @classmethod
426 def calcsize(cls, elfclass):
427 return max([member.calcsize(elfclass) for name, member in cls.members])
428
00f0c399 429
36a8fdfd
DL
430#
431# wrappers for spans of ELF data
432#
433
00f0c399 434
36a8fdfd 435class ELFSubset(object):
00f0c399 436 """
36a8fdfd 437 Common abstract base for section-level and file-level access.
00f0c399 438 """
36a8fdfd
DL
439
440 def __init__(self):
441 super().__init__()
442
443 self._pointers = WeakValueDictionary()
444
445 def __hash__(self):
446 return hash(self.name)
447
448 def __getitem__(self, k):
00f0c399 449 """
36a8fdfd
DL
450 Read data from slice
451
452 Subscript **must** be a slice; a simple index will not return a byte
453 but rather throw an exception. Valid slice syntaxes are defined by
454 the C module:
455
456 - `this[123:456]` - extract specific range
457 - `this[123:str]` - extract until null byte. The slice stop value is
458 the `str` type (or, technically, `unicode`.)
00f0c399 459 """
36a8fdfd
DL
460 return self._obj[k]
461
462 def getreloc(self, offset):
00f0c399 463 """
36a8fdfd 464 Check for a relocation record at the specified offset.
00f0c399 465 """
36a8fdfd
DL
466 return self._obj.getreloc(offset)
467
00f0c399
DL
468 def iter_data(self, scls, slice_=slice(None)):
469 """
36a8fdfd
DL
470 Assume an array of structs present at a particular slice and decode
471
472 :param scls: ELFDissectData subclass for the struct
473 :param slice_: optional range specification
00f0c399 474 """
36a8fdfd
DL
475 size = scls.calcsize(self._elffile.elfclass)
476
477 offset = slice_.start or 0
478 stop = slice_.stop or self._obj.len
479 if stop < 0:
480 stop = self._obj.len - stop
481
482 while offset < stop:
483 yield scls(ELFData(self, offset, size))
484 offset += size
485
486 def pointer(self, offset):
00f0c399 487 """
36a8fdfd
DL
488 Try to dereference a pointer value
489
490 This checks whether there's a relocation at the given offset and
491 uses that; otherwise (e.g. in a non-PIE executable where the pointer
492 is already resolved by the linker) the data at the location is used.
493
494 :param offset: byte offset from beginning of section,
495 or virtual address in file
496 :returns: ELFData wrapping pointed-to object
00f0c399 497 """
36a8fdfd
DL
498
499 ptrsize = struct.calcsize(self.ptrtype)
00f0c399
DL
500 data = struct.unpack(
501 self.endian + self.ptrtype, self[offset : offset + ptrsize]
502 )[0]
36a8fdfd
DL
503
504 reloc = self.getreloc(offset)
505 dstsect = None
506 if reloc:
507 # section won't be available in whole-file operation
508 dstsect = reloc.getsection(data)
509 addend = reloc.r_addend
510
511 if reloc.relative:
512 # old-style ELF REL instead of RELA, not well-tested
513 addend += data
514
515 if reloc.unresolved and reloc.symvalid:
516 return ELFUnresolved(reloc.symname, addend)
517 elif reloc.symvalid:
518 data = addend + reloc.st_value
519 else:
520 data = addend
521
522 # 0 could technically be a valid pointer for a shared library,
523 # since libraries may use 0 as default virtual start address (it'll
524 # be adjusted on loading)
525 # That said, if the library starts at 0, that's where the ELF header
526 # would be so it's still an invalid pointer.
527 if data == 0 and dstsect == None:
528 return ELFNull()
529
530 # wrap_data is different between file & section
531 return self._wrap_data(data, dstsect)
532
00f0c399 533
36a8fdfd 534class ELFDissectSection(ELFSubset):
00f0c399 535 """
36a8fdfd
DL
536 Access the contents of an ELF section like ``.text`` or ``.data``
537
538 :param elfwrap: ELFDissectFile wrapper for the file
539 :param idx: section index in section header table
540 :param section: section object from C module
00f0c399 541 """
36a8fdfd
DL
542
543 def __init__(self, elfwrap, idx, section):
544 super().__init__()
545
546 self._elfwrap = elfwrap
547 self._elffile = elfwrap._elffile
548 self._idx = idx
549 self._section = self._obj = section
550 self.name = section.name
551 self.ptrtype = elfwrap.ptrtype
552 self.endian = elfwrap.endian
553
554 def _wrap_data(self, data, dstsect):
555 if dstsect is None:
556 dstsect = self._elfwrap._elffile.get_section_addr(data)
557 offs = data - dstsect.sh_addr
558 dstsect = self._elfwrap.get_section(dstsect.idx)
559 return ELFData(dstsect, offs, None)
560
00f0c399 561
36a8fdfd 562class ELFDissectFile(ELFSubset):
00f0c399 563 """
36a8fdfd
DL
564 Access the contents of an ELF file.
565
566 Note that offsets for array subscript and relocation/pointer access are
567 based on the file's virtual address space and are NOT offsets to the
568 start of the file on disk!
569
570 (Shared libraries frequently have a virtual address space starting at 0,
571 but non-PIE executables have an architecture specific default loading
572 address like 0x400000 on x86.
573
574 :param filename: ELF file to open
00f0c399 575 """
36a8fdfd
DL
576
577 def __init__(self, filename):
578 super().__init__()
579
580 self.name = filename
581 self._elffile = self._obj = ELFFile(filename)
582 self._sections = {}
583
00f0c399
DL
584 self.ptrtype = "I" if self._elffile.elfclass == 32 else "Q"
585 self.endian = ">" if self._elffile.bigendian else "<"
36a8fdfd
DL
586
587 @property
588 def _elfwrap(self):
589 return self
590
591 def _wrap_data(self, data, dstsect):
592 return ELFData(self, data, None)
593
594 def get_section(self, secname):
00f0c399 595 """
36a8fdfd 596 Look up section by name or index
00f0c399 597 """
36a8fdfd
DL
598 if isinstance(secname, int):
599 sh_idx = secname
600 section = self._elffile.get_section_idx(secname)
601 else:
602 section = self._elffile.get_section(secname)
603
604 if section is None:
605 return None
606
607 sh_idx = section.idx
608
609 if sh_idx not in self._sections:
610 self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
611
612 return self._sections[sh_idx]