]>
git.proxmox.com Git - mirror_frr.git/blob - python/clippy/elf.py
1 # SPDX-License-Identifier: GPL-2.0-or-later
4 # Copyright (C) 2020 David Lamparter for NetDEF, Inc.
7 Wrapping layer and additional utility around _clippy.ELFFile.
9 Essentially, the C bits have the low-level ELF access bits that should be
10 fast while this has the bits that string everything together (and would've
11 been a PITA to do in C.)
13 Surprisingly - or maybe through proper engineering - this actually works
14 across architecture, word size and even endianness boundaries. Both the C
15 module (through GElf_*) and this code (cf. struct.unpack format mangling
16 in ELFDissectStruct) will take appropriate measures to flip and resize
21 from collections
import OrderedDict
22 from weakref
import WeakValueDictionary
24 from _clippy
import ELFFile
, ELFAccessError
31 class ELFNull(object):
33 NULL pointer, returned instead of ELFData
50 class ELFUnresolved(object):
52 Reference to an unresolved external symbol, returned instead of ELFData
54 :param symname: name of the referenced symbol
55 :param addend: offset added to the symbol, normally zero
58 def __init__(self
, symname
, addend
):
60 self
.symname
= symname
64 return "<unresolved: %s+%d>" % (self
.symname
, self
.addend
)
67 return hash((self
.symname
, self
.addend
))
70 class ELFData(object):
72 Actual data somewhere in the ELF file.
74 :type dstsect: ELFSubset
75 :param dstsect: container data area (section or entire file)
76 :param dstoffs: byte offset into dstsect
77 :param dstlen: byte size of object, or None if unknown, open-ended or string
80 def __init__(self
, dstsect
, dstoffs
, dstlen
):
81 self
._dstsect
= dstsect
82 self
._dstoffs
= dstoffs
87 return "<ptr: %s+0x%05x/%d>" % (
94 return hash((self
._dstsect
, self
._dstoffs
))
98 Interpret as C string / null terminated UTF-8 and get the actual text.
101 return self
._dstsect
[self
._dstoffs
: str].decode("UTF-8")
107 def get_data(self
, reflen
):
109 Interpret as some structure (and check vs. expected length)
111 :param reflen: expected size of the object, compared against actual
112 size (which is only known in rare cases, mostly when directly
113 accessing a symbol since symbols have their destination object
116 if self
._dstlen
is not None and self
._dstlen
!= reflen
:
118 "symbol size mismatch (got %d, expected %d)" % (self
._dstlen
, reflen
)
120 return self
._dstsect
[self
._dstoffs
: self
._dstoffs
+ reflen
]
122 def offset(self
, offs
, within_symbol
=False):
124 Get another ELFData at an offset
126 :param offs: byte offset, can be negative (e.g. in container_of)
127 :param within_symbol: retain length information
129 if self
._dstlen
is None or not within_symbol
:
130 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, None)
132 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, self
._dstlen
- offs
)
136 # dissection data items
140 class ELFDissectData(object):
142 Common bits for ELFDissectStruct and ELFDissectUnion
151 Used for boolean evaluation, e.g. "if struct: ..."
154 isinstance(self
._data
, ELFNull
) or isinstance(self
._data
, ELFUnresolved
)
157 def container_of(self
, parent
, fieldname
):
159 Assume this struct is embedded in a larger struct and get at the larger
161 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
163 :param parent: class (not instance) of the larger struct
164 :param fieldname: fieldname that refers back to this
165 :returns: instance of parent, with fieldname set to this object
168 if not hasattr(parent
, "_efields"):
169 parent
._setup
_efields
()
171 for field
in parent
._efields
[self
.elfclass
]:
172 if field
[0] == fieldname
:
176 spec
= "I" if self
.elfclass
== 32 else "Q"
177 offset
+= struct
.calcsize(spec
)
179 raise AttributeError("%r not found in %r.fields" % (fieldname
, parent
))
181 return parent(self
._data
.offset(-offset
), replace
={fieldname
: self
})
184 class ELFDissectStruct(ELFDissectData
):
186 Decode and provide access to a struct somewhere in the ELF file
188 Handles pointers and strings somewhat nicely. Create a subclass for each
189 struct that is to be accessed, and give a field list in a "fields"
192 :param dataptr: ELFData referring to the data bits to decode.
193 :param parent: where this was instantiated from; only for reference, has
194 no functional impact.
195 :param replace: substitute data values for specific fields. Used by
196 `container_of` to replace the inner struct when creating the outer
199 .. attribute:: fields
201 List of tuples describing the struct members. Items can be:
202 - ``('name', ELFDissectData)`` - directly embed another struct
203 - ``('name', 'I')`` - simple data types; second item for struct.unpack
204 - ``('name', 'I', None)`` - field to ignore
205 - ``('name', 'P', str)`` - pointer to string
206 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
208 ``P`` is added as unpack format for pointers (sized appropriately for
211 Refer to tiabwarfo.py for extracting this from ``pahole``.
213 TBD: replace tuples with a class.
215 .. attribute:: fieldrename
217 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
220 class Pointer(object):
222 Quick wrapper for pointers to further structs
224 This is just here to avoid going into infinite loops when loading
225 structs that have pointers to each other (e.g. struct xref <-->
226 struct xrefdata.) The pointer destination is only instantiated when
230 def __init__(self
, cls
, ptr
):
235 return "<Pointer:%s %r>" % (self
.cls
.__name
__, self
.ptr
)
238 if isinstance(self
.ptr
, ELFNull
):
240 return self
.cls(self
.ptr
)
242 def __new__(cls
, dataptr
, parent
=None, replace
=None):
243 if dataptr
._dstsect
is None:
244 return super().__new
__(cls
)
246 obj
= dataptr
._dstsect
._pointers
.get((cls
, dataptr
))
249 obj
= super().__new
__(cls
)
250 dataptr
._dstsect
._pointers
[(cls
, dataptr
)] = obj
253 replacements
= "lLnN"
256 def _preproc_structspec(cls
, elfclass
, spec
):
259 if hasattr(spec
, "calcsize"):
260 spec
= "%ds" % (spec
.calcsize(elfclass
),)
266 for c
in cls
.replacements
:
267 spec
= spec
.replace(c
, repl
[int(c
.isupper())])
271 def _setup_efields(cls
):
274 for elfclass
in [32, 64]:
275 cls
._efields
[elfclass
] = []
278 newf
= (f
[0], cls
._preproc
_structspec
(elfclass
, f
[1])) + f
[2:]
279 cls
._efields
[elfclass
].append(newf
)
280 size
+= struct
.calcsize(newf
[1])
281 cls
._esize
[elfclass
] = size
283 def __init__(self
, dataptr
, parent
=None, replace
=None):
284 if not hasattr(self
.__class
__, "_efields"):
285 self
._setup
_efields
()
289 self
._parent
= parent
290 self
.symname
= dataptr
.symname
291 if isinstance(dataptr
, ELFNull
) or isinstance(dataptr
, ELFUnresolved
):
295 self
._elfsect
= dataptr
._dstsect
296 self
.elfclass
= self
._elfsect
._elffile
.elfclass
297 self
.offset
= dataptr
._dstoffs
299 pspecl
= [f
[1] for f
in self
._efields
[self
.elfclass
]]
301 # need to correlate output from struct.unpack with extra metadata
302 # about the particular fields, so note down byte offsets (in locs)
303 # and tuple indices of pointers (in ptrs)
308 for idx
, spec
in enumerate(pspecl
):
311 spec
= self
._elfsect
.ptrtype
313 locs
[idx
] = struct
.calcsize(pspec
)
316 self
._total
_size
= struct
.calcsize(pspec
)
319 idx
, val
= v
[0], v
[1]
322 return self
._elfsect
.pointer(self
.offset
+ locs
[idx
])
324 data
= dataptr
.get_data(struct
.calcsize(pspec
))
325 unpacked
= struct
.unpack(self
._elfsect
.endian
+ pspec
, data
)
326 unpacked
= list(map(replace_ptrs
, enumerate(unpacked
)))
327 self
._fraw
= unpacked
328 self
._fdata
= OrderedDict()
329 replace
= replace
or {}
331 for i
, item
in enumerate(unpacked
):
332 name
= self
.fields
[i
][0]
337 self
._fdata
[name
] = replace
[name
]
340 if isinstance(self
.fields
[i
][1], type) and issubclass(
341 self
.fields
[i
][1], ELFDissectData
343 dataobj
= self
.fields
[i
][1](dataptr
.offset(locs
[i
]), self
)
344 self
._fdata
[name
] = dataobj
346 if len(self
.fields
[i
]) == 3:
347 if self
.fields
[i
][2] == str:
348 self
._fdata
[name
] = item
.get_string()
350 elif self
.fields
[i
][2] is None:
352 elif issubclass(self
.fields
[i
][2], ELFDissectData
):
353 cls
= self
.fields
[i
][2]
354 dataobj
= self
.Pointer(cls
, item
)
355 self
._fdata
[name
] = dataobj
358 self
._fdata
[name
] = item
360 def __getattr__(self
, attrname
):
361 if attrname
not in self
._fdata
:
362 raise AttributeError(attrname
)
363 if isinstance(self
._fdata
[attrname
], self
.Pointer
):
364 self
._fdata
[attrname
] = self
._fdata
[attrname
]()
365 return self
._fdata
[attrname
]
368 if not isinstance(self
._data
, ELFData
):
369 return "<%s: %r>" % (self
.__class
__.__name
__, self
._data
)
370 return "<%s: %s>" % (
371 self
.__class
__.__name
__,
372 ", ".join(["%s=%r" % t
for t
in self
._fdata
.items()]),
376 def calcsize(cls
, elfclass
):
378 Sum up byte size of this struct
380 Wraps struct.calcsize with some extra features.
382 if not hasattr(cls
, "_efields"):
385 pspec
= "".join([f
[1] for f
in cls
._efields
[elfclass
]])
387 ptrtype
= "I" if elfclass
== 32 else "Q"
388 pspec
= pspec
.replace("P", ptrtype
)
390 return struct
.calcsize(pspec
)
393 class ELFDissectUnion(ELFDissectData
):
395 Decode multiple structs in the same place.
397 Not currently used (and hence not tested.) Worked at some point but not
398 needed anymore and may be borked now. Remove this comment when using.
403 def __init__(self
, dataptr
, parent
=None):
404 self
._dataptr
= dataptr
405 self
._parent
= parent
407 for name
, membercls
in self
.__class
__.members
:
408 item
= membercls(dataptr
, parent
)
409 self
.members
.append(item
)
410 setattr(self
, name
, item
)
413 return "<%s: %s>" % (
414 self
.__class
__.__name
__,
415 ", ".join([repr(i
) for i
in self
.members
]),
419 def calcsize(cls
, elfclass
):
420 return max([member
.calcsize(elfclass
) for name
, member
in cls
.members
])
424 # wrappers for spans of ELF data
428 class ELFSubset(object):
430 Common abstract base for section-level and file-level access.
441 self
._pointers
= WeakValueDictionary()
443 def _wrap_data(self
, data
, dstsect
):
444 raise NotImplementedError()
447 return hash(self
.name
)
449 def __getitem__(self
, k
):
453 Subscript **must** be a slice; a simple index will not return a byte
454 but rather throw an exception. Valid slice syntaxes are defined by
457 - `this[123:456]` - extract specific range
458 - `this[123:str]` - extract until null byte. The slice stop value is
459 the `str` type (or, technically, `unicode`.)
463 def getreloc(self
, offset
):
465 Check for a relocation record at the specified offset.
467 return self
._obj
.getreloc(offset
)
469 def iter_data(self
, scls
, slice_
=slice(None)):
471 Assume an array of structs present at a particular slice and decode
473 :param scls: ELFDissectData subclass for the struct
474 :param slice_: optional range specification
476 size
= scls
.calcsize(self
._elffile
.elfclass
)
478 offset
= slice_
.start
or 0
479 stop
= slice_
.stop
or self
._obj
.len
481 stop
= self
._obj
.len - stop
484 yield scls(ELFData(self
, offset
, size
))
487 def pointer(self
, offset
):
489 Try to dereference a pointer value
491 This checks whether there's a relocation at the given offset and
492 uses that; otherwise (e.g. in a non-PIE executable where the pointer
493 is already resolved by the linker) the data at the location is used.
495 :param offset: byte offset from beginning of section,
496 or virtual address in file
497 :returns: ELFData wrapping pointed-to object
500 ptrsize
= struct
.calcsize(self
.ptrtype
)
501 data
= struct
.unpack(
502 self
.endian
+ self
.ptrtype
, self
[offset
: offset
+ ptrsize
]
505 reloc
= self
.getreloc(offset
)
508 # section won't be available in whole-file operation
509 dstsect
= reloc
.getsection(data
)
510 addend
= reloc
.r_addend
513 # old-style ELF REL instead of RELA, not well-tested
516 if reloc
.unresolved
and reloc
.symvalid
:
517 return ELFUnresolved(reloc
.symname
, addend
)
519 data
= addend
+ reloc
.st_value
523 # 0 could technically be a valid pointer for a shared library,
524 # since libraries may use 0 as default virtual start address (it'll
525 # be adjusted on loading)
526 # That said, if the library starts at 0, that's where the ELF header
527 # would be so it's still an invalid pointer.
528 if data
== 0 and dstsect
== None:
531 # wrap_data is different between file & section
532 return self
._wrap
_data
(data
, dstsect
)
535 class ELFDissectSection(ELFSubset
):
537 Access the contents of an ELF section like ``.text`` or ``.data``
539 :param elfwrap: ELFDissectFile wrapper for the file
540 :param idx: section index in section header table
541 :param section: section object from C module
544 def __init__(self
, elfwrap
, idx
, section
):
547 self
._elfwrap
= elfwrap
548 self
._elffile
= elfwrap
._elffile
550 self
._section
= self
._obj
= section
551 self
.name
= section
.name
552 self
.ptrtype
= elfwrap
.ptrtype
553 self
.endian
= elfwrap
.endian
555 def _wrap_data(self
, data
, dstsect
):
557 dstsect
= self
._elfwrap
._elffile
.get_section_addr(data
)
558 offs
= data
- dstsect
.sh_addr
559 dstsect
= self
._elfwrap
.get_section(dstsect
.idx
)
560 return ELFData(dstsect
, offs
, None)
563 class ELFDissectFile(ELFSubset
):
565 Access the contents of an ELF file.
567 Note that offsets for array subscript and relocation/pointer access are
568 based on the file's virtual address space and are NOT offsets to the
569 start of the file on disk!
571 (Shared libraries frequently have a virtual address space starting at 0,
572 but non-PIE executables have an architecture specific default loading
573 address like 0x400000 on x86.
575 :param filename: ELF file to open
578 def __init__(self
, filename
):
582 self
._elffile
= self
._obj
= ELFFile(filename
)
585 self
.ptrtype
= "I" if self
._elffile
.elfclass
== 32 else "Q"
586 self
.endian
= ">" if self
._elffile
.bigendian
else "<"
592 def _wrap_data(self
, data
, dstsect
):
593 return ELFData(self
, data
, None)
595 def get_section(self
, secname
):
597 Look up section by name or index
599 if isinstance(secname
, int):
601 section
= self
._elffile
.get_section_idx(secname
)
603 section
= self
._elffile
.get_section(secname
)
610 if sh_idx
not in self
._sections
:
611 self
._sections
[sh_idx
] = ELFDissectSection(self
, sh_idx
, section
)
613 return self
._sections
[sh_idx
]