]>
git.proxmox.com Git - mirror_frr.git/blob - python/clippy/elf.py
3 # Copyright (C) 2020 David Lamparter for NetDEF, Inc.
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 2 of the License, or (at your option)
10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 # You should have received a copy of the GNU General Public License along
16 # with this program; see the file COPYING; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 Wrapping layer and additional utility around _clippy.ELFFile.
22 Essentially, the C bits have the low-level ELF access bits that should be
23 fast while this has the bits that string everything together (and would've
24 been a PITA to do in C.)
26 Surprisingly - or maybe through proper engineering - this actually works
27 across architecture, word size and even endianness boundaries. Both the C
28 module (through GElf_*) and this code (cf. struct.unpack format mangling
29 in ELFDissectStruct) will take appropriate measures to flip and resize
34 from collections
import OrderedDict
35 from weakref
import WeakValueDictionary
37 from _clippy
import ELFFile
, ELFAccessError
43 class ELFNull(object):
45 NULL pointer, returned instead of ELFData
60 class ELFUnresolved(object):
62 Reference to an unresolved external symbol, returned instead of ELFData
64 :param symname: name of the referenced symbol
65 :param addend: offset added to the symbol, normally zero
67 def __init__(self
, symname
, addend
):
69 self
.symname
= symname
73 return '<unresolved: %s+%d>' % (self
.symname
, self
.addend
)
76 return hash((self
.symname
, self
.addend
))
78 class ELFData(object):
80 Actual data somewhere in the ELF file.
82 :type dstsect: ELFSubset
83 :param dstsect: container data area (section or entire file)
84 :param dstoffs: byte offset into dstsect
85 :param dstlen: byte size of object, or None if unknown, open-ended or string
87 def __init__(self
, dstsect
, dstoffs
, dstlen
):
88 self
._dstsect
= dstsect
89 self
._dstoffs
= dstoffs
94 return '<ptr: %s+0x%05x/%d>' % (self
._dstsect
.name
, self
._dstoffs
, self
._dstlen
or -1)
97 return hash((self
._dstsect
, self
._dstoffs
))
101 Interpret as C string / null terminated UTF-8 and get the actual text.
104 return self
._dstsect
[self
._dstoffs
:str].decode('UTF-8')
106 import pdb
; pdb
.set_trace()
108 def get_data(self
, reflen
):
110 Interpret as some structure (and check vs. expected length)
112 :param reflen: expected size of the object, compared against actual
113 size (which is only known in rare cases, mostly when directly
114 accessing a symbol since symbols have their destination object
117 if self
._dstlen
is not None and self
._dstlen
!= reflen
:
118 raise ValueError('symbol size mismatch (got %d, expected %d)' % (self
._dstlen
, reflen
))
119 return self
._dstsect
[self
._dstoffs
:self
._dstoffs
+reflen
]
121 def offset(self
, offs
, within_symbol
=False):
123 Get another ELFData at an offset
125 :param offs: byte offset, can be negative (e.g. in container_of)
126 :param within_symbol: retain length information
128 if self
._dstlen
is None or not within_symbol
:
129 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, None)
131 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, self
._dstlen
- offs
)
134 # dissection data items
137 class ELFDissectData(object):
139 Common bits for ELFDissectStruct and ELFDissectUnion
144 Used for boolean evaluation, e.g. "if struct: ..."
146 return not (isinstance(self
._data
, ELFNull
) or isinstance(self
._data
, ELFUnresolved
))
148 def container_of(self
, parent
, fieldname
):
150 Assume this struct is embedded in a larger struct and get at the larger
152 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
154 :param parent: class (not instance) of the larger struct
155 :param fieldname: fieldname that refers back to this
156 :returns: instance of parent, with fieldname set to this object
159 if not hasattr(parent
, '_efields'):
160 parent
._setup
_efields
()
162 for field
in parent
._efields
[self
.elfclass
]:
163 if field
[0] == fieldname
:
167 spec
= 'I' if self
.elfclass
== 32 else 'Q'
168 offset
+= struct
.calcsize(spec
)
170 raise AttributeError('%r not found in %r.fields' % (fieldname
, parent
))
172 return parent(self
._data
.offset(-offset
), replace
= {fieldname
: self
})
174 class ELFDissectStruct(ELFDissectData
):
176 Decode and provide access to a struct somewhere in the ELF file
178 Handles pointers and strings somewhat nicely. Create a subclass for each
179 struct that is to be accessed, and give a field list in a "fields"
182 :param dataptr: ELFData referring to the data bits to decode.
183 :param parent: where this was instantiated from; only for reference, has
184 no functional impact.
185 :param replace: substitute data values for specific fields. Used by
186 `container_of` to replace the inner struct when creating the outer
189 .. attribute:: fields
191 List of tuples describing the struct members. Items can be:
192 - ``('name', ELFDissectData)`` - directly embed another struct
193 - ``('name', 'I')`` - simple data types; second item for struct.unpack
194 - ``('name', 'I', None)`` - field to ignore
195 - ``('name', 'P', str)`` - pointer to string
196 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
198 ``P`` is added as unpack format for pointers (sized appropriately for
201 Refer to tiabwarfo.py for extracting this from ``pahole``.
203 TBD: replace tuples with a class.
205 .. attribute:: fieldrename
207 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
210 class Pointer(object):
212 Quick wrapper for pointers to further structs
214 This is just here to avoid going into infinite loops when loading
215 structs that have pointers to each other (e.g. struct xref <-->
216 struct xrefdata.) The pointer destination is only instantiated when
219 def __init__(self
, cls
, ptr
):
224 return '<Pointer:%s %r>' % (self
.cls
.__name
__, self
.ptr
)
227 if isinstance(self
.ptr
, ELFNull
):
229 return self
.cls(self
.ptr
)
231 def __new__(cls
, dataptr
, parent
= None, replace
= None):
232 if dataptr
._dstsect
is None:
233 return super().__new
__(cls
)
235 obj
= dataptr
._dstsect
._pointers
.get((cls
, dataptr
))
238 obj
= super().__new
__(cls
)
239 dataptr
._dstsect
._pointers
[(cls
, dataptr
)] = obj
242 replacements
= 'lLnN'
245 def _preproc_structspec(cls
, elfclass
, spec
):
248 if hasattr(spec
, 'calcsize'):
249 spec
= '%ds' % (spec
.calcsize(elfclass
),)
255 for c
in cls
.replacements
:
256 spec
= spec
.replace(c
, repl
[int(c
.isupper())])
260 def _setup_efields(cls
):
263 for elfclass
in [32, 64]:
264 cls
._efields
[elfclass
] = []
267 newf
= (f
[0], cls
._preproc
_structspec
(elfclass
, f
[1])) + f
[2:]
268 cls
._efields
[elfclass
].append(newf
)
269 size
+= struct
.calcsize(newf
[1])
270 cls
._esize
[elfclass
] = size
272 def __init__(self
, dataptr
, parent
= None, replace
= None):
273 if not hasattr(self
.__class
__, '_efields'):
274 self
._setup
_efields
()
278 self
._parent
= parent
279 self
.symname
= dataptr
.symname
280 if isinstance(dataptr
, ELFNull
) or isinstance(dataptr
, ELFUnresolved
):
284 self
._elfsect
= dataptr
._dstsect
285 self
.elfclass
= self
._elfsect
._elffile
.elfclass
286 self
.offset
= dataptr
._dstoffs
288 pspecl
= [f
[1] for f
in self
._efields
[self
.elfclass
]]
290 # need to correlate output from struct.unpack with extra metadata
291 # about the particular fields, so note down byte offsets (in locs)
292 # and tuple indices of pointers (in ptrs)
297 for idx
, spec
in enumerate(pspecl
):
300 spec
= self
._elfsect
.ptrtype
302 locs
[idx
] = struct
.calcsize(pspec
)
305 self
._total
_size
= struct
.calcsize(pspec
)
308 idx
, val
= v
[0], v
[1]
311 return self
._elfsect
.pointer(self
.offset
+ locs
[idx
])
313 data
= dataptr
.get_data(struct
.calcsize(pspec
))
314 unpacked
= struct
.unpack(self
._elfsect
.endian
+ pspec
, data
)
315 unpacked
= list(map(replace_ptrs
, enumerate(unpacked
)))
316 self
._fraw
= unpacked
317 self
._fdata
= OrderedDict()
318 replace
= replace
or {}
320 for i
, item
in enumerate(unpacked
):
321 name
= self
.fields
[i
][0]
326 self
._fdata
[name
] = replace
[name
]
329 if isinstance(self
.fields
[i
][1], type) and issubclass(self
.fields
[i
][1], ELFDissectData
):
330 dataobj
= self
.fields
[i
][1](dataptr
.offset(locs
[i
]), self
)
331 self
._fdata
[name
] = dataobj
333 if len(self
.fields
[i
]) == 3:
334 if self
.fields
[i
][2] == str:
335 self
._fdata
[name
] = item
.get_string()
337 elif self
.fields
[i
][2] is None:
339 elif issubclass(self
.fields
[i
][2], ELFDissectData
):
340 cls
= self
.fields
[i
][2]
341 dataobj
= self
.Pointer(cls
, item
)
342 self
._fdata
[name
] = dataobj
345 self
._fdata
[name
] = item
347 def __getattr__(self
, attrname
):
348 if attrname
not in self
._fdata
:
349 raise AttributeError(attrname
)
350 if isinstance(self
._fdata
[attrname
], self
.Pointer
):
351 self
._fdata
[attrname
] = self
._fdata
[attrname
]()
352 return self
._fdata
[attrname
]
355 if not isinstance(self
._data
, ELFData
):
356 return '<%s: %r>' % (self
.__class
__.__name
__, self
._data
)
357 return '<%s: %s>' % (self
.__class
__.__name
__,
358 ', '.join(['%s=%r' % t
for t
in self
._fdata
.items()]))
361 def calcsize(cls
, elfclass
):
363 Sum up byte size of this struct
365 Wraps struct.calcsize with some extra features.
367 if not hasattr(cls
, '_efields'):
370 pspec
= ''.join([f
[1] for f
in cls
._efields
[elfclass
]])
372 ptrtype
= 'I' if elfclass
== 32 else 'Q'
373 pspec
= pspec
.replace('P', ptrtype
)
375 return struct
.calcsize(pspec
)
377 class ELFDissectUnion(ELFDissectData
):
379 Decode multiple structs in the same place.
381 Not currently used (and hence not tested.) Worked at some point but not
382 needed anymore and may be borked now. Remove this comment when using.
384 def __init__(self
, dataptr
, parent
= None):
385 self
._dataptr
= dataptr
386 self
._parent
= parent
388 for name
, membercls
in self
.__class
__.members
:
389 item
= membercls(dataptr
, parent
)
390 self
.members
.append(item
)
391 setattr(self
, name
, item
)
394 return '<%s: %s>' % (self
.__class
__.__name
__, ', '.join([repr(i
) for i
in self
.members
]))
397 def calcsize(cls
, elfclass
):
398 return max([member
.calcsize(elfclass
) for name
, member
in cls
.members
])
401 # wrappers for spans of ELF data
404 class ELFSubset(object):
406 Common abstract base for section-level and file-level access.
412 self
._pointers
= WeakValueDictionary()
415 return hash(self
.name
)
417 def __getitem__(self
, k
):
421 Subscript **must** be a slice; a simple index will not return a byte
422 but rather throw an exception. Valid slice syntaxes are defined by
425 - `this[123:456]` - extract specific range
426 - `this[123:str]` - extract until null byte. The slice stop value is
427 the `str` type (or, technically, `unicode`.)
431 def getreloc(self
, offset
):
433 Check for a relocation record at the specified offset.
435 return self
._obj
.getreloc(offset
)
437 def iter_data(self
, scls
, slice_
= slice(None)):
439 Assume an array of structs present at a particular slice and decode
441 :param scls: ELFDissectData subclass for the struct
442 :param slice_: optional range specification
444 size
= scls
.calcsize(self
._elffile
.elfclass
)
446 offset
= slice_
.start
or 0
447 stop
= slice_
.stop
or self
._obj
.len
449 stop
= self
._obj
.len - stop
452 yield scls(ELFData(self
, offset
, size
))
455 def pointer(self
, offset
):
457 Try to dereference a pointer value
459 This checks whether there's a relocation at the given offset and
460 uses that; otherwise (e.g. in a non-PIE executable where the pointer
461 is already resolved by the linker) the data at the location is used.
463 :param offset: byte offset from beginning of section,
464 or virtual address in file
465 :returns: ELFData wrapping pointed-to object
468 ptrsize
= struct
.calcsize(self
.ptrtype
)
469 data
= struct
.unpack(self
.endian
+ self
.ptrtype
, self
[offset
:offset
+ ptrsize
])[0]
471 reloc
= self
.getreloc(offset
)
474 # section won't be available in whole-file operation
475 dstsect
= reloc
.getsection(data
)
476 addend
= reloc
.r_addend
479 # old-style ELF REL instead of RELA, not well-tested
482 if reloc
.unresolved
and reloc
.symvalid
:
483 return ELFUnresolved(reloc
.symname
, addend
)
485 data
= addend
+ reloc
.st_value
489 # 0 could technically be a valid pointer for a shared library,
490 # since libraries may use 0 as default virtual start address (it'll
491 # be adjusted on loading)
492 # That said, if the library starts at 0, that's where the ELF header
493 # would be so it's still an invalid pointer.
494 if data
== 0 and dstsect
== None:
497 # wrap_data is different between file & section
498 return self
._wrap
_data
(data
, dstsect
)
500 class ELFDissectSection(ELFSubset
):
502 Access the contents of an ELF section like ``.text`` or ``.data``
504 :param elfwrap: ELFDissectFile wrapper for the file
505 :param idx: section index in section header table
506 :param section: section object from C module
509 def __init__(self
, elfwrap
, idx
, section
):
512 self
._elfwrap
= elfwrap
513 self
._elffile
= elfwrap
._elffile
515 self
._section
= self
._obj
= section
516 self
.name
= section
.name
517 self
.ptrtype
= elfwrap
.ptrtype
518 self
.endian
= elfwrap
.endian
520 def _wrap_data(self
, data
, dstsect
):
522 dstsect
= self
._elfwrap
._elffile
.get_section_addr(data
)
523 offs
= data
- dstsect
.sh_addr
524 dstsect
= self
._elfwrap
.get_section(dstsect
.idx
)
525 return ELFData(dstsect
, offs
, None)
527 class ELFDissectFile(ELFSubset
):
529 Access the contents of an ELF file.
531 Note that offsets for array subscript and relocation/pointer access are
532 based on the file's virtual address space and are NOT offsets to the
533 start of the file on disk!
535 (Shared libraries frequently have a virtual address space starting at 0,
536 but non-PIE executables have an architecture specific default loading
537 address like 0x400000 on x86.
539 :param filename: ELF file to open
542 def __init__(self
, filename
):
546 self
._elffile
= self
._obj
= ELFFile(filename
)
549 self
.ptrtype
= 'I' if self
._elffile
.elfclass
== 32 else 'Q'
550 self
.endian
= '>' if self
._elffile
.bigendian
else '<'
556 def _wrap_data(self
, data
, dstsect
):
557 return ELFData(self
, data
, None)
559 def get_section(self
, secname
):
561 Look up section by name or index
563 if isinstance(secname
, int):
565 section
= self
._elffile
.get_section_idx(secname
)
567 section
= self
._elffile
.get_section(secname
)
574 if sh_idx
not in self
._sections
:
575 self
._sections
[sh_idx
] = ELFDissectSection(self
, sh_idx
, section
)
577 return self
._sections
[sh_idx
]