]>
git.proxmox.com Git - mirror_frr.git/blob - python/clippy/elf.py
3 # Copyright (C) 2020 David Lamparter for NetDEF, Inc.
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 2 of the License, or (at your option)
10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 # You should have received a copy of the GNU General Public License along
16 # with this program; see the file COPYING; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 Wrapping layer and additional utility around _clippy.ELFFile.
22 Essentially, the C bits have the low-level ELF access bits that should be
23 fast while this has the bits that string everything together (and would've
24 been a PITA to do in C.)
26 Surprisingly - or maybe through proper engineering - this actually works
27 across architecture, word size and even endianness boundaries. Both the C
28 module (through GElf_*) and this code (cf. struct.unpack format mangling
29 in ELFDissectStruct) will take appropriate measures to flip and resize
34 from collections
import OrderedDict
35 from weakref
import WeakValueDictionary
37 from _clippy
import ELFFile
, ELFAccessError
43 class ELFNull(object):
45 NULL pointer, returned instead of ELFData
60 class ELFUnresolved(object):
62 Reference to an unresolved external symbol, returned instead of ELFData
64 :param symname: name of the referenced symbol
65 :param addend: offset added to the symbol, normally zero
67 def __init__(self
, symname
, addend
):
69 self
.symname
= symname
73 return '<unresolved: %s+%d>' % (self
.symname
, self
.addend
)
76 return hash((self
.symname
, self
.addend
))
78 class ELFData(object):
80 Actual data somewhere in the ELF file.
82 :type dstsect: ELFSubset
83 :param dstsect: container data area (section or entire file)
84 :param dstoffs: byte offset into dstsect
85 :param dstlen: byte size of object, or None if unknown, open-ended or string
87 def __init__(self
, dstsect
, dstoffs
, dstlen
):
88 self
._dstsect
= dstsect
89 self
._dstoffs
= dstoffs
94 return '<ptr: %s+0x%05x/%d>' % (self
._dstsect
.name
, self
._dstoffs
, self
._dstlen
or -1)
97 return hash((self
._dstsect
, self
._dstoffs
))
101 Interpret as C string / null terminated UTF-8 and get the actual text.
104 return self
._dstsect
[self
._dstoffs
:str].decode('UTF-8')
106 import pdb
; pdb
.set_trace()
108 def get_data(self
, reflen
):
110 Interpret as some structure (and check vs. expected length)
112 :param reflen: expected size of the object, compared against actual
113 size (which is only known in rare cases, mostly when directly
114 accessing a symbol since symbols have their destination object
117 if self
._dstlen
is not None and self
._dstlen
!= reflen
:
118 raise ValueError('symbol size mismatch (got %d, expected %d)' % (self
._dstlen
, reflen
))
119 return self
._dstsect
[self
._dstoffs
:self
._dstoffs
+reflen
]
121 def offset(self
, offs
, within_symbol
=False):
123 Get another ELFData at an offset
125 :param offs: byte offset, can be negative (e.g. in container_of)
126 :param within_symbol: retain length information
128 if self
._dstlen
is None or not within_symbol
:
129 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, None)
131 return ELFData(self
._dstsect
, self
._dstoffs
+ offs
, self
._dstlen
- offs
)
134 # dissection data items
137 class ELFDissectData(object):
139 Common bits for ELFDissectStruct and ELFDissectUnion
144 Used for boolean evaluation, e.g. "if struct: ..."
146 return not (isinstance(self
._data
, ELFNull
) or isinstance(self
._data
, ELFUnresolved
))
148 def container_of(self
, parent
, fieldname
):
150 Assume this struct is embedded in a larger struct and get at the larger
152 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
154 :param parent: class (not instance) of the larger struct
155 :param fieldname: fieldname that refers back to this
156 :returns: instance of parent, with fieldname set to this object
159 if not hasattr(parent
, '_efields'):
160 parent
._setup
_efields
()
162 for field
in parent
._efields
[self
.elfclass
]:
163 if field
[0] == fieldname
:
165 offset
+= struct
.calcsize(field
[1])
167 raise AttributeError('%r not found in %r.fields' % (fieldname
, parent
))
169 return parent(self
._data
.offset(-offset
), replace
= {fieldname
: self
})
171 class ELFDissectStruct(ELFDissectData
):
173 Decode and provide access to a struct somewhere in the ELF file
175 Handles pointers and strings somewhat nicely. Create a subclass for each
176 struct that is to be accessed, and give a field list in a "fields"
179 :param dataptr: ELFData referring to the data bits to decode.
180 :param parent: where this was instantiated from; only for reference, has
181 no functional impact.
182 :param replace: substitute data values for specific fields. Used by
183 `container_of` to replace the inner struct when creating the outer
186 .. attribute:: fields
188 List of tuples describing the struct members. Items can be:
189 - ``('name', ELFDissectData)`` - directly embed another struct
190 - ``('name', 'I')`` - simple data types; second item for struct.unpack
191 - ``('name', 'I', None)`` - field to ignore
192 - ``('name', 'P', str)`` - pointer to string
193 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
195 ``P`` is added as unpack format for pointers (sized appropriately for
198 Refer to tiabwarfo.py for extracting this from ``pahole``.
200 TBD: replace tuples with a class.
202 .. attribute:: fieldrename
204 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
207 class Pointer(object):
209 Quick wrapper for pointers to further structs
211 This is just here to avoid going into infinite loops when loading
212 structs that have pointers to each other (e.g. struct xref <-->
213 struct xrefdata.) The pointer destination is only instantiated when
216 def __init__(self
, cls
, ptr
):
221 return '<Pointer:%s %r>' % (self
.cls
.__name
__, self
.ptr
)
224 if isinstance(self
.ptr
, ELFNull
):
226 return self
.cls(self
.ptr
)
228 def __new__(cls
, dataptr
, parent
= None, replace
= None):
229 if dataptr
._dstsect
is None:
230 return super().__new
__(cls
)
232 obj
= dataptr
._dstsect
._pointers
.get((cls
, dataptr
))
235 obj
= super().__new
__(cls
)
236 dataptr
._dstsect
._pointers
[(cls
, dataptr
)] = obj
239 replacements
= 'lLnN'
242 def _preproc_structspec(cls
, elfclass
, spec
):
245 if hasattr(spec
, 'calcsize'):
246 spec
= '%ds' % (spec
.calcsize(elfclass
),)
252 for c
in cls
.replacements
:
253 spec
= spec
.replace(c
, repl
[int(c
.isupper())])
257 def _setup_efields(cls
):
260 for elfclass
in [32, 64]:
261 cls
._efields
[elfclass
] = []
264 newf
= (f
[0], cls
._preproc
_structspec
(elfclass
, f
[1])) + f
[2:]
265 cls
._efields
[elfclass
].append(newf
)
266 size
+= struct
.calcsize(newf
[1])
267 cls
._esize
[elfclass
] = size
269 def __init__(self
, dataptr
, parent
= None, replace
= None):
270 if not hasattr(self
.__class
__, '_efields'):
271 self
._setup
_efields
()
275 self
._parent
= parent
276 self
.symname
= dataptr
.symname
277 if isinstance(dataptr
, ELFNull
) or isinstance(dataptr
, ELFUnresolved
):
281 self
._elfsect
= dataptr
._dstsect
282 self
.elfclass
= self
._elfsect
._elffile
.elfclass
283 self
.offset
= dataptr
._dstoffs
285 pspecl
= [f
[1] for f
in self
._efields
[self
.elfclass
]]
287 # need to correlate output from struct.unpack with extra metadata
288 # about the particular fields, so note down byte offsets (in locs)
289 # and tuple indices of pointers (in ptrs)
294 for idx
, spec
in enumerate(pspecl
):
297 spec
= self
._elfsect
.ptrtype
299 locs
[idx
] = struct
.calcsize(pspec
)
302 self
._total
_size
= struct
.calcsize(pspec
)
305 idx
, val
= v
[0], v
[1]
308 return self
._elfsect
.pointer(self
.offset
+ locs
[idx
])
310 data
= dataptr
.get_data(struct
.calcsize(pspec
))
311 unpacked
= struct
.unpack(self
._elfsect
.endian
+ pspec
, data
)
312 unpacked
= list(map(replace_ptrs
, enumerate(unpacked
)))
313 self
._fraw
= unpacked
314 self
._fdata
= OrderedDict()
315 replace
= replace
or {}
317 for i
, item
in enumerate(unpacked
):
318 name
= self
.fields
[i
][0]
323 self
._fdata
[name
] = replace
[name
]
326 if isinstance(self
.fields
[i
][1], type) and issubclass(self
.fields
[i
][1], ELFDissectData
):
327 dataobj
= self
.fields
[i
][1](dataptr
.offset(locs
[i
]), self
)
328 self
._fdata
[name
] = dataobj
330 if len(self
.fields
[i
]) == 3:
331 if self
.fields
[i
][2] == str:
332 self
._fdata
[name
] = item
.get_string()
334 elif self
.fields
[i
][2] is None:
336 elif issubclass(self
.fields
[i
][2], ELFDissectData
):
337 cls
= self
.fields
[i
][2]
338 dataobj
= self
.Pointer(cls
, item
)
339 self
._fdata
[name
] = dataobj
342 self
._fdata
[name
] = item
344 def __getattr__(self
, attrname
):
345 if attrname
not in self
._fdata
:
346 raise AttributeError(attrname
)
347 if isinstance(self
._fdata
[attrname
], self
.Pointer
):
348 self
._fdata
[attrname
] = self
._fdata
[attrname
]()
349 return self
._fdata
[attrname
]
352 if not isinstance(self
._data
, ELFData
):
353 return '<%s: %r>' % (self
.__class
__.__name
__, self
._data
)
354 return '<%s: %s>' % (self
.__class
__.__name
__,
355 ', '.join(['%s=%r' % t
for t
in self
._fdata
.items()]))
358 def calcsize(cls
, elfclass
):
360 Sum up byte size of this struct
362 Wraps struct.calcsize with some extra features.
364 if not hasattr(cls
, '_efields'):
367 pspec
= ''.join([f
[1] for f
in cls
._efields
[elfclass
]])
369 ptrtype
= 'I' if elfclass
== 32 else 'Q'
370 pspec
= pspec
.replace('P', ptrtype
)
372 return struct
.calcsize(pspec
)
374 class ELFDissectUnion(ELFDissectData
):
376 Decode multiple structs in the same place.
378 Not currently used (and hence not tested.) Worked at some point but not
379 needed anymore and may be borked now. Remove this comment when using.
381 def __init__(self
, dataptr
, parent
= None):
382 self
._dataptr
= dataptr
383 self
._parent
= parent
385 for name
, membercls
in self
.__class
__.members
:
386 item
= membercls(dataptr
, parent
)
387 self
.members
.append(item
)
388 setattr(self
, name
, item
)
391 return '<%s: %s>' % (self
.__class
__.__name
__, ', '.join([repr(i
) for i
in self
.members
]))
394 def calcsize(cls
, elfclass
):
395 return max([member
.calcsize(elfclass
) for name
, member
in cls
.members
])
398 # wrappers for spans of ELF data
401 class ELFSubset(object):
403 Common abstract base for section-level and file-level access.
409 self
._pointers
= WeakValueDictionary()
412 return hash(self
.name
)
414 def __getitem__(self
, k
):
418 Subscript **must** be a slice; a simple index will not return a byte
419 but rather throw an exception. Valid slice syntaxes are defined by
422 - `this[123:456]` - extract specific range
423 - `this[123:str]` - extract until null byte. The slice stop value is
424 the `str` type (or, technically, `unicode`.)
428 def getreloc(self
, offset
):
430 Check for a relocation record at the specified offset.
432 return self
._obj
.getreloc(offset
)
434 def iter_data(self
, scls
, slice_
= slice(None)):
436 Assume an array of structs present at a particular slice and decode
438 :param scls: ELFDissectData subclass for the struct
439 :param slice_: optional range specification
441 size
= scls
.calcsize(self
._elffile
.elfclass
)
443 offset
= slice_
.start
or 0
444 stop
= slice_
.stop
or self
._obj
.len
446 stop
= self
._obj
.len - stop
449 yield scls(ELFData(self
, offset
, size
))
452 def pointer(self
, offset
):
454 Try to dereference a pointer value
456 This checks whether there's a relocation at the given offset and
457 uses that; otherwise (e.g. in a non-PIE executable where the pointer
458 is already resolved by the linker) the data at the location is used.
460 :param offset: byte offset from beginning of section,
461 or virtual address in file
462 :returns: ELFData wrapping pointed-to object
465 ptrsize
= struct
.calcsize(self
.ptrtype
)
466 data
= struct
.unpack(self
.endian
+ self
.ptrtype
, self
[offset
:offset
+ ptrsize
])[0]
468 reloc
= self
.getreloc(offset
)
471 # section won't be available in whole-file operation
472 dstsect
= reloc
.getsection(data
)
473 addend
= reloc
.r_addend
476 # old-style ELF REL instead of RELA, not well-tested
479 if reloc
.unresolved
and reloc
.symvalid
:
480 return ELFUnresolved(reloc
.symname
, addend
)
482 data
= addend
+ reloc
.st_value
486 # 0 could technically be a valid pointer for a shared library,
487 # since libraries may use 0 as default virtual start address (it'll
488 # be adjusted on loading)
489 # That said, if the library starts at 0, that's where the ELF header
490 # would be so it's still an invalid pointer.
491 if data
== 0 and dstsect
== None:
494 # wrap_data is different between file & section
495 return self
._wrap
_data
(data
, dstsect
)
497 class ELFDissectSection(ELFSubset
):
499 Access the contents of an ELF section like ``.text`` or ``.data``
501 :param elfwrap: ELFDissectFile wrapper for the file
502 :param idx: section index in section header table
503 :param section: section object from C module
506 def __init__(self
, elfwrap
, idx
, section
):
509 self
._elfwrap
= elfwrap
510 self
._elffile
= elfwrap
._elffile
512 self
._section
= self
._obj
= section
513 self
.name
= section
.name
514 self
.ptrtype
= elfwrap
.ptrtype
515 self
.endian
= elfwrap
.endian
517 def _wrap_data(self
, data
, dstsect
):
519 dstsect
= self
._elfwrap
._elffile
.get_section_addr(data
)
520 offs
= data
- dstsect
.sh_addr
521 dstsect
= self
._elfwrap
.get_section(dstsect
.idx
)
522 return ELFData(dstsect
, offs
, None)
524 class ELFDissectFile(ELFSubset
):
526 Access the contents of an ELF file.
528 Note that offsets for array subscript and relocation/pointer access are
529 based on the file's virtual address space and are NOT offsets to the
530 start of the file on disk!
532 (Shared libraries frequently have a virtual address space starting at 0,
533 but non-PIE executables have an architecture specific default loading
534 address like 0x400000 on x86.
536 :param filename: ELF file to open
539 def __init__(self
, filename
):
543 self
._elffile
= self
._obj
= ELFFile(filename
)
546 self
.ptrtype
= 'I' if self
._elffile
.elfclass
== 32 else 'Q'
547 self
.endian
= '>' if self
._elffile
.bigendian
else '<'
553 def _wrap_data(self
, data
, dstsect
):
554 return ELFData(self
, data
, None)
556 def get_section(self
, secname
):
558 Look up section by name or index
560 if isinstance(secname
, int):
562 section
= self
._elffile
.get_section_idx(secname
)
564 section
= self
._elffile
.get_section(secname
)
571 if sh_idx
not in self
._sections
:
572 self
._sections
[sh_idx
] = ELFDissectSection(self
, sh_idx
, section
)
574 return self
._sections
[sh_idx
]