]> git.proxmox.com Git - mirror_frr.git/blame - python/clippy/elf.py
Merge pull request #8389 from idryzhov/route-map-optimization-nb
[mirror_frr.git] / python / clippy / elf.py
CommitLineData
36a8fdfd
DL
1# FRR libelf wrapper
2#
3# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License as published by the Free
7# Software Foundation; either version 2 of the License, or (at your option)
8# any later version.
9#
10# This program is distributed in the hope that it will be useful, but WITHOUT
11# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13# more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with this program; see the file COPYING; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19'''
20Wrapping layer and additional utility around _clippy.ELFFile.
21
22Essentially, the C bits have the low-level ELF access bits that should be
23fast while this has the bits that string everything together (and would've
24been a PITA to do in C.)
25
26Surprisingly - or maybe through proper engineering - this actually works
27across architecture, word size and even endianness boundaries. Both the C
28module (through GElf_*) and this code (cf. struct.unpack format mangling
29in ELFDissectStruct) will take appropriate measures to flip and resize
30fields as needed.
31'''
32
33import struct
34from collections import OrderedDict
35from weakref import WeakValueDictionary
36
37from _clippy import ELFFile, ELFAccessError
38
39#
40# data access
41#
42
43class ELFNull(object):
44 '''
45 NULL pointer, returned instead of ELFData
46 '''
47 def __init__(self):
48 self.symname = None
49 self._dstsect = None
50
51 def __repr__(self):
52 return '<ptr: NULL>'
53
54 def __hash__(self):
55 return hash(None)
56
57 def get_string(self):
58 return None
59
60class ELFUnresolved(object):
61 '''
62 Reference to an unresolved external symbol, returned instead of ELFData
63
64 :param symname: name of the referenced symbol
65 :param addend: offset added to the symbol, normally zero
66 '''
67 def __init__(self, symname, addend):
68 self.addend = addend
69 self.symname = symname
70 self._dstsect = None
71
72 def __repr__(self):
73 return '<unresolved: %s+%d>' % (self.symname, self.addend)
74
75 def __hash__(self):
76 return hash((self.symname, self.addend))
77
78class ELFData(object):
79 '''
80 Actual data somewhere in the ELF file.
81
82 :type dstsect: ELFSubset
83 :param dstsect: container data area (section or entire file)
84 :param dstoffs: byte offset into dstsect
85 :param dstlen: byte size of object, or None if unknown, open-ended or string
86 '''
87 def __init__(self, dstsect, dstoffs, dstlen):
88 self._dstsect = dstsect
89 self._dstoffs = dstoffs
90 self._dstlen = dstlen
91 self.symname = None
92
93 def __repr__(self):
94 return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1)
95
96 def __hash__(self):
97 return hash((self._dstsect, self._dstoffs))
98
99 def get_string(self):
100 '''
101 Interpret as C string / null terminated UTF-8 and get the actual text.
102 '''
103 try:
104 return self._dstsect[self._dstoffs:str].decode('UTF-8')
105 except:
106 import pdb; pdb.set_trace()
107
108 def get_data(self, reflen):
109 '''
110 Interpret as some structure (and check vs. expected length)
111
112 :param reflen: expected size of the object, compared against actual
113 size (which is only known in rare cases, mostly when directly
114 accessing a symbol since symbols have their destination object
115 size recorded)
116 '''
117 if self._dstlen is not None and self._dstlen != reflen:
118 raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen))
119 return self._dstsect[self._dstoffs:self._dstoffs+reflen]
120
121 def offset(self, offs, within_symbol=False):
122 '''
123 Get another ELFData at an offset
124
125 :param offs: byte offset, can be negative (e.g. in container_of)
126 :param within_symbol: retain length information
127 '''
128 if self._dstlen is None or not within_symbol:
129 return ELFData(self._dstsect, self._dstoffs + offs, None)
130 else:
131 return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
132
133#
134# dissection data items
135#
136
137class ELFDissectData(object):
138 '''
139 Common bits for ELFDissectStruct and ELFDissectUnion
140 '''
141
142 def __len__(self):
143 '''
144 Used for boolean evaluation, e.g. "if struct: ..."
145 '''
146 return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved))
147
148 def container_of(self, parent, fieldname):
149 '''
150 Assume this struct is embedded in a larger struct and get at the larger
151
152 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
153
154 :param parent: class (not instance) of the larger struct
155 :param fieldname: fieldname that refers back to this
156 :returns: instance of parent, with fieldname set to this object
157 '''
158 offset = 0
159 if not hasattr(parent, '_efields'):
160 parent._setup_efields()
161
162 for field in parent._efields[self.elfclass]:
163 if field[0] == fieldname:
164 break
b17f302b
DL
165 spec = field[1]
166 if spec == 'P':
167 spec = 'I' if self.elfclass == 32 else 'Q'
168 offset += struct.calcsize(spec)
36a8fdfd
DL
169 else:
170 raise AttributeError('%r not found in %r.fields' % (fieldname, parent))
171
172 return parent(self._data.offset(-offset), replace = {fieldname: self})
173
174class ELFDissectStruct(ELFDissectData):
175 '''
176 Decode and provide access to a struct somewhere in the ELF file
177
178 Handles pointers and strings somewhat nicely. Create a subclass for each
179 struct that is to be accessed, and give a field list in a "fields"
180 class-member.
181
182 :param dataptr: ELFData referring to the data bits to decode.
183 :param parent: where this was instantiated from; only for reference, has
184 no functional impact.
185 :param replace: substitute data values for specific fields. Used by
186 `container_of` to replace the inner struct when creating the outer
187 one.
188
189 .. attribute:: fields
190
191 List of tuples describing the struct members. Items can be:
192 - ``('name', ELFDissectData)`` - directly embed another struct
193 - ``('name', 'I')`` - simple data types; second item for struct.unpack
194 - ``('name', 'I', None)`` - field to ignore
195 - ``('name', 'P', str)`` - pointer to string
196 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
197
198 ``P`` is added as unpack format for pointers (sized appropriately for
199 the ELF file.)
200
201 Refer to tiabwarfo.py for extracting this from ``pahole``.
202
203 TBD: replace tuples with a class.
204
205 .. attribute:: fieldrename
206
207 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
208 '''
209
210 class Pointer(object):
211 '''
212 Quick wrapper for pointers to further structs
213
214 This is just here to avoid going into infinite loops when loading
215 structs that have pointers to each other (e.g. struct xref <-->
216 struct xrefdata.) The pointer destination is only instantiated when
217 actually accessed.
218 '''
219 def __init__(self, cls, ptr):
220 self.cls = cls
221 self.ptr = ptr
222
223 def __repr__(self):
224 return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr)
225
226 def __call__(self):
227 if isinstance(self.ptr, ELFNull):
228 return None
229 return self.cls(self.ptr)
230
231 def __new__(cls, dataptr, parent = None, replace = None):
232 if dataptr._dstsect is None:
233 return super().__new__(cls)
234
235 obj = dataptr._dstsect._pointers.get((cls, dataptr))
236 if obj is not None:
237 return obj
238 obj = super().__new__(cls)
239 dataptr._dstsect._pointers[(cls, dataptr)] = obj
240 return obj
241
242 replacements = 'lLnN'
243
244 @classmethod
245 def _preproc_structspec(cls, elfclass, spec):
246 elfbits = elfclass
247
248 if hasattr(spec, 'calcsize'):
249 spec = '%ds' % (spec.calcsize(elfclass),)
250
251 if elfbits == 32:
252 repl = ['i', 'I']
253 else:
254 repl = ['q', 'Q']
255 for c in cls.replacements:
256 spec = spec.replace(c, repl[int(c.isupper())])
257 return spec
258
259 @classmethod
260 def _setup_efields(cls):
261 cls._efields = {}
262 cls._esize = {}
263 for elfclass in [32, 64]:
264 cls._efields[elfclass] = []
265 size = 0
266 for f in cls.fields:
267 newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
268 cls._efields[elfclass].append(newf)
269 size += struct.calcsize(newf[1])
270 cls._esize[elfclass] = size
271
272 def __init__(self, dataptr, parent = None, replace = None):
273 if not hasattr(self.__class__, '_efields'):
274 self._setup_efields()
275
276 self._fdata = None
277 self._data = dataptr
278 self._parent = parent
279 self.symname = dataptr.symname
280 if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
281 self._fdata = {}
282 return
283
284 self._elfsect = dataptr._dstsect
285 self.elfclass = self._elfsect._elffile.elfclass
286 self.offset = dataptr._dstoffs
287
288 pspecl = [f[1] for f in self._efields[self.elfclass]]
289
290 # need to correlate output from struct.unpack with extra metadata
291 # about the particular fields, so note down byte offsets (in locs)
292 # and tuple indices of pointers (in ptrs)
293 pspec = ''
294 locs = {}
295 ptrs = set()
296
297 for idx, spec in enumerate(pspecl):
298 if spec == 'P':
299 ptrs.add(idx)
300 spec = self._elfsect.ptrtype
301
302 locs[idx] = struct.calcsize(pspec)
303 pspec = pspec + spec
304
305 self._total_size = struct.calcsize(pspec)
306
307 def replace_ptrs(v):
308 idx, val = v[0], v[1]
309 if idx not in ptrs:
310 return val
311 return self._elfsect.pointer(self.offset + locs[idx])
312
313 data = dataptr.get_data(struct.calcsize(pspec))
314 unpacked = struct.unpack(self._elfsect.endian + pspec, data)
315 unpacked = list(map(replace_ptrs, enumerate(unpacked)))
316 self._fraw = unpacked
317 self._fdata = OrderedDict()
318 replace = replace or {}
319
320 for i, item in enumerate(unpacked):
321 name = self.fields[i][0]
322 if name is None:
323 continue
324
325 if name in replace:
326 self._fdata[name] = replace[name]
327 continue
328
329 if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData):
330 dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
331 self._fdata[name] = dataobj
332 continue
333 if len(self.fields[i]) == 3:
334 if self.fields[i][2] == str:
335 self._fdata[name] = item.get_string()
336 continue
337 elif self.fields[i][2] is None:
338 pass
339 elif issubclass(self.fields[i][2], ELFDissectData):
340 cls = self.fields[i][2]
341 dataobj = self.Pointer(cls, item)
342 self._fdata[name] = dataobj
343 continue
344
345 self._fdata[name] = item
346
347 def __getattr__(self, attrname):
348 if attrname not in self._fdata:
349 raise AttributeError(attrname)
350 if isinstance(self._fdata[attrname], self.Pointer):
351 self._fdata[attrname] = self._fdata[attrname]()
352 return self._fdata[attrname]
353
354 def __repr__(self):
355 if not isinstance(self._data, ELFData):
356 return '<%s: %r>' % (self.__class__.__name__, self._data)
357 return '<%s: %s>' % (self.__class__.__name__,
358 ', '.join(['%s=%r' % t for t in self._fdata.items()]))
359
360 @classmethod
361 def calcsize(cls, elfclass):
362 '''
363 Sum up byte size of this struct
364
365 Wraps struct.calcsize with some extra features.
366 '''
367 if not hasattr(cls, '_efields'):
368 cls._setup_efields()
369
370 pspec = ''.join([f[1] for f in cls._efields[elfclass]])
371
372 ptrtype = 'I' if elfclass == 32 else 'Q'
373 pspec = pspec.replace('P', ptrtype)
374
375 return struct.calcsize(pspec)
376
377class ELFDissectUnion(ELFDissectData):
378 '''
379 Decode multiple structs in the same place.
380
381 Not currently used (and hence not tested.) Worked at some point but not
382 needed anymore and may be borked now. Remove this comment when using.
383 '''
384 def __init__(self, dataptr, parent = None):
385 self._dataptr = dataptr
386 self._parent = parent
387 self.members = []
388 for name, membercls in self.__class__.members:
389 item = membercls(dataptr, parent)
390 self.members.append(item)
391 setattr(self, name, item)
392
393 def __repr__(self):
394 return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members]))
395
396 @classmethod
397 def calcsize(cls, elfclass):
398 return max([member.calcsize(elfclass) for name, member in cls.members])
399
400#
401# wrappers for spans of ELF data
402#
403
404class ELFSubset(object):
405 '''
406 Common abstract base for section-level and file-level access.
407 '''
408
409 def __init__(self):
410 super().__init__()
411
412 self._pointers = WeakValueDictionary()
413
414 def __hash__(self):
415 return hash(self.name)
416
417 def __getitem__(self, k):
418 '''
419 Read data from slice
420
421 Subscript **must** be a slice; a simple index will not return a byte
422 but rather throw an exception. Valid slice syntaxes are defined by
423 the C module:
424
425 - `this[123:456]` - extract specific range
426 - `this[123:str]` - extract until null byte. The slice stop value is
427 the `str` type (or, technically, `unicode`.)
428 '''
429 return self._obj[k]
430
431 def getreloc(self, offset):
432 '''
433 Check for a relocation record at the specified offset.
434 '''
435 return self._obj.getreloc(offset)
436
437 def iter_data(self, scls, slice_ = slice(None)):
438 '''
439 Assume an array of structs present at a particular slice and decode
440
441 :param scls: ELFDissectData subclass for the struct
442 :param slice_: optional range specification
443 '''
444 size = scls.calcsize(self._elffile.elfclass)
445
446 offset = slice_.start or 0
447 stop = slice_.stop or self._obj.len
448 if stop < 0:
449 stop = self._obj.len - stop
450
451 while offset < stop:
452 yield scls(ELFData(self, offset, size))
453 offset += size
454
455 def pointer(self, offset):
456 '''
457 Try to dereference a pointer value
458
459 This checks whether there's a relocation at the given offset and
460 uses that; otherwise (e.g. in a non-PIE executable where the pointer
461 is already resolved by the linker) the data at the location is used.
462
463 :param offset: byte offset from beginning of section,
464 or virtual address in file
465 :returns: ELFData wrapping pointed-to object
466 '''
467
468 ptrsize = struct.calcsize(self.ptrtype)
469 data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0]
470
471 reloc = self.getreloc(offset)
472 dstsect = None
473 if reloc:
474 # section won't be available in whole-file operation
475 dstsect = reloc.getsection(data)
476 addend = reloc.r_addend
477
478 if reloc.relative:
479 # old-style ELF REL instead of RELA, not well-tested
480 addend += data
481
482 if reloc.unresolved and reloc.symvalid:
483 return ELFUnresolved(reloc.symname, addend)
484 elif reloc.symvalid:
485 data = addend + reloc.st_value
486 else:
487 data = addend
488
489 # 0 could technically be a valid pointer for a shared library,
490 # since libraries may use 0 as default virtual start address (it'll
491 # be adjusted on loading)
492 # That said, if the library starts at 0, that's where the ELF header
493 # would be so it's still an invalid pointer.
494 if data == 0 and dstsect == None:
495 return ELFNull()
496
497 # wrap_data is different between file & section
498 return self._wrap_data(data, dstsect)
499
500class ELFDissectSection(ELFSubset):
501 '''
502 Access the contents of an ELF section like ``.text`` or ``.data``
503
504 :param elfwrap: ELFDissectFile wrapper for the file
505 :param idx: section index in section header table
506 :param section: section object from C module
507 '''
508
509 def __init__(self, elfwrap, idx, section):
510 super().__init__()
511
512 self._elfwrap = elfwrap
513 self._elffile = elfwrap._elffile
514 self._idx = idx
515 self._section = self._obj = section
516 self.name = section.name
517 self.ptrtype = elfwrap.ptrtype
518 self.endian = elfwrap.endian
519
520 def _wrap_data(self, data, dstsect):
521 if dstsect is None:
522 dstsect = self._elfwrap._elffile.get_section_addr(data)
523 offs = data - dstsect.sh_addr
524 dstsect = self._elfwrap.get_section(dstsect.idx)
525 return ELFData(dstsect, offs, None)
526
527class ELFDissectFile(ELFSubset):
528 '''
529 Access the contents of an ELF file.
530
531 Note that offsets for array subscript and relocation/pointer access are
532 based on the file's virtual address space and are NOT offsets to the
533 start of the file on disk!
534
535 (Shared libraries frequently have a virtual address space starting at 0,
536 but non-PIE executables have an architecture specific default loading
537 address like 0x400000 on x86.
538
539 :param filename: ELF file to open
540 '''
541
542 def __init__(self, filename):
543 super().__init__()
544
545 self.name = filename
546 self._elffile = self._obj = ELFFile(filename)
547 self._sections = {}
548
549 self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q'
550 self.endian = '>' if self._elffile.bigendian else '<'
551
552 @property
553 def _elfwrap(self):
554 return self
555
556 def _wrap_data(self, data, dstsect):
557 return ELFData(self, data, None)
558
559 def get_section(self, secname):
560 '''
561 Look up section by name or index
562 '''
563 if isinstance(secname, int):
564 sh_idx = secname
565 section = self._elffile.get_section_idx(secname)
566 else:
567 section = self._elffile.get_section(secname)
568
569 if section is None:
570 return None
571
572 sh_idx = section.idx
573
574 if sh_idx not in self._sections:
575 self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
576
577 return self._sections[sh_idx]