]> git.proxmox.com Git - mirror_frr.git/blob - python/clippy/elf.py
Merge pull request #8123 from gromit1811/fix_topotest_ipv6_linklocal
[mirror_frr.git] / python / clippy / elf.py
1 # FRR libelf wrapper
2 #
3 # Copyright (C) 2020 David Lamparter for NetDEF, Inc.
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 2 of the License, or (at your option)
8 # any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 # more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; see the file COPYING; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19 '''
20 Wrapping layer and additional utility around _clippy.ELFFile.
21
22 Essentially, the C bits have the low-level ELF access bits that should be
23 fast while this has the bits that string everything together (and would've
24 been a PITA to do in C.)
25
26 Surprisingly - or maybe through proper engineering - this actually works
27 across architecture, word size and even endianness boundaries. Both the C
28 module (through GElf_*) and this code (cf. struct.unpack format mangling
29 in ELFDissectStruct) will take appropriate measures to flip and resize
30 fields as needed.
31 '''
32
33 import struct
34 from collections import OrderedDict
35 from weakref import WeakValueDictionary
36
37 from _clippy import ELFFile, ELFAccessError
38
39 #
40 # data access
41 #
42
43 class ELFNull(object):
44 '''
45 NULL pointer, returned instead of ELFData
46 '''
47 def __init__(self):
48 self.symname = None
49 self._dstsect = None
50
51 def __repr__(self):
52 return '<ptr: NULL>'
53
54 def __hash__(self):
55 return hash(None)
56
57 def get_string(self):
58 return None
59
60 class ELFUnresolved(object):
61 '''
62 Reference to an unresolved external symbol, returned instead of ELFData
63
64 :param symname: name of the referenced symbol
65 :param addend: offset added to the symbol, normally zero
66 '''
67 def __init__(self, symname, addend):
68 self.addend = addend
69 self.symname = symname
70 self._dstsect = None
71
72 def __repr__(self):
73 return '<unresolved: %s+%d>' % (self.symname, self.addend)
74
75 def __hash__(self):
76 return hash((self.symname, self.addend))
77
78 class ELFData(object):
79 '''
80 Actual data somewhere in the ELF file.
81
82 :type dstsect: ELFSubset
83 :param dstsect: container data area (section or entire file)
84 :param dstoffs: byte offset into dstsect
85 :param dstlen: byte size of object, or None if unknown, open-ended or string
86 '''
87 def __init__(self, dstsect, dstoffs, dstlen):
88 self._dstsect = dstsect
89 self._dstoffs = dstoffs
90 self._dstlen = dstlen
91 self.symname = None
92
93 def __repr__(self):
94 return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1)
95
96 def __hash__(self):
97 return hash((self._dstsect, self._dstoffs))
98
99 def get_string(self):
100 '''
101 Interpret as C string / null terminated UTF-8 and get the actual text.
102 '''
103 try:
104 return self._dstsect[self._dstoffs:str].decode('UTF-8')
105 except:
106 import pdb; pdb.set_trace()
107
108 def get_data(self, reflen):
109 '''
110 Interpret as some structure (and check vs. expected length)
111
112 :param reflen: expected size of the object, compared against actual
113 size (which is only known in rare cases, mostly when directly
114 accessing a symbol since symbols have their destination object
115 size recorded)
116 '''
117 if self._dstlen is not None and self._dstlen != reflen:
118 raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen))
119 return self._dstsect[self._dstoffs:self._dstoffs+reflen]
120
121 def offset(self, offs, within_symbol=False):
122 '''
123 Get another ELFData at an offset
124
125 :param offs: byte offset, can be negative (e.g. in container_of)
126 :param within_symbol: retain length information
127 '''
128 if self._dstlen is None or not within_symbol:
129 return ELFData(self._dstsect, self._dstoffs + offs, None)
130 else:
131 return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
132
133 #
134 # dissection data items
135 #
136
137 class ELFDissectData(object):
138 '''
139 Common bits for ELFDissectStruct and ELFDissectUnion
140 '''
141
142 def __len__(self):
143 '''
144 Used for boolean evaluation, e.g. "if struct: ..."
145 '''
146 return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved))
147
148 def container_of(self, parent, fieldname):
149 '''
150 Assume this struct is embedded in a larger struct and get at the larger
151
152 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
153
154 :param parent: class (not instance) of the larger struct
155 :param fieldname: fieldname that refers back to this
156 :returns: instance of parent, with fieldname set to this object
157 '''
158 offset = 0
159 if not hasattr(parent, '_efields'):
160 parent._setup_efields()
161
162 for field in parent._efields[self.elfclass]:
163 if field[0] == fieldname:
164 break
165 offset += struct.calcsize(field[1])
166 else:
167 raise AttributeError('%r not found in %r.fields' % (fieldname, parent))
168
169 return parent(self._data.offset(-offset), replace = {fieldname: self})
170
171 class ELFDissectStruct(ELFDissectData):
172 '''
173 Decode and provide access to a struct somewhere in the ELF file
174
175 Handles pointers and strings somewhat nicely. Create a subclass for each
176 struct that is to be accessed, and give a field list in a "fields"
177 class-member.
178
179 :param dataptr: ELFData referring to the data bits to decode.
180 :param parent: where this was instantiated from; only for reference, has
181 no functional impact.
182 :param replace: substitute data values for specific fields. Used by
183 `container_of` to replace the inner struct when creating the outer
184 one.
185
186 .. attribute:: fields
187
188 List of tuples describing the struct members. Items can be:
189 - ``('name', ELFDissectData)`` - directly embed another struct
190 - ``('name', 'I')`` - simple data types; second item for struct.unpack
191 - ``('name', 'I', None)`` - field to ignore
192 - ``('name', 'P', str)`` - pointer to string
193 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
194
195 ``P`` is added as unpack format for pointers (sized appropriately for
196 the ELF file.)
197
198 Refer to tiabwarfo.py for extracting this from ``pahole``.
199
200 TBD: replace tuples with a class.
201
202 .. attribute:: fieldrename
203
204 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
205 '''
206
207 class Pointer(object):
208 '''
209 Quick wrapper for pointers to further structs
210
211 This is just here to avoid going into infinite loops when loading
212 structs that have pointers to each other (e.g. struct xref <-->
213 struct xrefdata.) The pointer destination is only instantiated when
214 actually accessed.
215 '''
216 def __init__(self, cls, ptr):
217 self.cls = cls
218 self.ptr = ptr
219
220 def __repr__(self):
221 return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr)
222
223 def __call__(self):
224 if isinstance(self.ptr, ELFNull):
225 return None
226 return self.cls(self.ptr)
227
228 def __new__(cls, dataptr, parent = None, replace = None):
229 if dataptr._dstsect is None:
230 return super().__new__(cls)
231
232 obj = dataptr._dstsect._pointers.get((cls, dataptr))
233 if obj is not None:
234 return obj
235 obj = super().__new__(cls)
236 dataptr._dstsect._pointers[(cls, dataptr)] = obj
237 return obj
238
239 replacements = 'lLnN'
240
241 @classmethod
242 def _preproc_structspec(cls, elfclass, spec):
243 elfbits = elfclass
244
245 if hasattr(spec, 'calcsize'):
246 spec = '%ds' % (spec.calcsize(elfclass),)
247
248 if elfbits == 32:
249 repl = ['i', 'I']
250 else:
251 repl = ['q', 'Q']
252 for c in cls.replacements:
253 spec = spec.replace(c, repl[int(c.isupper())])
254 return spec
255
256 @classmethod
257 def _setup_efields(cls):
258 cls._efields = {}
259 cls._esize = {}
260 for elfclass in [32, 64]:
261 cls._efields[elfclass] = []
262 size = 0
263 for f in cls.fields:
264 newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
265 cls._efields[elfclass].append(newf)
266 size += struct.calcsize(newf[1])
267 cls._esize[elfclass] = size
268
269 def __init__(self, dataptr, parent = None, replace = None):
270 if not hasattr(self.__class__, '_efields'):
271 self._setup_efields()
272
273 self._fdata = None
274 self._data = dataptr
275 self._parent = parent
276 self.symname = dataptr.symname
277 if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
278 self._fdata = {}
279 return
280
281 self._elfsect = dataptr._dstsect
282 self.elfclass = self._elfsect._elffile.elfclass
283 self.offset = dataptr._dstoffs
284
285 pspecl = [f[1] for f in self._efields[self.elfclass]]
286
287 # need to correlate output from struct.unpack with extra metadata
288 # about the particular fields, so note down byte offsets (in locs)
289 # and tuple indices of pointers (in ptrs)
290 pspec = ''
291 locs = {}
292 ptrs = set()
293
294 for idx, spec in enumerate(pspecl):
295 if spec == 'P':
296 ptrs.add(idx)
297 spec = self._elfsect.ptrtype
298
299 locs[idx] = struct.calcsize(pspec)
300 pspec = pspec + spec
301
302 self._total_size = struct.calcsize(pspec)
303
304 def replace_ptrs(v):
305 idx, val = v[0], v[1]
306 if idx not in ptrs:
307 return val
308 return self._elfsect.pointer(self.offset + locs[idx])
309
310 data = dataptr.get_data(struct.calcsize(pspec))
311 unpacked = struct.unpack(self._elfsect.endian + pspec, data)
312 unpacked = list(map(replace_ptrs, enumerate(unpacked)))
313 self._fraw = unpacked
314 self._fdata = OrderedDict()
315 replace = replace or {}
316
317 for i, item in enumerate(unpacked):
318 name = self.fields[i][0]
319 if name is None:
320 continue
321
322 if name in replace:
323 self._fdata[name] = replace[name]
324 continue
325
326 if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData):
327 dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
328 self._fdata[name] = dataobj
329 continue
330 if len(self.fields[i]) == 3:
331 if self.fields[i][2] == str:
332 self._fdata[name] = item.get_string()
333 continue
334 elif self.fields[i][2] is None:
335 pass
336 elif issubclass(self.fields[i][2], ELFDissectData):
337 cls = self.fields[i][2]
338 dataobj = self.Pointer(cls, item)
339 self._fdata[name] = dataobj
340 continue
341
342 self._fdata[name] = item
343
344 def __getattr__(self, attrname):
345 if attrname not in self._fdata:
346 raise AttributeError(attrname)
347 if isinstance(self._fdata[attrname], self.Pointer):
348 self._fdata[attrname] = self._fdata[attrname]()
349 return self._fdata[attrname]
350
351 def __repr__(self):
352 if not isinstance(self._data, ELFData):
353 return '<%s: %r>' % (self.__class__.__name__, self._data)
354 return '<%s: %s>' % (self.__class__.__name__,
355 ', '.join(['%s=%r' % t for t in self._fdata.items()]))
356
357 @classmethod
358 def calcsize(cls, elfclass):
359 '''
360 Sum up byte size of this struct
361
362 Wraps struct.calcsize with some extra features.
363 '''
364 if not hasattr(cls, '_efields'):
365 cls._setup_efields()
366
367 pspec = ''.join([f[1] for f in cls._efields[elfclass]])
368
369 ptrtype = 'I' if elfclass == 32 else 'Q'
370 pspec = pspec.replace('P', ptrtype)
371
372 return struct.calcsize(pspec)
373
374 class ELFDissectUnion(ELFDissectData):
375 '''
376 Decode multiple structs in the same place.
377
378 Not currently used (and hence not tested.) Worked at some point but not
379 needed anymore and may be borked now. Remove this comment when using.
380 '''
381 def __init__(self, dataptr, parent = None):
382 self._dataptr = dataptr
383 self._parent = parent
384 self.members = []
385 for name, membercls in self.__class__.members:
386 item = membercls(dataptr, parent)
387 self.members.append(item)
388 setattr(self, name, item)
389
390 def __repr__(self):
391 return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members]))
392
393 @classmethod
394 def calcsize(cls, elfclass):
395 return max([member.calcsize(elfclass) for name, member in cls.members])
396
397 #
398 # wrappers for spans of ELF data
399 #
400
401 class ELFSubset(object):
402 '''
403 Common abstract base for section-level and file-level access.
404 '''
405
406 def __init__(self):
407 super().__init__()
408
409 self._pointers = WeakValueDictionary()
410
411 def __hash__(self):
412 return hash(self.name)
413
414 def __getitem__(self, k):
415 '''
416 Read data from slice
417
418 Subscript **must** be a slice; a simple index will not return a byte
419 but rather throw an exception. Valid slice syntaxes are defined by
420 the C module:
421
422 - `this[123:456]` - extract specific range
423 - `this[123:str]` - extract until null byte. The slice stop value is
424 the `str` type (or, technically, `unicode`.)
425 '''
426 return self._obj[k]
427
428 def getreloc(self, offset):
429 '''
430 Check for a relocation record at the specified offset.
431 '''
432 return self._obj.getreloc(offset)
433
434 def iter_data(self, scls, slice_ = slice(None)):
435 '''
436 Assume an array of structs present at a particular slice and decode
437
438 :param scls: ELFDissectData subclass for the struct
439 :param slice_: optional range specification
440 '''
441 size = scls.calcsize(self._elffile.elfclass)
442
443 offset = slice_.start or 0
444 stop = slice_.stop or self._obj.len
445 if stop < 0:
446 stop = self._obj.len - stop
447
448 while offset < stop:
449 yield scls(ELFData(self, offset, size))
450 offset += size
451
452 def pointer(self, offset):
453 '''
454 Try to dereference a pointer value
455
456 This checks whether there's a relocation at the given offset and
457 uses that; otherwise (e.g. in a non-PIE executable where the pointer
458 is already resolved by the linker) the data at the location is used.
459
460 :param offset: byte offset from beginning of section,
461 or virtual address in file
462 :returns: ELFData wrapping pointed-to object
463 '''
464
465 ptrsize = struct.calcsize(self.ptrtype)
466 data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0]
467
468 reloc = self.getreloc(offset)
469 dstsect = None
470 if reloc:
471 # section won't be available in whole-file operation
472 dstsect = reloc.getsection(data)
473 addend = reloc.r_addend
474
475 if reloc.relative:
476 # old-style ELF REL instead of RELA, not well-tested
477 addend += data
478
479 if reloc.unresolved and reloc.symvalid:
480 return ELFUnresolved(reloc.symname, addend)
481 elif reloc.symvalid:
482 data = addend + reloc.st_value
483 else:
484 data = addend
485
486 # 0 could technically be a valid pointer for a shared library,
487 # since libraries may use 0 as default virtual start address (it'll
488 # be adjusted on loading)
489 # That said, if the library starts at 0, that's where the ELF header
490 # would be so it's still an invalid pointer.
491 if data == 0 and dstsect == None:
492 return ELFNull()
493
494 # wrap_data is different between file & section
495 return self._wrap_data(data, dstsect)
496
497 class ELFDissectSection(ELFSubset):
498 '''
499 Access the contents of an ELF section like ``.text`` or ``.data``
500
501 :param elfwrap: ELFDissectFile wrapper for the file
502 :param idx: section index in section header table
503 :param section: section object from C module
504 '''
505
506 def __init__(self, elfwrap, idx, section):
507 super().__init__()
508
509 self._elfwrap = elfwrap
510 self._elffile = elfwrap._elffile
511 self._idx = idx
512 self._section = self._obj = section
513 self.name = section.name
514 self.ptrtype = elfwrap.ptrtype
515 self.endian = elfwrap.endian
516
517 def _wrap_data(self, data, dstsect):
518 if dstsect is None:
519 dstsect = self._elfwrap._elffile.get_section_addr(data)
520 offs = data - dstsect.sh_addr
521 dstsect = self._elfwrap.get_section(dstsect.idx)
522 return ELFData(dstsect, offs, None)
523
524 class ELFDissectFile(ELFSubset):
525 '''
526 Access the contents of an ELF file.
527
528 Note that offsets for array subscript and relocation/pointer access are
529 based on the file's virtual address space and are NOT offsets to the
530 start of the file on disk!
531
532 (Shared libraries frequently have a virtual address space starting at 0,
533 but non-PIE executables have an architecture specific default loading
534 address like 0x400000 on x86.
535
536 :param filename: ELF file to open
537 '''
538
539 def __init__(self, filename):
540 super().__init__()
541
542 self.name = filename
543 self._elffile = self._obj = ELFFile(filename)
544 self._sections = {}
545
546 self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q'
547 self.endian = '>' if self._elffile.bigendian else '<'
548
549 @property
550 def _elfwrap(self):
551 return self
552
553 def _wrap_data(self, data, dstsect):
554 return ELFData(self, data, None)
555
556 def get_section(self, secname):
557 '''
558 Look up section by name or index
559 '''
560 if isinstance(secname, int):
561 sh_idx = secname
562 section = self._elffile.get_section_idx(secname)
563 else:
564 section = self._elffile.get_section(secname)
565
566 if section is None:
567 return None
568
569 sh_idx = section.idx
570
571 if sh_idx not in self._sections:
572 self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
573
574 return self._sections[sh_idx]