]> git.proxmox.com Git - mirror_frr.git/blob - python/clippy/elf.py
Merge pull request #12366 from manojvn/ospfv2-flood-reduction
[mirror_frr.git] / python / clippy / elf.py
1 # SPDX-License-Identifier: GPL-2.0-or-later
2 # FRR libelf wrapper
3 #
4 # Copyright (C) 2020 David Lamparter for NetDEF, Inc.
5
6 """
7 Wrapping layer and additional utility around _clippy.ELFFile.
8
9 Essentially, the C bits have the low-level ELF access bits that should be
10 fast while this has the bits that string everything together (and would've
11 been a PITA to do in C.)
12
13 Surprisingly - or maybe through proper engineering - this actually works
14 across architecture, word size and even endianness boundaries. Both the C
15 module (through GElf_*) and this code (cf. struct.unpack format mangling
16 in ELFDissectStruct) will take appropriate measures to flip and resize
17 fields as needed.
18 """
19
20 import struct
21 from collections import OrderedDict
22 from weakref import WeakValueDictionary
23
24 from _clippy import ELFFile, ELFAccessError
25
26 #
27 # data access
28 #
29
30
31 class ELFNull(object):
32 """
33 NULL pointer, returned instead of ELFData
34 """
35
36 def __init__(self):
37 self.symname = None
38 self._dstsect = None
39
40 def __repr__(self):
41 return "<ptr: NULL>"
42
43 def __hash__(self):
44 return hash(None)
45
46 def get_string(self):
47 return None
48
49
50 class ELFUnresolved(object):
51 """
52 Reference to an unresolved external symbol, returned instead of ELFData
53
54 :param symname: name of the referenced symbol
55 :param addend: offset added to the symbol, normally zero
56 """
57
58 def __init__(self, symname, addend):
59 self.addend = addend
60 self.symname = symname
61 self._dstsect = None
62
63 def __repr__(self):
64 return "<unresolved: %s+%d>" % (self.symname, self.addend)
65
66 def __hash__(self):
67 return hash((self.symname, self.addend))
68
69
70 class ELFData(object):
71 """
72 Actual data somewhere in the ELF file.
73
74 :type dstsect: ELFSubset
75 :param dstsect: container data area (section or entire file)
76 :param dstoffs: byte offset into dstsect
77 :param dstlen: byte size of object, or None if unknown, open-ended or string
78 """
79
80 def __init__(self, dstsect, dstoffs, dstlen):
81 self._dstsect = dstsect
82 self._dstoffs = dstoffs
83 self._dstlen = dstlen
84 self.symname = None
85
86 def __repr__(self):
87 return "<ptr: %s+0x%05x/%d>" % (
88 self._dstsect.name,
89 self._dstoffs,
90 self._dstlen or -1,
91 )
92
93 def __hash__(self):
94 return hash((self._dstsect, self._dstoffs))
95
96 def get_string(self):
97 """
98 Interpret as C string / null terminated UTF-8 and get the actual text.
99 """
100 try:
101 return self._dstsect[self._dstoffs : str].decode("UTF-8")
102 except:
103 import pdb
104
105 pdb.set_trace()
106
107 def get_data(self, reflen):
108 """
109 Interpret as some structure (and check vs. expected length)
110
111 :param reflen: expected size of the object, compared against actual
112 size (which is only known in rare cases, mostly when directly
113 accessing a symbol since symbols have their destination object
114 size recorded)
115 """
116 if self._dstlen is not None and self._dstlen != reflen:
117 raise ValueError(
118 "symbol size mismatch (got %d, expected %d)" % (self._dstlen, reflen)
119 )
120 return self._dstsect[self._dstoffs : self._dstoffs + reflen]
121
122 def offset(self, offs, within_symbol=False):
123 """
124 Get another ELFData at an offset
125
126 :param offs: byte offset, can be negative (e.g. in container_of)
127 :param within_symbol: retain length information
128 """
129 if self._dstlen is None or not within_symbol:
130 return ELFData(self._dstsect, self._dstoffs + offs, None)
131 else:
132 return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
133
134
135 #
136 # dissection data items
137 #
138
139
140 class ELFDissectData(object):
141 """
142 Common bits for ELFDissectStruct and ELFDissectUnion
143 """
144
145 def __init__(self):
146 self._data = None
147 self.elfclass = None
148
149 def __len__(self):
150 """
151 Used for boolean evaluation, e.g. "if struct: ..."
152 """
153 return not (
154 isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved)
155 )
156
157 def container_of(self, parent, fieldname):
158 """
159 Assume this struct is embedded in a larger struct and get at the larger
160
161 Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
162
163 :param parent: class (not instance) of the larger struct
164 :param fieldname: fieldname that refers back to this
165 :returns: instance of parent, with fieldname set to this object
166 """
167 offset = 0
168 if not hasattr(parent, "_efields"):
169 parent._setup_efields()
170
171 for field in parent._efields[self.elfclass]:
172 if field[0] == fieldname:
173 break
174 spec = field[1]
175 if spec == "P":
176 spec = "I" if self.elfclass == 32 else "Q"
177 offset += struct.calcsize(spec)
178 else:
179 raise AttributeError("%r not found in %r.fields" % (fieldname, parent))
180
181 return parent(self._data.offset(-offset), replace={fieldname: self})
182
183
184 class ELFDissectStruct(ELFDissectData):
185 """
186 Decode and provide access to a struct somewhere in the ELF file
187
188 Handles pointers and strings somewhat nicely. Create a subclass for each
189 struct that is to be accessed, and give a field list in a "fields"
190 class-member.
191
192 :param dataptr: ELFData referring to the data bits to decode.
193 :param parent: where this was instantiated from; only for reference, has
194 no functional impact.
195 :param replace: substitute data values for specific fields. Used by
196 `container_of` to replace the inner struct when creating the outer
197 one.
198
199 .. attribute:: fields
200
201 List of tuples describing the struct members. Items can be:
202 - ``('name', ELFDissectData)`` - directly embed another struct
203 - ``('name', 'I')`` - simple data types; second item for struct.unpack
204 - ``('name', 'I', None)`` - field to ignore
205 - ``('name', 'P', str)`` - pointer to string
206 - ``('name', 'P', ELFDissectData)`` - pointer to another struct
207
208 ``P`` is added as unpack format for pointers (sized appropriately for
209 the ELF file.)
210
211 Refer to tiabwarfo.py for extracting this from ``pahole``.
212
213 TBD: replace tuples with a class.
214
215 .. attribute:: fieldrename
216
217 Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
218 """
219
220 class Pointer(object):
221 """
222 Quick wrapper for pointers to further structs
223
224 This is just here to avoid going into infinite loops when loading
225 structs that have pointers to each other (e.g. struct xref <-->
226 struct xrefdata.) The pointer destination is only instantiated when
227 actually accessed.
228 """
229
230 def __init__(self, cls, ptr):
231 self.cls = cls
232 self.ptr = ptr
233
234 def __repr__(self):
235 return "<Pointer:%s %r>" % (self.cls.__name__, self.ptr)
236
237 def __call__(self):
238 if isinstance(self.ptr, ELFNull):
239 return None
240 return self.cls(self.ptr)
241
242 def __new__(cls, dataptr, parent=None, replace=None):
243 if dataptr._dstsect is None:
244 return super().__new__(cls)
245
246 obj = dataptr._dstsect._pointers.get((cls, dataptr))
247 if obj is not None:
248 return obj
249 obj = super().__new__(cls)
250 dataptr._dstsect._pointers[(cls, dataptr)] = obj
251 return obj
252
253 replacements = "lLnN"
254
255 @classmethod
256 def _preproc_structspec(cls, elfclass, spec):
257 elfbits = elfclass
258
259 if hasattr(spec, "calcsize"):
260 spec = "%ds" % (spec.calcsize(elfclass),)
261
262 if elfbits == 32:
263 repl = ["i", "I"]
264 else:
265 repl = ["q", "Q"]
266 for c in cls.replacements:
267 spec = spec.replace(c, repl[int(c.isupper())])
268 return spec
269
270 @classmethod
271 def _setup_efields(cls):
272 cls._efields = {}
273 cls._esize = {}
274 for elfclass in [32, 64]:
275 cls._efields[elfclass] = []
276 size = 0
277 for f in cls.fields:
278 newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
279 cls._efields[elfclass].append(newf)
280 size += struct.calcsize(newf[1])
281 cls._esize[elfclass] = size
282
283 def __init__(self, dataptr, parent=None, replace=None):
284 if not hasattr(self.__class__, "_efields"):
285 self._setup_efields()
286
287 self._fdata = None
288 self._data = dataptr
289 self._parent = parent
290 self.symname = dataptr.symname
291 if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
292 self._fdata = {}
293 return
294
295 self._elfsect = dataptr._dstsect
296 self.elfclass = self._elfsect._elffile.elfclass
297 self.offset = dataptr._dstoffs
298
299 pspecl = [f[1] for f in self._efields[self.elfclass]]
300
301 # need to correlate output from struct.unpack with extra metadata
302 # about the particular fields, so note down byte offsets (in locs)
303 # and tuple indices of pointers (in ptrs)
304 pspec = ""
305 locs = {}
306 ptrs = set()
307
308 for idx, spec in enumerate(pspecl):
309 if spec == "P":
310 ptrs.add(idx)
311 spec = self._elfsect.ptrtype
312
313 locs[idx] = struct.calcsize(pspec)
314 pspec = pspec + spec
315
316 self._total_size = struct.calcsize(pspec)
317
318 def replace_ptrs(v):
319 idx, val = v[0], v[1]
320 if idx not in ptrs:
321 return val
322 return self._elfsect.pointer(self.offset + locs[idx])
323
324 data = dataptr.get_data(struct.calcsize(pspec))
325 unpacked = struct.unpack(self._elfsect.endian + pspec, data)
326 unpacked = list(map(replace_ptrs, enumerate(unpacked)))
327 self._fraw = unpacked
328 self._fdata = OrderedDict()
329 replace = replace or {}
330
331 for i, item in enumerate(unpacked):
332 name = self.fields[i][0]
333 if name is None:
334 continue
335
336 if name in replace:
337 self._fdata[name] = replace[name]
338 continue
339
340 if isinstance(self.fields[i][1], type) and issubclass(
341 self.fields[i][1], ELFDissectData
342 ):
343 dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
344 self._fdata[name] = dataobj
345 continue
346 if len(self.fields[i]) == 3:
347 if self.fields[i][2] == str:
348 self._fdata[name] = item.get_string()
349 continue
350 elif self.fields[i][2] is None:
351 pass
352 elif issubclass(self.fields[i][2], ELFDissectData):
353 cls = self.fields[i][2]
354 dataobj = self.Pointer(cls, item)
355 self._fdata[name] = dataobj
356 continue
357
358 self._fdata[name] = item
359
360 def __getattr__(self, attrname):
361 if attrname not in self._fdata:
362 raise AttributeError(attrname)
363 if isinstance(self._fdata[attrname], self.Pointer):
364 self._fdata[attrname] = self._fdata[attrname]()
365 return self._fdata[attrname]
366
367 def __repr__(self):
368 if not isinstance(self._data, ELFData):
369 return "<%s: %r>" % (self.__class__.__name__, self._data)
370 return "<%s: %s>" % (
371 self.__class__.__name__,
372 ", ".join(["%s=%r" % t for t in self._fdata.items()]),
373 )
374
375 @classmethod
376 def calcsize(cls, elfclass):
377 """
378 Sum up byte size of this struct
379
380 Wraps struct.calcsize with some extra features.
381 """
382 if not hasattr(cls, "_efields"):
383 cls._setup_efields()
384
385 pspec = "".join([f[1] for f in cls._efields[elfclass]])
386
387 ptrtype = "I" if elfclass == 32 else "Q"
388 pspec = pspec.replace("P", ptrtype)
389
390 return struct.calcsize(pspec)
391
392
393 class ELFDissectUnion(ELFDissectData):
394 """
395 Decode multiple structs in the same place.
396
397 Not currently used (and hence not tested.) Worked at some point but not
398 needed anymore and may be borked now. Remove this comment when using.
399 """
400
401 members = {}
402
403 def __init__(self, dataptr, parent=None):
404 self._dataptr = dataptr
405 self._parent = parent
406 self.members = []
407 for name, membercls in self.__class__.members:
408 item = membercls(dataptr, parent)
409 self.members.append(item)
410 setattr(self, name, item)
411
412 def __repr__(self):
413 return "<%s: %s>" % (
414 self.__class__.__name__,
415 ", ".join([repr(i) for i in self.members]),
416 )
417
418 @classmethod
419 def calcsize(cls, elfclass):
420 return max([member.calcsize(elfclass) for name, member in cls.members])
421
422
423 #
424 # wrappers for spans of ELF data
425 #
426
427
428 class ELFSubset(object):
429 """
430 Common abstract base for section-level and file-level access.
431 """
432
433 def __init__(self):
434 super().__init__()
435
436 self.name = None
437 self._obj = None
438 self._elffile = None
439 self.ptrtype = None
440 self.endian = None
441 self._pointers = WeakValueDictionary()
442
443 def _wrap_data(self, data, dstsect):
444 raise NotImplementedError()
445
446 def __hash__(self):
447 return hash(self.name)
448
449 def __getitem__(self, k):
450 """
451 Read data from slice
452
453 Subscript **must** be a slice; a simple index will not return a byte
454 but rather throw an exception. Valid slice syntaxes are defined by
455 the C module:
456
457 - `this[123:456]` - extract specific range
458 - `this[123:str]` - extract until null byte. The slice stop value is
459 the `str` type (or, technically, `unicode`.)
460 """
461 return self._obj[k]
462
463 def getreloc(self, offset):
464 """
465 Check for a relocation record at the specified offset.
466 """
467 return self._obj.getreloc(offset)
468
469 def iter_data(self, scls, slice_=slice(None)):
470 """
471 Assume an array of structs present at a particular slice and decode
472
473 :param scls: ELFDissectData subclass for the struct
474 :param slice_: optional range specification
475 """
476 size = scls.calcsize(self._elffile.elfclass)
477
478 offset = slice_.start or 0
479 stop = slice_.stop or self._obj.len
480 if stop < 0:
481 stop = self._obj.len - stop
482
483 while offset < stop:
484 yield scls(ELFData(self, offset, size))
485 offset += size
486
487 def pointer(self, offset):
488 """
489 Try to dereference a pointer value
490
491 This checks whether there's a relocation at the given offset and
492 uses that; otherwise (e.g. in a non-PIE executable where the pointer
493 is already resolved by the linker) the data at the location is used.
494
495 :param offset: byte offset from beginning of section,
496 or virtual address in file
497 :returns: ELFData wrapping pointed-to object
498 """
499
500 ptrsize = struct.calcsize(self.ptrtype)
501 data = struct.unpack(
502 self.endian + self.ptrtype, self[offset : offset + ptrsize]
503 )[0]
504
505 reloc = self.getreloc(offset)
506 dstsect = None
507 if reloc:
508 # section won't be available in whole-file operation
509 dstsect = reloc.getsection(data)
510 addend = reloc.r_addend
511
512 if reloc.relative:
513 # old-style ELF REL instead of RELA, not well-tested
514 addend += data
515
516 if reloc.unresolved and reloc.symvalid:
517 return ELFUnresolved(reloc.symname, addend)
518 elif reloc.symvalid:
519 data = addend + reloc.st_value
520 else:
521 data = addend
522
523 # 0 could technically be a valid pointer for a shared library,
524 # since libraries may use 0 as default virtual start address (it'll
525 # be adjusted on loading)
526 # That said, if the library starts at 0, that's where the ELF header
527 # would be so it's still an invalid pointer.
528 if data == 0 and dstsect == None:
529 return ELFNull()
530
531 # wrap_data is different between file & section
532 return self._wrap_data(data, dstsect)
533
534
535 class ELFDissectSection(ELFSubset):
536 """
537 Access the contents of an ELF section like ``.text`` or ``.data``
538
539 :param elfwrap: ELFDissectFile wrapper for the file
540 :param idx: section index in section header table
541 :param section: section object from C module
542 """
543
544 def __init__(self, elfwrap, idx, section):
545 super().__init__()
546
547 self._elfwrap = elfwrap
548 self._elffile = elfwrap._elffile
549 self._idx = idx
550 self._section = self._obj = section
551 self.name = section.name
552 self.ptrtype = elfwrap.ptrtype
553 self.endian = elfwrap.endian
554
555 def _wrap_data(self, data, dstsect):
556 if dstsect is None:
557 dstsect = self._elfwrap._elffile.get_section_addr(data)
558 offs = data - dstsect.sh_addr
559 dstsect = self._elfwrap.get_section(dstsect.idx)
560 return ELFData(dstsect, offs, None)
561
562
563 class ELFDissectFile(ELFSubset):
564 """
565 Access the contents of an ELF file.
566
567 Note that offsets for array subscript and relocation/pointer access are
568 based on the file's virtual address space and are NOT offsets to the
569 start of the file on disk!
570
571 (Shared libraries frequently have a virtual address space starting at 0,
572 but non-PIE executables have an architecture specific default loading
573 address like 0x400000 on x86.
574
575 :param filename: ELF file to open
576 """
577
578 def __init__(self, filename):
579 super().__init__()
580
581 self.name = filename
582 self._elffile = self._obj = ELFFile(filename)
583 self._sections = {}
584
585 self.ptrtype = "I" if self._elffile.elfclass == 32 else "Q"
586 self.endian = ">" if self._elffile.bigendian else "<"
587
588 @property
589 def _elfwrap(self):
590 return self
591
592 def _wrap_data(self, data, dstsect):
593 return ELFData(self, data, None)
594
595 def get_section(self, secname):
596 """
597 Look up section by name or index
598 """
599 if isinstance(secname, int):
600 sh_idx = secname
601 section = self._elffile.get_section_idx(secname)
602 else:
603 section = self._elffile.get_section(secname)
604
605 if section is None:
606 return None
607
608 sh_idx = section.idx
609
610 if sh_idx not in self._sections:
611 self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
612
613 return self._sections[sh_idx]