1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * fast ELF file accessor
4 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
7 /* Note: this wrapper is intended to be used as build-time helper. While
8 * it should be generally correct and proper, there may be the occasional
9 * memory leak or SEGV for things that haven't been well-tested.
11 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
12 * / ! \ in FRR to read files created by its own build. Don't take it out
13 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
15 * If you're working with this code (or even reading it), you really need to
16 * read a bunch of the ELF specs. There's no way around it, things in here
17 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
21 * https://refspecs.linuxfoundation.org/elf/elf.pdf
22 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
23 * Recommended reading:
24 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
26 * The core ELF spec is *not* enough, you should read at least one of the
27 * processor specific (psABI) docs. They define what & how relocations work.
28 * Luckily we don't need to care about the processor specifics since this only
29 * does data relocations, but without looking at the psABI, some things aren't
33 /* the API of this module roughly follows a very small subset of the one
34 * provided by the python elfutils package, which unfortunately is painfully
38 #define PY_SSIZE_T_CLEAN
44 #include "structmember.h"
48 #include <sys/types.h>
53 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
54 /* Solaris libelf bails otherwise ... */
55 #undef _FILE_OFFSET_BITS
56 #define _FILE_OFFSET_BITS 32
72 fprintf(stderr, __VA_ARGS__); \
76 static PyObject
*ELFFormatError
;
77 static PyObject
*ELFAccessError
;
79 /* most objects can only be created as return values from one of the methods */
80 static PyObject
*refuse_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
82 PyErr_SetString(PyExc_ValueError
,
83 "cannot create instances of this type");
90 PREDECL_HASH(elfrelocs
);
92 /* ELFFile and ELFSection intentionally share some behaviour, particularly
93 * subscript[123:456] access to file data. This is because relocatables
94 * (.o files) do things section-based, but linked executables/libraries do
95 * things file-based. Having the two behave similar allows simplifying the
101 * overall entry point, instantiated by reading in an ELF file
111 /* note from here on there are several instances of
113 * GElf_Something *x, _x;
115 * this is a pattern used by libelf's generic ELF routines; the _x
116 * field is used to create a copy of the ELF structure from the file
117 * with 32/64bit and endianness adjusted.
120 GElf_Ehdr
*ehdr
, _ehdr
;
122 size_t nsym
, symstridx
;
128 struct elfrelocs_head dynrelocs
;
137 * note that executables and shared libraries can have their section headers
138 * removed, though in practice this is only used as an obfuscation technique.
146 GElf_Shdr _shdr
, *shdr
;
148 unsigned long idx
, len
;
150 struct elfrelocs_head relocs
;
155 * note: relocations in object files (.o) are section-based while relocations
156 * in executables and shared libraries are file-based.
158 * Whenever accessing something that is a pointer in the ELF file, the Python
159 * code needs to check for a relocation; if the pointer is pointing to some
160 * unresolved symbol the file will generally contain 0 bytes. The relocation
161 * will tell what the pointer is actually pointing to.
163 * This represents both static (.o file) and dynamic (.so/exec) relocations.
168 struct elfrelocs_item elfrelocs_item
;
173 /* there's also old-fashioned GElf_Rel; we're converting that to
174 * GElf_Rela in elfsect_add_relocations()
176 GElf_Rela _rela
, *rela
;
181 /* documented below in python docstrings */
182 bool symvalid
, unresolved
, relative
;
183 unsigned long long st_value
;
186 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
);
187 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
);
189 DECLARE_HASH(elfrelocs
, struct elfreloc
, elfrelocs_item
,
190 elfreloc_cmp
, elfreloc_hash
);
192 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
);
193 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
);
194 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
);
195 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
);
197 /* --- end of declarations -------------------------------------------------- */
203 static const char elfreloc_doc
[] =
204 "Represents an ELF relocation record\n"
206 "(struct elfreloc * in elf_py.c)";
208 #define member(name, type, doc) \
210 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
211 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
213 static PyMemberDef members_elfreloc
[] = {
214 member(symname
, T_STRING
,
215 "Name of symbol this relocation refers to.\n"
217 "Will frequently be `None` in executables and shared libraries."
219 member(symvalid
, T_BOOL
,
220 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
221 member(unresolved
, T_BOOL
,
222 "Target symbol refers to an existing section"),
223 member(relative
, T_BOOL
,
224 "Relocation is a REL (not RELA) record and thus relative."),
225 member(st_value
, T_ULONGLONG
,
226 "Target symbol's value, if known\n\n"
227 "Will be zero for unresolved/external symbols."),
232 static PyGetSetDef getset_elfreloc
[] = {
233 { .name
= (char *)"r_addend", .get
= elfreloc_getaddend
, .doc
=
234 (char *)"Relocation addend value"},
238 static PyMethodDef methods_elfreloc
[] = {
239 {"getsection", elfreloc_getsection
, METH_VARARGS
,
240 "Find relocation target's ELF section\n\n"
241 "Args: address of relocatee (TODO: fix/remove?)\n"
242 "Returns: ELFSection or None\n\n"
243 "Not possible if section headers have been stripped."},
247 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
)
249 if (a
->rela
->r_offset
< b
->rela
->r_offset
)
251 if (a
->rela
->r_offset
> b
->rela
->r_offset
)
256 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
)
258 return jhash(&reloc
->rela
->r_offset
, sizeof(reloc
->rela
->r_offset
),
262 static struct elfreloc
*elfrelocs_get(struct elfrelocs_head
*head
,
265 struct elfreloc dummy
;
267 dummy
.rela
= &dummy
._rela
;
268 dummy
.rela
->r_offset
= offset
;
269 return elfrelocs_find(head
, &dummy
);
272 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
)
274 struct elfreloc
*w
= (struct elfreloc
*)self
;
277 if (!PyArg_ParseTuple(args
, "k", &data
))
283 if (!w
->symvalid
|| w
->symidx
== 0) {
287 data
= (w
->relative
? data
: 0) + w
->rela
->r_addend
;
288 scn
= elf_find_addr(w
->es
->ef
, data
, &idx
);
291 return elffile_secbyidx(w
->es
->ef
, scn
, idx
);
293 return elffile_secbyidx(w
->es
->ef
, NULL
, w
->sym
->st_shndx
);
296 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
)
298 struct elfreloc
*w
= (struct elfreloc
*)obj
;
300 return Py_BuildValue("K", (unsigned long long)w
->rela
->r_addend
);
303 static PyObject
*elfreloc_repr(PyObject
*arg
)
305 struct elfreloc
*w
= (struct elfreloc
*)arg
;
307 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
308 (unsigned long)w
->rela
->r_offset
,
309 (w
->symname
&& w
->symname
[0]) ? w
->symname
311 (unsigned long)w
->rela
->r_addend
);
314 static void elfreloc_free(void *arg
)
316 struct elfreloc
*w
= arg
;
321 static PyTypeObject typeobj_elfreloc
= {
322 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFReloc",
323 .tp_basicsize
= sizeof(struct elfreloc
),
324 .tp_flags
= Py_TPFLAGS_DEFAULT
,
325 .tp_doc
= elfreloc_doc
,
326 .tp_new
= refuse_new
,
327 .tp_free
= elfreloc_free
,
328 .tp_repr
= elfreloc_repr
,
329 .tp_members
= members_elfreloc
,
330 .tp_methods
= methods_elfreloc
,
331 .tp_getset
= getset_elfreloc
,
338 static const char elfsect_doc
[] =
339 "Represents an ELF section\n"
341 "To access section contents, use subscript notation, e.g.\n"
342 " section[123:456]\n"
343 "To read null terminated C strings, replace the end with str:\n"
344 " section[123:str]\n\n"
345 "(struct elfsect * in elf_py.c)";
347 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
);
349 #define member(name, type, doc) \
351 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
352 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
354 static PyMemberDef members_elfsect
[] = {
355 member(name
, T_STRING
,
356 "Section name, e.g. \".text\""),
358 "Section index in file"),
360 "Section length in bytes"),
365 static PyGetSetDef getset_elfsect
[] = {
366 { .name
= (char *)"sh_addr", .get
= elfsect_getaddr
, .doc
=
367 (char *)"Section virtual address (mapped program view)"},
371 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
)
373 struct elfsect
*w
= (struct elfsect
*)self
;
375 return Py_BuildValue("K", (unsigned long long)w
->shdr
->sh_addr
);
379 static PyObject
*elfsect_getreloc(PyObject
*self
, PyObject
*args
)
381 struct elfsect
*w
= (struct elfsect
*)self
;
382 struct elfreloc
*relw
;
386 if (!PyArg_ParseTuple(args
, "k", &offs
))
389 relw
= elfrelocs_get(&w
->relocs
, offs
+ w
->shdr
->sh_addr
);
393 ret
= (PyObject
*)relw
;
398 static PyMethodDef methods_elfsect
[] = {
399 {"getreloc", elfsect_getreloc
, METH_VARARGS
,
400 "Check for / get relocation at offset into section\n\n"
401 "Args: byte offset into section to check\n"
402 "Returns: ELFReloc or None"},
406 static PyObject
*elfsect_subscript(PyObject
*self
, PyObject
*key
)
408 Py_ssize_t start
, stop
, step
, sllen
;
409 struct elfsect
*w
= (struct elfsect
*)self
;
410 PySliceObject
*slice
;
411 unsigned long offs
, len
= ~0UL;
413 if (!PySlice_Check(key
)) {
414 PyErr_SetString(PyExc_IndexError
,
415 "ELFSection subscript must be slice");
418 slice
= (PySliceObject
*)key
;
419 if (PyLong_Check(slice
->stop
)) {
420 if (PySlice_GetIndicesEx(key
, w
->shdr
->sh_size
,
421 &start
, &stop
, &step
, &sllen
))
425 PyErr_SetString(PyExc_IndexError
,
426 "ELFSection subscript slice step must be 1");
429 if ((GElf_Xword
)stop
> w
->shdr
->sh_size
) {
430 PyErr_Format(ELFAccessError
,
431 "access (%lu) beyond end of section %lu/%s (%lu)",
432 stop
, w
->idx
, w
->name
, w
->shdr
->sh_size
);
439 if (slice
->stop
!= (void *)&PyUnicode_Type
440 || !PyLong_Check(slice
->start
)) {
441 PyErr_SetString(PyExc_IndexError
, "invalid slice");
445 offs
= PyLong_AsUnsignedLongLong(slice
->start
);
449 offs
+= w
->shdr
->sh_offset
;
450 if (offs
> w
->ef
->len
) {
451 PyErr_Format(ELFAccessError
,
452 "access (%lu) beyond end of file (%lu)",
457 len
= strnlen(w
->ef
->mmap
+ offs
, w
->ef
->len
- offs
);
459 Py_ssize_t pylen
= len
;
461 #if PY_MAJOR_VERSION >= 3
462 return Py_BuildValue("y#", w
->ef
->mmap
+ offs
, pylen
);
464 return Py_BuildValue("s#", w
->ef
->mmap
+ offs
, pylen
);
468 static PyMappingMethods mp_elfsect
= {
469 .mp_subscript
= elfsect_subscript
,
472 static void elfsect_free(void *arg
)
474 struct elfsect
*w
= arg
;
479 static PyObject
*elfsect_repr(PyObject
*arg
)
481 struct elfsect
*w
= (struct elfsect
*)arg
;
483 return PyUnicode_FromFormat("<ELFSection %s>", w
->name
);
486 static PyTypeObject typeobj_elfsect
= {
487 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFSection",
488 .tp_basicsize
= sizeof(struct elfsect
),
489 .tp_flags
= Py_TPFLAGS_DEFAULT
,
490 .tp_doc
= elfsect_doc
,
491 .tp_new
= refuse_new
,
492 .tp_free
= elfsect_free
,
493 .tp_repr
= elfsect_repr
,
494 .tp_as_mapping
= &mp_elfsect
,
495 .tp_members
= members_elfsect
,
496 .tp_methods
= methods_elfsect
,
497 .tp_getset
= getset_elfsect
,
500 static void elfsect_add_relocations(struct elfsect
*w
, Elf_Scn
*rel
,
504 Elf_Scn
*symtab
= elf_getscn(w
->ef
->elf
, relhdr
->sh_link
);
505 GElf_Shdr _symhdr
, *symhdr
= gelf_getshdr(symtab
, &_symhdr
);
506 Elf_Data
*symdata
= elf_getdata(symtab
, NULL
);
507 Elf_Data
*reldata
= elf_getdata(rel
, NULL
);
509 entries
= relhdr
->sh_size
/ relhdr
->sh_entsize
;
510 for (i
= 0; i
< entries
; i
++) {
511 struct elfreloc
*relw
;
516 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
517 &typeobj_elfreloc
, 0);
520 if (relhdr
->sh_type
== SHT_REL
) {
523 rel
= gelf_getrel(reldata
, i
, &_rel
);
524 relw
->rela
= &relw
->_rela
;
525 relw
->rela
->r_offset
= rel
->r_offset
;
526 relw
->rela
->r_info
= rel
->r_info
;
527 relw
->rela
->r_addend
= 0;
528 relw
->relative
= true;
530 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
533 if (rela
->r_offset
< w
->shdr
->sh_addr
534 || rela
->r_offset
>= w
->shdr
->sh_addr
+ w
->shdr
->sh_size
)
537 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
538 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
540 relw
->symname
= elf_strptr(w
->ef
->elf
, symhdr
->sh_link
,
542 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
544 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
545 relw
->st_value
= sym
->st_value
;
547 relw
->symname
= NULL
;
548 relw
->symvalid
= false;
549 relw
->unresolved
= false;
553 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
554 (long long)rela
->r_offset
, (unsigned long long)symidx
,
555 (long long)rela
->r_addend
, relw
->symname
);
557 elfrelocs_add(&w
->relocs
, relw
);
562 * bindings & loading code between ELFFile and ELFSection
565 static PyObject
*elfsect_wrap(struct elffile
*ef
, Elf_Scn
*scn
, size_t idx
,
571 w
= (struct elfsect
*)typeobj_elfsect
.tp_alloc(&typeobj_elfsect
, 0);
578 w
->shdr
= gelf_getshdr(scn
, &w
->_shdr
);
579 w
->len
= w
->shdr
->sh_size
;
581 elfrelocs_init(&w
->relocs
);
583 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
584 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
585 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
587 if (shdr
->sh_type
!= SHT_RELA
&& shdr
->sh_type
!= SHT_REL
)
589 if (shdr
->sh_info
&& shdr
->sh_info
!= idx
)
591 elfsect_add_relocations(w
, scn
, shdr
);
594 return (PyObject
*)w
;
597 static Elf_Scn
*elf_find_section(struct elffile
*ef
, const char *name
,
603 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
604 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
605 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
607 secname
= elf_strptr(ef
->elf
, ef
->ehdr
->e_shstrndx
,
609 if (strcmp(secname
, name
))
618 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
)
622 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
623 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
624 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
626 /* virtual address is kinda meaningless for TLS sections */
627 if (shdr
->sh_flags
& SHF_TLS
)
629 if (addr
< shdr
->sh_addr
||
630 addr
>= shdr
->sh_addr
+ shdr
->sh_size
)
644 static const char elffile_doc
[] =
645 "Represents an ELF file\n"
647 "Args: filename to load\n"
649 "To access raw file contents, use subscript notation, e.g.\n"
651 "To read null terminated C strings, replace the end with str:\n"
653 "(struct elffile * in elf_py.c)";
656 #define member(name, type, doc) \
658 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
659 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
661 static PyMemberDef members_elffile
[] = {
662 member(filename
, T_STRING
,
663 "Original file name as given when opening"),
664 member(elfclass
, T_INT
,
665 "ELF class (architecture bit size)\n\n"
666 "Either 32 or 64, straight integer."),
667 member(bigendian
, T_BOOL
,
668 "ELF file is big-endian\n\n"
669 "All internal ELF structures are automatically converted."),
670 member(has_symbols
, T_BOOL
,
671 "A symbol section is present\n\n"
672 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
678 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
)
684 scn
= elf_getscn(w
->elf
, idx
);
685 if (!scn
|| idx
>= w
->n_sect
)
688 if (!w
->sects
[idx
]) {
689 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
691 name
= elf_strptr(w
->elf
, w
->ehdr
->e_shstrndx
, shdr
->sh_name
);
692 w
->sects
[idx
] = elfsect_wrap(w
, scn
, idx
, name
);
700 static PyObject
*elffile_get_section(PyObject
*self
, PyObject
*args
)
703 struct elffile
*w
= (struct elffile
*)self
;
707 if (!PyArg_ParseTuple(args
, "s", &name
))
710 scn
= elf_find_section(w
, name
, &idx
);
711 return elffile_secbyidx(w
, scn
, idx
);
714 static PyObject
*elffile_get_section_addr(PyObject
*self
, PyObject
*args
)
716 unsigned long long addr
;
717 struct elffile
*w
= (struct elffile
*)self
;
721 if (!PyArg_ParseTuple(args
, "K", &addr
))
724 scn
= elf_find_addr(w
, addr
, &idx
);
725 return elffile_secbyidx(w
, scn
, idx
);
728 static PyObject
*elffile_get_section_idx(PyObject
*self
, PyObject
*args
)
730 unsigned long long idx
;
731 struct elffile
*w
= (struct elffile
*)self
;
733 if (!PyArg_ParseTuple(args
, "K", &idx
))
736 return elffile_secbyidx(w
, NULL
, idx
);
739 static PyObject
*elffile_get_symbol(PyObject
*self
, PyObject
*args
)
741 const char *name
, *symname
;
742 struct elffile
*w
= (struct elffile
*)self
;
746 if (!PyArg_ParseTuple(args
, "s", &name
))
749 for (i
= 0; i
< w
->nsym
; i
++) {
750 sym
= gelf_getsym(w
->symdata
, i
, &_sym
);
751 if (sym
->st_name
== 0)
753 symname
= elf_strptr(w
->elf
, w
->symstridx
, sym
->st_name
);
754 if (strcmp(symname
, name
))
758 Elf_Scn
*scn
= elf_getscn(w
->elf
, sym
->st_shndx
);
761 pysect
= elffile_secbyidx(w
, scn
, sym
->st_shndx
);
766 return Py_BuildValue("sKN", symname
,
767 (unsigned long long)sym
->st_value
, pysect
);
772 static PyObject
*elffile_getreloc(PyObject
*self
, PyObject
*args
)
774 struct elffile
*w
= (struct elffile
*)self
;
775 struct elfreloc
*relw
;
779 if (!PyArg_ParseTuple(args
, "k", &offs
))
782 relw
= elfrelocs_get(&w
->dynrelocs
, offs
);
786 ret
= (PyObject
*)relw
;
791 static PyObject
*elffile_find_note(PyObject
*self
, PyObject
*args
)
793 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
797 struct elffile
*w
= (struct elffile
*)self
;
800 if (!PyArg_ParseTuple(args
, "ss", &owner
, &ids
))
803 if (strlen((char *)ids
) != 4) {
804 PyErr_SetString(PyExc_ValueError
,
805 "ELF note ID must be exactly 4-byte string");
809 id
= (ids
[0] << 24) | (ids
[1] << 16) | (ids
[2] << 8) | ids
[3];
811 id
= (ids
[3] << 24) | (ids
[2] << 16) | (ids
[1] << 8) | ids
[0];
813 for (i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
814 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
818 if (phdr
->p_type
!= PT_NOTE
)
821 notedata
= elf_getdata_rawchunk(w
->elf
, phdr
->p_offset
,
822 phdr
->p_filesz
, ELF_T_NHDR
);
825 size_t nameoffs
, dataoffs
;
828 while ((offset
= gelf_getnote(notedata
, offset
, nhdr
,
829 &nameoffs
, &dataoffs
))) {
830 if (phdr
->p_offset
+ nameoffs
>= w
->len
)
833 const char *name
= w
->mmap
+ phdr
->p_offset
+ nameoffs
;
835 if (strcmp(name
, owner
))
837 if (id
!= nhdr
->n_type
)
842 s
= PyLong_FromUnsignedLongLong(
843 phdr
->p_vaddr
+ dataoffs
);
844 e
= PyLong_FromUnsignedLongLong(
845 phdr
->p_vaddr
+ dataoffs
+ nhdr
->n_descsz
);
846 return PySlice_New(s
, e
, NULL
);
853 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
854 static bool elffile_virt2file(struct elffile
*w
, GElf_Addr virt
,
859 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
860 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
862 if (phdr
->p_type
!= PT_LOAD
)
865 if (virt
< phdr
->p_vaddr
866 || virt
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
869 if (virt
>= phdr
->p_vaddr
+ phdr
->p_filesz
)
872 *offs
= virt
- phdr
->p_vaddr
+ phdr
->p_offset
;
878 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
880 static PyObject
*elffile_subscript(PyObject
*self
, PyObject
*key
)
882 Py_ssize_t start
, stop
, step
;
883 PySliceObject
*slice
;
884 struct elffile
*w
= (struct elffile
*)self
;
887 if (!PySlice_Check(key
)) {
888 PyErr_SetString(PyExc_IndexError
,
889 "ELFFile subscript must be slice");
892 slice
= (PySliceObject
*)key
;
895 if (PyLong_Check(slice
->stop
)) {
896 start
= PyLong_AsSsize_t(slice
->start
);
897 if (PyErr_Occurred())
899 if (slice
->stop
!= Py_None
) {
900 stop
= PyLong_AsSsize_t(slice
->stop
);
901 if (PyErr_Occurred())
904 if (slice
->step
!= Py_None
) {
905 step
= PyLong_AsSsize_t(slice
->step
);
906 if (PyErr_Occurred())
910 if (slice
->stop
!= (void *)&PyUnicode_Type
911 || !PyLong_Check(slice
->start
)) {
912 PyErr_SetString(PyExc_IndexError
, "invalid slice");
917 start
= PyLong_AsUnsignedLongLong(slice
->start
);
920 PyErr_SetString(PyExc_IndexError
,
921 "ELFFile subscript slice step must be 1");
925 GElf_Addr xstart
= start
, xstop
= stop
;
927 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
928 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
930 if (phdr
->p_type
!= PT_LOAD
)
933 if (xstart
< phdr
->p_vaddr
934 || xstart
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
936 if (!str
&& (xstop
< phdr
->p_vaddr
937 || xstop
> phdr
->p_vaddr
+ phdr
->p_memsz
)) {
938 PyErr_Format(ELFAccessError
,
939 "access (%llu) beyond end of program header (%llu)",
941 (long long)(phdr
->p_vaddr
+
946 xstart
= xstart
- phdr
->p_vaddr
+ phdr
->p_offset
;
949 xstop
= strlen(w
->mmap
+ xstart
);
951 xstop
= xstop
- phdr
->p_vaddr
+ phdr
->p_offset
;
953 Py_ssize_t pylen
= xstop
- xstart
;
955 #if PY_MAJOR_VERSION >= 3
956 return Py_BuildValue("y#", w
->mmap
+ xstart
, pylen
);
958 return Py_BuildValue("s#", w
->mmap
+ xstart
, pylen
);
962 return PyErr_Format(ELFAccessError
,
963 "virtual address (%llu) not found in program headers",
967 static PyMethodDef methods_elffile
[] = {
968 {"find_note", elffile_find_note
, METH_VARARGS
,
969 "find specific note entry"},
970 {"getreloc", elffile_getreloc
, METH_VARARGS
,
972 {"get_symbol", elffile_get_symbol
, METH_VARARGS
,
973 "find symbol by name"},
974 {"get_section", elffile_get_section
, METH_VARARGS
,
975 "find section by name"},
976 {"get_section_addr", elffile_get_section_addr
, METH_VARARGS
,
977 "find section by address"},
978 {"get_section_idx", elffile_get_section_idx
, METH_VARARGS
,
979 "find section by index"},
983 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
986 static void elffile_free(void *arg
)
988 struct elffile
*w
= arg
;
991 munmap(w
->mmap
, w
->len
);
995 static PyMappingMethods mp_elffile
= {
996 .mp_subscript
= elffile_subscript
,
999 static PyTypeObject typeobj_elffile
= {
1000 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFFile",
1001 .tp_basicsize
= sizeof(struct elffile
),
1002 .tp_flags
= Py_TPFLAGS_DEFAULT
,
1003 .tp_doc
= elffile_doc
,
1004 .tp_new
= elffile_load
,
1005 .tp_free
= elffile_free
,
1006 .tp_as_mapping
= &mp_elffile
,
1007 .tp_members
= members_elffile
,
1008 .tp_methods
= methods_elffile
,
1011 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1012 static char *elfdata_strptr(Elf_Data
*data
, size_t offset
)
1016 if (offset
>= data
->d_size
)
1019 p
= (char *)data
->d_buf
+ offset
;
1020 if (strnlen(p
, data
->d_size
- offset
) >= data
->d_size
- offset
)
1026 static void elffile_add_dynreloc(struct elffile
*w
, Elf_Data
*reldata
,
1027 size_t entries
, Elf_Data
*symdata
,
1028 Elf_Data
*strdata
, Elf_Type typ
)
1032 for (i
= 0; i
< entries
; i
++) {
1033 struct elfreloc
*relw
;
1037 GElf_Addr rel_offs
= 0;
1039 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
1040 &typeobj_elfreloc
, 0);
1043 if (typ
== ELF_T_REL
) {
1044 GElf_Rel _rel
, *rel
;
1047 rel
= gelf_getrel(reldata
, i
, &_rel
);
1048 relw
->rela
= &relw
->_rela
;
1049 relw
->rela
->r_offset
= rel
->r_offset
;
1050 relw
->rela
->r_info
= rel
->r_info
;
1051 relw
->rela
->r_addend
= 0;
1052 relw
->relative
= true;
1054 /* REL uses the pointer contents itself instead of the
1055 * RELA addend field :( ... theoretically this could
1056 * be some weird platform specific encoding, but since
1057 * we only care about data relocations it should
1058 * always be a pointer...
1060 if (elffile_virt2file(w
, rel
->r_offset
, &offs
)) {
1063 /* NB: this endian-converts! */
1064 ptr
= elf_getdata_rawchunk(w
->elf
, offs
,
1069 char *dst
= (char *)&rel_offs
;
1071 /* sigh. it endian-converts. but
1072 * doesn't size-convert.
1074 if (BYTE_ORDER
== BIG_ENDIAN
&&
1075 ptr
->d_size
< sizeof(rel_offs
))
1076 dst
+= sizeof(rel_offs
) -
1079 memcpy(dst
, ptr
->d_buf
, ptr
->d_size
);
1081 relw
->relative
= false;
1082 relw
->rela
->r_addend
= rel_offs
;
1086 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
1089 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
1090 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
1092 relw
->symname
= elfdata_strptr(strdata
, sym
->st_name
);
1093 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
1095 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
1096 relw
->st_value
= sym
->st_value
;
1098 relw
->symname
= NULL
;
1099 relw
->symvalid
= false;
1100 relw
->unresolved
= false;
1104 if (typ
== ELF_T_RELA
)
1105 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1106 (long long)rela
->r_offset
,
1107 (unsigned long long)symidx
,
1108 (long long)rela
->r_addend
, relw
->symname
);
1110 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1111 (long long)rela
->r_offset
,
1112 (unsigned long long)symidx
,
1113 (unsigned long long)rel_offs
, relw
->symname
);
1115 elfrelocs_add(&w
->dynrelocs
, relw
);
1119 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
1121 /* primary (only, really) entry point to anything in this module */
1122 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
1125 const char *filename
;
1126 static const char * const kwnames
[] = {"filename", NULL
};
1131 w
= (struct elffile
*)typeobj_elffile
.tp_alloc(&typeobj_elffile
, 0);
1135 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "s", (char **)kwnames
,
1139 w
->filename
= strdup(filename
);
1140 fd
= open(filename
, O_RDONLY
| O_NOCTTY
);
1141 if (fd
< 0 || fstat(fd
, &st
)) {
1142 PyErr_SetFromErrnoWithFilename(PyExc_OSError
, filename
);
1146 w
->len
= st
.st_size
;
1147 w
->mmap
= mmap(NULL
, st
.st_size
, PROT_READ
, MAP_SHARED
, fd
, 0);
1149 PyErr_SetFromErrnoWithFilename(PyExc_IOError
, filename
);
1154 w
->mmend
= w
->mmap
+ st
.st_size
;
1156 if (w
->len
< EI_NIDENT
|| memcmp(w
->mmap
, ELFMAG
, SELFMAG
)) {
1157 PyErr_SetString(ELFFormatError
, "invalid ELF signature");
1161 switch (w
->mmap
[EI_CLASS
]) {
1169 PyErr_SetString(ELFFormatError
, "invalid ELF class");
1172 switch (w
->mmap
[EI_DATA
]) {
1174 w
->bigendian
= false;
1177 w
->bigendian
= true;
1180 PyErr_SetString(ELFFormatError
, "invalid ELF byte order");
1184 w
->elf
= elf_memory(w
->mmap
, w
->len
);
1187 w
->ehdr
= gelf_getehdr(w
->elf
, &w
->_ehdr
);
1191 for (size_t i
= 0; i
< w
->ehdr
->e_shnum
; i
++) {
1192 Elf_Scn
*scn
= elf_getscn(w
->elf
, i
);
1193 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
1195 if (shdr
->sh_type
== SHT_SYMTAB
) {
1197 w
->nsym
= shdr
->sh_size
/ shdr
->sh_entsize
;
1198 w
->symdata
= elf_getdata(scn
, NULL
);
1199 w
->symstridx
= shdr
->sh_link
;
1203 w
->has_symbols
= w
->symtab
&& w
->symstridx
;
1204 elfrelocs_init(&w
->dynrelocs
);
1206 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1207 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
1208 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
1210 if (phdr
->p_type
!= PT_DYNAMIC
)
1213 Elf_Data
*dyndata
= elf_getdata_rawchunk(w
->elf
,
1214 phdr
->p_offset
, phdr
->p_filesz
, ELF_T_DYN
);
1216 GElf_Addr dynrela
= 0, dynrel
= 0, symtab
= 0, strtab
= 0;
1217 size_t dynrelasz
= 0, dynrelaent
= 0;
1218 size_t dynrelsz
= 0, dynrelent
= 0;
1220 GElf_Dyn _dyn
, *dyn
;
1222 for (size_t j
= 0;; j
++) {
1223 dyn
= gelf_getdyn(dyndata
, j
, &_dyn
);
1225 if (dyn
->d_tag
== DT_NULL
)
1228 switch (dyn
->d_tag
) {
1230 symtab
= dyn
->d_un
.d_ptr
;
1234 strtab
= dyn
->d_un
.d_ptr
;
1237 strsz
= dyn
->d_un
.d_val
;
1241 dynrela
= dyn
->d_un
.d_ptr
;
1244 dynrelasz
= dyn
->d_un
.d_val
;
1247 dynrelaent
= dyn
->d_un
.d_val
;
1251 dynrel
= dyn
->d_un
.d_ptr
;
1254 dynrelsz
= dyn
->d_un
.d_val
;
1257 dynrelent
= dyn
->d_un
.d_val
;
1263 Elf_Data
*symdata
= NULL
, *strdata
= NULL
;
1265 if (elffile_virt2file(w
, symtab
, &offset
))
1266 symdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1269 if (elffile_virt2file(w
, strtab
, &offset
))
1270 strdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1275 if (dynrela
&& dynrelasz
&& dynrelaent
1276 && elffile_virt2file(w
, dynrela
, &offset
)) {
1277 Elf_Data
*reladata
= NULL
;
1279 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela
,
1280 (long long)offset
, (long long)dynrelasz
);
1282 reladata
= elf_getdata_rawchunk(w
->elf
, offset
,
1283 dynrelasz
, ELF_T_RELA
);
1285 c
= dynrelasz
/ dynrelaent
;
1286 elffile_add_dynreloc(w
, reladata
, c
, symdata
, strdata
,
1290 if (dynrel
&& dynrelsz
&& dynrelent
1291 && elffile_virt2file(w
, dynrel
, &offset
)) {
1292 Elf_Data
*reldata
= NULL
;
1294 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel
,
1295 (long long)offset
, (long long)dynrelsz
);
1297 reldata
= elf_getdata_rawchunk(w
->elf
, offset
, dynrelsz
,
1300 c
= dynrelsz
/ dynrelent
;
1301 elffile_add_dynreloc(w
, reldata
, c
, symdata
, strdata
,
1307 w
->sects
= calloc(sizeof(PyObject
*), w
->ehdr
->e_shnum
);
1308 w
->n_sect
= w
->ehdr
->e_shnum
;
1310 return (PyObject
*)w
;
1315 PyErr_Format(ELFFormatError
, "libelf error %d: %s",
1316 err
, elf_errmsg(err
));
1324 static PyObject
*elfpy_debug(PyObject
*self
, PyObject
*args
)
1328 if (!PyArg_ParseTuple(args
, "p", &arg
))
1336 static PyMethodDef methods_elfpy
[] = {
1337 {"elfpy_debug", elfpy_debug
, METH_VARARGS
, "switch debuging on/off"},
1341 bool elf_py_init(PyObject
*pymod
)
1343 if (PyType_Ready(&typeobj_elffile
) < 0)
1345 if (PyType_Ready(&typeobj_elfsect
) < 0)
1347 if (PyType_Ready(&typeobj_elfreloc
) < 0)
1349 if (elf_version(EV_CURRENT
) == EV_NONE
)
1352 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1353 PyModule_AddFunctions(pymod
, methods_elfpy
);
1355 (void)methods_elfpy
;
1358 ELFFormatError
= PyErr_NewException("_clippy.ELFFormatError",
1359 PyExc_ValueError
, NULL
);
1360 PyModule_AddObject(pymod
, "ELFFormatError", ELFFormatError
);
1361 ELFAccessError
= PyErr_NewException("_clippy.ELFAccessError",
1362 PyExc_IndexError
, NULL
);
1363 PyModule_AddObject(pymod
, "ELFAccessError", ELFAccessError
);
1365 Py_INCREF(&typeobj_elffile
);
1366 PyModule_AddObject(pymod
, "ELFFile", (PyObject
*)&typeobj_elffile
);
1367 Py_INCREF(&typeobj_elfsect
);
1368 PyModule_AddObject(pymod
, "ELFSection", (PyObject
*)&typeobj_elfsect
);
1369 Py_INCREF(&typeobj_elfreloc
);
1370 PyModule_AddObject(pymod
, "ELFReloc", (PyObject
*)&typeobj_elfreloc
);