2 * fast ELF file accessor
3 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program; see the file COPYING; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /* Note: this wrapper is intended to be used as build-time helper. While
21 * it should be generally correct and proper, there may be the occasional
22 * memory leak or SEGV for things that haven't been well-tested.
24 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
25 * / ! \ in FRR to read files created by its own build. Don't take it out
26 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
28 * If you're working with this code (or even reading it), you really need to
29 * read a bunch of the ELF specs. There's no way around it, things in here
30 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
34 * https://refspecs.linuxfoundation.org/elf/elf.pdf
35 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
36 * Recommended reading:
37 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
39 * The core ELF spec is *not* enough, you should read at least one of the
40 * processor specific (psABI) docs. They define what & how relocations work.
41 * Luckily we don't need to care about the processor specifics since this only
42 * does data relocations, but without looking at the psABI, some things aren't
46 /* the API of this module roughly follows a very small subset of the one
47 * provided by the python elfutils package, which unfortunately is painfully
51 #define PY_SSIZE_T_CLEAN
57 #include "structmember.h"
61 #include <sys/types.h>
66 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
67 /* Solaris libelf bails otherwise ... */
68 #undef _FILE_OFFSET_BITS
69 #define _FILE_OFFSET_BITS 32
85 fprintf(stderr, __VA_ARGS__); \
89 static PyObject
*ELFFormatError
;
90 static PyObject
*ELFAccessError
;
92 /* most objects can only be created as return values from one of the methods */
93 static PyObject
*refuse_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
95 PyErr_SetString(PyExc_ValueError
,
96 "cannot create instances of this type");
103 PREDECL_HASH(elfrelocs
);
105 /* ELFFile and ELFSection intentionally share some behaviour, particularly
106 * subscript[123:456] access to file data. This is because relocatables
107 * (.o files) do things section-based, but linked executables/libraries do
108 * things file-based. Having the two behave similar allows simplifying the
114 * overall entry point, instantiated by reading in an ELF file
124 /* note from here on there are several instances of
126 * GElf_Something *x, _x;
128 * this is a pattern used by libelf's generic ELF routines; the _x
129 * field is used to create a copy of the ELF structure from the file
130 * with 32/64bit and endianness adjusted.
133 GElf_Ehdr
*ehdr
, _ehdr
;
135 size_t nsym
, symstridx
;
141 struct elfrelocs_head dynrelocs
;
150 * note that executables and shared libraries can have their section headers
151 * removed, though in practice this is only used as an obfuscation technique.
159 GElf_Shdr _shdr
, *shdr
;
161 unsigned long idx
, len
;
163 struct elfrelocs_head relocs
;
168 * note: relocations in object files (.o) are section-based while relocations
169 * in executables and shared libraries are file-based.
171 * Whenever accessing something that is a pointer in the ELF file, the Python
172 * code needs to check for a relocation; if the pointer is pointing to some
173 * unresolved symbol the file will generally contain 0 bytes. The relocation
174 * will tell what the pointer is actually pointing to.
176 * This represents both static (.o file) and dynamic (.so/exec) relocations.
181 struct elfrelocs_item elfrelocs_item
;
186 /* there's also old-fashioned GElf_Rel; we're converting that to
187 * GElf_Rela in elfsect_add_relocations()
189 GElf_Rela _rela
, *rela
;
194 /* documented below in python docstrings */
195 bool symvalid
, unresolved
, relative
;
196 unsigned long long st_value
;
199 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
);
200 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
);
202 DECLARE_HASH(elfrelocs
, struct elfreloc
, elfrelocs_item
,
203 elfreloc_cmp
, elfreloc_hash
);
205 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
);
206 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
);
207 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
);
208 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
);
210 /* --- end of declarations -------------------------------------------------- */
216 static const char elfreloc_doc
[] =
217 "Represents an ELF relocation record\n"
219 "(struct elfreloc * in elf_py.c)";
221 #define member(name, type, doc) \
223 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
224 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
226 static PyMemberDef members_elfreloc
[] = {
227 member(symname
, T_STRING
,
228 "Name of symbol this relocation refers to.\n"
230 "Will frequently be `None` in executables and shared libraries."
232 member(symvalid
, T_BOOL
,
233 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
234 member(unresolved
, T_BOOL
,
235 "Target symbol refers to an existing section"),
236 member(relative
, T_BOOL
,
237 "Relocation is a REL (not RELA) record and thus relative."),
238 member(st_value
, T_ULONGLONG
,
239 "Target symbol's value, if known\n\n"
240 "Will be zero for unresolved/external symbols."),
245 static PyGetSetDef getset_elfreloc
[] = {
246 { .name
= (char *)"r_addend", .get
= elfreloc_getaddend
, .doc
=
247 (char *)"Relocation addend value"},
251 static PyMethodDef methods_elfreloc
[] = {
252 {"getsection", elfreloc_getsection
, METH_VARARGS
,
253 "Find relocation target's ELF section\n\n"
254 "Args: address of relocatee (TODO: fix/remove?)\n"
255 "Returns: ELFSection or None\n\n"
256 "Not possible if section headers have been stripped."},
260 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
)
262 if (a
->rela
->r_offset
< b
->rela
->r_offset
)
264 if (a
->rela
->r_offset
> b
->rela
->r_offset
)
269 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
)
271 return jhash(&reloc
->rela
->r_offset
, sizeof(reloc
->rela
->r_offset
),
275 static struct elfreloc
*elfrelocs_get(struct elfrelocs_head
*head
,
278 struct elfreloc dummy
;
280 dummy
.rela
= &dummy
._rela
;
281 dummy
.rela
->r_offset
= offset
;
282 return elfrelocs_find(head
, &dummy
);
285 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
)
287 struct elfreloc
*w
= (struct elfreloc
*)self
;
290 if (!PyArg_ParseTuple(args
, "k", &data
))
296 if (w
->symidx
== 0) {
300 data
= (w
->relative
? data
: 0) + w
->rela
->r_addend
;
301 scn
= elf_find_addr(w
->es
->ef
, data
, &idx
);
304 return elffile_secbyidx(w
->es
->ef
, scn
, idx
);
306 return elffile_secbyidx(w
->es
->ef
, NULL
, w
->sym
->st_shndx
);
309 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
)
311 struct elfreloc
*w
= (struct elfreloc
*)obj
;
313 return Py_BuildValue("K", (unsigned long long)w
->rela
->r_addend
);
316 static PyObject
*elfreloc_repr(PyObject
*arg
)
318 struct elfreloc
*w
= (struct elfreloc
*)arg
;
320 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
321 (unsigned long)w
->rela
->r_offset
,
322 (w
->symname
&& w
->symname
[0]) ? w
->symname
324 (unsigned long)w
->rela
->r_addend
);
327 static void elfreloc_free(void *arg
)
329 struct elfreloc
*w
= arg
;
334 static PyTypeObject typeobj_elfreloc
= {
335 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFReloc",
336 .tp_basicsize
= sizeof(struct elfreloc
),
337 .tp_flags
= Py_TPFLAGS_DEFAULT
,
338 .tp_doc
= elfreloc_doc
,
339 .tp_new
= refuse_new
,
340 .tp_free
= elfreloc_free
,
341 .tp_repr
= elfreloc_repr
,
342 .tp_members
= members_elfreloc
,
343 .tp_methods
= methods_elfreloc
,
344 .tp_getset
= getset_elfreloc
,
351 static const char elfsect_doc
[] =
352 "Represents an ELF section\n"
354 "To access section contents, use subscript notation, e.g.\n"
355 " section[123:456]\n"
356 "To read null terminated C strings, replace the end with str:\n"
357 " section[123:str]\n\n"
358 "(struct elfsect * in elf_py.c)";
360 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
);
362 #define member(name, type, doc) \
364 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
365 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
367 static PyMemberDef members_elfsect
[] = {
368 member(name
, T_STRING
,
369 "Section name, e.g. \".text\""),
371 "Section index in file"),
373 "Section length in bytes"),
378 static PyGetSetDef getset_elfsect
[] = {
379 { .name
= (char *)"sh_addr", .get
= elfsect_getaddr
, .doc
=
380 (char *)"Section virtual address (mapped program view)"},
384 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
)
386 struct elfsect
*w
= (struct elfsect
*)self
;
388 return Py_BuildValue("K", (unsigned long long)w
->shdr
->sh_addr
);
392 static PyObject
*elfsect_getreloc(PyObject
*self
, PyObject
*args
)
394 struct elfsect
*w
= (struct elfsect
*)self
;
395 struct elfreloc
*relw
;
399 if (!PyArg_ParseTuple(args
, "k", &offs
))
402 relw
= elfrelocs_get(&w
->relocs
, offs
+ w
->shdr
->sh_addr
);
406 ret
= (PyObject
*)relw
;
411 static PyMethodDef methods_elfsect
[] = {
412 {"getreloc", elfsect_getreloc
, METH_VARARGS
,
413 "Check for / get relocation at offset into section\n\n"
414 "Args: byte offset into section to check\n"
415 "Returns: ELFReloc or None"},
419 static PyObject
*elfsect_subscript(PyObject
*self
, PyObject
*key
)
421 Py_ssize_t start
, stop
, step
, sllen
;
422 struct elfsect
*w
= (struct elfsect
*)self
;
423 PySliceObject
*slice
;
424 unsigned long offs
, len
= ~0UL;
426 if (!PySlice_Check(key
)) {
427 PyErr_SetString(PyExc_IndexError
,
428 "ELFSection subscript must be slice");
431 slice
= (PySliceObject
*)key
;
432 if (PyLong_Check(slice
->stop
)) {
433 if (PySlice_GetIndicesEx(key
, w
->shdr
->sh_size
,
434 &start
, &stop
, &step
, &sllen
))
438 PyErr_SetString(PyExc_IndexError
,
439 "ELFSection subscript slice step must be 1");
442 if ((GElf_Xword
)stop
> w
->shdr
->sh_size
) {
443 PyErr_Format(ELFAccessError
,
444 "access (%lu) beyond end of section %lu/%s (%lu)",
445 stop
, w
->idx
, w
->name
, w
->shdr
->sh_size
);
452 if (slice
->stop
!= (void *)&PyUnicode_Type
453 || !PyLong_Check(slice
->start
)) {
454 PyErr_SetString(PyExc_IndexError
, "invalid slice");
458 offs
= PyLong_AsUnsignedLongLong(slice
->start
);
462 offs
+= w
->shdr
->sh_offset
;
463 if (offs
> w
->ef
->len
) {
464 PyErr_Format(ELFAccessError
,
465 "access (%lu) beyond end of file (%lu)",
470 len
= strnlen(w
->ef
->mmap
+ offs
, w
->ef
->len
- offs
);
472 Py_ssize_t pylen
= len
;
474 #if PY_MAJOR_VERSION >= 3
475 return Py_BuildValue("y#", w
->ef
->mmap
+ offs
, pylen
);
477 return Py_BuildValue("s#", w
->ef
->mmap
+ offs
, pylen
);
481 static PyMappingMethods mp_elfsect
= {
482 .mp_subscript
= elfsect_subscript
,
485 static void elfsect_free(void *arg
)
487 struct elfsect
*w
= arg
;
492 static PyObject
*elfsect_repr(PyObject
*arg
)
494 struct elfsect
*w
= (struct elfsect
*)arg
;
496 return PyUnicode_FromFormat("<ELFSection %s>", w
->name
);
499 static PyTypeObject typeobj_elfsect
= {
500 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFSection",
501 .tp_basicsize
= sizeof(struct elfsect
),
502 .tp_flags
= Py_TPFLAGS_DEFAULT
,
503 .tp_doc
= elfsect_doc
,
504 .tp_new
= refuse_new
,
505 .tp_free
= elfsect_free
,
506 .tp_repr
= elfsect_repr
,
507 .tp_as_mapping
= &mp_elfsect
,
508 .tp_members
= members_elfsect
,
509 .tp_methods
= methods_elfsect
,
510 .tp_getset
= getset_elfsect
,
513 static void elfsect_add_relocations(struct elfsect
*w
, Elf_Scn
*rel
,
517 Elf_Scn
*symtab
= elf_getscn(w
->ef
->elf
, relhdr
->sh_link
);
518 GElf_Shdr _symhdr
, *symhdr
= gelf_getshdr(symtab
, &_symhdr
);
519 Elf_Data
*symdata
= elf_getdata(symtab
, NULL
);
520 Elf_Data
*reldata
= elf_getdata(rel
, NULL
);
522 entries
= relhdr
->sh_size
/ relhdr
->sh_entsize
;
523 for (i
= 0; i
< entries
; i
++) {
524 struct elfreloc
*relw
;
529 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
530 &typeobj_elfreloc
, 0);
533 if (relhdr
->sh_type
== SHT_REL
) {
536 rel
= gelf_getrel(reldata
, i
, &_rel
);
537 relw
->rela
= &relw
->_rela
;
538 relw
->rela
->r_offset
= rel
->r_offset
;
539 relw
->rela
->r_info
= rel
->r_info
;
540 relw
->rela
->r_addend
= 0;
541 relw
->relative
= true;
543 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
546 if (rela
->r_offset
< w
->shdr
->sh_addr
547 || rela
->r_offset
>= w
->shdr
->sh_addr
+ w
->shdr
->sh_size
)
550 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
551 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
553 relw
->symname
= elf_strptr(w
->ef
->elf
, symhdr
->sh_link
,
555 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
557 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
558 relw
->st_value
= sym
->st_value
;
560 relw
->symname
= NULL
;
561 relw
->symvalid
= false;
562 relw
->unresolved
= false;
566 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
567 (long long)rela
->r_offset
, (unsigned long long)symidx
,
568 (long long)rela
->r_addend
, relw
->symname
);
570 elfrelocs_add(&w
->relocs
, relw
);
575 * bindings & loading code between ELFFile and ELFSection
578 static PyObject
*elfsect_wrap(struct elffile
*ef
, Elf_Scn
*scn
, size_t idx
,
584 w
= (struct elfsect
*)typeobj_elfsect
.tp_alloc(&typeobj_elfsect
, 0);
591 w
->shdr
= gelf_getshdr(scn
, &w
->_shdr
);
592 w
->len
= w
->shdr
->sh_size
;
594 elfrelocs_init(&w
->relocs
);
596 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
597 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
598 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
600 if (shdr
->sh_type
!= SHT_RELA
&& shdr
->sh_type
!= SHT_REL
)
602 if (shdr
->sh_info
&& shdr
->sh_info
!= idx
)
604 elfsect_add_relocations(w
, scn
, shdr
);
607 return (PyObject
*)w
;
610 static Elf_Scn
*elf_find_section(struct elffile
*ef
, const char *name
,
616 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
617 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
618 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
620 secname
= elf_strptr(ef
->elf
, ef
->ehdr
->e_shstrndx
,
622 if (strcmp(secname
, name
))
631 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
)
635 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
636 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
637 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
639 if (addr
< shdr
->sh_addr
||
640 addr
>= shdr
->sh_addr
+ shdr
->sh_size
)
654 static const char elffile_doc
[] =
655 "Represents an ELF file\n"
657 "Args: filename to load\n"
659 "To access raw file contents, use subscript notation, e.g.\n"
661 "To read null terminated C strings, replace the end with str:\n"
663 "(struct elffile * in elf_py.c)";
666 #define member(name, type, doc) \
668 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
669 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
671 static PyMemberDef members_elffile
[] = {
672 member(filename
, T_STRING
,
673 "Original file name as given when opening"),
674 member(elfclass
, T_INT
,
675 "ELF class (architecture bit size)\n\n"
676 "Either 32 or 64, straight integer."),
677 member(bigendian
, T_BOOL
,
678 "ELF file is big-endian\n\n"
679 "All internal ELF structures are automatically converted."),
680 member(has_symbols
, T_BOOL
,
681 "A symbol section is present\n\n"
682 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
688 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
)
694 scn
= elf_getscn(w
->elf
, idx
);
695 if (!scn
|| idx
>= w
->n_sect
)
698 if (!w
->sects
[idx
]) {
699 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
701 name
= elf_strptr(w
->elf
, w
->ehdr
->e_shstrndx
, shdr
->sh_name
);
702 w
->sects
[idx
] = elfsect_wrap(w
, scn
, idx
, name
);
710 static PyObject
*elffile_get_section(PyObject
*self
, PyObject
*args
)
713 struct elffile
*w
= (struct elffile
*)self
;
717 if (!PyArg_ParseTuple(args
, "s", &name
))
720 scn
= elf_find_section(w
, name
, &idx
);
721 return elffile_secbyidx(w
, scn
, idx
);
724 static PyObject
*elffile_get_section_addr(PyObject
*self
, PyObject
*args
)
726 unsigned long long addr
;
727 struct elffile
*w
= (struct elffile
*)self
;
731 if (!PyArg_ParseTuple(args
, "K", &addr
))
734 scn
= elf_find_addr(w
, addr
, &idx
);
735 return elffile_secbyidx(w
, scn
, idx
);
738 static PyObject
*elffile_get_section_idx(PyObject
*self
, PyObject
*args
)
740 unsigned long long idx
;
741 struct elffile
*w
= (struct elffile
*)self
;
743 if (!PyArg_ParseTuple(args
, "K", &idx
))
746 return elffile_secbyidx(w
, NULL
, idx
);
749 static PyObject
*elffile_get_symbol(PyObject
*self
, PyObject
*args
)
751 const char *name
, *symname
;
752 struct elffile
*w
= (struct elffile
*)self
;
756 if (!PyArg_ParseTuple(args
, "s", &name
))
759 for (i
= 0; i
< w
->nsym
; i
++) {
760 sym
= gelf_getsym(w
->symdata
, i
, &_sym
);
761 if (sym
->st_name
== 0)
763 symname
= elf_strptr(w
->elf
, w
->symstridx
, sym
->st_name
);
764 if (strcmp(symname
, name
))
768 Elf_Scn
*scn
= elf_getscn(w
->elf
, sym
->st_shndx
);
771 pysect
= elffile_secbyidx(w
, scn
, sym
->st_shndx
);
776 return Py_BuildValue("sKN", symname
,
777 (unsigned long long)sym
->st_value
, pysect
);
782 static PyObject
*elffile_getreloc(PyObject
*self
, PyObject
*args
)
784 struct elffile
*w
= (struct elffile
*)self
;
785 struct elfreloc
*relw
;
789 if (!PyArg_ParseTuple(args
, "k", &offs
))
792 relw
= elfrelocs_get(&w
->dynrelocs
, offs
);
796 ret
= (PyObject
*)relw
;
801 static PyObject
*elffile_find_note(PyObject
*self
, PyObject
*args
)
803 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
807 struct elffile
*w
= (struct elffile
*)self
;
810 if (!PyArg_ParseTuple(args
, "ss", &owner
, &ids
))
813 if (strlen((char *)ids
) != 4) {
814 PyErr_SetString(PyExc_ValueError
,
815 "ELF note ID must be exactly 4-byte string");
819 id
= (ids
[0] << 24) | (ids
[1] << 16) | (ids
[2] << 8) | ids
[3];
821 id
= (ids
[3] << 24) | (ids
[2] << 16) | (ids
[1] << 8) | ids
[0];
823 for (i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
824 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
828 if (phdr
->p_type
!= PT_NOTE
)
831 notedata
= elf_getdata_rawchunk(w
->elf
, phdr
->p_offset
,
832 phdr
->p_filesz
, ELF_T_NHDR
);
835 size_t nameoffs
, dataoffs
;
838 while ((offset
= gelf_getnote(notedata
, offset
, nhdr
,
839 &nameoffs
, &dataoffs
))) {
840 if (phdr
->p_offset
+ nameoffs
>= w
->len
)
843 const char *name
= w
->mmap
+ phdr
->p_offset
+ nameoffs
;
845 if (strcmp(name
, owner
))
847 if (id
!= nhdr
->n_type
)
852 s
= PyLong_FromUnsignedLongLong(
853 phdr
->p_vaddr
+ dataoffs
);
854 e
= PyLong_FromUnsignedLongLong(
855 phdr
->p_vaddr
+ dataoffs
+ nhdr
->n_descsz
);
856 return PySlice_New(s
, e
, NULL
);
863 static bool elffile_virt2file(struct elffile
*w
, GElf_Addr virt
,
868 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
869 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
871 if (phdr
->p_type
!= PT_LOAD
)
874 if (virt
< phdr
->p_vaddr
875 || virt
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
878 if (virt
>= phdr
->p_vaddr
+ phdr
->p_filesz
)
881 *offs
= virt
- phdr
->p_vaddr
+ phdr
->p_offset
;
888 static PyObject
*elffile_subscript(PyObject
*self
, PyObject
*key
)
890 Py_ssize_t start
, stop
, step
;
891 PySliceObject
*slice
;
892 struct elffile
*w
= (struct elffile
*)self
;
895 if (!PySlice_Check(key
)) {
896 PyErr_SetString(PyExc_IndexError
,
897 "ELFFile subscript must be slice");
900 slice
= (PySliceObject
*)key
;
903 if (PyLong_Check(slice
->stop
)) {
904 start
= PyLong_AsSsize_t(slice
->start
);
905 if (PyErr_Occurred())
907 if (slice
->stop
!= Py_None
) {
908 stop
= PyLong_AsSsize_t(slice
->stop
);
909 if (PyErr_Occurred())
912 if (slice
->step
!= Py_None
) {
913 step
= PyLong_AsSsize_t(slice
->step
);
914 if (PyErr_Occurred())
918 if (slice
->stop
!= (void *)&PyUnicode_Type
919 || !PyLong_Check(slice
->start
)) {
920 PyErr_SetString(PyExc_IndexError
, "invalid slice");
925 start
= PyLong_AsUnsignedLongLong(slice
->start
);
928 PyErr_SetString(PyExc_IndexError
,
929 "ELFFile subscript slice step must be 1");
933 GElf_Addr xstart
= start
, xstop
= stop
;
935 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
936 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
938 if (phdr
->p_type
!= PT_LOAD
)
941 if (xstart
< phdr
->p_vaddr
942 || xstart
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
944 if (!str
&& (xstop
< phdr
->p_vaddr
945 || xstop
> phdr
->p_vaddr
+ phdr
->p_memsz
)) {
946 PyErr_Format(ELFAccessError
,
947 "access (%llu) beyond end of program header (%llu)",
949 (long long)(phdr
->p_vaddr
+
954 xstart
= xstart
- phdr
->p_vaddr
+ phdr
->p_offset
;
957 xstop
= strlen(w
->mmap
+ xstart
);
959 xstop
= xstop
- phdr
->p_vaddr
+ phdr
->p_offset
;
961 Py_ssize_t pylen
= xstop
- xstart
;
963 #if PY_MAJOR_VERSION >= 3
964 return Py_BuildValue("y#", w
->mmap
+ xstart
, pylen
);
966 return Py_BuildValue("s#", w
->mmap
+ xstart
, pylen
);
970 return PyErr_Format(ELFAccessError
,
971 "virtual address (%llu) not found in program headers",
975 static PyMethodDef methods_elffile
[] = {
976 {"find_note", elffile_find_note
, METH_VARARGS
,
977 "find specific note entry"},
978 {"getreloc", elffile_getreloc
, METH_VARARGS
,
980 {"get_symbol", elffile_get_symbol
, METH_VARARGS
,
981 "find symbol by name"},
982 {"get_section", elffile_get_section
, METH_VARARGS
,
983 "find section by name"},
984 {"get_section_addr", elffile_get_section_addr
, METH_VARARGS
,
985 "find section by address"},
986 {"get_section_idx", elffile_get_section_idx
, METH_VARARGS
,
987 "find section by index"},
991 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
994 static void elffile_free(void *arg
)
996 struct elffile
*w
= arg
;
999 munmap(w
->mmap
, w
->len
);
1003 static PyMappingMethods mp_elffile
= {
1004 .mp_subscript
= elffile_subscript
,
1007 static PyTypeObject typeobj_elffile
= {
1008 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFFile",
1009 .tp_basicsize
= sizeof(struct elffile
),
1010 .tp_flags
= Py_TPFLAGS_DEFAULT
,
1011 .tp_doc
= elffile_doc
,
1012 .tp_new
= elffile_load
,
1013 .tp_free
= elffile_free
,
1014 .tp_as_mapping
= &mp_elffile
,
1015 .tp_members
= members_elffile
,
1016 .tp_methods
= methods_elffile
,
1019 static char *elfdata_strptr(Elf_Data
*data
, size_t offset
)
1023 if (offset
>= data
->d_size
)
1026 p
= (char *)data
->d_buf
+ offset
;
1027 if (strnlen(p
, data
->d_size
- offset
) >= data
->d_size
- offset
)
1033 static void elffile_add_dynreloc(struct elffile
*w
, Elf_Data
*reldata
,
1034 size_t entries
, Elf_Data
*symdata
,
1035 Elf_Data
*strdata
, Elf_Type typ
)
1039 for (i
= 0; i
< entries
; i
++) {
1040 struct elfreloc
*relw
;
1044 GElf_Addr rel_offs
= 0;
1046 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
1047 &typeobj_elfreloc
, 0);
1050 if (typ
== ELF_T_REL
) {
1051 GElf_Rel _rel
, *rel
;
1054 rel
= gelf_getrel(reldata
, i
, &_rel
);
1055 relw
->rela
= &relw
->_rela
;
1056 relw
->rela
->r_offset
= rel
->r_offset
;
1057 relw
->rela
->r_info
= rel
->r_info
;
1058 relw
->rela
->r_addend
= 0;
1059 relw
->relative
= true;
1061 /* REL uses the pointer contents itself instead of the
1062 * RELA addend field :( ... theoretically this could
1063 * be some weird platform specific encoding, but since
1064 * we only care about data relocations it should
1065 * always be a pointer...
1067 if (elffile_virt2file(w
, rel
->r_offset
, &offs
)) {
1068 Elf_Data
*ptr
, *conv
;
1071 .d_buf
= (void *)&tmp
,
1072 .d_type
= ELF_T_ADDR
,
1073 .d_version
= EV_CURRENT
,
1074 .d_size
= sizeof(tmp
),
1079 ptr
= elf_getdata_rawchunk(w
->elf
, offs
,
1083 conv
= gelf_xlatetom(w
->elf
, &mem
, ptr
,
1086 memcpy(&rel_offs
, conv
->d_buf
,
1089 relw
->relative
= false;
1090 relw
->rela
->r_addend
= rel_offs
;
1094 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
1097 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
1098 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
1100 relw
->symname
= elfdata_strptr(strdata
, sym
->st_name
);
1101 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
1103 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
1104 relw
->st_value
= sym
->st_value
;
1106 relw
->symname
= NULL
;
1107 relw
->symvalid
= false;
1108 relw
->unresolved
= false;
1112 if (typ
== ELF_T_RELA
)
1113 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1114 (long long)rela
->r_offset
,
1115 (unsigned long long)symidx
,
1116 (long long)rela
->r_addend
, relw
->symname
);
1118 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1119 (long long)rela
->r_offset
,
1120 (unsigned long long)symidx
,
1121 (unsigned long long)rel_offs
, relw
->symname
);
1123 elfrelocs_add(&w
->dynrelocs
, relw
);
1128 /* primary (only, really) entry point to anything in this module */
1129 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
1132 const char *filename
;
1133 static const char * const kwnames
[] = {"filename", NULL
};
1138 w
= (struct elffile
*)typeobj_elffile
.tp_alloc(&typeobj_elffile
, 0);
1142 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "s", (char **)kwnames
,
1146 w
->filename
= strdup(filename
);
1147 fd
= open(filename
, O_RDONLY
| O_NOCTTY
);
1148 if (fd
< 0 || fstat(fd
, &st
)) {
1149 PyErr_SetFromErrnoWithFilename(PyExc_OSError
, filename
);
1153 w
->len
= st
.st_size
;
1154 w
->mmap
= mmap(NULL
, st
.st_size
, PROT_READ
, MAP_SHARED
, fd
, 0);
1156 PyErr_SetFromErrnoWithFilename(PyExc_IOError
, filename
);
1161 w
->mmend
= w
->mmap
+ st
.st_size
;
1163 if (w
->len
< EI_NIDENT
|| memcmp(w
->mmap
, ELFMAG
, SELFMAG
)) {
1164 PyErr_SetString(ELFFormatError
, "invalid ELF signature");
1168 switch (w
->mmap
[EI_CLASS
]) {
1176 PyErr_SetString(ELFFormatError
, "invalid ELF class");
1179 switch (w
->mmap
[EI_DATA
]) {
1181 w
->bigendian
= false;
1184 w
->bigendian
= true;
1187 PyErr_SetString(ELFFormatError
, "invalid ELF byte order");
1191 w
->elf
= elf_memory(w
->mmap
, w
->len
);
1194 w
->ehdr
= gelf_getehdr(w
->elf
, &w
->_ehdr
);
1198 for (size_t i
= 0; i
< w
->ehdr
->e_shnum
; i
++) {
1199 Elf_Scn
*scn
= elf_getscn(w
->elf
, i
);
1200 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
1202 if (shdr
->sh_type
== SHT_SYMTAB
) {
1204 w
->nsym
= shdr
->sh_size
/ shdr
->sh_entsize
;
1205 w
->symdata
= elf_getdata(scn
, NULL
);
1206 w
->symstridx
= shdr
->sh_link
;
1210 w
->has_symbols
= w
->symtab
&& w
->symstridx
;
1211 elfrelocs_init(&w
->dynrelocs
);
1213 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1214 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
1215 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
1217 if (phdr
->p_type
!= PT_DYNAMIC
)
1220 Elf_Data
*dyndata
= elf_getdata_rawchunk(w
->elf
,
1221 phdr
->p_offset
, phdr
->p_filesz
, ELF_T_DYN
);
1223 GElf_Addr dynrela
= 0, dynrel
= 0, symtab
= 0, strtab
= 0;
1224 size_t dynrelasz
= 0, dynrelaent
= 0;
1225 size_t dynrelsz
= 0, dynrelent
= 0;
1227 GElf_Dyn _dyn
, *dyn
;
1229 for (size_t j
= 0;; j
++) {
1230 dyn
= gelf_getdyn(dyndata
, j
, &_dyn
);
1232 if (dyn
->d_tag
== DT_NULL
)
1235 switch (dyn
->d_tag
) {
1237 symtab
= dyn
->d_un
.d_ptr
;
1241 strtab
= dyn
->d_un
.d_ptr
;
1244 strsz
= dyn
->d_un
.d_val
;
1248 dynrela
= dyn
->d_un
.d_ptr
;
1251 dynrelasz
= dyn
->d_un
.d_val
;
1254 dynrelaent
= dyn
->d_un
.d_val
;
1258 dynrel
= dyn
->d_un
.d_ptr
;
1261 dynrelsz
= dyn
->d_un
.d_val
;
1264 dynrelent
= dyn
->d_un
.d_val
;
1270 Elf_Data
*symdata
= NULL
, *strdata
= NULL
;
1272 if (elffile_virt2file(w
, symtab
, &offset
))
1273 symdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1276 if (elffile_virt2file(w
, strtab
, &offset
))
1277 strdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1282 if (dynrela
&& dynrelasz
&& dynrelaent
1283 && elffile_virt2file(w
, dynrela
, &offset
)) {
1284 Elf_Data
*reladata
= NULL
;
1286 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela
,
1287 (long long)offset
, (long long)dynrelasz
);
1289 reladata
= elf_getdata_rawchunk(w
->elf
, offset
,
1290 dynrelasz
, ELF_T_RELA
);
1292 c
= dynrelasz
/ dynrelaent
;
1293 elffile_add_dynreloc(w
, reladata
, c
, symdata
, strdata
,
1297 if (dynrel
&& dynrelsz
&& dynrelent
1298 && elffile_virt2file(w
, dynrel
, &offset
)) {
1299 Elf_Data
*reldata
= NULL
;
1301 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel
,
1302 (long long)offset
, (long long)dynrelsz
);
1304 reldata
= elf_getdata_rawchunk(w
->elf
, offset
, dynrelsz
,
1307 c
= dynrelsz
/ dynrelent
;
1308 elffile_add_dynreloc(w
, reldata
, c
, symdata
, strdata
,
1314 w
->sects
= calloc(sizeof(PyObject
*), w
->ehdr
->e_shnum
);
1315 w
->n_sect
= w
->ehdr
->e_shnum
;
1317 return (PyObject
*)w
;
1322 PyErr_Format(ELFFormatError
, "libelf error %d: %s",
1323 err
, elf_errmsg(err
));
1331 static PyObject
*elfpy_debug(PyObject
*self
, PyObject
*args
)
1335 if (!PyArg_ParseTuple(args
, "p", &arg
))
1343 static PyMethodDef methods_elfpy
[] = {
1344 {"elfpy_debug", elfpy_debug
, METH_VARARGS
, "switch debuging on/off"},
1348 bool elf_py_init(PyObject
*pymod
)
1350 if (PyType_Ready(&typeobj_elffile
) < 0)
1352 if (PyType_Ready(&typeobj_elfsect
) < 0)
1354 if (PyType_Ready(&typeobj_elfreloc
) < 0)
1356 if (elf_version(EV_CURRENT
) == EV_NONE
)
1359 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1360 PyModule_AddFunctions(pymod
, methods_elfpy
);
1362 (void)methods_elfpy
;
1365 ELFFormatError
= PyErr_NewException("_clippy.ELFFormatError",
1366 PyExc_ValueError
, NULL
);
1367 PyModule_AddObject(pymod
, "ELFFormatError", ELFFormatError
);
1368 ELFAccessError
= PyErr_NewException("_clippy.ELFAccessError",
1369 PyExc_IndexError
, NULL
);
1370 PyModule_AddObject(pymod
, "ELFAccessError", ELFAccessError
);
1372 Py_INCREF(&typeobj_elffile
);
1373 PyModule_AddObject(pymod
, "ELFFile", (PyObject
*)&typeobj_elffile
);
1374 Py_INCREF(&typeobj_elfsect
);
1375 PyModule_AddObject(pymod
, "ELFSection", (PyObject
*)&typeobj_elfsect
);
1376 Py_INCREF(&typeobj_elfreloc
);
1377 PyModule_AddObject(pymod
, "ELFReloc", (PyObject
*)&typeobj_elfreloc
);