2 * fast ELF file accessor
3 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program; see the file COPYING; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 /* Note: this wrapper is intended to be used as build-time helper. While
21 * it should be generally correct and proper, there may be the occasional
22 * memory leak or SEGV for things that haven't been well-tested.
24 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
25 * / ! \ in FRR to read files created by its own build. Don't take it out
26 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
28 * If you're working with this code (or even reading it), you really need to
29 * read a bunch of the ELF specs. There's no way around it, things in here
30 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
34 * https://refspecs.linuxfoundation.org/elf/elf.pdf
35 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
36 * Recommended reading:
37 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
39 * The core ELF spec is *not* enough, you should read at least one of the
40 * processor specific (psABI) docs. They define what & how relocations work.
41 * Luckily we don't need to care about the processor specifics since this only
42 * does data relocations, but without looking at the psABI, some things aren't
46 /* the API of this module roughly follows a very small subset of the one
47 * provided by the python elfutils package, which unfortunately is painfully
51 #define PY_SSIZE_T_CLEAN
57 #include "structmember.h"
61 #include <sys/types.h>
66 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
67 /* Solaris libelf bails otherwise ... */
68 #undef _FILE_OFFSET_BITS
69 #define _FILE_OFFSET_BITS 32
85 fprintf(stderr, __VA_ARGS__); \
89 static PyObject
*ELFFormatError
;
90 static PyObject
*ELFAccessError
;
92 /* most objects can only be created as return values from one of the methods */
93 static PyObject
*refuse_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
95 PyErr_SetString(PyExc_ValueError
,
96 "cannot create instances of this type");
103 PREDECL_HASH(elfrelocs
);
105 /* ELFFile and ELFSection intentionally share some behaviour, particularly
106 * subscript[123:456] access to file data. This is because relocatables
107 * (.o files) do things section-based, but linked executables/libraries do
108 * things file-based. Having the two behave similar allows simplifying the
114 * overall entry point, instantiated by reading in an ELF file
124 /* note from here on there are several instances of
126 * GElf_Something *x, _x;
128 * this is a pattern used by libelf's generic ELF routines; the _x
129 * field is used to create a copy of the ELF structure from the file
130 * with 32/64bit and endianness adjusted.
133 GElf_Ehdr
*ehdr
, _ehdr
;
135 size_t nsym
, symstridx
;
141 struct elfrelocs_head dynrelocs
;
150 * note that executables and shared libraries can have their section headers
151 * removed, though in practice this is only used as an obfuscation technique.
159 GElf_Shdr _shdr
, *shdr
;
161 unsigned long idx
, len
;
163 struct elfrelocs_head relocs
;
168 * note: relocations in object files (.o) are section-based while relocations
169 * in executables and shared libraries are file-based.
171 * Whenever accessing something that is a pointer in the ELF file, the Python
172 * code needs to check for a relocation; if the pointer is pointing to some
173 * unresolved symbol the file will generally contain 0 bytes. The relocation
174 * will tell what the pointer is actually pointing to.
176 * This represents both static (.o file) and dynamic (.so/exec) relocations.
181 struct elfrelocs_item elfrelocs_item
;
186 /* there's also old-fashioned GElf_Rel; we're converting that to
187 * GElf_Rela in elfsect_add_relocations()
189 GElf_Rela _rela
, *rela
;
194 /* documented below in python docstrings */
195 bool symvalid
, unresolved
, relative
;
196 unsigned long long st_value
;
199 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
);
200 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
);
202 DECLARE_HASH(elfrelocs
, struct elfreloc
, elfrelocs_item
,
203 elfreloc_cmp
, elfreloc_hash
);
205 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
);
206 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
);
207 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
);
208 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
);
210 /* --- end of declarations -------------------------------------------------- */
216 static const char elfreloc_doc
[] =
217 "Represents an ELF relocation record\n"
219 "(struct elfreloc * in elf_py.c)";
221 #define member(name, type, doc) \
223 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
224 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
226 static PyMemberDef members_elfreloc
[] = {
227 member(symname
, T_STRING
,
228 "Name of symbol this relocation refers to.\n"
230 "Will frequently be `None` in executables and shared libraries."
232 member(symvalid
, T_BOOL
,
233 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
234 member(unresolved
, T_BOOL
,
235 "Target symbol refers to an existing section"),
236 member(relative
, T_BOOL
,
237 "Relocation is a REL (not RELA) record and thus relative."),
238 member(st_value
, T_ULONGLONG
,
239 "Target symbol's value, if known\n\n"
240 "Will be zero for unresolved/external symbols."),
245 static PyGetSetDef getset_elfreloc
[] = {
246 { .name
= (char *)"r_addend", .get
= elfreloc_getaddend
, .doc
=
247 (char *)"Relocation addend value"},
251 static PyMethodDef methods_elfreloc
[] = {
252 {"getsection", elfreloc_getsection
, METH_VARARGS
,
253 "Find relocation target's ELF section\n\n"
254 "Args: address of relocatee (TODO: fix/remove?)\n"
255 "Returns: ELFSection or None\n\n"
256 "Not possible if section headers have been stripped."},
260 static int elfreloc_cmp(const struct elfreloc
*a
, const struct elfreloc
*b
)
262 if (a
->rela
->r_offset
< b
->rela
->r_offset
)
264 if (a
->rela
->r_offset
> b
->rela
->r_offset
)
269 static uint32_t elfreloc_hash(const struct elfreloc
*reloc
)
271 return jhash(&reloc
->rela
->r_offset
, sizeof(reloc
->rela
->r_offset
),
275 static struct elfreloc
*elfrelocs_get(struct elfrelocs_head
*head
,
278 struct elfreloc dummy
;
280 dummy
.rela
= &dummy
._rela
;
281 dummy
.rela
->r_offset
= offset
;
282 return elfrelocs_find(head
, &dummy
);
285 static PyObject
*elfreloc_getsection(PyObject
*self
, PyObject
*args
)
287 struct elfreloc
*w
= (struct elfreloc
*)self
;
290 if (!PyArg_ParseTuple(args
, "k", &data
))
296 if (w
->symidx
== 0) {
300 data
= (w
->relative
? data
: 0) + w
->rela
->r_addend
;
301 scn
= elf_find_addr(w
->es
->ef
, data
, &idx
);
304 return elffile_secbyidx(w
->es
->ef
, scn
, idx
);
306 return elffile_secbyidx(w
->es
->ef
, NULL
, w
->sym
->st_shndx
);
309 static PyObject
*elfreloc_getaddend(PyObject
*obj
, void *closure
)
311 struct elfreloc
*w
= (struct elfreloc
*)obj
;
313 return Py_BuildValue("K", (unsigned long long)w
->rela
->r_addend
);
316 static PyObject
*elfreloc_repr(PyObject
*arg
)
318 struct elfreloc
*w
= (struct elfreloc
*)arg
;
320 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
321 (unsigned long)w
->rela
->r_offset
,
322 (w
->symname
&& w
->symname
[0]) ? w
->symname
324 (unsigned long)w
->rela
->r_addend
);
327 static void elfreloc_free(void *arg
)
329 struct elfreloc
*w
= arg
;
334 static PyTypeObject typeobj_elfreloc
= {
335 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFReloc",
336 .tp_basicsize
= sizeof(struct elfreloc
),
337 .tp_flags
= Py_TPFLAGS_DEFAULT
,
338 .tp_doc
= elfreloc_doc
,
339 .tp_new
= refuse_new
,
340 .tp_free
= elfreloc_free
,
341 .tp_repr
= elfreloc_repr
,
342 .tp_members
= members_elfreloc
,
343 .tp_methods
= methods_elfreloc
,
344 .tp_getset
= getset_elfreloc
,
351 static const char elfsect_doc
[] =
352 "Represents an ELF section\n"
354 "To access section contents, use subscript notation, e.g.\n"
355 " section[123:456]\n"
356 "To read null terminated C strings, replace the end with str:\n"
357 " section[123:str]\n\n"
358 "(struct elfsect * in elf_py.c)";
360 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
);
362 #define member(name, type, doc) \
364 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
365 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
367 static PyMemberDef members_elfsect
[] = {
368 member(name
, T_STRING
,
369 "Section name, e.g. \".text\""),
371 "Section index in file"),
373 "Section length in bytes"),
378 static PyGetSetDef getset_elfsect
[] = {
379 { .name
= (char *)"sh_addr", .get
= elfsect_getaddr
, .doc
=
380 (char *)"Section virtual address (mapped program view)"},
384 static PyObject
*elfsect_getaddr(PyObject
*self
, void *closure
)
386 struct elfsect
*w
= (struct elfsect
*)self
;
388 return Py_BuildValue("K", (unsigned long long)w
->shdr
->sh_addr
);
392 static PyObject
*elfsect_getreloc(PyObject
*self
, PyObject
*args
)
394 struct elfsect
*w
= (struct elfsect
*)self
;
395 struct elfreloc
*relw
;
399 if (!PyArg_ParseTuple(args
, "k", &offs
))
402 relw
= elfrelocs_get(&w
->relocs
, offs
+ w
->shdr
->sh_addr
);
406 ret
= (PyObject
*)relw
;
411 static PyMethodDef methods_elfsect
[] = {
412 {"getreloc", elfsect_getreloc
, METH_VARARGS
,
413 "Check for / get relocation at offset into section\n\n"
414 "Args: byte offset into section to check\n"
415 "Returns: ELFReloc or None"},
419 static PyObject
*elfsect_subscript(PyObject
*self
, PyObject
*key
)
421 Py_ssize_t start
, stop
, step
, sllen
;
422 struct elfsect
*w
= (struct elfsect
*)self
;
423 PySliceObject
*slice
;
424 unsigned long offs
, len
= ~0UL;
426 if (!PySlice_Check(key
)) {
427 PyErr_SetString(PyExc_IndexError
,
428 "ELFSection subscript must be slice");
431 slice
= (PySliceObject
*)key
;
432 if (PyLong_Check(slice
->stop
)) {
433 if (PySlice_GetIndicesEx(key
, w
->shdr
->sh_size
,
434 &start
, &stop
, &step
, &sllen
))
438 PyErr_SetString(PyExc_IndexError
,
439 "ELFSection subscript slice step must be 1");
442 if ((GElf_Xword
)stop
> w
->shdr
->sh_size
) {
443 PyErr_Format(ELFAccessError
,
444 "access (%lu) beyond end of section %lu/%s (%lu)",
445 stop
, w
->idx
, w
->name
, w
->shdr
->sh_size
);
452 if (slice
->stop
!= (void *)&PyUnicode_Type
453 || !PyLong_Check(slice
->start
)) {
454 PyErr_SetString(PyExc_IndexError
, "invalid slice");
458 offs
= PyLong_AsUnsignedLongLong(slice
->start
);
462 offs
+= w
->shdr
->sh_offset
;
463 if (offs
> w
->ef
->len
) {
464 PyErr_Format(ELFAccessError
,
465 "access (%lu) beyond end of file (%lu)",
470 len
= strnlen(w
->ef
->mmap
+ offs
, w
->ef
->len
- offs
);
472 Py_ssize_t pylen
= len
;
474 #if PY_MAJOR_VERSION >= 3
475 return Py_BuildValue("y#", w
->ef
->mmap
+ offs
, pylen
);
477 return Py_BuildValue("s#", w
->ef
->mmap
+ offs
, pylen
);
481 static PyMappingMethods mp_elfsect
= {
482 .mp_subscript
= elfsect_subscript
,
485 static void elfsect_free(void *arg
)
487 struct elfsect
*w
= arg
;
492 static PyObject
*elfsect_repr(PyObject
*arg
)
494 struct elfsect
*w
= (struct elfsect
*)arg
;
496 return PyUnicode_FromFormat("<ELFSection %s>", w
->name
);
499 static PyTypeObject typeobj_elfsect
= {
500 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFSection",
501 .tp_basicsize
= sizeof(struct elfsect
),
502 .tp_flags
= Py_TPFLAGS_DEFAULT
,
503 .tp_doc
= elfsect_doc
,
504 .tp_new
= refuse_new
,
505 .tp_free
= elfsect_free
,
506 .tp_repr
= elfsect_repr
,
507 .tp_as_mapping
= &mp_elfsect
,
508 .tp_members
= members_elfsect
,
509 .tp_methods
= methods_elfsect
,
510 .tp_getset
= getset_elfsect
,
513 static void elfsect_add_relocations(struct elfsect
*w
, Elf_Scn
*rel
,
517 Elf_Scn
*symtab
= elf_getscn(w
->ef
->elf
, relhdr
->sh_link
);
518 GElf_Shdr _symhdr
, *symhdr
= gelf_getshdr(symtab
, &_symhdr
);
519 Elf_Data
*symdata
= elf_getdata(symtab
, NULL
);
520 Elf_Data
*reldata
= elf_getdata(rel
, NULL
);
522 entries
= relhdr
->sh_size
/ relhdr
->sh_entsize
;
523 for (i
= 0; i
< entries
; i
++) {
524 struct elfreloc
*relw
;
529 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
530 &typeobj_elfreloc
, 0);
533 if (relhdr
->sh_type
== SHT_REL
) {
536 rel
= gelf_getrel(reldata
, i
, &_rel
);
537 relw
->rela
= &relw
->_rela
;
538 relw
->rela
->r_offset
= rel
->r_offset
;
539 relw
->rela
->r_info
= rel
->r_info
;
540 relw
->rela
->r_addend
= 0;
541 relw
->relative
= true;
543 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
546 if (rela
->r_offset
< w
->shdr
->sh_addr
547 || rela
->r_offset
>= w
->shdr
->sh_addr
+ w
->shdr
->sh_size
)
550 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
551 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
553 relw
->symname
= elf_strptr(w
->ef
->elf
, symhdr
->sh_link
,
555 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
557 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
558 relw
->st_value
= sym
->st_value
;
560 relw
->symname
= NULL
;
561 relw
->symvalid
= false;
562 relw
->unresolved
= false;
566 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
567 (long long)rela
->r_offset
, (unsigned long long)symidx
,
568 (long long)rela
->r_addend
, relw
->symname
);
570 elfrelocs_add(&w
->relocs
, relw
);
575 * bindings & loading code between ELFFile and ELFSection
578 static PyObject
*elfsect_wrap(struct elffile
*ef
, Elf_Scn
*scn
, size_t idx
,
584 w
= (struct elfsect
*)typeobj_elfsect
.tp_alloc(&typeobj_elfsect
, 0);
591 w
->shdr
= gelf_getshdr(scn
, &w
->_shdr
);
592 w
->len
= w
->shdr
->sh_size
;
594 elfrelocs_init(&w
->relocs
);
596 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
597 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
598 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
600 if (shdr
->sh_type
!= SHT_RELA
&& shdr
->sh_type
!= SHT_REL
)
602 if (shdr
->sh_info
&& shdr
->sh_info
!= idx
)
604 elfsect_add_relocations(w
, scn
, shdr
);
607 return (PyObject
*)w
;
610 static Elf_Scn
*elf_find_section(struct elffile
*ef
, const char *name
,
616 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
617 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
618 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
620 secname
= elf_strptr(ef
->elf
, ef
->ehdr
->e_shstrndx
,
622 if (strcmp(secname
, name
))
631 static Elf_Scn
*elf_find_addr(struct elffile
*ef
, uint64_t addr
, size_t *idx
)
635 for (i
= 0; i
< ef
->ehdr
->e_shnum
; i
++) {
636 Elf_Scn
*scn
= elf_getscn(ef
->elf
, i
);
637 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
639 /* virtual address is kinda meaningless for TLS sections */
640 if (shdr
->sh_flags
& SHF_TLS
)
642 if (addr
< shdr
->sh_addr
||
643 addr
>= shdr
->sh_addr
+ shdr
->sh_size
)
657 static const char elffile_doc
[] =
658 "Represents an ELF file\n"
660 "Args: filename to load\n"
662 "To access raw file contents, use subscript notation, e.g.\n"
664 "To read null terminated C strings, replace the end with str:\n"
666 "(struct elffile * in elf_py.c)";
669 #define member(name, type, doc) \
671 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
672 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
674 static PyMemberDef members_elffile
[] = {
675 member(filename
, T_STRING
,
676 "Original file name as given when opening"),
677 member(elfclass
, T_INT
,
678 "ELF class (architecture bit size)\n\n"
679 "Either 32 or 64, straight integer."),
680 member(bigendian
, T_BOOL
,
681 "ELF file is big-endian\n\n"
682 "All internal ELF structures are automatically converted."),
683 member(has_symbols
, T_BOOL
,
684 "A symbol section is present\n\n"
685 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
691 static PyObject
*elffile_secbyidx(struct elffile
*w
, Elf_Scn
*scn
, size_t idx
)
697 scn
= elf_getscn(w
->elf
, idx
);
698 if (!scn
|| idx
>= w
->n_sect
)
701 if (!w
->sects
[idx
]) {
702 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
704 name
= elf_strptr(w
->elf
, w
->ehdr
->e_shstrndx
, shdr
->sh_name
);
705 w
->sects
[idx
] = elfsect_wrap(w
, scn
, idx
, name
);
713 static PyObject
*elffile_get_section(PyObject
*self
, PyObject
*args
)
716 struct elffile
*w
= (struct elffile
*)self
;
720 if (!PyArg_ParseTuple(args
, "s", &name
))
723 scn
= elf_find_section(w
, name
, &idx
);
724 return elffile_secbyidx(w
, scn
, idx
);
727 static PyObject
*elffile_get_section_addr(PyObject
*self
, PyObject
*args
)
729 unsigned long long addr
;
730 struct elffile
*w
= (struct elffile
*)self
;
734 if (!PyArg_ParseTuple(args
, "K", &addr
))
737 scn
= elf_find_addr(w
, addr
, &idx
);
738 return elffile_secbyidx(w
, scn
, idx
);
741 static PyObject
*elffile_get_section_idx(PyObject
*self
, PyObject
*args
)
743 unsigned long long idx
;
744 struct elffile
*w
= (struct elffile
*)self
;
746 if (!PyArg_ParseTuple(args
, "K", &idx
))
749 return elffile_secbyidx(w
, NULL
, idx
);
752 static PyObject
*elffile_get_symbol(PyObject
*self
, PyObject
*args
)
754 const char *name
, *symname
;
755 struct elffile
*w
= (struct elffile
*)self
;
759 if (!PyArg_ParseTuple(args
, "s", &name
))
762 for (i
= 0; i
< w
->nsym
; i
++) {
763 sym
= gelf_getsym(w
->symdata
, i
, &_sym
);
764 if (sym
->st_name
== 0)
766 symname
= elf_strptr(w
->elf
, w
->symstridx
, sym
->st_name
);
767 if (strcmp(symname
, name
))
771 Elf_Scn
*scn
= elf_getscn(w
->elf
, sym
->st_shndx
);
774 pysect
= elffile_secbyidx(w
, scn
, sym
->st_shndx
);
779 return Py_BuildValue("sKN", symname
,
780 (unsigned long long)sym
->st_value
, pysect
);
785 static PyObject
*elffile_getreloc(PyObject
*self
, PyObject
*args
)
787 struct elffile
*w
= (struct elffile
*)self
;
788 struct elfreloc
*relw
;
792 if (!PyArg_ParseTuple(args
, "k", &offs
))
795 relw
= elfrelocs_get(&w
->dynrelocs
, offs
);
799 ret
= (PyObject
*)relw
;
804 static PyObject
*elffile_find_note(PyObject
*self
, PyObject
*args
)
806 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
810 struct elffile
*w
= (struct elffile
*)self
;
813 if (!PyArg_ParseTuple(args
, "ss", &owner
, &ids
))
816 if (strlen((char *)ids
) != 4) {
817 PyErr_SetString(PyExc_ValueError
,
818 "ELF note ID must be exactly 4-byte string");
822 id
= (ids
[0] << 24) | (ids
[1] << 16) | (ids
[2] << 8) | ids
[3];
824 id
= (ids
[3] << 24) | (ids
[2] << 16) | (ids
[1] << 8) | ids
[0];
826 for (i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
827 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
831 if (phdr
->p_type
!= PT_NOTE
)
834 notedata
= elf_getdata_rawchunk(w
->elf
, phdr
->p_offset
,
835 phdr
->p_filesz
, ELF_T_NHDR
);
838 size_t nameoffs
, dataoffs
;
841 while ((offset
= gelf_getnote(notedata
, offset
, nhdr
,
842 &nameoffs
, &dataoffs
))) {
843 if (phdr
->p_offset
+ nameoffs
>= w
->len
)
846 const char *name
= w
->mmap
+ phdr
->p_offset
+ nameoffs
;
848 if (strcmp(name
, owner
))
850 if (id
!= nhdr
->n_type
)
855 s
= PyLong_FromUnsignedLongLong(
856 phdr
->p_vaddr
+ dataoffs
);
857 e
= PyLong_FromUnsignedLongLong(
858 phdr
->p_vaddr
+ dataoffs
+ nhdr
->n_descsz
);
859 return PySlice_New(s
, e
, NULL
);
866 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
867 static bool elffile_virt2file(struct elffile
*w
, GElf_Addr virt
,
872 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
873 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
875 if (phdr
->p_type
!= PT_LOAD
)
878 if (virt
< phdr
->p_vaddr
879 || virt
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
882 if (virt
>= phdr
->p_vaddr
+ phdr
->p_filesz
)
885 *offs
= virt
- phdr
->p_vaddr
+ phdr
->p_offset
;
891 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
893 static PyObject
*elffile_subscript(PyObject
*self
, PyObject
*key
)
895 Py_ssize_t start
, stop
, step
;
896 PySliceObject
*slice
;
897 struct elffile
*w
= (struct elffile
*)self
;
900 if (!PySlice_Check(key
)) {
901 PyErr_SetString(PyExc_IndexError
,
902 "ELFFile subscript must be slice");
905 slice
= (PySliceObject
*)key
;
908 if (PyLong_Check(slice
->stop
)) {
909 start
= PyLong_AsSsize_t(slice
->start
);
910 if (PyErr_Occurred())
912 if (slice
->stop
!= Py_None
) {
913 stop
= PyLong_AsSsize_t(slice
->stop
);
914 if (PyErr_Occurred())
917 if (slice
->step
!= Py_None
) {
918 step
= PyLong_AsSsize_t(slice
->step
);
919 if (PyErr_Occurred())
923 if (slice
->stop
!= (void *)&PyUnicode_Type
924 || !PyLong_Check(slice
->start
)) {
925 PyErr_SetString(PyExc_IndexError
, "invalid slice");
930 start
= PyLong_AsUnsignedLongLong(slice
->start
);
933 PyErr_SetString(PyExc_IndexError
,
934 "ELFFile subscript slice step must be 1");
938 GElf_Addr xstart
= start
, xstop
= stop
;
940 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
941 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
943 if (phdr
->p_type
!= PT_LOAD
)
946 if (xstart
< phdr
->p_vaddr
947 || xstart
>= phdr
->p_vaddr
+ phdr
->p_memsz
)
949 if (!str
&& (xstop
< phdr
->p_vaddr
950 || xstop
> phdr
->p_vaddr
+ phdr
->p_memsz
)) {
951 PyErr_Format(ELFAccessError
,
952 "access (%llu) beyond end of program header (%llu)",
954 (long long)(phdr
->p_vaddr
+
959 xstart
= xstart
- phdr
->p_vaddr
+ phdr
->p_offset
;
962 xstop
= strlen(w
->mmap
+ xstart
);
964 xstop
= xstop
- phdr
->p_vaddr
+ phdr
->p_offset
;
966 Py_ssize_t pylen
= xstop
- xstart
;
968 #if PY_MAJOR_VERSION >= 3
969 return Py_BuildValue("y#", w
->mmap
+ xstart
, pylen
);
971 return Py_BuildValue("s#", w
->mmap
+ xstart
, pylen
);
975 return PyErr_Format(ELFAccessError
,
976 "virtual address (%llu) not found in program headers",
980 static PyMethodDef methods_elffile
[] = {
981 {"find_note", elffile_find_note
, METH_VARARGS
,
982 "find specific note entry"},
983 {"getreloc", elffile_getreloc
, METH_VARARGS
,
985 {"get_symbol", elffile_get_symbol
, METH_VARARGS
,
986 "find symbol by name"},
987 {"get_section", elffile_get_section
, METH_VARARGS
,
988 "find section by name"},
989 {"get_section_addr", elffile_get_section_addr
, METH_VARARGS
,
990 "find section by address"},
991 {"get_section_idx", elffile_get_section_idx
, METH_VARARGS
,
992 "find section by index"},
996 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
999 static void elffile_free(void *arg
)
1001 struct elffile
*w
= arg
;
1004 munmap(w
->mmap
, w
->len
);
1008 static PyMappingMethods mp_elffile
= {
1009 .mp_subscript
= elffile_subscript
,
1012 static PyTypeObject typeobj_elffile
= {
1013 PyVarObject_HEAD_INIT(NULL
, 0).tp_name
= "_clippy.ELFFile",
1014 .tp_basicsize
= sizeof(struct elffile
),
1015 .tp_flags
= Py_TPFLAGS_DEFAULT
,
1016 .tp_doc
= elffile_doc
,
1017 .tp_new
= elffile_load
,
1018 .tp_free
= elffile_free
,
1019 .tp_as_mapping
= &mp_elffile
,
1020 .tp_members
= members_elffile
,
1021 .tp_methods
= methods_elffile
,
1024 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1025 static char *elfdata_strptr(Elf_Data
*data
, size_t offset
)
1029 if (offset
>= data
->d_size
)
1032 p
= (char *)data
->d_buf
+ offset
;
1033 if (strnlen(p
, data
->d_size
- offset
) >= data
->d_size
- offset
)
1039 static void elffile_add_dynreloc(struct elffile
*w
, Elf_Data
*reldata
,
1040 size_t entries
, Elf_Data
*symdata
,
1041 Elf_Data
*strdata
, Elf_Type typ
)
1045 for (i
= 0; i
< entries
; i
++) {
1046 struct elfreloc
*relw
;
1050 GElf_Addr rel_offs
= 0;
1052 relw
= (struct elfreloc
*)typeobj_elfreloc
.tp_alloc(
1053 &typeobj_elfreloc
, 0);
1056 if (typ
== ELF_T_REL
) {
1057 GElf_Rel _rel
, *rel
;
1060 rel
= gelf_getrel(reldata
, i
, &_rel
);
1061 relw
->rela
= &relw
->_rela
;
1062 relw
->rela
->r_offset
= rel
->r_offset
;
1063 relw
->rela
->r_info
= rel
->r_info
;
1064 relw
->rela
->r_addend
= 0;
1065 relw
->relative
= true;
1067 /* REL uses the pointer contents itself instead of the
1068 * RELA addend field :( ... theoretically this could
1069 * be some weird platform specific encoding, but since
1070 * we only care about data relocations it should
1071 * always be a pointer...
1073 if (elffile_virt2file(w
, rel
->r_offset
, &offs
)) {
1074 Elf_Data
*ptr
, *conv
;
1077 .d_buf
= (void *)&tmp
,
1078 .d_type
= ELF_T_ADDR
,
1079 .d_version
= EV_CURRENT
,
1080 .d_size
= sizeof(tmp
),
1085 ptr
= elf_getdata_rawchunk(w
->elf
, offs
,
1089 conv
= gelf_xlatetom(w
->elf
, &mem
, ptr
,
1092 memcpy(&rel_offs
, conv
->d_buf
,
1095 relw
->relative
= false;
1096 relw
->rela
->r_addend
= rel_offs
;
1100 relw
->rela
= gelf_getrela(reldata
, i
, &relw
->_rela
);
1103 symidx
= relw
->symidx
= GELF_R_SYM(rela
->r_info
);
1104 sym
= relw
->sym
= gelf_getsym(symdata
, symidx
, &relw
->_sym
);
1106 relw
->symname
= elfdata_strptr(strdata
, sym
->st_name
);
1107 relw
->symvalid
= GELF_ST_TYPE(sym
->st_info
)
1109 relw
->unresolved
= sym
->st_shndx
== SHN_UNDEF
;
1110 relw
->st_value
= sym
->st_value
;
1112 relw
->symname
= NULL
;
1113 relw
->symvalid
= false;
1114 relw
->unresolved
= false;
1118 if (typ
== ELF_T_RELA
)
1119 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1120 (long long)rela
->r_offset
,
1121 (unsigned long long)symidx
,
1122 (long long)rela
->r_addend
, relw
->symname
);
1124 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1125 (long long)rela
->r_offset
,
1126 (unsigned long long)symidx
,
1127 (unsigned long long)rel_offs
, relw
->symname
);
1129 elfrelocs_add(&w
->dynrelocs
, relw
);
1133 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
1135 /* primary (only, really) entry point to anything in this module */
1136 static PyObject
*elffile_load(PyTypeObject
*type
, PyObject
*args
,
1139 const char *filename
;
1140 static const char * const kwnames
[] = {"filename", NULL
};
1145 w
= (struct elffile
*)typeobj_elffile
.tp_alloc(&typeobj_elffile
, 0);
1149 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "s", (char **)kwnames
,
1153 w
->filename
= strdup(filename
);
1154 fd
= open(filename
, O_RDONLY
| O_NOCTTY
);
1155 if (fd
< 0 || fstat(fd
, &st
)) {
1156 PyErr_SetFromErrnoWithFilename(PyExc_OSError
, filename
);
1160 w
->len
= st
.st_size
;
1161 w
->mmap
= mmap(NULL
, st
.st_size
, PROT_READ
, MAP_SHARED
, fd
, 0);
1163 PyErr_SetFromErrnoWithFilename(PyExc_IOError
, filename
);
1168 w
->mmend
= w
->mmap
+ st
.st_size
;
1170 if (w
->len
< EI_NIDENT
|| memcmp(w
->mmap
, ELFMAG
, SELFMAG
)) {
1171 PyErr_SetString(ELFFormatError
, "invalid ELF signature");
1175 switch (w
->mmap
[EI_CLASS
]) {
1183 PyErr_SetString(ELFFormatError
, "invalid ELF class");
1186 switch (w
->mmap
[EI_DATA
]) {
1188 w
->bigendian
= false;
1191 w
->bigendian
= true;
1194 PyErr_SetString(ELFFormatError
, "invalid ELF byte order");
1198 w
->elf
= elf_memory(w
->mmap
, w
->len
);
1201 w
->ehdr
= gelf_getehdr(w
->elf
, &w
->_ehdr
);
1205 for (size_t i
= 0; i
< w
->ehdr
->e_shnum
; i
++) {
1206 Elf_Scn
*scn
= elf_getscn(w
->elf
, i
);
1207 GElf_Shdr _shdr
, *shdr
= gelf_getshdr(scn
, &_shdr
);
1209 if (shdr
->sh_type
== SHT_SYMTAB
) {
1211 w
->nsym
= shdr
->sh_size
/ shdr
->sh_entsize
;
1212 w
->symdata
= elf_getdata(scn
, NULL
);
1213 w
->symstridx
= shdr
->sh_link
;
1217 w
->has_symbols
= w
->symtab
&& w
->symstridx
;
1218 elfrelocs_init(&w
->dynrelocs
);
1220 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1221 for (size_t i
= 0; i
< w
->ehdr
->e_phnum
; i
++) {
1222 GElf_Phdr _phdr
, *phdr
= gelf_getphdr(w
->elf
, i
, &_phdr
);
1224 if (phdr
->p_type
!= PT_DYNAMIC
)
1227 Elf_Data
*dyndata
= elf_getdata_rawchunk(w
->elf
,
1228 phdr
->p_offset
, phdr
->p_filesz
, ELF_T_DYN
);
1230 GElf_Addr dynrela
= 0, dynrel
= 0, symtab
= 0, strtab
= 0;
1231 size_t dynrelasz
= 0, dynrelaent
= 0;
1232 size_t dynrelsz
= 0, dynrelent
= 0;
1234 GElf_Dyn _dyn
, *dyn
;
1236 for (size_t j
= 0;; j
++) {
1237 dyn
= gelf_getdyn(dyndata
, j
, &_dyn
);
1239 if (dyn
->d_tag
== DT_NULL
)
1242 switch (dyn
->d_tag
) {
1244 symtab
= dyn
->d_un
.d_ptr
;
1248 strtab
= dyn
->d_un
.d_ptr
;
1251 strsz
= dyn
->d_un
.d_val
;
1255 dynrela
= dyn
->d_un
.d_ptr
;
1258 dynrelasz
= dyn
->d_un
.d_val
;
1261 dynrelaent
= dyn
->d_un
.d_val
;
1265 dynrel
= dyn
->d_un
.d_ptr
;
1268 dynrelsz
= dyn
->d_un
.d_val
;
1271 dynrelent
= dyn
->d_un
.d_val
;
1277 Elf_Data
*symdata
= NULL
, *strdata
= NULL
;
1279 if (elffile_virt2file(w
, symtab
, &offset
))
1280 symdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1283 if (elffile_virt2file(w
, strtab
, &offset
))
1284 strdata
= elf_getdata_rawchunk(w
->elf
, offset
,
1289 if (dynrela
&& dynrelasz
&& dynrelaent
1290 && elffile_virt2file(w
, dynrela
, &offset
)) {
1291 Elf_Data
*reladata
= NULL
;
1293 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela
,
1294 (long long)offset
, (long long)dynrelasz
);
1296 reladata
= elf_getdata_rawchunk(w
->elf
, offset
,
1297 dynrelasz
, ELF_T_RELA
);
1299 c
= dynrelasz
/ dynrelaent
;
1300 elffile_add_dynreloc(w
, reladata
, c
, symdata
, strdata
,
1304 if (dynrel
&& dynrelsz
&& dynrelent
1305 && elffile_virt2file(w
, dynrel
, &offset
)) {
1306 Elf_Data
*reldata
= NULL
;
1308 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel
,
1309 (long long)offset
, (long long)dynrelsz
);
1311 reldata
= elf_getdata_rawchunk(w
->elf
, offset
, dynrelsz
,
1314 c
= dynrelsz
/ dynrelent
;
1315 elffile_add_dynreloc(w
, reldata
, c
, symdata
, strdata
,
1321 w
->sects
= calloc(sizeof(PyObject
*), w
->ehdr
->e_shnum
);
1322 w
->n_sect
= w
->ehdr
->e_shnum
;
1324 return (PyObject
*)w
;
1329 PyErr_Format(ELFFormatError
, "libelf error %d: %s",
1330 err
, elf_errmsg(err
));
1338 static PyObject
*elfpy_debug(PyObject
*self
, PyObject
*args
)
1342 if (!PyArg_ParseTuple(args
, "p", &arg
))
1350 static PyMethodDef methods_elfpy
[] = {
1351 {"elfpy_debug", elfpy_debug
, METH_VARARGS
, "switch debuging on/off"},
1355 bool elf_py_init(PyObject
*pymod
)
1357 if (PyType_Ready(&typeobj_elffile
) < 0)
1359 if (PyType_Ready(&typeobj_elfsect
) < 0)
1361 if (PyType_Ready(&typeobj_elfreloc
) < 0)
1363 if (elf_version(EV_CURRENT
) == EV_NONE
)
1366 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1367 PyModule_AddFunctions(pymod
, methods_elfpy
);
1369 (void)methods_elfpy
;
1372 ELFFormatError
= PyErr_NewException("_clippy.ELFFormatError",
1373 PyExc_ValueError
, NULL
);
1374 PyModule_AddObject(pymod
, "ELFFormatError", ELFFormatError
);
1375 ELFAccessError
= PyErr_NewException("_clippy.ELFAccessError",
1376 PyExc_IndexError
, NULL
);
1377 PyModule_AddObject(pymod
, "ELFAccessError", ELFAccessError
);
1379 Py_INCREF(&typeobj_elffile
);
1380 PyModule_AddObject(pymod
, "ELFFile", (PyObject
*)&typeobj_elffile
);
1381 Py_INCREF(&typeobj_elfsect
);
1382 PyModule_AddObject(pymod
, "ELFSection", (PyObject
*)&typeobj_elfsect
);
1383 Py_INCREF(&typeobj_elfreloc
);
1384 PyModule_AddObject(pymod
, "ELFReloc", (PyObject
*)&typeobj_elfreloc
);