]> git.proxmox.com Git - mirror_frr.git/blob - lib/elf_py.c
*: auto-convert to SPDX License IDs
[mirror_frr.git] / lib / elf_py.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * fast ELF file accessor
4 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
5 */
6
7 /* Note: this wrapper is intended to be used as build-time helper. While
8 * it should be generally correct and proper, there may be the occasional
9 * memory leak or SEGV for things that haven't been well-tested.
10 * _
11 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
12 * / ! \ in FRR to read files created by its own build. Don't take it out
13 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
14 *
15 * If you're working with this code (or even reading it), you really need to
16 * read a bunch of the ELF specs. There's no way around it, things in here
17 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
18 * your friends.
19 *
20 * Required reading:
21 * https://refspecs.linuxfoundation.org/elf/elf.pdf
22 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
23 * Recommended reading:
24 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
25 *
26 * The core ELF spec is *not* enough, you should read at least one of the
27 * processor specific (psABI) docs. They define what & how relocations work.
28 * Luckily we don't need to care about the processor specifics since this only
29 * does data relocations, but without looking at the psABI, some things aren't
30 * quite clear.
31 */
32
33 /* the API of this module roughly follows a very small subset of the one
34 * provided by the python elfutils package, which unfortunately is painfully
35 * slow.
36 */
37
38 #define PY_SSIZE_T_CLEAN
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43 #include <Python.h>
44 #include "structmember.h"
45 #include <string.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <sys/mman.h>
51 #include <fcntl.h>
52
53 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
54 /* Solaris libelf bails otherwise ... */
55 #undef _FILE_OFFSET_BITS
56 #define _FILE_OFFSET_BITS 32
57 #endif
58
59 #include <elf.h>
60 #include <libelf.h>
61 #include <gelf.h>
62
63 #include "typesafe.h"
64 #include "jhash.h"
65 #include "clippy.h"
66
67 static bool debug;
68
69 #define debugf(...) \
70 do { \
71 if (debug) \
72 fprintf(stderr, __VA_ARGS__); \
73 } while (0)
74
75 /* Exceptions */
76 static PyObject *ELFFormatError;
77 static PyObject *ELFAccessError;
78
79 /* most objects can only be created as return values from one of the methods */
80 static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
81 {
82 PyErr_SetString(PyExc_ValueError,
83 "cannot create instances of this type");
84 return NULL;
85 }
86
87 struct elfreloc;
88 struct elfsect;
89
90 PREDECL_HASH(elfrelocs);
91
92 /* ELFFile and ELFSection intentionally share some behaviour, particularly
93 * subscript[123:456] access to file data. This is because relocatables
94 * (.o files) do things section-based, but linked executables/libraries do
95 * things file-based. Having the two behave similar allows simplifying the
96 * Python code.
97 */
98
99 /* class ELFFile:
100 *
101 * overall entry point, instantiated by reading in an ELF file
102 */
103 struct elffile {
104 PyObject_HEAD
105
106 char *filename;
107 char *mmap, *mmend;
108 size_t len;
109 Elf *elf;
110
111 /* note from here on there are several instances of
112 *
113 * GElf_Something *x, _x;
114 *
115 * this is a pattern used by libelf's generic ELF routines; the _x
116 * field is used to create a copy of the ELF structure from the file
117 * with 32/64bit and endianness adjusted.
118 */
119
120 GElf_Ehdr *ehdr, _ehdr;
121 Elf_Scn *symtab;
122 size_t nsym, symstridx;
123 Elf_Data *symdata;
124
125 PyObject **sects;
126 size_t n_sect;
127
128 struct elfrelocs_head dynrelocs;
129
130 int elfclass;
131 bool bigendian;
132 bool has_symbols;
133 };
134
135 /* class ELFSection:
136 *
137 * note that executables and shared libraries can have their section headers
138 * removed, though in practice this is only used as an obfuscation technique.
139 */
140 struct elfsect {
141 PyObject_HEAD
142
143 const char *name;
144 struct elffile *ef;
145
146 GElf_Shdr _shdr, *shdr;
147 Elf_Scn *scn;
148 unsigned long idx, len;
149
150 struct elfrelocs_head relocs;
151 };
152
153 /* class ELFReloc:
154 *
155 * note: relocations in object files (.o) are section-based while relocations
156 * in executables and shared libraries are file-based.
157 *
158 * Whenever accessing something that is a pointer in the ELF file, the Python
159 * code needs to check for a relocation; if the pointer is pointing to some
160 * unresolved symbol the file will generally contain 0 bytes. The relocation
161 * will tell what the pointer is actually pointing to.
162 *
163 * This represents both static (.o file) and dynamic (.so/exec) relocations.
164 */
165 struct elfreloc {
166 PyObject_HEAD
167
168 struct elfrelocs_item elfrelocs_item;
169
170 struct elfsect *es;
171 struct elffile *ef;
172
173 /* there's also old-fashioned GElf_Rel; we're converting that to
174 * GElf_Rela in elfsect_add_relocations()
175 */
176 GElf_Rela _rela, *rela;
177 GElf_Sym _sym, *sym;
178 size_t symidx;
179 const char *symname;
180
181 /* documented below in python docstrings */
182 bool symvalid, unresolved, relative;
183 unsigned long long st_value;
184 };
185
186 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b);
187 static uint32_t elfreloc_hash(const struct elfreloc *reloc);
188
189 DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item,
190 elfreloc_cmp, elfreloc_hash);
191
192 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx);
193 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx);
194 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args);
195 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure);
196
197 /* --- end of declarations -------------------------------------------------- */
198
199 /*
200 * class ELFReloc:
201 */
202
203 static const char elfreloc_doc[] =
204 "Represents an ELF relocation record\n"
205 "\n"
206 "(struct elfreloc * in elf_py.c)";
207
208 #define member(name, type, doc) \
209 { \
210 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
211 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
212 }
213 static PyMemberDef members_elfreloc[] = {
214 member(symname, T_STRING,
215 "Name of symbol this relocation refers to.\n"
216 "\n"
217 "Will frequently be `None` in executables and shared libraries."
218 ),
219 member(symvalid, T_BOOL,
220 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
221 member(unresolved, T_BOOL,
222 "Target symbol refers to an existing section"),
223 member(relative, T_BOOL,
224 "Relocation is a REL (not RELA) record and thus relative."),
225 member(st_value, T_ULONGLONG,
226 "Target symbol's value, if known\n\n"
227 "Will be zero for unresolved/external symbols."),
228 {}
229 };
230 #undef member
231
232 static PyGetSetDef getset_elfreloc[] = {
233 { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc =
234 (char *)"Relocation addend value"},
235 {}
236 };
237
238 static PyMethodDef methods_elfreloc[] = {
239 {"getsection", elfreloc_getsection, METH_VARARGS,
240 "Find relocation target's ELF section\n\n"
241 "Args: address of relocatee (TODO: fix/remove?)\n"
242 "Returns: ELFSection or None\n\n"
243 "Not possible if section headers have been stripped."},
244 {}
245 };
246
247 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b)
248 {
249 if (a->rela->r_offset < b->rela->r_offset)
250 return -1;
251 if (a->rela->r_offset > b->rela->r_offset)
252 return 1;
253 return 0;
254 }
255
256 static uint32_t elfreloc_hash(const struct elfreloc *reloc)
257 {
258 return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset),
259 0xc9a2b7f4);
260 }
261
262 static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head,
263 GElf_Addr offset)
264 {
265 struct elfreloc dummy;
266
267 dummy.rela = &dummy._rela;
268 dummy.rela->r_offset = offset;
269 return elfrelocs_find(head, &dummy);
270 }
271
272 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args)
273 {
274 struct elfreloc *w = (struct elfreloc *)self;
275 long data;
276
277 if (!PyArg_ParseTuple(args, "k", &data))
278 return NULL;
279
280 if (!w->es)
281 Py_RETURN_NONE;
282
283 if (!w->symvalid || w->symidx == 0) {
284 size_t idx = 0;
285 Elf_Scn *scn;
286
287 data = (w->relative ? data : 0) + w->rela->r_addend;
288 scn = elf_find_addr(w->es->ef, data, &idx);
289 if (!scn)
290 Py_RETURN_NONE;
291 return elffile_secbyidx(w->es->ef, scn, idx);
292 }
293 return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx);
294 }
295
296 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure)
297 {
298 struct elfreloc *w = (struct elfreloc *)obj;
299
300 return Py_BuildValue("K", (unsigned long long)w->rela->r_addend);
301 }
302
303 static PyObject *elfreloc_repr(PyObject *arg)
304 {
305 struct elfreloc *w = (struct elfreloc *)arg;
306
307 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
308 (unsigned long)w->rela->r_offset,
309 (w->symname && w->symname[0]) ? w->symname
310 : "[0]",
311 (unsigned long)w->rela->r_addend);
312 }
313
314 static void elfreloc_free(void *arg)
315 {
316 struct elfreloc *w = arg;
317
318 (void)w;
319 }
320
321 static PyTypeObject typeobj_elfreloc = {
322 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc",
323 .tp_basicsize = sizeof(struct elfreloc),
324 .tp_flags = Py_TPFLAGS_DEFAULT,
325 .tp_doc = elfreloc_doc,
326 .tp_new = refuse_new,
327 .tp_free = elfreloc_free,
328 .tp_repr = elfreloc_repr,
329 .tp_members = members_elfreloc,
330 .tp_methods = methods_elfreloc,
331 .tp_getset = getset_elfreloc,
332 };
333
334 /*
335 * class ELFSection:
336 */
337
338 static const char elfsect_doc[] =
339 "Represents an ELF section\n"
340 "\n"
341 "To access section contents, use subscript notation, e.g.\n"
342 " section[123:456]\n"
343 "To read null terminated C strings, replace the end with str:\n"
344 " section[123:str]\n\n"
345 "(struct elfsect * in elf_py.c)";
346
347 static PyObject *elfsect_getaddr(PyObject *self, void *closure);
348
349 #define member(name, type, doc) \
350 { \
351 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
352 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
353 }
354 static PyMemberDef members_elfsect[] = {
355 member(name, T_STRING,
356 "Section name, e.g. \".text\""),
357 member(idx, T_ULONG,
358 "Section index in file"),
359 member(len, T_ULONG,
360 "Section length in bytes"),
361 {},
362 };
363 #undef member
364
365 static PyGetSetDef getset_elfsect[] = {
366 { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc =
367 (char *)"Section virtual address (mapped program view)"},
368 {}
369 };
370
371 static PyObject *elfsect_getaddr(PyObject *self, void *closure)
372 {
373 struct elfsect *w = (struct elfsect *)self;
374
375 return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr);
376 }
377
378
379 static PyObject *elfsect_getreloc(PyObject *self, PyObject *args)
380 {
381 struct elfsect *w = (struct elfsect *)self;
382 struct elfreloc *relw;
383 unsigned long offs;
384 PyObject *ret;
385
386 if (!PyArg_ParseTuple(args, "k", &offs))
387 return NULL;
388
389 relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr);
390 if (!relw)
391 Py_RETURN_NONE;
392
393 ret = (PyObject *)relw;
394 Py_INCREF(ret);
395 return ret;
396 }
397
398 static PyMethodDef methods_elfsect[] = {
399 {"getreloc", elfsect_getreloc, METH_VARARGS,
400 "Check for / get relocation at offset into section\n\n"
401 "Args: byte offset into section to check\n"
402 "Returns: ELFReloc or None"},
403 {}
404 };
405
406 static PyObject *elfsect_subscript(PyObject *self, PyObject *key)
407 {
408 Py_ssize_t start, stop, step, sllen;
409 struct elfsect *w = (struct elfsect *)self;
410 PySliceObject *slice;
411 unsigned long offs, len = ~0UL;
412
413 if (!PySlice_Check(key)) {
414 PyErr_SetString(PyExc_IndexError,
415 "ELFSection subscript must be slice");
416 return NULL;
417 }
418 slice = (PySliceObject *)key;
419 if (PyLong_Check(slice->stop)) {
420 if (PySlice_GetIndicesEx(key, w->shdr->sh_size,
421 &start, &stop, &step, &sllen))
422 return NULL;
423
424 if (step != 1) {
425 PyErr_SetString(PyExc_IndexError,
426 "ELFSection subscript slice step must be 1");
427 return NULL;
428 }
429 if ((GElf_Xword)stop > w->shdr->sh_size) {
430 PyErr_Format(ELFAccessError,
431 "access (%lu) beyond end of section %lu/%s (%lu)",
432 stop, w->idx, w->name, w->shdr->sh_size);
433 return NULL;
434 }
435
436 offs = start;
437 len = sllen;
438 } else {
439 if (slice->stop != (void *)&PyUnicode_Type
440 || !PyLong_Check(slice->start)) {
441 PyErr_SetString(PyExc_IndexError, "invalid slice");
442 return NULL;
443 }
444
445 offs = PyLong_AsUnsignedLongLong(slice->start);
446 len = ~0UL;
447 }
448
449 offs += w->shdr->sh_offset;
450 if (offs > w->ef->len) {
451 PyErr_Format(ELFAccessError,
452 "access (%lu) beyond end of file (%lu)",
453 offs, w->ef->len);
454 return NULL;
455 }
456 if (len == ~0UL)
457 len = strnlen(w->ef->mmap + offs, w->ef->len - offs);
458
459 Py_ssize_t pylen = len;
460
461 #if PY_MAJOR_VERSION >= 3
462 return Py_BuildValue("y#", w->ef->mmap + offs, pylen);
463 #else
464 return Py_BuildValue("s#", w->ef->mmap + offs, pylen);
465 #endif
466 }
467
468 static PyMappingMethods mp_elfsect = {
469 .mp_subscript = elfsect_subscript,
470 };
471
472 static void elfsect_free(void *arg)
473 {
474 struct elfsect *w = arg;
475
476 (void)w;
477 }
478
479 static PyObject *elfsect_repr(PyObject *arg)
480 {
481 struct elfsect *w = (struct elfsect *)arg;
482
483 return PyUnicode_FromFormat("<ELFSection %s>", w->name);
484 }
485
486 static PyTypeObject typeobj_elfsect = {
487 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection",
488 .tp_basicsize = sizeof(struct elfsect),
489 .tp_flags = Py_TPFLAGS_DEFAULT,
490 .tp_doc = elfsect_doc,
491 .tp_new = refuse_new,
492 .tp_free = elfsect_free,
493 .tp_repr = elfsect_repr,
494 .tp_as_mapping = &mp_elfsect,
495 .tp_members = members_elfsect,
496 .tp_methods = methods_elfsect,
497 .tp_getset = getset_elfsect,
498 };
499
500 static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel,
501 GElf_Shdr *relhdr)
502 {
503 size_t i, entries;
504 Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link);
505 GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr);
506 Elf_Data *symdata = elf_getdata(symtab, NULL);
507 Elf_Data *reldata = elf_getdata(rel, NULL);
508
509 entries = relhdr->sh_size / relhdr->sh_entsize;
510 for (i = 0; i < entries; i++) {
511 struct elfreloc *relw;
512 size_t symidx;
513 GElf_Rela *rela;
514 GElf_Sym *sym;
515
516 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
517 &typeobj_elfreloc, 0);
518 relw->es = w;
519
520 if (relhdr->sh_type == SHT_REL) {
521 GElf_Rel _rel, *rel;
522
523 rel = gelf_getrel(reldata, i, &_rel);
524 relw->rela = &relw->_rela;
525 relw->rela->r_offset = rel->r_offset;
526 relw->rela->r_info = rel->r_info;
527 relw->rela->r_addend = 0;
528 relw->relative = true;
529 } else
530 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
531
532 rela = relw->rela;
533 if (rela->r_offset < w->shdr->sh_addr
534 || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size)
535 continue;
536
537 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
538 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
539 if (sym) {
540 relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link,
541 sym->st_name);
542 relw->symvalid = GELF_ST_TYPE(sym->st_info)
543 != STT_NOTYPE;
544 relw->unresolved = sym->st_shndx == SHN_UNDEF;
545 relw->st_value = sym->st_value;
546 } else {
547 relw->symname = NULL;
548 relw->symvalid = false;
549 relw->unresolved = false;
550 relw->st_value = 0;
551 }
552
553 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
554 (long long)rela->r_offset, (unsigned long long)symidx,
555 (long long)rela->r_addend, relw->symname);
556
557 elfrelocs_add(&w->relocs, relw);
558 }
559 }
560
561 /*
562 * bindings & loading code between ELFFile and ELFSection
563 */
564
565 static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx,
566 const char *name)
567 {
568 struct elfsect *w;
569 size_t i;
570
571 w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0);
572 if (!w)
573 return NULL;
574
575 w->name = name;
576 w->ef = ef;
577 w->scn = scn;
578 w->shdr = gelf_getshdr(scn, &w->_shdr);
579 w->len = w->shdr->sh_size;
580 w->idx = idx;
581 elfrelocs_init(&w->relocs);
582
583 for (i = 0; i < ef->ehdr->e_shnum; i++) {
584 Elf_Scn *scn = elf_getscn(ef->elf, i);
585 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
586
587 if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL)
588 continue;
589 if (shdr->sh_info && shdr->sh_info != idx)
590 continue;
591 elfsect_add_relocations(w, scn, shdr);
592 }
593
594 return (PyObject *)w;
595 }
596
597 static Elf_Scn *elf_find_section(struct elffile *ef, const char *name,
598 size_t *idx)
599 {
600 size_t i;
601 const char *secname;
602
603 for (i = 0; i < ef->ehdr->e_shnum; i++) {
604 Elf_Scn *scn = elf_getscn(ef->elf, i);
605 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
606
607 secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx,
608 shdr->sh_name);
609 if (strcmp(secname, name))
610 continue;
611 if (idx)
612 *idx = i;
613 return scn;
614 }
615 return NULL;
616 }
617
618 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx)
619 {
620 size_t i;
621
622 for (i = 0; i < ef->ehdr->e_shnum; i++) {
623 Elf_Scn *scn = elf_getscn(ef->elf, i);
624 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
625
626 /* virtual address is kinda meaningless for TLS sections */
627 if (shdr->sh_flags & SHF_TLS)
628 continue;
629 if (addr < shdr->sh_addr ||
630 addr >= shdr->sh_addr + shdr->sh_size)
631 continue;
632
633 if (idx)
634 *idx = i;
635 return scn;
636 }
637 return NULL;
638 }
639
640 /*
641 * class ELFFile:
642 */
643
644 static const char elffile_doc[] =
645 "Represents an ELF file\n"
646 "\n"
647 "Args: filename to load\n"
648 "\n"
649 "To access raw file contents, use subscript notation, e.g.\n"
650 " file[123:456]\n"
651 "To read null terminated C strings, replace the end with str:\n"
652 " file[123:str]\n\n"
653 "(struct elffile * in elf_py.c)";
654
655
656 #define member(name, type, doc) \
657 { \
658 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
659 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
660 }
661 static PyMemberDef members_elffile[] = {
662 member(filename, T_STRING,
663 "Original file name as given when opening"),
664 member(elfclass, T_INT,
665 "ELF class (architecture bit size)\n\n"
666 "Either 32 or 64, straight integer."),
667 member(bigendian, T_BOOL,
668 "ELF file is big-endian\n\n"
669 "All internal ELF structures are automatically converted."),
670 member(has_symbols, T_BOOL,
671 "A symbol section is present\n\n"
672 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
673 ),
674 {},
675 };
676 #undef member
677
678 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx)
679 {
680 const char *name;
681 PyObject *ret;
682
683 if (!scn)
684 scn = elf_getscn(w->elf, idx);
685 if (!scn || idx >= w->n_sect)
686 Py_RETURN_NONE;
687
688 if (!w->sects[idx]) {
689 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
690
691 name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name);
692 w->sects[idx] = elfsect_wrap(w, scn, idx, name);
693 }
694
695 ret = w->sects[idx];
696 Py_INCREF(ret);
697 return ret;
698 }
699
700 static PyObject *elffile_get_section(PyObject *self, PyObject *args)
701 {
702 const char *name;
703 struct elffile *w = (struct elffile *)self;
704 Elf_Scn *scn;
705 size_t idx = 0;
706
707 if (!PyArg_ParseTuple(args, "s", &name))
708 return NULL;
709
710 scn = elf_find_section(w, name, &idx);
711 return elffile_secbyidx(w, scn, idx);
712 }
713
714 static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args)
715 {
716 unsigned long long addr;
717 struct elffile *w = (struct elffile *)self;
718 Elf_Scn *scn;
719 size_t idx = 0;
720
721 if (!PyArg_ParseTuple(args, "K", &addr))
722 return NULL;
723
724 scn = elf_find_addr(w, addr, &idx);
725 return elffile_secbyidx(w, scn, idx);
726 }
727
728 static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args)
729 {
730 unsigned long long idx;
731 struct elffile *w = (struct elffile *)self;
732
733 if (!PyArg_ParseTuple(args, "K", &idx))
734 return NULL;
735
736 return elffile_secbyidx(w, NULL, idx);
737 }
738
739 static PyObject *elffile_get_symbol(PyObject *self, PyObject *args)
740 {
741 const char *name, *symname;
742 struct elffile *w = (struct elffile *)self;
743 GElf_Sym _sym, *sym;
744 size_t i;
745
746 if (!PyArg_ParseTuple(args, "s", &name))
747 return NULL;
748
749 for (i = 0; i < w->nsym; i++) {
750 sym = gelf_getsym(w->symdata, i, &_sym);
751 if (sym->st_name == 0)
752 continue;
753 symname = elf_strptr(w->elf, w->symstridx, sym->st_name);
754 if (strcmp(symname, name))
755 continue;
756
757 PyObject *pysect;
758 Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx);
759
760 if (scn)
761 pysect = elffile_secbyidx(w, scn, sym->st_shndx);
762 else {
763 pysect = Py_None;
764 Py_INCREF(pysect);
765 }
766 return Py_BuildValue("sKN", symname,
767 (unsigned long long)sym->st_value, pysect);
768 }
769 Py_RETURN_NONE;
770 }
771
772 static PyObject *elffile_getreloc(PyObject *self, PyObject *args)
773 {
774 struct elffile *w = (struct elffile *)self;
775 struct elfreloc *relw;
776 unsigned long offs;
777 PyObject *ret;
778
779 if (!PyArg_ParseTuple(args, "k", &offs))
780 return NULL;
781
782 relw = elfrelocs_get(&w->dynrelocs, offs);
783 if (!relw)
784 Py_RETURN_NONE;
785
786 ret = (PyObject *)relw;
787 Py_INCREF(ret);
788 return ret;
789 }
790
791 static PyObject *elffile_find_note(PyObject *self, PyObject *args)
792 {
793 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
794 const char *owner;
795 const uint8_t *ids;
796 GElf_Word id;
797 struct elffile *w = (struct elffile *)self;
798 size_t i;
799
800 if (!PyArg_ParseTuple(args, "ss", &owner, &ids))
801 return NULL;
802
803 if (strlen((char *)ids) != 4) {
804 PyErr_SetString(PyExc_ValueError,
805 "ELF note ID must be exactly 4-byte string");
806 return NULL;
807 }
808 if (w->bigendian)
809 id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3];
810 else
811 id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0];
812
813 for (i = 0; i < w->ehdr->e_phnum; i++) {
814 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
815 Elf_Data *notedata;
816 size_t offset;
817
818 if (phdr->p_type != PT_NOTE)
819 continue;
820
821 notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset,
822 phdr->p_filesz, ELF_T_NHDR);
823
824 GElf_Nhdr nhdr[1];
825 size_t nameoffs, dataoffs;
826
827 offset = 0;
828 while ((offset = gelf_getnote(notedata, offset, nhdr,
829 &nameoffs, &dataoffs))) {
830 if (phdr->p_offset + nameoffs >= w->len)
831 continue;
832
833 const char *name = w->mmap + phdr->p_offset + nameoffs;
834
835 if (strcmp(name, owner))
836 continue;
837 if (id != nhdr->n_type)
838 continue;
839
840 PyObject *s, *e;
841
842 s = PyLong_FromUnsignedLongLong(
843 phdr->p_vaddr + dataoffs);
844 e = PyLong_FromUnsignedLongLong(
845 phdr->p_vaddr + dataoffs + nhdr->n_descsz);
846 return PySlice_New(s, e, NULL);
847 }
848 }
849 #endif
850 Py_RETURN_NONE;
851 }
852
853 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
854 static bool elffile_virt2file(struct elffile *w, GElf_Addr virt,
855 GElf_Addr *offs)
856 {
857 *offs = 0;
858
859 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
860 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
861
862 if (phdr->p_type != PT_LOAD)
863 continue;
864
865 if (virt < phdr->p_vaddr
866 || virt >= phdr->p_vaddr + phdr->p_memsz)
867 continue;
868
869 if (virt >= phdr->p_vaddr + phdr->p_filesz)
870 return false;
871
872 *offs = virt - phdr->p_vaddr + phdr->p_offset;
873 return true;
874 }
875
876 return false;
877 }
878 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
879
880 static PyObject *elffile_subscript(PyObject *self, PyObject *key)
881 {
882 Py_ssize_t start, stop, step;
883 PySliceObject *slice;
884 struct elffile *w = (struct elffile *)self;
885 bool str = false;
886
887 if (!PySlice_Check(key)) {
888 PyErr_SetString(PyExc_IndexError,
889 "ELFFile subscript must be slice");
890 return NULL;
891 }
892 slice = (PySliceObject *)key;
893 stop = -1;
894 step = 1;
895 if (PyLong_Check(slice->stop)) {
896 start = PyLong_AsSsize_t(slice->start);
897 if (PyErr_Occurred())
898 return NULL;
899 if (slice->stop != Py_None) {
900 stop = PyLong_AsSsize_t(slice->stop);
901 if (PyErr_Occurred())
902 return NULL;
903 }
904 if (slice->step != Py_None) {
905 step = PyLong_AsSsize_t(slice->step);
906 if (PyErr_Occurred())
907 return NULL;
908 }
909 } else {
910 if (slice->stop != (void *)&PyUnicode_Type
911 || !PyLong_Check(slice->start)) {
912 PyErr_SetString(PyExc_IndexError, "invalid slice");
913 return NULL;
914 }
915
916 str = true;
917 start = PyLong_AsUnsignedLongLong(slice->start);
918 }
919 if (step != 1) {
920 PyErr_SetString(PyExc_IndexError,
921 "ELFFile subscript slice step must be 1");
922 return NULL;
923 }
924
925 GElf_Addr xstart = start, xstop = stop;
926
927 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
928 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
929
930 if (phdr->p_type != PT_LOAD)
931 continue;
932
933 if (xstart < phdr->p_vaddr
934 || xstart >= phdr->p_vaddr + phdr->p_memsz)
935 continue;
936 if (!str && (xstop < phdr->p_vaddr
937 || xstop > phdr->p_vaddr + phdr->p_memsz)) {
938 PyErr_Format(ELFAccessError,
939 "access (%llu) beyond end of program header (%llu)",
940 (long long)xstop,
941 (long long)(phdr->p_vaddr +
942 phdr->p_memsz));
943 return NULL;
944 }
945
946 xstart = xstart - phdr->p_vaddr + phdr->p_offset;
947
948 if (str)
949 xstop = strlen(w->mmap + xstart);
950 else
951 xstop = xstop - phdr->p_vaddr + phdr->p_offset;
952
953 Py_ssize_t pylen = xstop - xstart;
954
955 #if PY_MAJOR_VERSION >= 3
956 return Py_BuildValue("y#", w->mmap + xstart, pylen);
957 #else
958 return Py_BuildValue("s#", w->mmap + xstart, pylen);
959 #endif
960 };
961
962 return PyErr_Format(ELFAccessError,
963 "virtual address (%llu) not found in program headers",
964 (long long)start);
965 }
966
967 static PyMethodDef methods_elffile[] = {
968 {"find_note", elffile_find_note, METH_VARARGS,
969 "find specific note entry"},
970 {"getreloc", elffile_getreloc, METH_VARARGS,
971 "find relocation"},
972 {"get_symbol", elffile_get_symbol, METH_VARARGS,
973 "find symbol by name"},
974 {"get_section", elffile_get_section, METH_VARARGS,
975 "find section by name"},
976 {"get_section_addr", elffile_get_section_addr, METH_VARARGS,
977 "find section by address"},
978 {"get_section_idx", elffile_get_section_idx, METH_VARARGS,
979 "find section by index"},
980 {}
981 };
982
983 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
984 PyObject *kwds);
985
986 static void elffile_free(void *arg)
987 {
988 struct elffile *w = arg;
989
990 elf_end(w->elf);
991 munmap(w->mmap, w->len);
992 free(w->filename);
993 }
994
995 static PyMappingMethods mp_elffile = {
996 .mp_subscript = elffile_subscript,
997 };
998
999 static PyTypeObject typeobj_elffile = {
1000 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile",
1001 .tp_basicsize = sizeof(struct elffile),
1002 .tp_flags = Py_TPFLAGS_DEFAULT,
1003 .tp_doc = elffile_doc,
1004 .tp_new = elffile_load,
1005 .tp_free = elffile_free,
1006 .tp_as_mapping = &mp_elffile,
1007 .tp_members = members_elffile,
1008 .tp_methods = methods_elffile,
1009 };
1010
1011 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1012 static char *elfdata_strptr(Elf_Data *data, size_t offset)
1013 {
1014 char *p;
1015
1016 if (offset >= data->d_size)
1017 return NULL;
1018
1019 p = (char *)data->d_buf + offset;
1020 if (strnlen(p, data->d_size - offset) >= data->d_size - offset)
1021 return NULL;
1022
1023 return p;
1024 }
1025
1026 static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata,
1027 size_t entries, Elf_Data *symdata,
1028 Elf_Data *strdata, Elf_Type typ)
1029 {
1030 size_t i;
1031
1032 for (i = 0; i < entries; i++) {
1033 struct elfreloc *relw;
1034 size_t symidx;
1035 GElf_Rela *rela;
1036 GElf_Sym *sym;
1037 GElf_Addr rel_offs = 0;
1038
1039 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
1040 &typeobj_elfreloc, 0);
1041 relw->ef = w;
1042
1043 if (typ == ELF_T_REL) {
1044 GElf_Rel _rel, *rel;
1045 GElf_Addr offs;
1046
1047 rel = gelf_getrel(reldata, i, &_rel);
1048 relw->rela = &relw->_rela;
1049 relw->rela->r_offset = rel->r_offset;
1050 relw->rela->r_info = rel->r_info;
1051 relw->rela->r_addend = 0;
1052 relw->relative = true;
1053
1054 /* REL uses the pointer contents itself instead of the
1055 * RELA addend field :( ... theoretically this could
1056 * be some weird platform specific encoding, but since
1057 * we only care about data relocations it should
1058 * always be a pointer...
1059 */
1060 if (elffile_virt2file(w, rel->r_offset, &offs)) {
1061 Elf_Data *ptr;
1062
1063 /* NB: this endian-converts! */
1064 ptr = elf_getdata_rawchunk(w->elf, offs,
1065 w->elfclass / 8,
1066 ELF_T_ADDR);
1067
1068 if (ptr) {
1069 char *dst = (char *)&rel_offs;
1070
1071 /* sigh. it endian-converts. but
1072 * doesn't size-convert.
1073 */
1074 if (BYTE_ORDER == BIG_ENDIAN &&
1075 ptr->d_size < sizeof(rel_offs))
1076 dst += sizeof(rel_offs) -
1077 ptr->d_size;
1078
1079 memcpy(dst, ptr->d_buf, ptr->d_size);
1080
1081 relw->relative = false;
1082 relw->rela->r_addend = rel_offs;
1083 }
1084 }
1085 } else
1086 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
1087
1088 rela = relw->rela;
1089 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
1090 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
1091 if (sym) {
1092 relw->symname = elfdata_strptr(strdata, sym->st_name);
1093 relw->symvalid = GELF_ST_TYPE(sym->st_info)
1094 != STT_NOTYPE;
1095 relw->unresolved = sym->st_shndx == SHN_UNDEF;
1096 relw->st_value = sym->st_value;
1097 } else {
1098 relw->symname = NULL;
1099 relw->symvalid = false;
1100 relw->unresolved = false;
1101 relw->st_value = 0;
1102 }
1103
1104 if (typ == ELF_T_RELA)
1105 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1106 (long long)rela->r_offset,
1107 (unsigned long long)symidx,
1108 (long long)rela->r_addend, relw->symname);
1109 else
1110 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1111 (long long)rela->r_offset,
1112 (unsigned long long)symidx,
1113 (unsigned long long)rel_offs, relw->symname);
1114
1115 elfrelocs_add(&w->dynrelocs, relw);
1116 }
1117
1118 }
1119 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
1120
1121 /* primary (only, really) entry point to anything in this module */
1122 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
1123 PyObject *kwds)
1124 {
1125 const char *filename;
1126 static const char * const kwnames[] = {"filename", NULL};
1127 struct elffile *w;
1128 struct stat st;
1129 int fd, err;
1130
1131 w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0);
1132 if (!w)
1133 return NULL;
1134
1135 if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames,
1136 &filename))
1137 return NULL;
1138
1139 w->filename = strdup(filename);
1140 fd = open(filename, O_RDONLY | O_NOCTTY);
1141 if (fd < 0 || fstat(fd, &st)) {
1142 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1143 close(fd);
1144 goto out;
1145 }
1146 w->len = st.st_size;
1147 w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
1148 if (!w->mmap) {
1149 PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
1150 close(fd);
1151 goto out;
1152 }
1153 close(fd);
1154 w->mmend = w->mmap + st.st_size;
1155
1156 if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) {
1157 PyErr_SetString(ELFFormatError, "invalid ELF signature");
1158 goto out;
1159 }
1160
1161 switch (w->mmap[EI_CLASS]) {
1162 case ELFCLASS32:
1163 w->elfclass = 32;
1164 break;
1165 case ELFCLASS64:
1166 w->elfclass = 64;
1167 break;
1168 default:
1169 PyErr_SetString(ELFFormatError, "invalid ELF class");
1170 goto out;
1171 }
1172 switch (w->mmap[EI_DATA]) {
1173 case ELFDATA2LSB:
1174 w->bigendian = false;
1175 break;
1176 case ELFDATA2MSB:
1177 w->bigendian = true;
1178 break;
1179 default:
1180 PyErr_SetString(ELFFormatError, "invalid ELF byte order");
1181 goto out;
1182 }
1183
1184 w->elf = elf_memory(w->mmap, w->len);
1185 if (!w->elf)
1186 goto out_elferr;
1187 w->ehdr = gelf_getehdr(w->elf, &w->_ehdr);
1188 if (!w->ehdr)
1189 goto out_elferr;
1190
1191 for (size_t i = 0; i < w->ehdr->e_shnum; i++) {
1192 Elf_Scn *scn = elf_getscn(w->elf, i);
1193 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
1194
1195 if (shdr->sh_type == SHT_SYMTAB) {
1196 w->symtab = scn;
1197 w->nsym = shdr->sh_size / shdr->sh_entsize;
1198 w->symdata = elf_getdata(scn, NULL);
1199 w->symstridx = shdr->sh_link;
1200 break;
1201 }
1202 }
1203 w->has_symbols = w->symtab && w->symstridx;
1204 elfrelocs_init(&w->dynrelocs);
1205
1206 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1207 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
1208 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
1209
1210 if (phdr->p_type != PT_DYNAMIC)
1211 continue;
1212
1213 Elf_Data *dyndata = elf_getdata_rawchunk(w->elf,
1214 phdr->p_offset, phdr->p_filesz, ELF_T_DYN);
1215
1216 GElf_Addr dynrela = 0, dynrel = 0, symtab = 0, strtab = 0;
1217 size_t dynrelasz = 0, dynrelaent = 0;
1218 size_t dynrelsz = 0, dynrelent = 0;
1219 size_t strsz = 0;
1220 GElf_Dyn _dyn, *dyn;
1221
1222 for (size_t j = 0;; j++) {
1223 dyn = gelf_getdyn(dyndata, j, &_dyn);
1224
1225 if (dyn->d_tag == DT_NULL)
1226 break;
1227
1228 switch (dyn->d_tag) {
1229 case DT_SYMTAB:
1230 symtab = dyn->d_un.d_ptr;
1231 break;
1232
1233 case DT_STRTAB:
1234 strtab = dyn->d_un.d_ptr;
1235 break;
1236 case DT_STRSZ:
1237 strsz = dyn->d_un.d_val;
1238 break;
1239
1240 case DT_RELA:
1241 dynrela = dyn->d_un.d_ptr;
1242 break;
1243 case DT_RELASZ:
1244 dynrelasz = dyn->d_un.d_val;
1245 break;
1246 case DT_RELAENT:
1247 dynrelaent = dyn->d_un.d_val;
1248 break;
1249
1250 case DT_REL:
1251 dynrel = dyn->d_un.d_ptr;
1252 break;
1253 case DT_RELSZ:
1254 dynrelsz = dyn->d_un.d_val;
1255 break;
1256 case DT_RELENT:
1257 dynrelent = dyn->d_un.d_val;
1258 break;
1259 }
1260 }
1261
1262 GElf_Addr offset;
1263 Elf_Data *symdata = NULL, *strdata = NULL;
1264
1265 if (elffile_virt2file(w, symtab, &offset))
1266 symdata = elf_getdata_rawchunk(w->elf, offset,
1267 w->len - offset,
1268 ELF_T_SYM);
1269 if (elffile_virt2file(w, strtab, &offset))
1270 strdata = elf_getdata_rawchunk(w->elf, offset,
1271 strsz, ELF_T_BYTE);
1272
1273 size_t c;
1274
1275 if (dynrela && dynrelasz && dynrelaent
1276 && elffile_virt2file(w, dynrela, &offset)) {
1277 Elf_Data *reladata = NULL;
1278
1279 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela,
1280 (long long)offset, (long long)dynrelasz);
1281
1282 reladata = elf_getdata_rawchunk(w->elf, offset,
1283 dynrelasz, ELF_T_RELA);
1284
1285 c = dynrelasz / dynrelaent;
1286 elffile_add_dynreloc(w, reladata, c, symdata, strdata,
1287 ELF_T_RELA);
1288 }
1289
1290 if (dynrel && dynrelsz && dynrelent
1291 && elffile_virt2file(w, dynrel, &offset)) {
1292 Elf_Data *reldata = NULL;
1293
1294 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel,
1295 (long long)offset, (long long)dynrelsz);
1296
1297 reldata = elf_getdata_rawchunk(w->elf, offset, dynrelsz,
1298 ELF_T_REL);
1299
1300 c = dynrelsz / dynrelent;
1301 elffile_add_dynreloc(w, reldata, c, symdata, strdata,
1302 ELF_T_REL);
1303 }
1304 }
1305 #endif
1306
1307 w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum);
1308 w->n_sect = w->ehdr->e_shnum;
1309
1310 return (PyObject *)w;
1311
1312 out_elferr:
1313 err = elf_errno();
1314
1315 PyErr_Format(ELFFormatError, "libelf error %d: %s",
1316 err, elf_errmsg(err));
1317 out:
1318 if (w->elf)
1319 elf_end(w->elf);
1320 free(w->filename);
1321 return NULL;
1322 }
1323
1324 static PyObject *elfpy_debug(PyObject *self, PyObject *args)
1325 {
1326 int arg;
1327
1328 if (!PyArg_ParseTuple(args, "p", &arg))
1329 return NULL;
1330
1331 debug = arg;
1332
1333 Py_RETURN_NONE;
1334 }
1335
1336 static PyMethodDef methods_elfpy[] = {
1337 {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"},
1338 {}
1339 };
1340
1341 bool elf_py_init(PyObject *pymod)
1342 {
1343 if (PyType_Ready(&typeobj_elffile) < 0)
1344 return false;
1345 if (PyType_Ready(&typeobj_elfsect) < 0)
1346 return false;
1347 if (PyType_Ready(&typeobj_elfreloc) < 0)
1348 return false;
1349 if (elf_version(EV_CURRENT) == EV_NONE)
1350 return false;
1351
1352 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1353 PyModule_AddFunctions(pymod, methods_elfpy);
1354 #else
1355 (void)methods_elfpy;
1356 #endif
1357
1358 ELFFormatError = PyErr_NewException("_clippy.ELFFormatError",
1359 PyExc_ValueError, NULL);
1360 PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError);
1361 ELFAccessError = PyErr_NewException("_clippy.ELFAccessError",
1362 PyExc_IndexError, NULL);
1363 PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError);
1364
1365 Py_INCREF(&typeobj_elffile);
1366 PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile);
1367 Py_INCREF(&typeobj_elfsect);
1368 PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect);
1369 Py_INCREF(&typeobj_elfreloc);
1370 PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc);
1371 return true;
1372 }