]> git.proxmox.com Git - mirror_frr.git/blob - lib/elf_py.c
Merge pull request #8426 from idryzhov/fix-interface-nb-stale-pointers
[mirror_frr.git] / lib / elf_py.c
1 /*
2 * fast ELF file accessor
3 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; see the file COPYING; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 /* Note: this wrapper is intended to be used as build-time helper. While
21 * it should be generally correct and proper, there may be the occasional
22 * memory leak or SEGV for things that haven't been well-tested.
23 * _
24 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
25 * / ! \ in FRR to read files created by its own build. Don't take it out
26 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
27 *
28 * If you're working with this code (or even reading it), you really need to
29 * read a bunch of the ELF specs. There's no way around it, things in here
30 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
31 * your friends.
32 *
33 * Required reading:
34 * https://refspecs.linuxfoundation.org/elf/elf.pdf
35 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
36 * Recommended reading:
37 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
38 *
39 * The core ELF spec is *not* enough, you should read at least one of the
40 * processor specific (psABI) docs. They define what & how relocations work.
41 * Luckily we don't need to care about the processor specifics since this only
42 * does data relocations, but without looking at the psABI, some things aren't
43 * quite clear.
44 */
45
46 /* the API of this module roughly follows a very small subset of the one
47 * provided by the python elfutils package, which unfortunately is painfully
48 * slow.
49 */
50
51 #define PY_SSIZE_T_CLEAN
52
53 #include <Python.h>
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57 #include "structmember.h"
58 #include <string.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61 #include <sys/types.h>
62 #include <sys/stat.h>
63 #include <sys/mman.h>
64 #include <fcntl.h>
65
66 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
67 /* Solaris libelf bails otherwise ... */
68 #undef _FILE_OFFSET_BITS
69 #define _FILE_OFFSET_BITS 32
70 #endif
71
72 #include <elf.h>
73 #include <libelf.h>
74 #include <gelf.h>
75
76 #include "typesafe.h"
77 #include "jhash.h"
78 #include "clippy.h"
79
80 static bool debug;
81
82 #define debugf(...) \
83 do { \
84 if (debug) \
85 fprintf(stderr, __VA_ARGS__); \
86 } while (0)
87
88 /* Exceptions */
89 static PyObject *ELFFormatError;
90 static PyObject *ELFAccessError;
91
92 /* most objects can only be created as return values from one of the methods */
93 static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
94 {
95 PyErr_SetString(PyExc_ValueError,
96 "cannot create instances of this type");
97 return NULL;
98 }
99
100 struct elfreloc;
101 struct elfsect;
102
103 PREDECL_HASH(elfrelocs);
104
105 /* ELFFile and ELFSection intentionally share some behaviour, particularly
106 * subscript[123:456] access to file data. This is because relocatables
107 * (.o files) do things section-based, but linked executables/libraries do
108 * things file-based. Having the two behave similar allows simplifying the
109 * Python code.
110 */
111
112 /* class ELFFile:
113 *
114 * overall entry point, instantiated by reading in an ELF file
115 */
116 struct elffile {
117 PyObject_HEAD
118
119 char *filename;
120 char *mmap, *mmend;
121 size_t len;
122 Elf *elf;
123
124 /* note from here on there are several instances of
125 *
126 * GElf_Something *x, _x;
127 *
128 * this is a pattern used by libelf's generic ELF routines; the _x
129 * field is used to create a copy of the ELF structure from the file
130 * with 32/64bit and endianness adjusted.
131 */
132
133 GElf_Ehdr *ehdr, _ehdr;
134 Elf_Scn *symtab;
135 size_t nsym, symstridx;
136 Elf_Data *symdata;
137
138 PyObject **sects;
139 size_t n_sect;
140
141 struct elfrelocs_head dynrelocs;
142
143 int elfclass;
144 bool bigendian;
145 bool has_symbols;
146 };
147
148 /* class ELFSection:
149 *
150 * note that executables and shared libraries can have their section headers
151 * removed, though in practice this is only used as an obfuscation technique.
152 */
153 struct elfsect {
154 PyObject_HEAD
155
156 const char *name;
157 struct elffile *ef;
158
159 GElf_Shdr _shdr, *shdr;
160 Elf_Scn *scn;
161 unsigned long idx, len;
162
163 struct elfrelocs_head relocs;
164 };
165
166 /* class ELFReloc:
167 *
168 * note: relocations in object files (.o) are section-based while relocations
169 * in executables and shared libraries are file-based.
170 *
171 * Whenever accessing something that is a pointer in the ELF file, the Python
172 * code needs to check for a relocation; if the pointer is pointing to some
173 * unresolved symbol the file will generally contain 0 bytes. The relocation
174 * will tell what the pointer is actually pointing to.
175 *
176 * This represents both static (.o file) and dynamic (.so/exec) relocations.
177 */
178 struct elfreloc {
179 PyObject_HEAD
180
181 struct elfrelocs_item elfrelocs_item;
182
183 struct elfsect *es;
184 struct elffile *ef;
185
186 /* there's also old-fashioned GElf_Rel; we're converting that to
187 * GElf_Rela in elfsect_add_relocations()
188 */
189 GElf_Rela _rela, *rela;
190 GElf_Sym _sym, *sym;
191 size_t symidx;
192 const char *symname;
193
194 /* documented below in python docstrings */
195 bool symvalid, unresolved, relative;
196 unsigned long long st_value;
197 };
198
199 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b);
200 static uint32_t elfreloc_hash(const struct elfreloc *reloc);
201
202 DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item,
203 elfreloc_cmp, elfreloc_hash);
204
205 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx);
206 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx);
207 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args);
208 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure);
209
210 /* --- end of declarations -------------------------------------------------- */
211
212 /*
213 * class ELFReloc:
214 */
215
216 static const char elfreloc_doc[] =
217 "Represents an ELF relocation record\n"
218 "\n"
219 "(struct elfreloc * in elf_py.c)";
220
221 #define member(name, type, doc) \
222 { \
223 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
224 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
225 }
226 static PyMemberDef members_elfreloc[] = {
227 member(symname, T_STRING,
228 "Name of symbol this relocation refers to.\n"
229 "\n"
230 "Will frequently be `None` in executables and shared libraries."
231 ),
232 member(symvalid, T_BOOL,
233 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
234 member(unresolved, T_BOOL,
235 "Target symbol refers to an existing section"),
236 member(relative, T_BOOL,
237 "Relocation is a REL (not RELA) record and thus relative."),
238 member(st_value, T_ULONGLONG,
239 "Target symbol's value, if known\n\n"
240 "Will be zero for unresolved/external symbols."),
241 {}
242 };
243 #undef member
244
245 static PyGetSetDef getset_elfreloc[] = {
246 { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc =
247 (char *)"Relocation addend value"},
248 {}
249 };
250
251 static PyMethodDef methods_elfreloc[] = {
252 {"getsection", elfreloc_getsection, METH_VARARGS,
253 "Find relocation target's ELF section\n\n"
254 "Args: address of relocatee (TODO: fix/remove?)\n"
255 "Returns: ELFSection or None\n\n"
256 "Not possible if section headers have been stripped."},
257 {}
258 };
259
260 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b)
261 {
262 if (a->rela->r_offset < b->rela->r_offset)
263 return -1;
264 if (a->rela->r_offset > b->rela->r_offset)
265 return 1;
266 return 0;
267 }
268
269 static uint32_t elfreloc_hash(const struct elfreloc *reloc)
270 {
271 return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset),
272 0xc9a2b7f4);
273 }
274
275 static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head,
276 GElf_Addr offset)
277 {
278 struct elfreloc dummy;
279
280 dummy.rela = &dummy._rela;
281 dummy.rela->r_offset = offset;
282 return elfrelocs_find(head, &dummy);
283 }
284
285 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args)
286 {
287 struct elfreloc *w = (struct elfreloc *)self;
288 long data;
289
290 if (!PyArg_ParseTuple(args, "k", &data))
291 return NULL;
292
293 if (!w->es)
294 Py_RETURN_NONE;
295
296 if (w->symidx == 0) {
297 size_t idx = 0;
298 Elf_Scn *scn;
299
300 data = (w->relative ? data : 0) + w->rela->r_addend;
301 scn = elf_find_addr(w->es->ef, data, &idx);
302 if (!scn)
303 Py_RETURN_NONE;
304 return elffile_secbyidx(w->es->ef, scn, idx);
305 }
306 return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx);
307 }
308
309 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure)
310 {
311 struct elfreloc *w = (struct elfreloc *)obj;
312
313 return Py_BuildValue("K", (unsigned long long)w->rela->r_addend);
314 }
315
316 static PyObject *elfreloc_repr(PyObject *arg)
317 {
318 struct elfreloc *w = (struct elfreloc *)arg;
319
320 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
321 (unsigned long)w->rela->r_offset,
322 (w->symname && w->symname[0]) ? w->symname
323 : "[0]",
324 (unsigned long)w->rela->r_addend);
325 }
326
327 static void elfreloc_free(void *arg)
328 {
329 struct elfreloc *w = arg;
330
331 (void)w;
332 }
333
334 static PyTypeObject typeobj_elfreloc = {
335 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc",
336 .tp_basicsize = sizeof(struct elfreloc),
337 .tp_flags = Py_TPFLAGS_DEFAULT,
338 .tp_doc = elfreloc_doc,
339 .tp_new = refuse_new,
340 .tp_free = elfreloc_free,
341 .tp_repr = elfreloc_repr,
342 .tp_members = members_elfreloc,
343 .tp_methods = methods_elfreloc,
344 .tp_getset = getset_elfreloc,
345 };
346
347 /*
348 * class ELFSection:
349 */
350
351 static const char elfsect_doc[] =
352 "Represents an ELF section\n"
353 "\n"
354 "To access section contents, use subscript notation, e.g.\n"
355 " section[123:456]\n"
356 "To read null terminated C strings, replace the end with str:\n"
357 " section[123:str]\n\n"
358 "(struct elfsect * in elf_py.c)";
359
360 static PyObject *elfsect_getaddr(PyObject *self, void *closure);
361
362 #define member(name, type, doc) \
363 { \
364 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
365 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
366 }
367 static PyMemberDef members_elfsect[] = {
368 member(name, T_STRING,
369 "Section name, e.g. \".text\""),
370 member(idx, T_ULONG,
371 "Section index in file"),
372 member(len, T_ULONG,
373 "Section length in bytes"),
374 {},
375 };
376 #undef member
377
378 static PyGetSetDef getset_elfsect[] = {
379 { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc =
380 (char *)"Section virtual address (mapped program view)"},
381 {}
382 };
383
384 static PyObject *elfsect_getaddr(PyObject *self, void *closure)
385 {
386 struct elfsect *w = (struct elfsect *)self;
387
388 return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr);
389 }
390
391
392 static PyObject *elfsect_getreloc(PyObject *self, PyObject *args)
393 {
394 struct elfsect *w = (struct elfsect *)self;
395 struct elfreloc *relw;
396 unsigned long offs;
397 PyObject *ret;
398
399 if (!PyArg_ParseTuple(args, "k", &offs))
400 return NULL;
401
402 relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr);
403 if (!relw)
404 Py_RETURN_NONE;
405
406 ret = (PyObject *)relw;
407 Py_INCREF(ret);
408 return ret;
409 }
410
411 static PyMethodDef methods_elfsect[] = {
412 {"getreloc", elfsect_getreloc, METH_VARARGS,
413 "Check for / get relocation at offset into section\n\n"
414 "Args: byte offset into section to check\n"
415 "Returns: ELFReloc or None"},
416 {}
417 };
418
419 static PyObject *elfsect_subscript(PyObject *self, PyObject *key)
420 {
421 Py_ssize_t start, stop, step, sllen;
422 struct elfsect *w = (struct elfsect *)self;
423 PySliceObject *slice;
424 unsigned long offs, len = ~0UL;
425
426 if (!PySlice_Check(key)) {
427 PyErr_SetString(PyExc_IndexError,
428 "ELFSection subscript must be slice");
429 return NULL;
430 }
431 slice = (PySliceObject *)key;
432 if (PyLong_Check(slice->stop)) {
433 if (PySlice_GetIndicesEx(key, w->shdr->sh_size,
434 &start, &stop, &step, &sllen))
435 return NULL;
436
437 if (step != 1) {
438 PyErr_SetString(PyExc_IndexError,
439 "ELFSection subscript slice step must be 1");
440 return NULL;
441 }
442 if ((GElf_Xword)stop > w->shdr->sh_size) {
443 PyErr_Format(ELFAccessError,
444 "access (%lu) beyond end of section %lu/%s (%lu)",
445 stop, w->idx, w->name, w->shdr->sh_size);
446 return NULL;
447 }
448
449 offs = start;
450 len = sllen;
451 } else {
452 if (slice->stop != (void *)&PyUnicode_Type
453 || !PyLong_Check(slice->start)) {
454 PyErr_SetString(PyExc_IndexError, "invalid slice");
455 return NULL;
456 }
457
458 offs = PyLong_AsUnsignedLongLong(slice->start);
459 len = ~0UL;
460 }
461
462 offs += w->shdr->sh_offset;
463 if (offs > w->ef->len) {
464 PyErr_Format(ELFAccessError,
465 "access (%lu) beyond end of file (%lu)",
466 offs, w->ef->len);
467 return NULL;
468 }
469 if (len == ~0UL)
470 len = strnlen(w->ef->mmap + offs, w->ef->len - offs);
471
472 Py_ssize_t pylen = len;
473
474 #if PY_MAJOR_VERSION >= 3
475 return Py_BuildValue("y#", w->ef->mmap + offs, pylen);
476 #else
477 return Py_BuildValue("s#", w->ef->mmap + offs, pylen);
478 #endif
479 }
480
481 static PyMappingMethods mp_elfsect = {
482 .mp_subscript = elfsect_subscript,
483 };
484
485 static void elfsect_free(void *arg)
486 {
487 struct elfsect *w = arg;
488
489 (void)w;
490 }
491
492 static PyObject *elfsect_repr(PyObject *arg)
493 {
494 struct elfsect *w = (struct elfsect *)arg;
495
496 return PyUnicode_FromFormat("<ELFSection %s>", w->name);
497 }
498
499 static PyTypeObject typeobj_elfsect = {
500 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection",
501 .tp_basicsize = sizeof(struct elfsect),
502 .tp_flags = Py_TPFLAGS_DEFAULT,
503 .tp_doc = elfsect_doc,
504 .tp_new = refuse_new,
505 .tp_free = elfsect_free,
506 .tp_repr = elfsect_repr,
507 .tp_as_mapping = &mp_elfsect,
508 .tp_members = members_elfsect,
509 .tp_methods = methods_elfsect,
510 .tp_getset = getset_elfsect,
511 };
512
513 static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel,
514 GElf_Shdr *relhdr)
515 {
516 size_t i, entries;
517 Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link);
518 GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr);
519 Elf_Data *symdata = elf_getdata(symtab, NULL);
520 Elf_Data *reldata = elf_getdata(rel, NULL);
521
522 entries = relhdr->sh_size / relhdr->sh_entsize;
523 for (i = 0; i < entries; i++) {
524 struct elfreloc *relw;
525 size_t symidx;
526 GElf_Rela *rela;
527 GElf_Sym *sym;
528
529 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
530 &typeobj_elfreloc, 0);
531 relw->es = w;
532
533 if (relhdr->sh_type == SHT_REL) {
534 GElf_Rel _rel, *rel;
535
536 rel = gelf_getrel(reldata, i, &_rel);
537 relw->rela = &relw->_rela;
538 relw->rela->r_offset = rel->r_offset;
539 relw->rela->r_info = rel->r_info;
540 relw->rela->r_addend = 0;
541 relw->relative = true;
542 } else
543 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
544
545 rela = relw->rela;
546 if (rela->r_offset < w->shdr->sh_addr
547 || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size)
548 continue;
549
550 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
551 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
552 if (sym) {
553 relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link,
554 sym->st_name);
555 relw->symvalid = GELF_ST_TYPE(sym->st_info)
556 != STT_NOTYPE;
557 relw->unresolved = sym->st_shndx == SHN_UNDEF;
558 relw->st_value = sym->st_value;
559 } else {
560 relw->symname = NULL;
561 relw->symvalid = false;
562 relw->unresolved = false;
563 relw->st_value = 0;
564 }
565
566 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
567 (long long)rela->r_offset, (unsigned long long)symidx,
568 (long long)rela->r_addend, relw->symname);
569
570 elfrelocs_add(&w->relocs, relw);
571 }
572 }
573
574 /*
575 * bindings & loading code between ELFFile and ELFSection
576 */
577
578 static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx,
579 const char *name)
580 {
581 struct elfsect *w;
582 size_t i;
583
584 w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0);
585 if (!w)
586 return NULL;
587
588 w->name = name;
589 w->ef = ef;
590 w->scn = scn;
591 w->shdr = gelf_getshdr(scn, &w->_shdr);
592 w->len = w->shdr->sh_size;
593 w->idx = idx;
594 elfrelocs_init(&w->relocs);
595
596 for (i = 0; i < ef->ehdr->e_shnum; i++) {
597 Elf_Scn *scn = elf_getscn(ef->elf, i);
598 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
599
600 if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL)
601 continue;
602 if (shdr->sh_info && shdr->sh_info != idx)
603 continue;
604 elfsect_add_relocations(w, scn, shdr);
605 }
606
607 return (PyObject *)w;
608 }
609
610 static Elf_Scn *elf_find_section(struct elffile *ef, const char *name,
611 size_t *idx)
612 {
613 size_t i;
614 const char *secname;
615
616 for (i = 0; i < ef->ehdr->e_shnum; i++) {
617 Elf_Scn *scn = elf_getscn(ef->elf, i);
618 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
619
620 secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx,
621 shdr->sh_name);
622 if (strcmp(secname, name))
623 continue;
624 if (idx)
625 *idx = i;
626 return scn;
627 }
628 return NULL;
629 }
630
631 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx)
632 {
633 size_t i;
634
635 for (i = 0; i < ef->ehdr->e_shnum; i++) {
636 Elf_Scn *scn = elf_getscn(ef->elf, i);
637 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
638
639 if (addr < shdr->sh_addr ||
640 addr >= shdr->sh_addr + shdr->sh_size)
641 continue;
642
643 if (idx)
644 *idx = i;
645 return scn;
646 }
647 return NULL;
648 }
649
650 /*
651 * class ELFFile:
652 */
653
654 static const char elffile_doc[] =
655 "Represents an ELF file\n"
656 "\n"
657 "Args: filename to load\n"
658 "\n"
659 "To access raw file contents, use subscript notation, e.g.\n"
660 " file[123:456]\n"
661 "To read null terminated C strings, replace the end with str:\n"
662 " file[123:str]\n\n"
663 "(struct elffile * in elf_py.c)";
664
665
666 #define member(name, type, doc) \
667 { \
668 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
669 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
670 }
671 static PyMemberDef members_elffile[] = {
672 member(filename, T_STRING,
673 "Original file name as given when opening"),
674 member(elfclass, T_INT,
675 "ELF class (architecture bit size)\n\n"
676 "Either 32 or 64, straight integer."),
677 member(bigendian, T_BOOL,
678 "ELF file is big-endian\n\n"
679 "All internal ELF structures are automatically converted."),
680 member(has_symbols, T_BOOL,
681 "A symbol section is present\n\n"
682 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
683 ),
684 {},
685 };
686 #undef member
687
688 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx)
689 {
690 const char *name;
691 PyObject *ret;
692
693 if (!scn)
694 scn = elf_getscn(w->elf, idx);
695 if (!scn || idx >= w->n_sect)
696 Py_RETURN_NONE;
697
698 if (!w->sects[idx]) {
699 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
700
701 name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name);
702 w->sects[idx] = elfsect_wrap(w, scn, idx, name);
703 }
704
705 ret = w->sects[idx];
706 Py_INCREF(ret);
707 return ret;
708 }
709
710 static PyObject *elffile_get_section(PyObject *self, PyObject *args)
711 {
712 const char *name;
713 struct elffile *w = (struct elffile *)self;
714 Elf_Scn *scn;
715 size_t idx = 0;
716
717 if (!PyArg_ParseTuple(args, "s", &name))
718 return NULL;
719
720 scn = elf_find_section(w, name, &idx);
721 return elffile_secbyidx(w, scn, idx);
722 }
723
724 static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args)
725 {
726 unsigned long long addr;
727 struct elffile *w = (struct elffile *)self;
728 Elf_Scn *scn;
729 size_t idx = 0;
730
731 if (!PyArg_ParseTuple(args, "K", &addr))
732 return NULL;
733
734 scn = elf_find_addr(w, addr, &idx);
735 return elffile_secbyidx(w, scn, idx);
736 }
737
738 static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args)
739 {
740 unsigned long long idx;
741 struct elffile *w = (struct elffile *)self;
742
743 if (!PyArg_ParseTuple(args, "K", &idx))
744 return NULL;
745
746 return elffile_secbyidx(w, NULL, idx);
747 }
748
749 static PyObject *elffile_get_symbol(PyObject *self, PyObject *args)
750 {
751 const char *name, *symname;
752 struct elffile *w = (struct elffile *)self;
753 GElf_Sym _sym, *sym;
754 size_t i;
755
756 if (!PyArg_ParseTuple(args, "s", &name))
757 return NULL;
758
759 for (i = 0; i < w->nsym; i++) {
760 sym = gelf_getsym(w->symdata, i, &_sym);
761 if (sym->st_name == 0)
762 continue;
763 symname = elf_strptr(w->elf, w->symstridx, sym->st_name);
764 if (strcmp(symname, name))
765 continue;
766
767 PyObject *pysect;
768 Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx);
769
770 if (scn)
771 pysect = elffile_secbyidx(w, scn, sym->st_shndx);
772 else {
773 pysect = Py_None;
774 Py_INCREF(pysect);
775 }
776 return Py_BuildValue("sKN", symname,
777 (unsigned long long)sym->st_value, pysect);
778 }
779 Py_RETURN_NONE;
780 }
781
782 static PyObject *elffile_getreloc(PyObject *self, PyObject *args)
783 {
784 struct elffile *w = (struct elffile *)self;
785 struct elfreloc *relw;
786 unsigned long offs;
787 PyObject *ret;
788
789 if (!PyArg_ParseTuple(args, "k", &offs))
790 return NULL;
791
792 relw = elfrelocs_get(&w->dynrelocs, offs);
793 if (!relw)
794 Py_RETURN_NONE;
795
796 ret = (PyObject *)relw;
797 Py_INCREF(ret);
798 return ret;
799 }
800
801 static PyObject *elffile_find_note(PyObject *self, PyObject *args)
802 {
803 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
804 const char *owner;
805 const uint8_t *ids;
806 GElf_Word id;
807 struct elffile *w = (struct elffile *)self;
808 size_t i;
809
810 if (!PyArg_ParseTuple(args, "ss", &owner, &ids))
811 return NULL;
812
813 if (strlen((char *)ids) != 4) {
814 PyErr_SetString(PyExc_ValueError,
815 "ELF note ID must be exactly 4-byte string");
816 return NULL;
817 }
818 if (w->bigendian)
819 id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3];
820 else
821 id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0];
822
823 for (i = 0; i < w->ehdr->e_phnum; i++) {
824 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
825 Elf_Data *notedata;
826 size_t offset;
827
828 if (phdr->p_type != PT_NOTE)
829 continue;
830
831 notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset,
832 phdr->p_filesz, ELF_T_NHDR);
833
834 GElf_Nhdr nhdr[1];
835 size_t nameoffs, dataoffs;
836
837 offset = 0;
838 while ((offset = gelf_getnote(notedata, offset, nhdr,
839 &nameoffs, &dataoffs))) {
840 if (phdr->p_offset + nameoffs >= w->len)
841 continue;
842
843 const char *name = w->mmap + phdr->p_offset + nameoffs;
844
845 if (strcmp(name, owner))
846 continue;
847 if (id != nhdr->n_type)
848 continue;
849
850 PyObject *s, *e;
851
852 s = PyLong_FromUnsignedLongLong(
853 phdr->p_vaddr + dataoffs);
854 e = PyLong_FromUnsignedLongLong(
855 phdr->p_vaddr + dataoffs + nhdr->n_descsz);
856 return PySlice_New(s, e, NULL);
857 }
858 }
859 #endif
860 Py_RETURN_NONE;
861 }
862
863 static bool elffile_virt2file(struct elffile *w, GElf_Addr virt,
864 GElf_Addr *offs)
865 {
866 *offs = 0;
867
868 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
869 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
870
871 if (phdr->p_type != PT_LOAD)
872 continue;
873
874 if (virt < phdr->p_vaddr
875 || virt >= phdr->p_vaddr + phdr->p_memsz)
876 continue;
877
878 if (virt >= phdr->p_vaddr + phdr->p_filesz)
879 return false;
880
881 *offs = virt - phdr->p_vaddr + phdr->p_offset;
882 return true;
883 }
884
885 return false;
886 }
887
888 static PyObject *elffile_subscript(PyObject *self, PyObject *key)
889 {
890 Py_ssize_t start, stop, step;
891 PySliceObject *slice;
892 struct elffile *w = (struct elffile *)self;
893 bool str = false;
894
895 if (!PySlice_Check(key)) {
896 PyErr_SetString(PyExc_IndexError,
897 "ELFFile subscript must be slice");
898 return NULL;
899 }
900 slice = (PySliceObject *)key;
901 stop = -1;
902 step = 1;
903 if (PyLong_Check(slice->stop)) {
904 start = PyLong_AsSsize_t(slice->start);
905 if (PyErr_Occurred())
906 return NULL;
907 if (slice->stop != Py_None) {
908 stop = PyLong_AsSsize_t(slice->stop);
909 if (PyErr_Occurred())
910 return NULL;
911 }
912 if (slice->step != Py_None) {
913 step = PyLong_AsSsize_t(slice->step);
914 if (PyErr_Occurred())
915 return NULL;
916 }
917 } else {
918 if (slice->stop != (void *)&PyUnicode_Type
919 || !PyLong_Check(slice->start)) {
920 PyErr_SetString(PyExc_IndexError, "invalid slice");
921 return NULL;
922 }
923
924 str = true;
925 start = PyLong_AsUnsignedLongLong(slice->start);
926 }
927 if (step != 1) {
928 PyErr_SetString(PyExc_IndexError,
929 "ELFFile subscript slice step must be 1");
930 return NULL;
931 }
932
933 GElf_Addr xstart = start, xstop = stop;
934
935 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
936 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
937
938 if (phdr->p_type != PT_LOAD)
939 continue;
940
941 if (xstart < phdr->p_vaddr
942 || xstart >= phdr->p_vaddr + phdr->p_memsz)
943 continue;
944 if (!str && (xstop < phdr->p_vaddr
945 || xstop > phdr->p_vaddr + phdr->p_memsz)) {
946 PyErr_Format(ELFAccessError,
947 "access (%llu) beyond end of program header (%llu)",
948 (long long)xstop,
949 (long long)(phdr->p_vaddr +
950 phdr->p_memsz));
951 return NULL;
952 }
953
954 xstart = xstart - phdr->p_vaddr + phdr->p_offset;
955
956 if (str)
957 xstop = strlen(w->mmap + xstart);
958 else
959 xstop = xstop - phdr->p_vaddr + phdr->p_offset;
960
961 Py_ssize_t pylen = xstop - xstart;
962
963 #if PY_MAJOR_VERSION >= 3
964 return Py_BuildValue("y#", w->mmap + xstart, pylen);
965 #else
966 return Py_BuildValue("s#", w->mmap + xstart, pylen);
967 #endif
968 };
969
970 return PyErr_Format(ELFAccessError,
971 "virtual address (%llu) not found in program headers",
972 (long long)start);
973 }
974
975 static PyMethodDef methods_elffile[] = {
976 {"find_note", elffile_find_note, METH_VARARGS,
977 "find specific note entry"},
978 {"getreloc", elffile_getreloc, METH_VARARGS,
979 "find relocation"},
980 {"get_symbol", elffile_get_symbol, METH_VARARGS,
981 "find symbol by name"},
982 {"get_section", elffile_get_section, METH_VARARGS,
983 "find section by name"},
984 {"get_section_addr", elffile_get_section_addr, METH_VARARGS,
985 "find section by address"},
986 {"get_section_idx", elffile_get_section_idx, METH_VARARGS,
987 "find section by index"},
988 {}
989 };
990
991 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
992 PyObject *kwds);
993
994 static void elffile_free(void *arg)
995 {
996 struct elffile *w = arg;
997
998 elf_end(w->elf);
999 munmap(w->mmap, w->len);
1000 free(w->filename);
1001 }
1002
1003 static PyMappingMethods mp_elffile = {
1004 .mp_subscript = elffile_subscript,
1005 };
1006
1007 static PyTypeObject typeobj_elffile = {
1008 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile",
1009 .tp_basicsize = sizeof(struct elffile),
1010 .tp_flags = Py_TPFLAGS_DEFAULT,
1011 .tp_doc = elffile_doc,
1012 .tp_new = elffile_load,
1013 .tp_free = elffile_free,
1014 .tp_as_mapping = &mp_elffile,
1015 .tp_members = members_elffile,
1016 .tp_methods = methods_elffile,
1017 };
1018
1019 static char *elfdata_strptr(Elf_Data *data, size_t offset)
1020 {
1021 char *p;
1022
1023 if (offset >= data->d_size)
1024 return NULL;
1025
1026 p = (char *)data->d_buf + offset;
1027 if (strnlen(p, data->d_size - offset) >= data->d_size - offset)
1028 return NULL;
1029
1030 return p;
1031 }
1032
1033 static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata,
1034 size_t entries, Elf_Data *symdata,
1035 Elf_Data *strdata, Elf_Type typ)
1036 {
1037 size_t i;
1038
1039 for (i = 0; i < entries; i++) {
1040 struct elfreloc *relw;
1041 size_t symidx;
1042 GElf_Rela *rela;
1043 GElf_Sym *sym;
1044 GElf_Addr rel_offs = 0;
1045
1046 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
1047 &typeobj_elfreloc, 0);
1048 relw->ef = w;
1049
1050 if (typ == ELF_T_REL) {
1051 GElf_Rel _rel, *rel;
1052 GElf_Addr offs;
1053
1054 rel = gelf_getrel(reldata, i, &_rel);
1055 relw->rela = &relw->_rela;
1056 relw->rela->r_offset = rel->r_offset;
1057 relw->rela->r_info = rel->r_info;
1058 relw->rela->r_addend = 0;
1059 relw->relative = true;
1060
1061 /* REL uses the pointer contents itself instead of the
1062 * RELA addend field :( ... theoretically this could
1063 * be some weird platform specific encoding, but since
1064 * we only care about data relocations it should
1065 * always be a pointer...
1066 */
1067 if (elffile_virt2file(w, rel->r_offset, &offs)) {
1068 Elf_Data *ptr, *conv;
1069 GElf_Addr tmp;
1070 Elf_Data mem = {
1071 .d_buf = (void *)&tmp,
1072 .d_type = ELF_T_ADDR,
1073 .d_version = EV_CURRENT,
1074 .d_size = sizeof(tmp),
1075 .d_off = 0,
1076 .d_align = 0,
1077 };
1078
1079 ptr = elf_getdata_rawchunk(w->elf, offs,
1080 w->elfclass / 8,
1081 ELF_T_ADDR);
1082
1083 conv = gelf_xlatetom(w->elf, &mem, ptr,
1084 w->mmap[EI_DATA]);
1085 if (conv) {
1086 memcpy(&rel_offs, conv->d_buf,
1087 conv->d_size);
1088
1089 relw->relative = false;
1090 relw->rela->r_addend = rel_offs;
1091 }
1092 }
1093 } else
1094 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
1095
1096 rela = relw->rela;
1097 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
1098 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
1099 if (sym) {
1100 relw->symname = elfdata_strptr(strdata, sym->st_name);
1101 relw->symvalid = GELF_ST_TYPE(sym->st_info)
1102 != STT_NOTYPE;
1103 relw->unresolved = sym->st_shndx == SHN_UNDEF;
1104 relw->st_value = sym->st_value;
1105 } else {
1106 relw->symname = NULL;
1107 relw->symvalid = false;
1108 relw->unresolved = false;
1109 relw->st_value = 0;
1110 }
1111
1112 if (typ == ELF_T_RELA)
1113 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1114 (long long)rela->r_offset,
1115 (unsigned long long)symidx,
1116 (long long)rela->r_addend, relw->symname);
1117 else
1118 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1119 (long long)rela->r_offset,
1120 (unsigned long long)symidx,
1121 (unsigned long long)rel_offs, relw->symname);
1122
1123 elfrelocs_add(&w->dynrelocs, relw);
1124 }
1125
1126 }
1127
1128 /* primary (only, really) entry point to anything in this module */
1129 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
1130 PyObject *kwds)
1131 {
1132 const char *filename;
1133 static const char * const kwnames[] = {"filename", NULL};
1134 struct elffile *w;
1135 struct stat st;
1136 int fd, err;
1137
1138 w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0);
1139 if (!w)
1140 return NULL;
1141
1142 if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames,
1143 &filename))
1144 return NULL;
1145
1146 w->filename = strdup(filename);
1147 fd = open(filename, O_RDONLY | O_NOCTTY);
1148 if (fd < 0 || fstat(fd, &st)) {
1149 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1150 close(fd);
1151 goto out;
1152 }
1153 w->len = st.st_size;
1154 w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
1155 if (!w->mmap) {
1156 PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
1157 close(fd);
1158 goto out;
1159 }
1160 close(fd);
1161 w->mmend = w->mmap + st.st_size;
1162
1163 if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) {
1164 PyErr_SetString(ELFFormatError, "invalid ELF signature");
1165 goto out;
1166 }
1167
1168 switch (w->mmap[EI_CLASS]) {
1169 case ELFCLASS32:
1170 w->elfclass = 32;
1171 break;
1172 case ELFCLASS64:
1173 w->elfclass = 64;
1174 break;
1175 default:
1176 PyErr_SetString(ELFFormatError, "invalid ELF class");
1177 goto out;
1178 }
1179 switch (w->mmap[EI_DATA]) {
1180 case ELFDATA2LSB:
1181 w->bigendian = false;
1182 break;
1183 case ELFDATA2MSB:
1184 w->bigendian = true;
1185 break;
1186 default:
1187 PyErr_SetString(ELFFormatError, "invalid ELF byte order");
1188 goto out;
1189 }
1190
1191 w->elf = elf_memory(w->mmap, w->len);
1192 if (!w->elf)
1193 goto out_elferr;
1194 w->ehdr = gelf_getehdr(w->elf, &w->_ehdr);
1195 if (!w->ehdr)
1196 goto out_elferr;
1197
1198 for (size_t i = 0; i < w->ehdr->e_shnum; i++) {
1199 Elf_Scn *scn = elf_getscn(w->elf, i);
1200 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
1201
1202 if (shdr->sh_type == SHT_SYMTAB) {
1203 w->symtab = scn;
1204 w->nsym = shdr->sh_size / shdr->sh_entsize;
1205 w->symdata = elf_getdata(scn, NULL);
1206 w->symstridx = shdr->sh_link;
1207 break;
1208 }
1209 }
1210 w->has_symbols = w->symtab && w->symstridx;
1211 elfrelocs_init(&w->dynrelocs);
1212
1213 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1214 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
1215 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
1216
1217 if (phdr->p_type != PT_DYNAMIC)
1218 continue;
1219
1220 Elf_Data *dyndata = elf_getdata_rawchunk(w->elf,
1221 phdr->p_offset, phdr->p_filesz, ELF_T_DYN);
1222
1223 GElf_Addr dynrela = 0, dynrel = 0, symtab = 0, strtab = 0;
1224 size_t dynrelasz = 0, dynrelaent = 0;
1225 size_t dynrelsz = 0, dynrelent = 0;
1226 size_t strsz = 0;
1227 GElf_Dyn _dyn, *dyn;
1228
1229 for (size_t j = 0;; j++) {
1230 dyn = gelf_getdyn(dyndata, j, &_dyn);
1231
1232 if (dyn->d_tag == DT_NULL)
1233 break;
1234
1235 switch (dyn->d_tag) {
1236 case DT_SYMTAB:
1237 symtab = dyn->d_un.d_ptr;
1238 break;
1239
1240 case DT_STRTAB:
1241 strtab = dyn->d_un.d_ptr;
1242 break;
1243 case DT_STRSZ:
1244 strsz = dyn->d_un.d_val;
1245 break;
1246
1247 case DT_RELA:
1248 dynrela = dyn->d_un.d_ptr;
1249 break;
1250 case DT_RELASZ:
1251 dynrelasz = dyn->d_un.d_val;
1252 break;
1253 case DT_RELAENT:
1254 dynrelaent = dyn->d_un.d_val;
1255 break;
1256
1257 case DT_REL:
1258 dynrel = dyn->d_un.d_ptr;
1259 break;
1260 case DT_RELSZ:
1261 dynrelsz = dyn->d_un.d_val;
1262 break;
1263 case DT_RELENT:
1264 dynrelent = dyn->d_un.d_val;
1265 break;
1266 }
1267 }
1268
1269 GElf_Addr offset;
1270 Elf_Data *symdata = NULL, *strdata = NULL;
1271
1272 if (elffile_virt2file(w, symtab, &offset))
1273 symdata = elf_getdata_rawchunk(w->elf, offset,
1274 w->len - offset,
1275 ELF_T_SYM);
1276 if (elffile_virt2file(w, strtab, &offset))
1277 strdata = elf_getdata_rawchunk(w->elf, offset,
1278 strsz, ELF_T_BYTE);
1279
1280 size_t c;
1281
1282 if (dynrela && dynrelasz && dynrelaent
1283 && elffile_virt2file(w, dynrela, &offset)) {
1284 Elf_Data *reladata = NULL;
1285
1286 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela,
1287 (long long)offset, (long long)dynrelasz);
1288
1289 reladata = elf_getdata_rawchunk(w->elf, offset,
1290 dynrelasz, ELF_T_RELA);
1291
1292 c = dynrelasz / dynrelaent;
1293 elffile_add_dynreloc(w, reladata, c, symdata, strdata,
1294 ELF_T_RELA);
1295 }
1296
1297 if (dynrel && dynrelsz && dynrelent
1298 && elffile_virt2file(w, dynrel, &offset)) {
1299 Elf_Data *reldata = NULL;
1300
1301 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel,
1302 (long long)offset, (long long)dynrelsz);
1303
1304 reldata = elf_getdata_rawchunk(w->elf, offset, dynrelsz,
1305 ELF_T_REL);
1306
1307 c = dynrelsz / dynrelent;
1308 elffile_add_dynreloc(w, reldata, c, symdata, strdata,
1309 ELF_T_REL);
1310 }
1311 }
1312 #endif
1313
1314 w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum);
1315 w->n_sect = w->ehdr->e_shnum;
1316
1317 return (PyObject *)w;
1318
1319 out_elferr:
1320 err = elf_errno();
1321
1322 PyErr_Format(ELFFormatError, "libelf error %d: %s",
1323 err, elf_errmsg(err));
1324 out:
1325 if (w->elf)
1326 elf_end(w->elf);
1327 free(w->filename);
1328 return NULL;
1329 }
1330
1331 static PyObject *elfpy_debug(PyObject *self, PyObject *args)
1332 {
1333 int arg;
1334
1335 if (!PyArg_ParseTuple(args, "p", &arg))
1336 return NULL;
1337
1338 debug = arg;
1339
1340 Py_RETURN_NONE;
1341 }
1342
1343 static PyMethodDef methods_elfpy[] = {
1344 {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"},
1345 {}
1346 };
1347
1348 bool elf_py_init(PyObject *pymod)
1349 {
1350 if (PyType_Ready(&typeobj_elffile) < 0)
1351 return false;
1352 if (PyType_Ready(&typeobj_elfsect) < 0)
1353 return false;
1354 if (PyType_Ready(&typeobj_elfreloc) < 0)
1355 return false;
1356 if (elf_version(EV_CURRENT) == EV_NONE)
1357 return false;
1358
1359 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1360 PyModule_AddFunctions(pymod, methods_elfpy);
1361 #else
1362 (void)methods_elfpy;
1363 #endif
1364
1365 ELFFormatError = PyErr_NewException("_clippy.ELFFormatError",
1366 PyExc_ValueError, NULL);
1367 PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError);
1368 ELFAccessError = PyErr_NewException("_clippy.ELFAccessError",
1369 PyExc_IndexError, NULL);
1370 PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError);
1371
1372 Py_INCREF(&typeobj_elffile);
1373 PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile);
1374 Py_INCREF(&typeobj_elfsect);
1375 PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect);
1376 Py_INCREF(&typeobj_elfreloc);
1377 PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc);
1378 return true;
1379 }