]> git.proxmox.com Git - mirror_frr.git/blob - lib/elf_py.c
Merge pull request #9708 from mobash-rasool/new_b
[mirror_frr.git] / lib / elf_py.c
1 /*
2 * fast ELF file accessor
3 * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; see the file COPYING; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 /* Note: this wrapper is intended to be used as build-time helper. While
21 * it should be generally correct and proper, there may be the occasional
22 * memory leak or SEGV for things that haven't been well-tested.
23 * _
24 * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
25 * / ! \ in FRR to read files created by its own build. Don't take it out
26 * /_____\ of FRR and use it to parse random ELF files you found somewhere.
27 *
28 * If you're working with this code (or even reading it), you really need to
29 * read a bunch of the ELF specs. There's no way around it, things in here
30 * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
31 * your friends.
32 *
33 * Required reading:
34 * https://refspecs.linuxfoundation.org/elf/elf.pdf
35 * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
36 * Recommended reading:
37 * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
38 *
39 * The core ELF spec is *not* enough, you should read at least one of the
40 * processor specific (psABI) docs. They define what & how relocations work.
41 * Luckily we don't need to care about the processor specifics since this only
42 * does data relocations, but without looking at the psABI, some things aren't
43 * quite clear.
44 */
45
46 /* the API of this module roughly follows a very small subset of the one
47 * provided by the python elfutils package, which unfortunately is painfully
48 * slow.
49 */
50
51 #define PY_SSIZE_T_CLEAN
52
53 #include <Python.h>
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57 #include "structmember.h"
58 #include <string.h>
59 #include <stdlib.h>
60 #include <unistd.h>
61 #include <sys/types.h>
62 #include <sys/stat.h>
63 #include <sys/mman.h>
64 #include <fcntl.h>
65
66 #if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
67 /* Solaris libelf bails otherwise ... */
68 #undef _FILE_OFFSET_BITS
69 #define _FILE_OFFSET_BITS 32
70 #endif
71
72 #include <elf.h>
73 #include <libelf.h>
74 #include <gelf.h>
75
76 #include "typesafe.h"
77 #include "jhash.h"
78 #include "clippy.h"
79
80 static bool debug;
81
82 #define debugf(...) \
83 do { \
84 if (debug) \
85 fprintf(stderr, __VA_ARGS__); \
86 } while (0)
87
88 /* Exceptions */
89 static PyObject *ELFFormatError;
90 static PyObject *ELFAccessError;
91
92 /* most objects can only be created as return values from one of the methods */
93 static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
94 {
95 PyErr_SetString(PyExc_ValueError,
96 "cannot create instances of this type");
97 return NULL;
98 }
99
100 struct elfreloc;
101 struct elfsect;
102
103 PREDECL_HASH(elfrelocs);
104
105 /* ELFFile and ELFSection intentionally share some behaviour, particularly
106 * subscript[123:456] access to file data. This is because relocatables
107 * (.o files) do things section-based, but linked executables/libraries do
108 * things file-based. Having the two behave similar allows simplifying the
109 * Python code.
110 */
111
112 /* class ELFFile:
113 *
114 * overall entry point, instantiated by reading in an ELF file
115 */
116 struct elffile {
117 PyObject_HEAD
118
119 char *filename;
120 char *mmap, *mmend;
121 size_t len;
122 Elf *elf;
123
124 /* note from here on there are several instances of
125 *
126 * GElf_Something *x, _x;
127 *
128 * this is a pattern used by libelf's generic ELF routines; the _x
129 * field is used to create a copy of the ELF structure from the file
130 * with 32/64bit and endianness adjusted.
131 */
132
133 GElf_Ehdr *ehdr, _ehdr;
134 Elf_Scn *symtab;
135 size_t nsym, symstridx;
136 Elf_Data *symdata;
137
138 PyObject **sects;
139 size_t n_sect;
140
141 struct elfrelocs_head dynrelocs;
142
143 int elfclass;
144 bool bigendian;
145 bool has_symbols;
146 };
147
148 /* class ELFSection:
149 *
150 * note that executables and shared libraries can have their section headers
151 * removed, though in practice this is only used as an obfuscation technique.
152 */
153 struct elfsect {
154 PyObject_HEAD
155
156 const char *name;
157 struct elffile *ef;
158
159 GElf_Shdr _shdr, *shdr;
160 Elf_Scn *scn;
161 unsigned long idx, len;
162
163 struct elfrelocs_head relocs;
164 };
165
166 /* class ELFReloc:
167 *
168 * note: relocations in object files (.o) are section-based while relocations
169 * in executables and shared libraries are file-based.
170 *
171 * Whenever accessing something that is a pointer in the ELF file, the Python
172 * code needs to check for a relocation; if the pointer is pointing to some
173 * unresolved symbol the file will generally contain 0 bytes. The relocation
174 * will tell what the pointer is actually pointing to.
175 *
176 * This represents both static (.o file) and dynamic (.so/exec) relocations.
177 */
178 struct elfreloc {
179 PyObject_HEAD
180
181 struct elfrelocs_item elfrelocs_item;
182
183 struct elfsect *es;
184 struct elffile *ef;
185
186 /* there's also old-fashioned GElf_Rel; we're converting that to
187 * GElf_Rela in elfsect_add_relocations()
188 */
189 GElf_Rela _rela, *rela;
190 GElf_Sym _sym, *sym;
191 size_t symidx;
192 const char *symname;
193
194 /* documented below in python docstrings */
195 bool symvalid, unresolved, relative;
196 unsigned long long st_value;
197 };
198
199 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b);
200 static uint32_t elfreloc_hash(const struct elfreloc *reloc);
201
202 DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item,
203 elfreloc_cmp, elfreloc_hash);
204
205 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx);
206 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx);
207 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args);
208 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure);
209
210 /* --- end of declarations -------------------------------------------------- */
211
212 /*
213 * class ELFReloc:
214 */
215
216 static const char elfreloc_doc[] =
217 "Represents an ELF relocation record\n"
218 "\n"
219 "(struct elfreloc * in elf_py.c)";
220
221 #define member(name, type, doc) \
222 { \
223 (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
224 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
225 }
226 static PyMemberDef members_elfreloc[] = {
227 member(symname, T_STRING,
228 "Name of symbol this relocation refers to.\n"
229 "\n"
230 "Will frequently be `None` in executables and shared libraries."
231 ),
232 member(symvalid, T_BOOL,
233 "Target symbol has a valid type, i.e. not STT_NOTYPE"),
234 member(unresolved, T_BOOL,
235 "Target symbol refers to an existing section"),
236 member(relative, T_BOOL,
237 "Relocation is a REL (not RELA) record and thus relative."),
238 member(st_value, T_ULONGLONG,
239 "Target symbol's value, if known\n\n"
240 "Will be zero for unresolved/external symbols."),
241 {}
242 };
243 #undef member
244
245 static PyGetSetDef getset_elfreloc[] = {
246 { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc =
247 (char *)"Relocation addend value"},
248 {}
249 };
250
251 static PyMethodDef methods_elfreloc[] = {
252 {"getsection", elfreloc_getsection, METH_VARARGS,
253 "Find relocation target's ELF section\n\n"
254 "Args: address of relocatee (TODO: fix/remove?)\n"
255 "Returns: ELFSection or None\n\n"
256 "Not possible if section headers have been stripped."},
257 {}
258 };
259
260 static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b)
261 {
262 if (a->rela->r_offset < b->rela->r_offset)
263 return -1;
264 if (a->rela->r_offset > b->rela->r_offset)
265 return 1;
266 return 0;
267 }
268
269 static uint32_t elfreloc_hash(const struct elfreloc *reloc)
270 {
271 return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset),
272 0xc9a2b7f4);
273 }
274
275 static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head,
276 GElf_Addr offset)
277 {
278 struct elfreloc dummy;
279
280 dummy.rela = &dummy._rela;
281 dummy.rela->r_offset = offset;
282 return elfrelocs_find(head, &dummy);
283 }
284
285 static PyObject *elfreloc_getsection(PyObject *self, PyObject *args)
286 {
287 struct elfreloc *w = (struct elfreloc *)self;
288 long data;
289
290 if (!PyArg_ParseTuple(args, "k", &data))
291 return NULL;
292
293 if (!w->es)
294 Py_RETURN_NONE;
295
296 if (w->symidx == 0) {
297 size_t idx = 0;
298 Elf_Scn *scn;
299
300 data = (w->relative ? data : 0) + w->rela->r_addend;
301 scn = elf_find_addr(w->es->ef, data, &idx);
302 if (!scn)
303 Py_RETURN_NONE;
304 return elffile_secbyidx(w->es->ef, scn, idx);
305 }
306 return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx);
307 }
308
309 static PyObject *elfreloc_getaddend(PyObject *obj, void *closure)
310 {
311 struct elfreloc *w = (struct elfreloc *)obj;
312
313 return Py_BuildValue("K", (unsigned long long)w->rela->r_addend);
314 }
315
316 static PyObject *elfreloc_repr(PyObject *arg)
317 {
318 struct elfreloc *w = (struct elfreloc *)arg;
319
320 return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
321 (unsigned long)w->rela->r_offset,
322 (w->symname && w->symname[0]) ? w->symname
323 : "[0]",
324 (unsigned long)w->rela->r_addend);
325 }
326
327 static void elfreloc_free(void *arg)
328 {
329 struct elfreloc *w = arg;
330
331 (void)w;
332 }
333
334 static PyTypeObject typeobj_elfreloc = {
335 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc",
336 .tp_basicsize = sizeof(struct elfreloc),
337 .tp_flags = Py_TPFLAGS_DEFAULT,
338 .tp_doc = elfreloc_doc,
339 .tp_new = refuse_new,
340 .tp_free = elfreloc_free,
341 .tp_repr = elfreloc_repr,
342 .tp_members = members_elfreloc,
343 .tp_methods = methods_elfreloc,
344 .tp_getset = getset_elfreloc,
345 };
346
347 /*
348 * class ELFSection:
349 */
350
351 static const char elfsect_doc[] =
352 "Represents an ELF section\n"
353 "\n"
354 "To access section contents, use subscript notation, e.g.\n"
355 " section[123:456]\n"
356 "To read null terminated C strings, replace the end with str:\n"
357 " section[123:str]\n\n"
358 "(struct elfsect * in elf_py.c)";
359
360 static PyObject *elfsect_getaddr(PyObject *self, void *closure);
361
362 #define member(name, type, doc) \
363 { \
364 (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
365 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
366 }
367 static PyMemberDef members_elfsect[] = {
368 member(name, T_STRING,
369 "Section name, e.g. \".text\""),
370 member(idx, T_ULONG,
371 "Section index in file"),
372 member(len, T_ULONG,
373 "Section length in bytes"),
374 {},
375 };
376 #undef member
377
378 static PyGetSetDef getset_elfsect[] = {
379 { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc =
380 (char *)"Section virtual address (mapped program view)"},
381 {}
382 };
383
384 static PyObject *elfsect_getaddr(PyObject *self, void *closure)
385 {
386 struct elfsect *w = (struct elfsect *)self;
387
388 return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr);
389 }
390
391
392 static PyObject *elfsect_getreloc(PyObject *self, PyObject *args)
393 {
394 struct elfsect *w = (struct elfsect *)self;
395 struct elfreloc *relw;
396 unsigned long offs;
397 PyObject *ret;
398
399 if (!PyArg_ParseTuple(args, "k", &offs))
400 return NULL;
401
402 relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr);
403 if (!relw)
404 Py_RETURN_NONE;
405
406 ret = (PyObject *)relw;
407 Py_INCREF(ret);
408 return ret;
409 }
410
411 static PyMethodDef methods_elfsect[] = {
412 {"getreloc", elfsect_getreloc, METH_VARARGS,
413 "Check for / get relocation at offset into section\n\n"
414 "Args: byte offset into section to check\n"
415 "Returns: ELFReloc or None"},
416 {}
417 };
418
419 static PyObject *elfsect_subscript(PyObject *self, PyObject *key)
420 {
421 Py_ssize_t start, stop, step, sllen;
422 struct elfsect *w = (struct elfsect *)self;
423 PySliceObject *slice;
424 unsigned long offs, len = ~0UL;
425
426 if (!PySlice_Check(key)) {
427 PyErr_SetString(PyExc_IndexError,
428 "ELFSection subscript must be slice");
429 return NULL;
430 }
431 slice = (PySliceObject *)key;
432 if (PyLong_Check(slice->stop)) {
433 if (PySlice_GetIndicesEx(key, w->shdr->sh_size,
434 &start, &stop, &step, &sllen))
435 return NULL;
436
437 if (step != 1) {
438 PyErr_SetString(PyExc_IndexError,
439 "ELFSection subscript slice step must be 1");
440 return NULL;
441 }
442 if ((GElf_Xword)stop > w->shdr->sh_size) {
443 PyErr_Format(ELFAccessError,
444 "access (%lu) beyond end of section %lu/%s (%lu)",
445 stop, w->idx, w->name, w->shdr->sh_size);
446 return NULL;
447 }
448
449 offs = start;
450 len = sllen;
451 } else {
452 if (slice->stop != (void *)&PyUnicode_Type
453 || !PyLong_Check(slice->start)) {
454 PyErr_SetString(PyExc_IndexError, "invalid slice");
455 return NULL;
456 }
457
458 offs = PyLong_AsUnsignedLongLong(slice->start);
459 len = ~0UL;
460 }
461
462 offs += w->shdr->sh_offset;
463 if (offs > w->ef->len) {
464 PyErr_Format(ELFAccessError,
465 "access (%lu) beyond end of file (%lu)",
466 offs, w->ef->len);
467 return NULL;
468 }
469 if (len == ~0UL)
470 len = strnlen(w->ef->mmap + offs, w->ef->len - offs);
471
472 Py_ssize_t pylen = len;
473
474 #if PY_MAJOR_VERSION >= 3
475 return Py_BuildValue("y#", w->ef->mmap + offs, pylen);
476 #else
477 return Py_BuildValue("s#", w->ef->mmap + offs, pylen);
478 #endif
479 }
480
481 static PyMappingMethods mp_elfsect = {
482 .mp_subscript = elfsect_subscript,
483 };
484
485 static void elfsect_free(void *arg)
486 {
487 struct elfsect *w = arg;
488
489 (void)w;
490 }
491
492 static PyObject *elfsect_repr(PyObject *arg)
493 {
494 struct elfsect *w = (struct elfsect *)arg;
495
496 return PyUnicode_FromFormat("<ELFSection %s>", w->name);
497 }
498
499 static PyTypeObject typeobj_elfsect = {
500 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection",
501 .tp_basicsize = sizeof(struct elfsect),
502 .tp_flags = Py_TPFLAGS_DEFAULT,
503 .tp_doc = elfsect_doc,
504 .tp_new = refuse_new,
505 .tp_free = elfsect_free,
506 .tp_repr = elfsect_repr,
507 .tp_as_mapping = &mp_elfsect,
508 .tp_members = members_elfsect,
509 .tp_methods = methods_elfsect,
510 .tp_getset = getset_elfsect,
511 };
512
513 static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel,
514 GElf_Shdr *relhdr)
515 {
516 size_t i, entries;
517 Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link);
518 GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr);
519 Elf_Data *symdata = elf_getdata(symtab, NULL);
520 Elf_Data *reldata = elf_getdata(rel, NULL);
521
522 entries = relhdr->sh_size / relhdr->sh_entsize;
523 for (i = 0; i < entries; i++) {
524 struct elfreloc *relw;
525 size_t symidx;
526 GElf_Rela *rela;
527 GElf_Sym *sym;
528
529 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
530 &typeobj_elfreloc, 0);
531 relw->es = w;
532
533 if (relhdr->sh_type == SHT_REL) {
534 GElf_Rel _rel, *rel;
535
536 rel = gelf_getrel(reldata, i, &_rel);
537 relw->rela = &relw->_rela;
538 relw->rela->r_offset = rel->r_offset;
539 relw->rela->r_info = rel->r_info;
540 relw->rela->r_addend = 0;
541 relw->relative = true;
542 } else
543 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
544
545 rela = relw->rela;
546 if (rela->r_offset < w->shdr->sh_addr
547 || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size)
548 continue;
549
550 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
551 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
552 if (sym) {
553 relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link,
554 sym->st_name);
555 relw->symvalid = GELF_ST_TYPE(sym->st_info)
556 != STT_NOTYPE;
557 relw->unresolved = sym->st_shndx == SHN_UNDEF;
558 relw->st_value = sym->st_value;
559 } else {
560 relw->symname = NULL;
561 relw->symvalid = false;
562 relw->unresolved = false;
563 relw->st_value = 0;
564 }
565
566 debugf("reloc @ %016llx sym %5llu %016llx %s\n",
567 (long long)rela->r_offset, (unsigned long long)symidx,
568 (long long)rela->r_addend, relw->symname);
569
570 elfrelocs_add(&w->relocs, relw);
571 }
572 }
573
574 /*
575 * bindings & loading code between ELFFile and ELFSection
576 */
577
578 static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx,
579 const char *name)
580 {
581 struct elfsect *w;
582 size_t i;
583
584 w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0);
585 if (!w)
586 return NULL;
587
588 w->name = name;
589 w->ef = ef;
590 w->scn = scn;
591 w->shdr = gelf_getshdr(scn, &w->_shdr);
592 w->len = w->shdr->sh_size;
593 w->idx = idx;
594 elfrelocs_init(&w->relocs);
595
596 for (i = 0; i < ef->ehdr->e_shnum; i++) {
597 Elf_Scn *scn = elf_getscn(ef->elf, i);
598 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
599
600 if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL)
601 continue;
602 if (shdr->sh_info && shdr->sh_info != idx)
603 continue;
604 elfsect_add_relocations(w, scn, shdr);
605 }
606
607 return (PyObject *)w;
608 }
609
610 static Elf_Scn *elf_find_section(struct elffile *ef, const char *name,
611 size_t *idx)
612 {
613 size_t i;
614 const char *secname;
615
616 for (i = 0; i < ef->ehdr->e_shnum; i++) {
617 Elf_Scn *scn = elf_getscn(ef->elf, i);
618 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
619
620 secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx,
621 shdr->sh_name);
622 if (strcmp(secname, name))
623 continue;
624 if (idx)
625 *idx = i;
626 return scn;
627 }
628 return NULL;
629 }
630
631 static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx)
632 {
633 size_t i;
634
635 for (i = 0; i < ef->ehdr->e_shnum; i++) {
636 Elf_Scn *scn = elf_getscn(ef->elf, i);
637 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
638
639 /* virtual address is kinda meaningless for TLS sections */
640 if (shdr->sh_flags & SHF_TLS)
641 continue;
642 if (addr < shdr->sh_addr ||
643 addr >= shdr->sh_addr + shdr->sh_size)
644 continue;
645
646 if (idx)
647 *idx = i;
648 return scn;
649 }
650 return NULL;
651 }
652
653 /*
654 * class ELFFile:
655 */
656
657 static const char elffile_doc[] =
658 "Represents an ELF file\n"
659 "\n"
660 "Args: filename to load\n"
661 "\n"
662 "To access raw file contents, use subscript notation, e.g.\n"
663 " file[123:456]\n"
664 "To read null terminated C strings, replace the end with str:\n"
665 " file[123:str]\n\n"
666 "(struct elffile * in elf_py.c)";
667
668
669 #define member(name, type, doc) \
670 { \
671 (char *)#name, type, offsetof(struct elffile, name), READONLY, \
672 (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
673 }
674 static PyMemberDef members_elffile[] = {
675 member(filename, T_STRING,
676 "Original file name as given when opening"),
677 member(elfclass, T_INT,
678 "ELF class (architecture bit size)\n\n"
679 "Either 32 or 64, straight integer."),
680 member(bigendian, T_BOOL,
681 "ELF file is big-endian\n\n"
682 "All internal ELF structures are automatically converted."),
683 member(has_symbols, T_BOOL,
684 "A symbol section is present\n\n"
685 "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
686 ),
687 {},
688 };
689 #undef member
690
691 static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx)
692 {
693 const char *name;
694 PyObject *ret;
695
696 if (!scn)
697 scn = elf_getscn(w->elf, idx);
698 if (!scn || idx >= w->n_sect)
699 Py_RETURN_NONE;
700
701 if (!w->sects[idx]) {
702 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
703
704 name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name);
705 w->sects[idx] = elfsect_wrap(w, scn, idx, name);
706 }
707
708 ret = w->sects[idx];
709 Py_INCREF(ret);
710 return ret;
711 }
712
713 static PyObject *elffile_get_section(PyObject *self, PyObject *args)
714 {
715 const char *name;
716 struct elffile *w = (struct elffile *)self;
717 Elf_Scn *scn;
718 size_t idx = 0;
719
720 if (!PyArg_ParseTuple(args, "s", &name))
721 return NULL;
722
723 scn = elf_find_section(w, name, &idx);
724 return elffile_secbyidx(w, scn, idx);
725 }
726
727 static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args)
728 {
729 unsigned long long addr;
730 struct elffile *w = (struct elffile *)self;
731 Elf_Scn *scn;
732 size_t idx = 0;
733
734 if (!PyArg_ParseTuple(args, "K", &addr))
735 return NULL;
736
737 scn = elf_find_addr(w, addr, &idx);
738 return elffile_secbyidx(w, scn, idx);
739 }
740
741 static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args)
742 {
743 unsigned long long idx;
744 struct elffile *w = (struct elffile *)self;
745
746 if (!PyArg_ParseTuple(args, "K", &idx))
747 return NULL;
748
749 return elffile_secbyidx(w, NULL, idx);
750 }
751
752 static PyObject *elffile_get_symbol(PyObject *self, PyObject *args)
753 {
754 const char *name, *symname;
755 struct elffile *w = (struct elffile *)self;
756 GElf_Sym _sym, *sym;
757 size_t i;
758
759 if (!PyArg_ParseTuple(args, "s", &name))
760 return NULL;
761
762 for (i = 0; i < w->nsym; i++) {
763 sym = gelf_getsym(w->symdata, i, &_sym);
764 if (sym->st_name == 0)
765 continue;
766 symname = elf_strptr(w->elf, w->symstridx, sym->st_name);
767 if (strcmp(symname, name))
768 continue;
769
770 PyObject *pysect;
771 Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx);
772
773 if (scn)
774 pysect = elffile_secbyidx(w, scn, sym->st_shndx);
775 else {
776 pysect = Py_None;
777 Py_INCREF(pysect);
778 }
779 return Py_BuildValue("sKN", symname,
780 (unsigned long long)sym->st_value, pysect);
781 }
782 Py_RETURN_NONE;
783 }
784
785 static PyObject *elffile_getreloc(PyObject *self, PyObject *args)
786 {
787 struct elffile *w = (struct elffile *)self;
788 struct elfreloc *relw;
789 unsigned long offs;
790 PyObject *ret;
791
792 if (!PyArg_ParseTuple(args, "k", &offs))
793 return NULL;
794
795 relw = elfrelocs_get(&w->dynrelocs, offs);
796 if (!relw)
797 Py_RETURN_NONE;
798
799 ret = (PyObject *)relw;
800 Py_INCREF(ret);
801 return ret;
802 }
803
804 static PyObject *elffile_find_note(PyObject *self, PyObject *args)
805 {
806 #if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
807 const char *owner;
808 const uint8_t *ids;
809 GElf_Word id;
810 struct elffile *w = (struct elffile *)self;
811 size_t i;
812
813 if (!PyArg_ParseTuple(args, "ss", &owner, &ids))
814 return NULL;
815
816 if (strlen((char *)ids) != 4) {
817 PyErr_SetString(PyExc_ValueError,
818 "ELF note ID must be exactly 4-byte string");
819 return NULL;
820 }
821 if (w->bigendian)
822 id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3];
823 else
824 id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0];
825
826 for (i = 0; i < w->ehdr->e_phnum; i++) {
827 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
828 Elf_Data *notedata;
829 size_t offset;
830
831 if (phdr->p_type != PT_NOTE)
832 continue;
833
834 notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset,
835 phdr->p_filesz, ELF_T_NHDR);
836
837 GElf_Nhdr nhdr[1];
838 size_t nameoffs, dataoffs;
839
840 offset = 0;
841 while ((offset = gelf_getnote(notedata, offset, nhdr,
842 &nameoffs, &dataoffs))) {
843 if (phdr->p_offset + nameoffs >= w->len)
844 continue;
845
846 const char *name = w->mmap + phdr->p_offset + nameoffs;
847
848 if (strcmp(name, owner))
849 continue;
850 if (id != nhdr->n_type)
851 continue;
852
853 PyObject *s, *e;
854
855 s = PyLong_FromUnsignedLongLong(
856 phdr->p_vaddr + dataoffs);
857 e = PyLong_FromUnsignedLongLong(
858 phdr->p_vaddr + dataoffs + nhdr->n_descsz);
859 return PySlice_New(s, e, NULL);
860 }
861 }
862 #endif
863 Py_RETURN_NONE;
864 }
865
866 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
867 static bool elffile_virt2file(struct elffile *w, GElf_Addr virt,
868 GElf_Addr *offs)
869 {
870 *offs = 0;
871
872 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
873 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
874
875 if (phdr->p_type != PT_LOAD)
876 continue;
877
878 if (virt < phdr->p_vaddr
879 || virt >= phdr->p_vaddr + phdr->p_memsz)
880 continue;
881
882 if (virt >= phdr->p_vaddr + phdr->p_filesz)
883 return false;
884
885 *offs = virt - phdr->p_vaddr + phdr->p_offset;
886 return true;
887 }
888
889 return false;
890 }
891 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
892
893 static PyObject *elffile_subscript(PyObject *self, PyObject *key)
894 {
895 Py_ssize_t start, stop, step;
896 PySliceObject *slice;
897 struct elffile *w = (struct elffile *)self;
898 bool str = false;
899
900 if (!PySlice_Check(key)) {
901 PyErr_SetString(PyExc_IndexError,
902 "ELFFile subscript must be slice");
903 return NULL;
904 }
905 slice = (PySliceObject *)key;
906 stop = -1;
907 step = 1;
908 if (PyLong_Check(slice->stop)) {
909 start = PyLong_AsSsize_t(slice->start);
910 if (PyErr_Occurred())
911 return NULL;
912 if (slice->stop != Py_None) {
913 stop = PyLong_AsSsize_t(slice->stop);
914 if (PyErr_Occurred())
915 return NULL;
916 }
917 if (slice->step != Py_None) {
918 step = PyLong_AsSsize_t(slice->step);
919 if (PyErr_Occurred())
920 return NULL;
921 }
922 } else {
923 if (slice->stop != (void *)&PyUnicode_Type
924 || !PyLong_Check(slice->start)) {
925 PyErr_SetString(PyExc_IndexError, "invalid slice");
926 return NULL;
927 }
928
929 str = true;
930 start = PyLong_AsUnsignedLongLong(slice->start);
931 }
932 if (step != 1) {
933 PyErr_SetString(PyExc_IndexError,
934 "ELFFile subscript slice step must be 1");
935 return NULL;
936 }
937
938 GElf_Addr xstart = start, xstop = stop;
939
940 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
941 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
942
943 if (phdr->p_type != PT_LOAD)
944 continue;
945
946 if (xstart < phdr->p_vaddr
947 || xstart >= phdr->p_vaddr + phdr->p_memsz)
948 continue;
949 if (!str && (xstop < phdr->p_vaddr
950 || xstop > phdr->p_vaddr + phdr->p_memsz)) {
951 PyErr_Format(ELFAccessError,
952 "access (%llu) beyond end of program header (%llu)",
953 (long long)xstop,
954 (long long)(phdr->p_vaddr +
955 phdr->p_memsz));
956 return NULL;
957 }
958
959 xstart = xstart - phdr->p_vaddr + phdr->p_offset;
960
961 if (str)
962 xstop = strlen(w->mmap + xstart);
963 else
964 xstop = xstop - phdr->p_vaddr + phdr->p_offset;
965
966 Py_ssize_t pylen = xstop - xstart;
967
968 #if PY_MAJOR_VERSION >= 3
969 return Py_BuildValue("y#", w->mmap + xstart, pylen);
970 #else
971 return Py_BuildValue("s#", w->mmap + xstart, pylen);
972 #endif
973 };
974
975 return PyErr_Format(ELFAccessError,
976 "virtual address (%llu) not found in program headers",
977 (long long)start);
978 }
979
980 static PyMethodDef methods_elffile[] = {
981 {"find_note", elffile_find_note, METH_VARARGS,
982 "find specific note entry"},
983 {"getreloc", elffile_getreloc, METH_VARARGS,
984 "find relocation"},
985 {"get_symbol", elffile_get_symbol, METH_VARARGS,
986 "find symbol by name"},
987 {"get_section", elffile_get_section, METH_VARARGS,
988 "find section by name"},
989 {"get_section_addr", elffile_get_section_addr, METH_VARARGS,
990 "find section by address"},
991 {"get_section_idx", elffile_get_section_idx, METH_VARARGS,
992 "find section by index"},
993 {}
994 };
995
996 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
997 PyObject *kwds);
998
999 static void elffile_free(void *arg)
1000 {
1001 struct elffile *w = arg;
1002
1003 elf_end(w->elf);
1004 munmap(w->mmap, w->len);
1005 free(w->filename);
1006 }
1007
1008 static PyMappingMethods mp_elffile = {
1009 .mp_subscript = elffile_subscript,
1010 };
1011
1012 static PyTypeObject typeobj_elffile = {
1013 PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile",
1014 .tp_basicsize = sizeof(struct elffile),
1015 .tp_flags = Py_TPFLAGS_DEFAULT,
1016 .tp_doc = elffile_doc,
1017 .tp_new = elffile_load,
1018 .tp_free = elffile_free,
1019 .tp_as_mapping = &mp_elffile,
1020 .tp_members = members_elffile,
1021 .tp_methods = methods_elffile,
1022 };
1023
1024 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1025 static char *elfdata_strptr(Elf_Data *data, size_t offset)
1026 {
1027 char *p;
1028
1029 if (offset >= data->d_size)
1030 return NULL;
1031
1032 p = (char *)data->d_buf + offset;
1033 if (strnlen(p, data->d_size - offset) >= data->d_size - offset)
1034 return NULL;
1035
1036 return p;
1037 }
1038
1039 static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata,
1040 size_t entries, Elf_Data *symdata,
1041 Elf_Data *strdata, Elf_Type typ)
1042 {
1043 size_t i;
1044
1045 for (i = 0; i < entries; i++) {
1046 struct elfreloc *relw;
1047 size_t symidx;
1048 GElf_Rela *rela;
1049 GElf_Sym *sym;
1050 GElf_Addr rel_offs = 0;
1051
1052 relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
1053 &typeobj_elfreloc, 0);
1054 relw->ef = w;
1055
1056 if (typ == ELF_T_REL) {
1057 GElf_Rel _rel, *rel;
1058 GElf_Addr offs;
1059
1060 rel = gelf_getrel(reldata, i, &_rel);
1061 relw->rela = &relw->_rela;
1062 relw->rela->r_offset = rel->r_offset;
1063 relw->rela->r_info = rel->r_info;
1064 relw->rela->r_addend = 0;
1065 relw->relative = true;
1066
1067 /* REL uses the pointer contents itself instead of the
1068 * RELA addend field :( ... theoretically this could
1069 * be some weird platform specific encoding, but since
1070 * we only care about data relocations it should
1071 * always be a pointer...
1072 */
1073 if (elffile_virt2file(w, rel->r_offset, &offs)) {
1074 Elf_Data *ptr, *conv;
1075 GElf_Addr tmp;
1076 Elf_Data mem = {
1077 .d_buf = (void *)&tmp,
1078 .d_type = ELF_T_ADDR,
1079 .d_version = EV_CURRENT,
1080 .d_size = sizeof(tmp),
1081 .d_off = 0,
1082 .d_align = 0,
1083 };
1084
1085 ptr = elf_getdata_rawchunk(w->elf, offs,
1086 w->elfclass / 8,
1087 ELF_T_ADDR);
1088
1089 conv = gelf_xlatetom(w->elf, &mem, ptr,
1090 w->mmap[EI_DATA]);
1091 if (conv) {
1092 memcpy(&rel_offs, conv->d_buf,
1093 conv->d_size);
1094
1095 relw->relative = false;
1096 relw->rela->r_addend = rel_offs;
1097 }
1098 }
1099 } else
1100 relw->rela = gelf_getrela(reldata, i, &relw->_rela);
1101
1102 rela = relw->rela;
1103 symidx = relw->symidx = GELF_R_SYM(rela->r_info);
1104 sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
1105 if (sym) {
1106 relw->symname = elfdata_strptr(strdata, sym->st_name);
1107 relw->symvalid = GELF_ST_TYPE(sym->st_info)
1108 != STT_NOTYPE;
1109 relw->unresolved = sym->st_shndx == SHN_UNDEF;
1110 relw->st_value = sym->st_value;
1111 } else {
1112 relw->symname = NULL;
1113 relw->symvalid = false;
1114 relw->unresolved = false;
1115 relw->st_value = 0;
1116 }
1117
1118 if (typ == ELF_T_RELA)
1119 debugf("dynrela @ %016llx sym %5llu %016llx %s\n",
1120 (long long)rela->r_offset,
1121 (unsigned long long)symidx,
1122 (long long)rela->r_addend, relw->symname);
1123 else
1124 debugf("dynrel @ %016llx sym %5llu (%016llx) %s\n",
1125 (long long)rela->r_offset,
1126 (unsigned long long)symidx,
1127 (unsigned long long)rel_offs, relw->symname);
1128
1129 elfrelocs_add(&w->dynrelocs, relw);
1130 }
1131
1132 }
1133 #endif /* HAVE_ELF_GETDATA_RAWCHUNK */
1134
1135 /* primary (only, really) entry point to anything in this module */
1136 static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
1137 PyObject *kwds)
1138 {
1139 const char *filename;
1140 static const char * const kwnames[] = {"filename", NULL};
1141 struct elffile *w;
1142 struct stat st;
1143 int fd, err;
1144
1145 w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0);
1146 if (!w)
1147 return NULL;
1148
1149 if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames,
1150 &filename))
1151 return NULL;
1152
1153 w->filename = strdup(filename);
1154 fd = open(filename, O_RDONLY | O_NOCTTY);
1155 if (fd < 0 || fstat(fd, &st)) {
1156 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1157 close(fd);
1158 goto out;
1159 }
1160 w->len = st.st_size;
1161 w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
1162 if (!w->mmap) {
1163 PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
1164 close(fd);
1165 goto out;
1166 }
1167 close(fd);
1168 w->mmend = w->mmap + st.st_size;
1169
1170 if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) {
1171 PyErr_SetString(ELFFormatError, "invalid ELF signature");
1172 goto out;
1173 }
1174
1175 switch (w->mmap[EI_CLASS]) {
1176 case ELFCLASS32:
1177 w->elfclass = 32;
1178 break;
1179 case ELFCLASS64:
1180 w->elfclass = 64;
1181 break;
1182 default:
1183 PyErr_SetString(ELFFormatError, "invalid ELF class");
1184 goto out;
1185 }
1186 switch (w->mmap[EI_DATA]) {
1187 case ELFDATA2LSB:
1188 w->bigendian = false;
1189 break;
1190 case ELFDATA2MSB:
1191 w->bigendian = true;
1192 break;
1193 default:
1194 PyErr_SetString(ELFFormatError, "invalid ELF byte order");
1195 goto out;
1196 }
1197
1198 w->elf = elf_memory(w->mmap, w->len);
1199 if (!w->elf)
1200 goto out_elferr;
1201 w->ehdr = gelf_getehdr(w->elf, &w->_ehdr);
1202 if (!w->ehdr)
1203 goto out_elferr;
1204
1205 for (size_t i = 0; i < w->ehdr->e_shnum; i++) {
1206 Elf_Scn *scn = elf_getscn(w->elf, i);
1207 GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
1208
1209 if (shdr->sh_type == SHT_SYMTAB) {
1210 w->symtab = scn;
1211 w->nsym = shdr->sh_size / shdr->sh_entsize;
1212 w->symdata = elf_getdata(scn, NULL);
1213 w->symstridx = shdr->sh_link;
1214 break;
1215 }
1216 }
1217 w->has_symbols = w->symtab && w->symstridx;
1218 elfrelocs_init(&w->dynrelocs);
1219
1220 #ifdef HAVE_ELF_GETDATA_RAWCHUNK
1221 for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
1222 GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
1223
1224 if (phdr->p_type != PT_DYNAMIC)
1225 continue;
1226
1227 Elf_Data *dyndata = elf_getdata_rawchunk(w->elf,
1228 phdr->p_offset, phdr->p_filesz, ELF_T_DYN);
1229
1230 GElf_Addr dynrela = 0, dynrel = 0, symtab = 0, strtab = 0;
1231 size_t dynrelasz = 0, dynrelaent = 0;
1232 size_t dynrelsz = 0, dynrelent = 0;
1233 size_t strsz = 0;
1234 GElf_Dyn _dyn, *dyn;
1235
1236 for (size_t j = 0;; j++) {
1237 dyn = gelf_getdyn(dyndata, j, &_dyn);
1238
1239 if (dyn->d_tag == DT_NULL)
1240 break;
1241
1242 switch (dyn->d_tag) {
1243 case DT_SYMTAB:
1244 symtab = dyn->d_un.d_ptr;
1245 break;
1246
1247 case DT_STRTAB:
1248 strtab = dyn->d_un.d_ptr;
1249 break;
1250 case DT_STRSZ:
1251 strsz = dyn->d_un.d_val;
1252 break;
1253
1254 case DT_RELA:
1255 dynrela = dyn->d_un.d_ptr;
1256 break;
1257 case DT_RELASZ:
1258 dynrelasz = dyn->d_un.d_val;
1259 break;
1260 case DT_RELAENT:
1261 dynrelaent = dyn->d_un.d_val;
1262 break;
1263
1264 case DT_REL:
1265 dynrel = dyn->d_un.d_ptr;
1266 break;
1267 case DT_RELSZ:
1268 dynrelsz = dyn->d_un.d_val;
1269 break;
1270 case DT_RELENT:
1271 dynrelent = dyn->d_un.d_val;
1272 break;
1273 }
1274 }
1275
1276 GElf_Addr offset;
1277 Elf_Data *symdata = NULL, *strdata = NULL;
1278
1279 if (elffile_virt2file(w, symtab, &offset))
1280 symdata = elf_getdata_rawchunk(w->elf, offset,
1281 w->len - offset,
1282 ELF_T_SYM);
1283 if (elffile_virt2file(w, strtab, &offset))
1284 strdata = elf_getdata_rawchunk(w->elf, offset,
1285 strsz, ELF_T_BYTE);
1286
1287 size_t c;
1288
1289 if (dynrela && dynrelasz && dynrelaent
1290 && elffile_virt2file(w, dynrela, &offset)) {
1291 Elf_Data *reladata = NULL;
1292
1293 debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela,
1294 (long long)offset, (long long)dynrelasz);
1295
1296 reladata = elf_getdata_rawchunk(w->elf, offset,
1297 dynrelasz, ELF_T_RELA);
1298
1299 c = dynrelasz / dynrelaent;
1300 elffile_add_dynreloc(w, reladata, c, symdata, strdata,
1301 ELF_T_RELA);
1302 }
1303
1304 if (dynrel && dynrelsz && dynrelent
1305 && elffile_virt2file(w, dynrel, &offset)) {
1306 Elf_Data *reldata = NULL;
1307
1308 debugf("dynrel @%llx/%llx+%llx\n", (long long)dynrel,
1309 (long long)offset, (long long)dynrelsz);
1310
1311 reldata = elf_getdata_rawchunk(w->elf, offset, dynrelsz,
1312 ELF_T_REL);
1313
1314 c = dynrelsz / dynrelent;
1315 elffile_add_dynreloc(w, reldata, c, symdata, strdata,
1316 ELF_T_REL);
1317 }
1318 }
1319 #endif
1320
1321 w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum);
1322 w->n_sect = w->ehdr->e_shnum;
1323
1324 return (PyObject *)w;
1325
1326 out_elferr:
1327 err = elf_errno();
1328
1329 PyErr_Format(ELFFormatError, "libelf error %d: %s",
1330 err, elf_errmsg(err));
1331 out:
1332 if (w->elf)
1333 elf_end(w->elf);
1334 free(w->filename);
1335 return NULL;
1336 }
1337
1338 static PyObject *elfpy_debug(PyObject *self, PyObject *args)
1339 {
1340 int arg;
1341
1342 if (!PyArg_ParseTuple(args, "p", &arg))
1343 return NULL;
1344
1345 debug = arg;
1346
1347 Py_RETURN_NONE;
1348 }
1349
1350 static PyMethodDef methods_elfpy[] = {
1351 {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"},
1352 {}
1353 };
1354
1355 bool elf_py_init(PyObject *pymod)
1356 {
1357 if (PyType_Ready(&typeobj_elffile) < 0)
1358 return false;
1359 if (PyType_Ready(&typeobj_elfsect) < 0)
1360 return false;
1361 if (PyType_Ready(&typeobj_elfreloc) < 0)
1362 return false;
1363 if (elf_version(EV_CURRENT) == EV_NONE)
1364 return false;
1365
1366 #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
1367 PyModule_AddFunctions(pymod, methods_elfpy);
1368 #else
1369 (void)methods_elfpy;
1370 #endif
1371
1372 ELFFormatError = PyErr_NewException("_clippy.ELFFormatError",
1373 PyExc_ValueError, NULL);
1374 PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError);
1375 ELFAccessError = PyErr_NewException("_clippy.ELFAccessError",
1376 PyExc_IndexError, NULL);
1377 PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError);
1378
1379 Py_INCREF(&typeobj_elffile);
1380 PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile);
1381 Py_INCREF(&typeobj_elfsect);
1382 PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect);
1383 Py_INCREF(&typeobj_elfreloc);
1384 PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc);
1385 return true;
1386 }