]> git.proxmox.com Git - mirror_iproute2.git/blob - lib/bpf.c
bpf: consolidate dumps to use bpf_dump_prog_info
[mirror_iproute2.git] / lib / bpf.c
1 /*
2 * bpf.c BPF common code
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Daniel Borkmann <daniel@iogearbox.net>
10 * Jiri Pirko <jiri@resnulli.us>
11 * Alexei Starovoitov <ast@kernel.org>
12 */
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <stdbool.h>
19 #include <stdint.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <stdarg.h>
23 #include <limits.h>
24 #include <assert.h>
25
26 #ifdef HAVE_ELF
27 #include <libelf.h>
28 #include <gelf.h>
29 #endif
30
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <sys/un.h>
34 #include <sys/vfs.h>
35 #include <sys/mount.h>
36 #include <sys/syscall.h>
37 #include <sys/sendfile.h>
38 #include <sys/resource.h>
39
40 #include <arpa/inet.h>
41
42 #include "utils.h"
43
44 #include "bpf_util.h"
45 #include "bpf_elf.h"
46 #include "bpf_scm.h"
47
48 struct bpf_prog_meta {
49 const char *type;
50 const char *subdir;
51 const char *section;
52 bool may_uds_export;
53 };
54
55 static const enum bpf_prog_type __bpf_types[] = {
56 BPF_PROG_TYPE_SCHED_CLS,
57 BPF_PROG_TYPE_SCHED_ACT,
58 BPF_PROG_TYPE_XDP,
59 BPF_PROG_TYPE_LWT_IN,
60 BPF_PROG_TYPE_LWT_OUT,
61 BPF_PROG_TYPE_LWT_XMIT,
62 };
63
64 static const struct bpf_prog_meta __bpf_prog_meta[] = {
65 [BPF_PROG_TYPE_SCHED_CLS] = {
66 .type = "cls",
67 .subdir = "tc",
68 .section = ELF_SECTION_CLASSIFIER,
69 .may_uds_export = true,
70 },
71 [BPF_PROG_TYPE_SCHED_ACT] = {
72 .type = "act",
73 .subdir = "tc",
74 .section = ELF_SECTION_ACTION,
75 .may_uds_export = true,
76 },
77 [BPF_PROG_TYPE_XDP] = {
78 .type = "xdp",
79 .subdir = "xdp",
80 .section = ELF_SECTION_PROG,
81 },
82 [BPF_PROG_TYPE_LWT_IN] = {
83 .type = "lwt_in",
84 .subdir = "ip",
85 .section = ELF_SECTION_PROG,
86 },
87 [BPF_PROG_TYPE_LWT_OUT] = {
88 .type = "lwt_out",
89 .subdir = "ip",
90 .section = ELF_SECTION_PROG,
91 },
92 [BPF_PROG_TYPE_LWT_XMIT] = {
93 .type = "lwt_xmit",
94 .subdir = "ip",
95 .section = ELF_SECTION_PROG,
96 },
97 };
98
99 static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
100 {
101 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
102 __bpf_prog_meta[type].subdir);
103 return __bpf_prog_meta[type].subdir;
104 }
105
106 const char *bpf_prog_to_default_section(enum bpf_prog_type type)
107 {
108 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
109 __bpf_prog_meta[type].section);
110 return __bpf_prog_meta[type].section;
111 }
112
113 #ifdef HAVE_ELF
114 static int bpf_obj_open(const char *path, enum bpf_prog_type type,
115 const char *sec, bool verbose);
116 #else
117 static int bpf_obj_open(const char *path, enum bpf_prog_type type,
118 const char *sec, bool verbose)
119 {
120 fprintf(stderr, "No ELF library support compiled in.\n");
121 errno = ENOSYS;
122 return -1;
123 }
124 #endif
125
126 static inline __u64 bpf_ptr_to_u64(const void *ptr)
127 {
128 return (__u64)(unsigned long)ptr;
129 }
130
131 static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
132 {
133 #ifdef __NR_bpf
134 return syscall(__NR_bpf, cmd, attr, size);
135 #else
136 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
137 errno = ENOSYS;
138 return -1;
139 #endif
140 }
141
142 static int bpf_map_update(int fd, const void *key, const void *value,
143 uint64_t flags)
144 {
145 union bpf_attr attr = {};
146
147 attr.map_fd = fd;
148 attr.key = bpf_ptr_to_u64(key);
149 attr.value = bpf_ptr_to_u64(value);
150 attr.flags = flags;
151
152 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
153 }
154
155 static int bpf_prog_fd_by_id(uint32_t id)
156 {
157 union bpf_attr attr = {};
158
159 attr.prog_id = id;
160
161 return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
162 }
163
164 static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
165 uint32_t *info_len)
166 {
167 union bpf_attr attr = {};
168 int ret;
169
170 attr.info.bpf_fd = fd;
171 attr.info.info = bpf_ptr_to_u64(info);
172 attr.info.info_len = *info_len;
173
174 *info_len = 0;
175 ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
176 if (!ret)
177 *info_len = attr.info.info_len;
178
179 return ret;
180 }
181
182 int bpf_dump_prog_info(FILE *f, uint32_t id)
183 {
184 struct bpf_prog_info info = {};
185 uint32_t len = sizeof(info);
186 int fd, ret, dump_ok = 0;
187 SPRINT_BUF(tmp);
188
189 fprintf(f, "id %u ", id);
190
191 fd = bpf_prog_fd_by_id(id);
192 if (fd < 0)
193 return dump_ok;
194
195 ret = bpf_prog_info_by_fd(fd, &info, &len);
196 if (!ret && len) {
197 fprintf(f, "tag %s ",
198 hexstring_n2a(info.tag, sizeof(info.tag),
199 tmp, sizeof(tmp)));
200 if (info.jited_prog_len)
201 fprintf(f, "jited ");
202 dump_ok = 1;
203 }
204
205 close(fd);
206 return dump_ok;
207 }
208
209 static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
210 char **bpf_string, bool *need_release,
211 const char separator)
212 {
213 char sp;
214
215 if (from_file) {
216 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
217 char *tmp_string, *pos, c_prev = ' ';
218 FILE *fp;
219 int c;
220
221 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
222 tmp_string = pos = calloc(1, tmp_len);
223 if (tmp_string == NULL)
224 return -ENOMEM;
225
226 fp = fopen(arg, "r");
227 if (fp == NULL) {
228 perror("Cannot fopen");
229 free(tmp_string);
230 return -ENOENT;
231 }
232
233 while ((c = fgetc(fp)) != EOF) {
234 switch (c) {
235 case '\n':
236 if (c_prev != ',')
237 *(pos++) = ',';
238 c_prev = ',';
239 break;
240 case ' ':
241 case '\t':
242 if (c_prev != ' ')
243 *(pos++) = c;
244 c_prev = ' ';
245 break;
246 default:
247 *(pos++) = c;
248 c_prev = c;
249 }
250 if (pos - tmp_string == tmp_len)
251 break;
252 }
253
254 if (!feof(fp)) {
255 free(tmp_string);
256 fclose(fp);
257 return -E2BIG;
258 }
259
260 fclose(fp);
261 *pos = 0;
262
263 *need_release = true;
264 *bpf_string = tmp_string;
265 } else {
266 *need_release = false;
267 *bpf_string = arg;
268 }
269
270 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
271 sp != separator) {
272 if (*need_release)
273 free(*bpf_string);
274 return -EINVAL;
275 }
276
277 return 0;
278 }
279
280 static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
281 bool from_file)
282 {
283 char *bpf_string, *token, separator = ',';
284 int ret = 0, i = 0;
285 bool need_release;
286 __u16 bpf_len = 0;
287
288 if (argc < 1)
289 return -EINVAL;
290 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
291 &need_release, separator))
292 return -EINVAL;
293 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
294 ret = -EINVAL;
295 goto out;
296 }
297
298 token = bpf_string;
299 while ((token = strchr(token, separator)) && (++token)[0]) {
300 if (i >= bpf_len) {
301 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
302 ret = -EINVAL;
303 goto out;
304 }
305
306 if (sscanf(token, "%hu %hhu %hhu %u,",
307 &bpf_ops[i].code, &bpf_ops[i].jt,
308 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
309 fprintf(stderr, "Error at instruction %d!\n", i);
310 ret = -EINVAL;
311 goto out;
312 }
313
314 i++;
315 }
316
317 if (i != bpf_len) {
318 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
319 ret = -EINVAL;
320 goto out;
321 }
322 ret = bpf_len;
323 out:
324 if (need_release)
325 free(bpf_string);
326
327 return ret;
328 }
329
330 void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
331 {
332 struct sock_filter *ops = RTA_DATA(bpf_ops);
333 int i;
334
335 if (len == 0)
336 return;
337
338 fprintf(f, "bytecode \'%u,", len);
339
340 for (i = 0; i < len - 1; i++)
341 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
342 ops[i].jf, ops[i].k);
343
344 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
345 ops[i].jf, ops[i].k);
346 }
347
348 static void bpf_map_pin_report(const struct bpf_elf_map *pin,
349 const struct bpf_elf_map *obj)
350 {
351 fprintf(stderr, "Map specification differs from pinned file!\n");
352
353 if (obj->type != pin->type)
354 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
355 obj->type, pin->type);
356 if (obj->size_key != pin->size_key)
357 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
358 obj->size_key, pin->size_key);
359 if (obj->size_value != pin->size_value)
360 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
361 obj->size_value, pin->size_value);
362 if (obj->max_elem != pin->max_elem)
363 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
364 obj->max_elem, pin->max_elem);
365 if (obj->flags != pin->flags)
366 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
367 obj->flags, pin->flags);
368
369 fprintf(stderr, "\n");
370 }
371
372 struct bpf_prog_data {
373 unsigned int type;
374 unsigned int jited;
375 };
376
377 struct bpf_map_ext {
378 struct bpf_prog_data owner;
379 };
380
381 static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
382 struct bpf_map_ext *ext)
383 {
384 unsigned int val, owner_type = 0, owner_jited = 0;
385 char file[PATH_MAX], buff[4096];
386 FILE *fp;
387
388 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
389 memset(map, 0, sizeof(*map));
390
391 fp = fopen(file, "r");
392 if (!fp) {
393 fprintf(stderr, "No procfs support?!\n");
394 return -EIO;
395 }
396
397 while (fgets(buff, sizeof(buff), fp)) {
398 if (sscanf(buff, "map_type:\t%u", &val) == 1)
399 map->type = val;
400 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
401 map->size_key = val;
402 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
403 map->size_value = val;
404 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
405 map->max_elem = val;
406 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
407 map->flags = val;
408 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
409 owner_type = val;
410 else if (sscanf(buff, "owner_jited:\t%i", &val) == 1)
411 owner_jited = val;
412 }
413
414 fclose(fp);
415 if (ext) {
416 memset(ext, 0, sizeof(*ext));
417 ext->owner.type = owner_type;
418 ext->owner.jited = owner_jited;
419 }
420
421 return 0;
422 }
423
424 static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
425 struct bpf_map_ext *ext, int length,
426 enum bpf_prog_type type)
427 {
428 struct bpf_elf_map tmp, zero = {};
429 int ret;
430
431 ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext);
432 if (ret < 0)
433 return ret;
434
435 /* The decision to reject this is on kernel side eventually, but
436 * at least give the user a chance to know what's wrong.
437 */
438 if (ext->owner.type && ext->owner.type != type)
439 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
440 type, ext->owner.type);
441
442 if (!memcmp(&tmp, map, length)) {
443 return 0;
444 } else {
445 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
446 * so just accept it. We know we do have an eBPF fd and in this
447 * case, everything is 0. It is guaranteed that no such map exists
448 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
449 */
450 if (!memcmp(&tmp, &zero, length))
451 return 0;
452
453 bpf_map_pin_report(&tmp, map);
454 return -EINVAL;
455 }
456 }
457
458 static int bpf_mnt_fs(const char *target)
459 {
460 bool bind_done = false;
461
462 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
463 if (errno != EINVAL || bind_done) {
464 fprintf(stderr, "mount --make-private %s failed: %s\n",
465 target, strerror(errno));
466 return -1;
467 }
468
469 if (mount(target, target, "none", MS_BIND, NULL)) {
470 fprintf(stderr, "mount --bind %s %s failed: %s\n",
471 target, target, strerror(errno));
472 return -1;
473 }
474
475 bind_done = true;
476 }
477
478 if (mount("bpf", target, "bpf", 0, "mode=0700")) {
479 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
480 target, strerror(errno));
481 return -1;
482 }
483
484 return 0;
485 }
486
487 static int bpf_mnt_check_target(const char *target)
488 {
489 struct stat sb = {};
490 int ret;
491
492 ret = stat(target, &sb);
493 if (ret) {
494 ret = mkdir(target, S_IRWXU);
495 if (ret) {
496 fprintf(stderr, "mkdir %s failed: %s\n", target,
497 strerror(errno));
498 return ret;
499 }
500 }
501
502 return 0;
503 }
504
505 static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
506 {
507 struct statfs st_fs;
508
509 if (statfs(mnt, &st_fs) < 0)
510 return -ENOENT;
511 if ((unsigned long)st_fs.f_type != magic)
512 return -ENOENT;
513
514 return 0;
515 }
516
517 static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt,
518 int len, const char *mntpt)
519 {
520 int ret;
521
522 ret = bpf_valid_mntpt(mntpt, magic);
523 if (!ret) {
524 strlcpy(mnt, mntpt, len);
525 return mnt;
526 }
527
528 return NULL;
529 }
530
531 static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
532 char *mnt, int len,
533 const char * const *known_mnts)
534 {
535 const char * const *ptr;
536 char type[100];
537 FILE *fp;
538
539 if (known_mnts) {
540 ptr = known_mnts;
541 while (*ptr) {
542 if (bpf_find_mntpt_single(magic, mnt, len, *ptr))
543 return mnt;
544 ptr++;
545 }
546 }
547
548 if (len != PATH_MAX)
549 return NULL;
550
551 fp = fopen("/proc/mounts", "r");
552 if (fp == NULL)
553 return NULL;
554
555 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
556 mnt, type) == 2) {
557 if (strcmp(type, fstype) == 0)
558 break;
559 }
560
561 fclose(fp);
562 if (strcmp(type, fstype) != 0)
563 return NULL;
564
565 return mnt;
566 }
567
568 int bpf_trace_pipe(void)
569 {
570 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
571 static const char * const tracefs_known_mnts[] = {
572 TRACE_DIR_MNT,
573 "/sys/kernel/debug/tracing",
574 "/tracing",
575 "/trace",
576 0,
577 };
578 int fd_in, fd_out = STDERR_FILENO;
579 char tpipe[PATH_MAX];
580 const char *mnt;
581
582 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
583 sizeof(tracefs_mnt), tracefs_known_mnts);
584 if (!mnt) {
585 fprintf(stderr, "tracefs not mounted?\n");
586 return -1;
587 }
588
589 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
590
591 fd_in = open(tpipe, O_RDONLY);
592 if (fd_in < 0)
593 return -1;
594
595 fprintf(stderr, "Running! Hang up with ^C!\n\n");
596 while (1) {
597 static char buff[4096];
598 ssize_t ret;
599
600 ret = read(fd_in, buff, sizeof(buff));
601 if (ret > 0 && write(fd_out, buff, ret) == ret)
602 continue;
603 break;
604 }
605
606 close(fd_in);
607 return -1;
608 }
609
610 static int bpf_gen_global(const char *bpf_sub_dir)
611 {
612 char bpf_glo_dir[PATH_MAX];
613 int ret;
614
615 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
616 bpf_sub_dir, BPF_DIR_GLOBALS);
617
618 ret = mkdir(bpf_glo_dir, S_IRWXU);
619 if (ret && errno != EEXIST) {
620 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
621 strerror(errno));
622 return ret;
623 }
624
625 return 0;
626 }
627
628 static int bpf_gen_master(const char *base, const char *name)
629 {
630 char bpf_sub_dir[PATH_MAX];
631 int ret;
632
633 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
634
635 ret = mkdir(bpf_sub_dir, S_IRWXU);
636 if (ret && errno != EEXIST) {
637 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
638 strerror(errno));
639 return ret;
640 }
641
642 return bpf_gen_global(bpf_sub_dir);
643 }
644
645 static int bpf_slave_via_bind_mnt(const char *full_name,
646 const char *full_link)
647 {
648 int ret;
649
650 ret = mkdir(full_name, S_IRWXU);
651 if (ret) {
652 assert(errno != EEXIST);
653 fprintf(stderr, "mkdir %s failed: %s\n", full_name,
654 strerror(errno));
655 return ret;
656 }
657
658 ret = mount(full_link, full_name, "none", MS_BIND, NULL);
659 if (ret) {
660 rmdir(full_name);
661 fprintf(stderr, "mount --bind %s %s failed: %s\n",
662 full_link, full_name, strerror(errno));
663 }
664
665 return ret;
666 }
667
668 static int bpf_gen_slave(const char *base, const char *name,
669 const char *link)
670 {
671 char bpf_lnk_dir[PATH_MAX];
672 char bpf_sub_dir[PATH_MAX];
673 struct stat sb = {};
674 int ret;
675
676 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
677 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
678
679 ret = symlink(bpf_lnk_dir, bpf_sub_dir);
680 if (ret) {
681 if (errno != EEXIST) {
682 if (errno != EPERM) {
683 fprintf(stderr, "symlink %s failed: %s\n",
684 bpf_sub_dir, strerror(errno));
685 return ret;
686 }
687
688 return bpf_slave_via_bind_mnt(bpf_sub_dir,
689 bpf_lnk_dir);
690 }
691
692 ret = lstat(bpf_sub_dir, &sb);
693 if (ret) {
694 fprintf(stderr, "lstat %s failed: %s\n",
695 bpf_sub_dir, strerror(errno));
696 return ret;
697 }
698
699 if ((sb.st_mode & S_IFMT) != S_IFLNK)
700 return bpf_gen_global(bpf_sub_dir);
701 }
702
703 return 0;
704 }
705
706 static int bpf_gen_hierarchy(const char *base)
707 {
708 int ret, i;
709
710 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
711 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
712 ret = bpf_gen_slave(base,
713 bpf_prog_to_subdir(__bpf_types[i]),
714 bpf_prog_to_subdir(__bpf_types[0]));
715 return ret;
716 }
717
718 static const char *bpf_get_work_dir(enum bpf_prog_type type)
719 {
720 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
721 static char bpf_wrk_dir[PATH_MAX];
722 static const char *mnt;
723 static bool bpf_mnt_cached;
724 const char *mnt_env = getenv(BPF_ENV_MNT);
725 static const char * const bpf_known_mnts[] = {
726 BPF_DIR_MNT,
727 "/bpf",
728 0,
729 };
730 int ret;
731
732 if (bpf_mnt_cached) {
733 const char *out = mnt;
734
735 if (out && type) {
736 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
737 out, bpf_prog_to_subdir(type));
738 out = bpf_tmp;
739 }
740 return out;
741 }
742
743 if (mnt_env)
744 mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp,
745 sizeof(bpf_tmp), mnt_env);
746 else
747 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp,
748 sizeof(bpf_tmp), bpf_known_mnts);
749 if (!mnt) {
750 mnt = mnt_env ? : BPF_DIR_MNT;
751 ret = bpf_mnt_check_target(mnt);
752 if (!ret)
753 ret = bpf_mnt_fs(mnt);
754 if (ret) {
755 mnt = NULL;
756 goto out;
757 }
758 }
759
760 snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
761
762 ret = bpf_gen_hierarchy(bpf_wrk_dir);
763 if (ret) {
764 mnt = NULL;
765 goto out;
766 }
767
768 mnt = bpf_wrk_dir;
769 out:
770 bpf_mnt_cached = true;
771 return mnt;
772 }
773
774 static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
775 {
776 union bpf_attr attr = {};
777 char tmp[PATH_MAX];
778
779 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
780 pathname[1] == ':' && bpf_get_work_dir(type)) {
781 snprintf(tmp, sizeof(tmp), "%s/%s",
782 bpf_get_work_dir(type), pathname + 2);
783 pathname = tmp;
784 }
785
786 attr.pathname = bpf_ptr_to_u64(pathname);
787
788 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
789 }
790
791 static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type)
792 {
793 int prog_fd = bpf_obj_get(pathname, type);
794
795 if (prog_fd < 0)
796 fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n",
797 pathname, strerror(errno));
798 return prog_fd;
799 }
800
801 enum bpf_mode {
802 CBPF_BYTECODE,
803 CBPF_FILE,
804 EBPF_OBJECT,
805 EBPF_PINNED,
806 BPF_MODE_MAX,
807 };
808
809 static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode,
810 struct bpf_cfg_in *cfg, const bool *opt_tbl)
811 {
812 const char *file, *section, *uds_name;
813 bool verbose = false;
814 int i, ret, argc;
815 char **argv;
816
817 argv = cfg->argv;
818 argc = cfg->argc;
819
820 if (opt_tbl[CBPF_BYTECODE] &&
821 (matches(*argv, "bytecode") == 0 ||
822 strcmp(*argv, "bc") == 0)) {
823 *mode = CBPF_BYTECODE;
824 } else if (opt_tbl[CBPF_FILE] &&
825 (matches(*argv, "bytecode-file") == 0 ||
826 strcmp(*argv, "bcf") == 0)) {
827 *mode = CBPF_FILE;
828 } else if (opt_tbl[EBPF_OBJECT] &&
829 (matches(*argv, "object-file") == 0 ||
830 strcmp(*argv, "obj") == 0)) {
831 *mode = EBPF_OBJECT;
832 } else if (opt_tbl[EBPF_PINNED] &&
833 (matches(*argv, "object-pinned") == 0 ||
834 matches(*argv, "pinned") == 0 ||
835 matches(*argv, "fd") == 0)) {
836 *mode = EBPF_PINNED;
837 } else {
838 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
839 return -1;
840 }
841
842 NEXT_ARG();
843 file = section = uds_name = NULL;
844 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
845 file = *argv;
846 NEXT_ARG_FWD();
847
848 if (*type == BPF_PROG_TYPE_UNSPEC) {
849 if (argc > 0 && matches(*argv, "type") == 0) {
850 NEXT_ARG();
851 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
852 i++) {
853 if (!__bpf_prog_meta[i].type)
854 continue;
855 if (!matches(*argv,
856 __bpf_prog_meta[i].type)) {
857 *type = i;
858 break;
859 }
860 }
861
862 if (*type == BPF_PROG_TYPE_UNSPEC) {
863 fprintf(stderr, "What type is \"%s\"?\n",
864 *argv);
865 return -1;
866 }
867 NEXT_ARG_FWD();
868 } else {
869 *type = BPF_PROG_TYPE_SCHED_CLS;
870 }
871 }
872
873 section = bpf_prog_to_default_section(*type);
874 if (argc > 0 && matches(*argv, "section") == 0) {
875 NEXT_ARG();
876 section = *argv;
877 NEXT_ARG_FWD();
878 }
879
880 if (__bpf_prog_meta[*type].may_uds_export) {
881 uds_name = getenv(BPF_ENV_UDS);
882 if (argc > 0 && !uds_name &&
883 matches(*argv, "export") == 0) {
884 NEXT_ARG();
885 uds_name = *argv;
886 NEXT_ARG_FWD();
887 }
888 }
889
890 if (argc > 0 && matches(*argv, "verbose") == 0) {
891 verbose = true;
892 NEXT_ARG_FWD();
893 }
894
895 PREV_ARG();
896 }
897
898 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
899 ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE);
900 else if (*mode == EBPF_OBJECT)
901 ret = bpf_obj_open(file, *type, section, verbose);
902 else if (*mode == EBPF_PINNED)
903 ret = bpf_obj_pinned(file, *type);
904 else
905 return -1;
906
907 cfg->object = file;
908 cfg->section = section;
909 cfg->uds = uds_name;
910 cfg->argc = argc;
911 cfg->argv = argv;
912
913 return ret;
914 }
915
916 static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
917 const struct bpf_cfg_ops *ops, void *nl,
918 const bool *opt_tbl)
919 {
920 struct sock_filter opcodes[BPF_MAXINSNS];
921 char annotation[256];
922 enum bpf_mode mode;
923 int ret;
924
925 cfg->ops = opcodes;
926 ret = bpf_parse(&type, &mode, cfg, opt_tbl);
927 cfg->ops = NULL;
928 if (ret < 0)
929 return ret;
930
931 if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
932 ops->cbpf_cb(nl, opcodes, ret);
933 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
934 snprintf(annotation, sizeof(annotation), "%s:[%s]",
935 basename(cfg->object), mode == EBPF_PINNED ?
936 "*fsobj" : cfg->section);
937 ops->ebpf_cb(nl, ret, annotation);
938 }
939
940 return 0;
941 }
942
943 int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
944 const struct bpf_cfg_ops *ops, void *nl)
945 {
946 bool opt_tbl[BPF_MODE_MAX] = {};
947
948 if (ops->cbpf_cb) {
949 opt_tbl[CBPF_BYTECODE] = true;
950 opt_tbl[CBPF_FILE] = true;
951 }
952
953 if (ops->ebpf_cb) {
954 opt_tbl[EBPF_OBJECT] = true;
955 opt_tbl[EBPF_PINNED] = true;
956 }
957
958 return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl);
959 }
960
961 int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
962 {
963 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
964 const bool opt_tbl[BPF_MODE_MAX] = {
965 [EBPF_OBJECT] = true,
966 [EBPF_PINNED] = true,
967 };
968 const struct bpf_elf_map test = {
969 .type = BPF_MAP_TYPE_PROG_ARRAY,
970 .size_key = sizeof(int),
971 .size_value = sizeof(int),
972 };
973 struct bpf_cfg_in cfg = {
974 .argc = argc,
975 .argv = argv,
976 };
977 struct bpf_map_ext ext = {};
978 int ret, prog_fd, map_fd;
979 enum bpf_mode mode;
980 uint32_t map_key;
981
982 prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl);
983 if (prog_fd < 0)
984 return prog_fd;
985 if (key) {
986 map_key = *key;
987 } else {
988 ret = sscanf(cfg.section, "%*i/%i", &map_key);
989 if (ret != 1) {
990 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
991 ret = -EINVAL;
992 goto out_prog;
993 }
994 }
995
996 map_fd = bpf_obj_get(map_path, type);
997 if (map_fd < 0) {
998 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
999 map_path, strerror(errno));
1000 ret = map_fd;
1001 goto out_prog;
1002 }
1003
1004 ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext,
1005 offsetof(struct bpf_elf_map, max_elem),
1006 type);
1007 if (ret < 0) {
1008 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
1009 goto out_map;
1010 }
1011
1012 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
1013 if (ret < 0)
1014 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
1015 out_map:
1016 close(map_fd);
1017 out_prog:
1018 close(prog_fd);
1019 return ret;
1020 }
1021
1022 int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type)
1023 {
1024 union bpf_attr attr = {};
1025
1026 attr.target_fd = target_fd;
1027 attr.attach_bpf_fd = prog_fd;
1028 attr.attach_type = type;
1029
1030 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
1031 }
1032
1033 int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type)
1034 {
1035 union bpf_attr attr = {};
1036
1037 attr.target_fd = target_fd;
1038 attr.attach_type = type;
1039
1040 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
1041 }
1042
1043 int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
1044 size_t size_insns, const char *license, char *log,
1045 size_t size_log)
1046 {
1047 union bpf_attr attr = {};
1048
1049 attr.prog_type = type;
1050 attr.insns = bpf_ptr_to_u64(insns);
1051 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
1052 attr.license = bpf_ptr_to_u64(license);
1053
1054 if (size_log > 0) {
1055 attr.log_buf = bpf_ptr_to_u64(log);
1056 attr.log_size = size_log;
1057 attr.log_level = 1;
1058 }
1059
1060 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
1061 }
1062
1063 #ifdef HAVE_ELF
1064 struct bpf_elf_prog {
1065 enum bpf_prog_type type;
1066 const struct bpf_insn *insns;
1067 size_t size;
1068 const char *license;
1069 };
1070
1071 struct bpf_hash_entry {
1072 unsigned int pinning;
1073 const char *subpath;
1074 struct bpf_hash_entry *next;
1075 };
1076
1077 struct bpf_config {
1078 unsigned int jit_enabled;
1079 };
1080
1081 struct bpf_elf_ctx {
1082 struct bpf_config cfg;
1083 Elf *elf_fd;
1084 GElf_Ehdr elf_hdr;
1085 Elf_Data *sym_tab;
1086 Elf_Data *str_tab;
1087 int obj_fd;
1088 int map_fds[ELF_MAX_MAPS];
1089 struct bpf_elf_map maps[ELF_MAX_MAPS];
1090 struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
1091 int sym_num;
1092 int map_num;
1093 int map_len;
1094 bool *sec_done;
1095 int sec_maps;
1096 char license[ELF_MAX_LICENSE_LEN];
1097 enum bpf_prog_type type;
1098 bool verbose;
1099 struct bpf_elf_st stat;
1100 struct bpf_hash_entry *ht[256];
1101 char *log;
1102 size_t log_size;
1103 };
1104
1105 struct bpf_elf_sec_data {
1106 GElf_Shdr sec_hdr;
1107 Elf_Data *sec_data;
1108 const char *sec_name;
1109 };
1110
1111 struct bpf_map_data {
1112 int *fds;
1113 const char *obj;
1114 struct bpf_elf_st *st;
1115 struct bpf_elf_map *ent;
1116 };
1117
1118 static __check_format_string(2, 3) void
1119 bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
1120 {
1121 va_list vl;
1122
1123 va_start(vl, format);
1124 vfprintf(stderr, format, vl);
1125 va_end(vl);
1126
1127 if (ctx->log && ctx->log[0]) {
1128 if (ctx->verbose) {
1129 fprintf(stderr, "%s\n", ctx->log);
1130 } else {
1131 unsigned int off = 0, len = strlen(ctx->log);
1132
1133 if (len > BPF_MAX_LOG) {
1134 off = len - BPF_MAX_LOG;
1135 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
1136 off);
1137 }
1138 fprintf(stderr, "%s\n", ctx->log + off);
1139 }
1140
1141 memset(ctx->log, 0, ctx->log_size);
1142 }
1143 }
1144
1145 static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
1146 {
1147 const size_t log_max = UINT_MAX >> 8;
1148 size_t log_size = ctx->log_size;
1149 void *ptr;
1150
1151 if (!ctx->log) {
1152 log_size = 65536;
1153 } else if (log_size < log_max) {
1154 log_size <<= 1;
1155 if (log_size > log_max)
1156 log_size = log_max;
1157 } else {
1158 return -EINVAL;
1159 }
1160
1161 ptr = realloc(ctx->log, log_size);
1162 if (!ptr)
1163 return -ENOMEM;
1164
1165 ctx->log = ptr;
1166 ctx->log_size = log_size;
1167
1168 return 0;
1169 }
1170
1171 static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
1172 uint32_t size_value, uint32_t max_elem,
1173 uint32_t flags, int inner_fd)
1174 {
1175 union bpf_attr attr = {};
1176
1177 attr.map_type = type;
1178 attr.key_size = size_key;
1179 attr.value_size = inner_fd ? sizeof(int) : size_value;
1180 attr.max_entries = max_elem;
1181 attr.map_flags = flags;
1182 attr.inner_map_fd = inner_fd;
1183
1184 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
1185 }
1186
1187 static int bpf_obj_pin(int fd, const char *pathname)
1188 {
1189 union bpf_attr attr = {};
1190
1191 attr.pathname = bpf_ptr_to_u64(pathname);
1192 attr.bpf_fd = fd;
1193
1194 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
1195 }
1196
1197 static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
1198 {
1199 struct sockaddr_alg alg = {
1200 .salg_family = AF_ALG,
1201 .salg_type = "hash",
1202 .salg_name = "sha1",
1203 };
1204 int ret, cfd, ofd, ffd;
1205 struct stat stbuff;
1206 ssize_t size;
1207
1208 if (!object || len != 20)
1209 return -EINVAL;
1210
1211 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
1212 if (cfd < 0) {
1213 fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
1214 strerror(errno));
1215 return cfd;
1216 }
1217
1218 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
1219 if (ret < 0) {
1220 fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
1221 goto out_cfd;
1222 }
1223
1224 ofd = accept(cfd, NULL, 0);
1225 if (ofd < 0) {
1226 fprintf(stderr, "Error accepting socket: %s\n",
1227 strerror(errno));
1228 ret = ofd;
1229 goto out_cfd;
1230 }
1231
1232 ffd = open(object, O_RDONLY);
1233 if (ffd < 0) {
1234 fprintf(stderr, "Error opening object %s: %s\n",
1235 object, strerror(errno));
1236 ret = ffd;
1237 goto out_ofd;
1238 }
1239
1240 ret = fstat(ffd, &stbuff);
1241 if (ret < 0) {
1242 fprintf(stderr, "Error doing fstat: %s\n",
1243 strerror(errno));
1244 goto out_ffd;
1245 }
1246
1247 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
1248 if (size != stbuff.st_size) {
1249 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
1250 size, stbuff.st_size, strerror(errno));
1251 ret = -1;
1252 goto out_ffd;
1253 }
1254
1255 size = read(ofd, out, len);
1256 if (size != len) {
1257 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
1258 size, len, strerror(errno));
1259 ret = -1;
1260 } else {
1261 ret = 0;
1262 }
1263 out_ffd:
1264 close(ffd);
1265 out_ofd:
1266 close(ofd);
1267 out_cfd:
1268 close(cfd);
1269 return ret;
1270 }
1271
1272 static const char *bpf_get_obj_uid(const char *pathname)
1273 {
1274 static bool bpf_uid_cached;
1275 static char bpf_uid[64];
1276 uint8_t tmp[20];
1277 int ret;
1278
1279 if (bpf_uid_cached)
1280 goto done;
1281
1282 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
1283 if (ret) {
1284 fprintf(stderr, "Object hashing failed!\n");
1285 return NULL;
1286 }
1287
1288 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
1289 bpf_uid_cached = true;
1290 done:
1291 return bpf_uid;
1292 }
1293
1294 static int bpf_init_env(const char *pathname)
1295 {
1296 struct rlimit limit = {
1297 .rlim_cur = RLIM_INFINITY,
1298 .rlim_max = RLIM_INFINITY,
1299 };
1300
1301 /* Don't bother in case we fail! */
1302 setrlimit(RLIMIT_MEMLOCK, &limit);
1303
1304 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) {
1305 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
1306 return 0;
1307 }
1308
1309 if (!bpf_get_obj_uid(pathname))
1310 return -1;
1311
1312 return 0;
1313 }
1314
1315 static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
1316 uint32_t pinning)
1317 {
1318 struct bpf_hash_entry *entry;
1319
1320 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1321 while (entry && entry->pinning != pinning)
1322 entry = entry->next;
1323
1324 return entry ? entry->subpath : NULL;
1325 }
1326
1327 static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
1328 uint32_t pinning)
1329 {
1330 switch (pinning) {
1331 case PIN_OBJECT_NS:
1332 case PIN_GLOBAL_NS:
1333 return false;
1334 case PIN_NONE:
1335 return true;
1336 default:
1337 return !bpf_custom_pinning(ctx, pinning);
1338 }
1339 }
1340
1341 static void bpf_make_pathname(char *pathname, size_t len, const char *name,
1342 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1343 {
1344 switch (pinning) {
1345 case PIN_OBJECT_NS:
1346 snprintf(pathname, len, "%s/%s/%s",
1347 bpf_get_work_dir(ctx->type),
1348 bpf_get_obj_uid(NULL), name);
1349 break;
1350 case PIN_GLOBAL_NS:
1351 snprintf(pathname, len, "%s/%s/%s",
1352 bpf_get_work_dir(ctx->type),
1353 BPF_DIR_GLOBALS, name);
1354 break;
1355 default:
1356 snprintf(pathname, len, "%s/../%s/%s",
1357 bpf_get_work_dir(ctx->type),
1358 bpf_custom_pinning(ctx, pinning), name);
1359 break;
1360 }
1361 }
1362
1363 static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1364 uint32_t pinning)
1365 {
1366 char pathname[PATH_MAX];
1367
1368 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1369 return 0;
1370
1371 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1372 return bpf_obj_get(pathname, ctx->type);
1373 }
1374
1375 static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
1376 {
1377 char tmp[PATH_MAX];
1378 int ret;
1379
1380 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
1381 bpf_get_obj_uid(NULL));
1382
1383 ret = mkdir(tmp, S_IRWXU);
1384 if (ret && errno != EEXIST) {
1385 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1386 return ret;
1387 }
1388
1389 return 0;
1390 }
1391
1392 static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
1393 const char *todo)
1394 {
1395 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1396 int ret;
1397
1398 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
1399 snprintf(rem, sizeof(rem), "%s/", todo);
1400 sub = strtok(rem, "/");
1401
1402 while (sub) {
1403 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1404 return -EINVAL;
1405
1406 strcat(tmp, sub);
1407 strcat(tmp, "/");
1408
1409 ret = mkdir(tmp, S_IRWXU);
1410 if (ret && errno != EEXIST) {
1411 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
1412 strerror(errno));
1413 return ret;
1414 }
1415
1416 sub = strtok(NULL, "/");
1417 }
1418
1419 return 0;
1420 }
1421
1422 static int bpf_place_pinned(int fd, const char *name,
1423 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1424 {
1425 char pathname[PATH_MAX];
1426 const char *tmp;
1427 int ret = 0;
1428
1429 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1430 return 0;
1431
1432 if (pinning == PIN_OBJECT_NS)
1433 ret = bpf_make_obj_path(ctx);
1434 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
1435 ret = bpf_make_custom_path(ctx, tmp);
1436 if (ret < 0)
1437 return ret;
1438
1439 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1440 return bpf_obj_pin(fd, pathname);
1441 }
1442
1443 static void bpf_prog_report(int fd, const char *section,
1444 const struct bpf_elf_prog *prog,
1445 struct bpf_elf_ctx *ctx)
1446 {
1447 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1448
1449 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
1450 fd < 0 ? "rejected: " : "loaded",
1451 fd < 0 ? strerror(errno) : "",
1452 fd < 0 ? errno : fd);
1453
1454 fprintf(stderr, " - Type: %u\n", prog->type);
1455 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1456 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
1457 fprintf(stderr, " - License: %s\n\n", prog->license);
1458
1459 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1460 }
1461
1462 static int bpf_prog_attach(const char *section,
1463 const struct bpf_elf_prog *prog,
1464 struct bpf_elf_ctx *ctx)
1465 {
1466 int tries = 0, fd;
1467 retry:
1468 errno = 0;
1469 fd = bpf_prog_load(prog->type, prog->insns, prog->size,
1470 prog->license, ctx->log, ctx->log_size);
1471 if (fd < 0 || ctx->verbose) {
1472 /* The verifier log is pretty chatty, sometimes so chatty
1473 * on larger programs, that we could fail to dump everything
1474 * into our buffer. Still, try to give a debuggable error
1475 * log for the user, so enlarge it and re-fail.
1476 */
1477 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
1478 if (tries++ < 10 && !bpf_log_realloc(ctx))
1479 goto retry;
1480
1481 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
1482 ctx->log_size, tries);
1483 return fd;
1484 }
1485
1486 bpf_prog_report(fd, section, prog, ctx);
1487 }
1488
1489 return fd;
1490 }
1491
1492 static void bpf_map_report(int fd, const char *name,
1493 const struct bpf_elf_map *map,
1494 struct bpf_elf_ctx *ctx, int inner_fd)
1495 {
1496 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1497 fd < 0 ? "rejected: " : "loaded",
1498 fd < 0 ? strerror(errno) : "",
1499 fd < 0 ? errno : fd);
1500
1501 fprintf(stderr, " - Type: %u\n", map->type);
1502 fprintf(stderr, " - Identifier: %u\n", map->id);
1503 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1504 fprintf(stderr, " - Size key: %u\n", map->size_key);
1505 fprintf(stderr, " - Size value: %u\n",
1506 inner_fd ? (int)sizeof(int) : map->size_value);
1507 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1508 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
1509 }
1510
1511 static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
1512 {
1513 int i;
1514
1515 for (i = 0; i < ctx->map_num; i++) {
1516 if (ctx->maps[i].id != id)
1517 continue;
1518 if (ctx->map_fds[i] < 0)
1519 return -EINVAL;
1520
1521 return ctx->map_fds[i];
1522 }
1523
1524 return -ENOENT;
1525 }
1526
1527 static void bpf_report_map_in_map(int outer_fd, uint32_t idx)
1528 {
1529 struct bpf_elf_map outer_map;
1530 int ret;
1531
1532 fprintf(stderr, "Cannot insert map into map! ");
1533
1534 ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL);
1535 if (!ret) {
1536 if (idx >= outer_map.max_elem &&
1537 outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
1538 fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
1539 outer_map.max_elem, idx);
1540 return;
1541 }
1542 }
1543
1544 fprintf(stderr, "Different map specs used for outer and inner map?\n");
1545 }
1546
1547 static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
1548 {
1549 return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1550 map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
1551 }
1552
1553 static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
1554 const struct bpf_elf_map *map, struct bpf_map_ext *ext,
1555 int *have_map_in_map)
1556 {
1557 int fd, ret, map_inner_fd = 0;
1558
1559 fd = bpf_probe_pinned(name, ctx, map->pinning);
1560 if (fd > 0) {
1561 ret = bpf_map_selfcheck_pinned(fd, map, ext,
1562 offsetof(struct bpf_elf_map,
1563 id), ctx->type);
1564 if (ret < 0) {
1565 close(fd);
1566 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1567 name);
1568 return ret;
1569 }
1570 if (ctx->verbose)
1571 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1572 name);
1573 return fd;
1574 }
1575
1576 if (have_map_in_map && bpf_is_map_in_map_type(map)) {
1577 (*have_map_in_map)++;
1578 if (map->inner_id)
1579 return 0;
1580 fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
1581 name);
1582 return -EINVAL;
1583 }
1584
1585 if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
1586 map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
1587 if (map_inner_fd < 0) {
1588 fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
1589 name, map->inner_id);
1590 return -EINVAL;
1591 }
1592 }
1593
1594 errno = 0;
1595 fd = bpf_map_create(map->type, map->size_key, map->size_value,
1596 map->max_elem, map->flags, map_inner_fd);
1597 if (fd < 0 || ctx->verbose) {
1598 bpf_map_report(fd, name, map, ctx, map_inner_fd);
1599 if (fd < 0)
1600 return fd;
1601 }
1602
1603 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
1604 if (ret < 0 && errno != EEXIST) {
1605 fprintf(stderr, "Could not pin %s map: %s\n", name,
1606 strerror(errno));
1607 close(fd);
1608 return ret;
1609 }
1610
1611 return fd;
1612 }
1613
1614 static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1615 const GElf_Sym *sym)
1616 {
1617 return ctx->str_tab->d_buf + sym->st_name;
1618 }
1619
1620 static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1621 {
1622 GElf_Sym sym;
1623 int i;
1624
1625 for (i = 0; i < ctx->sym_num; i++) {
1626 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1627 continue;
1628
1629 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1630 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1631 sym.st_shndx != ctx->sec_maps ||
1632 sym.st_value / ctx->map_len != which)
1633 continue;
1634
1635 return bpf_str_tab_name(ctx, &sym);
1636 }
1637
1638 return NULL;
1639 }
1640
1641 static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
1642 {
1643 int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
1644 const char *map_name;
1645
1646 for (i = 0; i < ctx->map_num; i++) {
1647 map_name = bpf_map_fetch_name(ctx, i);
1648 if (!map_name)
1649 return -EIO;
1650
1651 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1652 &ctx->maps_ext[i], &have_map_in_map);
1653 if (fd < 0)
1654 return fd;
1655
1656 ctx->map_fds[i] = !fd ? -1 : fd;
1657 }
1658
1659 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1660 if (ctx->map_fds[i] >= 0)
1661 continue;
1662
1663 map_name = bpf_map_fetch_name(ctx, i);
1664 if (!map_name)
1665 return -EIO;
1666
1667 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1668 &ctx->maps_ext[i], NULL);
1669 if (fd < 0)
1670 return fd;
1671
1672 ctx->map_fds[i] = fd;
1673 }
1674
1675 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1676 if (!ctx->maps[i].id ||
1677 ctx->maps[i].inner_id ||
1678 ctx->maps[i].inner_idx == -1)
1679 continue;
1680
1681 inner_fd = ctx->map_fds[i];
1682 inner_idx = ctx->maps[i].inner_idx;
1683
1684 for (j = 0; j < ctx->map_num; j++) {
1685 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
1686 continue;
1687 if (ctx->maps[j].inner_id != ctx->maps[i].id)
1688 continue;
1689
1690 ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
1691 &inner_fd, BPF_ANY);
1692 if (ret < 0) {
1693 bpf_report_map_in_map(ctx->map_fds[j],
1694 inner_idx);
1695 return ret;
1696 }
1697 }
1698 }
1699
1700 return 0;
1701 }
1702
1703 static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
1704 {
1705 int i, num = 0;
1706 GElf_Sym sym;
1707
1708 for (i = 0; i < ctx->sym_num; i++) {
1709 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1710 continue;
1711
1712 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1713 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1714 sym.st_shndx != ctx->sec_maps)
1715 continue;
1716 num++;
1717 }
1718
1719 return num;
1720 }
1721
1722 static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1723 struct bpf_elf_sec_data *data)
1724 {
1725 Elf_Data *sec_edata;
1726 GElf_Shdr sec_hdr;
1727 Elf_Scn *sec_fd;
1728 char *sec_name;
1729
1730 memset(data, 0, sizeof(*data));
1731
1732 sec_fd = elf_getscn(ctx->elf_fd, section);
1733 if (!sec_fd)
1734 return -EINVAL;
1735 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1736 return -EIO;
1737
1738 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
1739 sec_hdr.sh_name);
1740 if (!sec_name || !sec_hdr.sh_size)
1741 return -ENOENT;
1742
1743 sec_edata = elf_getdata(sec_fd, NULL);
1744 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1745 return -EIO;
1746
1747 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
1748
1749 data->sec_name = sec_name;
1750 data->sec_data = sec_edata;
1751 return 0;
1752 }
1753
1754 struct bpf_elf_map_min {
1755 __u32 type;
1756 __u32 size_key;
1757 __u32 size_value;
1758 __u32 max_elem;
1759 };
1760
1761 static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
1762 struct bpf_elf_sec_data *data)
1763 {
1764 ctx->map_num = data->sec_data->d_size;
1765 ctx->sec_maps = section;
1766 ctx->sec_done[section] = true;
1767
1768 if (ctx->map_num > sizeof(ctx->maps)) {
1769 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1770 return -ENOMEM;
1771 }
1772
1773 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
1774 return 0;
1775 }
1776
1777 static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
1778 {
1779 GElf_Sym sym;
1780 int off, i;
1781
1782 for (off = 0; off < end; off += ctx->map_len) {
1783 /* Order doesn't need to be linear here, hence we walk
1784 * the table again.
1785 */
1786 for (i = 0; i < ctx->sym_num; i++) {
1787 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1788 continue;
1789 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1790 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1791 sym.st_shndx != ctx->sec_maps)
1792 continue;
1793 if (sym.st_value == off)
1794 break;
1795 if (i == ctx->sym_num - 1)
1796 return -1;
1797 }
1798 }
1799
1800 return off == end ? 0 : -1;
1801 }
1802
1803 static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
1804 {
1805 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
1806 int i, sym_num = bpf_map_num_sym(ctx);
1807 __u8 *buff;
1808
1809 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
1810 fprintf(stderr, "%u maps not supported in current map section!\n",
1811 sym_num);
1812 return -EINVAL;
1813 }
1814
1815 if (ctx->map_num % sym_num != 0 ||
1816 ctx->map_num % sizeof(__u32) != 0) {
1817 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1818 return -EINVAL;
1819 }
1820
1821 ctx->map_len = ctx->map_num / sym_num;
1822 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
1823 fprintf(stderr, "Different struct bpf_elf_map in use!\n");
1824 return -EINVAL;
1825 }
1826
1827 if (ctx->map_len == sizeof(struct bpf_elf_map)) {
1828 ctx->map_num = sym_num;
1829 return 0;
1830 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
1831 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
1832 return -EINVAL;
1833 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
1834 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
1835 return -EINVAL;
1836 }
1837
1838 ctx->map_num = sym_num;
1839 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
1840 i++, buff += ctx->map_len) {
1841 /* The fixup leaves the rest of the members as zero, which
1842 * is fine currently, but option exist to set some other
1843 * default value as well when needed in future.
1844 */
1845 memcpy(&fixup[i], buff, ctx->map_len);
1846 }
1847
1848 memcpy(ctx->maps, fixup, sizeof(fixup));
1849
1850 printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
1851 sizeof(struct bpf_elf_map) - ctx->map_len);
1852 return 0;
1853 }
1854
1855 static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1856 struct bpf_elf_sec_data *data)
1857 {
1858 if (data->sec_data->d_size > sizeof(ctx->license))
1859 return -ENOMEM;
1860
1861 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1862 ctx->sec_done[section] = true;
1863 return 0;
1864 }
1865
1866 static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1867 struct bpf_elf_sec_data *data)
1868 {
1869 ctx->sym_tab = data->sec_data;
1870 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1871 ctx->sec_done[section] = true;
1872 return 0;
1873 }
1874
1875 static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1876 struct bpf_elf_sec_data *data)
1877 {
1878 ctx->str_tab = data->sec_data;
1879 ctx->sec_done[section] = true;
1880 return 0;
1881 }
1882
1883 static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
1884 {
1885 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
1886 }
1887
1888 static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1889 {
1890 struct bpf_elf_sec_data data;
1891 int i, ret = -1;
1892
1893 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1894 ret = bpf_fill_section_data(ctx, i, &data);
1895 if (ret < 0)
1896 continue;
1897
1898 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1899 !strcmp(data.sec_name, ELF_SECTION_MAPS))
1900 ret = bpf_fetch_maps_begin(ctx, i, &data);
1901 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1902 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
1903 ret = bpf_fetch_license(ctx, i, &data);
1904 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
1905 !strcmp(data.sec_name, ".symtab"))
1906 ret = bpf_fetch_symtab(ctx, i, &data);
1907 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
1908 !strcmp(data.sec_name, ".strtab"))
1909 ret = bpf_fetch_strtab(ctx, i, &data);
1910 if (ret < 0) {
1911 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
1912 i);
1913 return ret;
1914 }
1915 }
1916
1917 if (bpf_has_map_data(ctx)) {
1918 ret = bpf_fetch_maps_end(ctx);
1919 if (ret < 0) {
1920 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
1921 return ret;
1922 }
1923
1924 ret = bpf_maps_attach_all(ctx);
1925 if (ret < 0) {
1926 fprintf(stderr, "Error loading maps into kernel!\n");
1927 return ret;
1928 }
1929 }
1930
1931 return ret;
1932 }
1933
1934 static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
1935 bool *sseen)
1936 {
1937 struct bpf_elf_sec_data data;
1938 struct bpf_elf_prog prog;
1939 int ret, i, fd = -1;
1940
1941 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1942 if (ctx->sec_done[i])
1943 continue;
1944
1945 ret = bpf_fill_section_data(ctx, i, &data);
1946 if (ret < 0 ||
1947 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
1948 data.sec_hdr.sh_flags & SHF_EXECINSTR &&
1949 !strcmp(data.sec_name, section)))
1950 continue;
1951
1952 *sseen = true;
1953
1954 memset(&prog, 0, sizeof(prog));
1955 prog.type = ctx->type;
1956 prog.insns = data.sec_data->d_buf;
1957 prog.size = data.sec_data->d_size;
1958 prog.license = ctx->license;
1959
1960 fd = bpf_prog_attach(section, &prog, ctx);
1961 if (fd < 0)
1962 return fd;
1963
1964 ctx->sec_done[i] = true;
1965 break;
1966 }
1967
1968 return fd;
1969 }
1970
1971 struct bpf_tail_call_props {
1972 unsigned int total;
1973 unsigned int jited;
1974 };
1975
1976 static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1977 struct bpf_elf_sec_data *data_relo,
1978 struct bpf_elf_sec_data *data_insn,
1979 struct bpf_tail_call_props *props)
1980 {
1981 Elf_Data *idata = data_insn->sec_data;
1982 GElf_Shdr *rhdr = &data_relo->sec_hdr;
1983 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1984 struct bpf_insn *insns = idata->d_buf;
1985 unsigned int num_insns = idata->d_size / sizeof(*insns);
1986
1987 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
1988 unsigned int ioff, rmap;
1989 GElf_Rel relo;
1990 GElf_Sym sym;
1991
1992 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1993 return -EIO;
1994
1995 ioff = relo.r_offset / sizeof(struct bpf_insn);
1996 if (ioff >= num_insns ||
1997 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
1998 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
1999 ioff);
2000 if (ioff < num_insns &&
2001 insns[ioff].code == (BPF_JMP | BPF_CALL))
2002 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
2003 return -EINVAL;
2004 }
2005
2006 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
2007 return -EIO;
2008 if (sym.st_shndx != ctx->sec_maps) {
2009 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2010 relo_ent, sym.st_shndx);
2011 return -EIO;
2012 }
2013
2014 rmap = sym.st_value / ctx->map_len;
2015 if (rmap >= ARRAY_SIZE(ctx->map_fds))
2016 return -EINVAL;
2017 if (!ctx->map_fds[rmap])
2018 return -EINVAL;
2019 if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
2020 props->total++;
2021 if (ctx->maps_ext[rmap].owner.jited ||
2022 (ctx->maps_ext[rmap].owner.type == 0 &&
2023 ctx->cfg.jit_enabled))
2024 props->jited++;
2025 }
2026
2027 if (ctx->verbose)
2028 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
2029 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
2030 data_insn->sec_name, ioff);
2031
2032 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
2033 insns[ioff].imm = ctx->map_fds[rmap];
2034 }
2035
2036 return 0;
2037 }
2038
2039 static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
2040 bool *lderr, bool *sseen)
2041 {
2042 struct bpf_elf_sec_data data_relo, data_insn;
2043 struct bpf_elf_prog prog;
2044 int ret, idx, i, fd = -1;
2045
2046 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2047 struct bpf_tail_call_props props = {};
2048
2049 ret = bpf_fill_section_data(ctx, i, &data_relo);
2050 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
2051 continue;
2052
2053 idx = data_relo.sec_hdr.sh_info;
2054
2055 ret = bpf_fill_section_data(ctx, idx, &data_insn);
2056 if (ret < 0 ||
2057 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
2058 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
2059 !strcmp(data_insn.sec_name, section)))
2060 continue;
2061
2062 *sseen = true;
2063
2064 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props);
2065 if (ret < 0) {
2066 *lderr = true;
2067 return ret;
2068 }
2069
2070 memset(&prog, 0, sizeof(prog));
2071 prog.type = ctx->type;
2072 prog.insns = data_insn.sec_data->d_buf;
2073 prog.size = data_insn.sec_data->d_size;
2074 prog.license = ctx->license;
2075
2076 fd = bpf_prog_attach(section, &prog, ctx);
2077 if (fd < 0) {
2078 *lderr = true;
2079 if (props.total) {
2080 if (ctx->cfg.jit_enabled &&
2081 props.total != props.jited)
2082 fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
2083 props.jited, props.total);
2084 if (!ctx->cfg.jit_enabled &&
2085 props.jited)
2086 fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
2087 props.jited, props.total);
2088 }
2089 return fd;
2090 }
2091
2092 ctx->sec_done[i] = true;
2093 ctx->sec_done[idx] = true;
2094 break;
2095 }
2096
2097 return fd;
2098 }
2099
2100 static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
2101 {
2102 bool lderr = false, sseen = false;
2103 int ret = -1;
2104
2105 if (bpf_has_map_data(ctx))
2106 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
2107 if (ret < 0 && !lderr)
2108 ret = bpf_fetch_prog(ctx, section, &sseen);
2109 if (ret < 0 && !sseen)
2110 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
2111 section);
2112 return ret;
2113 }
2114
2115 static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
2116 {
2117 int i;
2118
2119 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
2120 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
2121 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
2122 return i;
2123 return -1;
2124 }
2125
2126 struct bpf_jited_aux {
2127 int prog_fd;
2128 int map_fd;
2129 struct bpf_prog_data prog;
2130 struct bpf_map_ext map;
2131 };
2132
2133 static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog)
2134 {
2135 char file[PATH_MAX], buff[4096];
2136 unsigned int val;
2137 FILE *fp;
2138
2139 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
2140 memset(prog, 0, sizeof(*prog));
2141
2142 fp = fopen(file, "r");
2143 if (!fp) {
2144 fprintf(stderr, "No procfs support?!\n");
2145 return -EIO;
2146 }
2147
2148 while (fgets(buff, sizeof(buff), fp)) {
2149 if (sscanf(buff, "prog_type:\t%u", &val) == 1)
2150 prog->type = val;
2151 else if (sscanf(buff, "prog_jited:\t%u", &val) == 1)
2152 prog->jited = val;
2153 }
2154
2155 fclose(fp);
2156 return 0;
2157 }
2158
2159 static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux)
2160 {
2161 struct bpf_elf_map tmp;
2162 int ret;
2163
2164 ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map);
2165 if (!ret)
2166 ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog);
2167
2168 return ret;
2169 }
2170
2171 static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
2172 {
2173 struct bpf_elf_sec_data data;
2174 uint32_t map_id, key_id;
2175 int fd, i, ret, idx;
2176
2177 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2178 if (ctx->sec_done[i])
2179 continue;
2180
2181 ret = bpf_fill_section_data(ctx, i, &data);
2182 if (ret < 0)
2183 continue;
2184
2185 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
2186 if (ret != 2)
2187 continue;
2188
2189 idx = bpf_find_map_by_id(ctx, map_id);
2190 if (idx < 0)
2191 continue;
2192
2193 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
2194 if (fd < 0)
2195 return -EIO;
2196
2197 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
2198 &fd, BPF_ANY);
2199 if (ret < 0) {
2200 struct bpf_jited_aux aux = {};
2201
2202 ret = -errno;
2203 if (errno == E2BIG) {
2204 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
2205 key_id, map_id);
2206 return ret;
2207 }
2208
2209 aux.map_fd = ctx->map_fds[idx];
2210 aux.prog_fd = fd;
2211
2212 if (bpf_tail_call_get_aux(&aux))
2213 return ret;
2214 if (!aux.map.owner.type)
2215 return ret;
2216
2217 if (aux.prog.type != aux.map.owner.type)
2218 fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n",
2219 aux.map.owner.type, aux.prog.type);
2220 if (aux.prog.jited != aux.map.owner.jited)
2221 fprintf(stderr, "Tail call map %s jited, but prog %s!\n",
2222 aux.map.owner.jited ? "is" : "not",
2223 aux.prog.jited ? "is" : "not");
2224 return ret;
2225 }
2226
2227 ctx->sec_done[i] = true;
2228 }
2229
2230 return 0;
2231 }
2232
2233 static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
2234 {
2235 struct stat st;
2236 int ret;
2237
2238 memset(&ctx->stat, 0, sizeof(ctx->stat));
2239
2240 ret = fstat(ctx->obj_fd, &st);
2241 if (ret < 0) {
2242 fprintf(stderr, "Stat of elf file failed: %s\n",
2243 strerror(errno));
2244 return;
2245 }
2246
2247 ctx->stat.st_dev = st.st_dev;
2248 ctx->stat.st_ino = st.st_ino;
2249 }
2250
2251 static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
2252 {
2253 char buff[PATH_MAX];
2254
2255 while (fgets(buff, sizeof(buff), fp)) {
2256 char *ptr = buff;
2257
2258 while (*ptr == ' ' || *ptr == '\t')
2259 ptr++;
2260
2261 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
2262 continue;
2263
2264 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
2265 sscanf(ptr, "%i %s #", id, path) != 2) {
2266 strcpy(path, ptr);
2267 return -1;
2268 }
2269
2270 return 1;
2271 }
2272
2273 return 0;
2274 }
2275
2276 static bool bpf_pinning_reserved(uint32_t pinning)
2277 {
2278 switch (pinning) {
2279 case PIN_NONE:
2280 case PIN_OBJECT_NS:
2281 case PIN_GLOBAL_NS:
2282 return true;
2283 default:
2284 return false;
2285 }
2286 }
2287
2288 static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
2289 {
2290 struct bpf_hash_entry *entry;
2291 char subpath[PATH_MAX] = {};
2292 uint32_t pinning;
2293 FILE *fp;
2294 int ret;
2295
2296 fp = fopen(db_file, "r");
2297 if (!fp)
2298 return;
2299
2300 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
2301 if (ret == -1) {
2302 fprintf(stderr, "Database %s is corrupted at: %s\n",
2303 db_file, subpath);
2304 fclose(fp);
2305 return;
2306 }
2307
2308 if (bpf_pinning_reserved(pinning)) {
2309 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
2310 db_file, pinning);
2311 continue;
2312 }
2313
2314 entry = malloc(sizeof(*entry));
2315 if (!entry) {
2316 fprintf(stderr, "No memory left for db entry!\n");
2317 continue;
2318 }
2319
2320 entry->pinning = pinning;
2321 entry->subpath = strdup(subpath);
2322 if (!entry->subpath) {
2323 fprintf(stderr, "No memory left for db entry!\n");
2324 free(entry);
2325 continue;
2326 }
2327
2328 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
2329 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
2330 }
2331
2332 fclose(fp);
2333 }
2334
2335 static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
2336 {
2337 struct bpf_hash_entry *entry;
2338 int i;
2339
2340 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
2341 while ((entry = ctx->ht[i]) != NULL) {
2342 ctx->ht[i] = entry->next;
2343 free((char *)entry->subpath);
2344 free(entry);
2345 }
2346 }
2347 }
2348
2349 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
2350 {
2351 if (ctx->elf_hdr.e_type != ET_REL ||
2352 (ctx->elf_hdr.e_machine != EM_NONE &&
2353 ctx->elf_hdr.e_machine != EM_BPF) ||
2354 ctx->elf_hdr.e_version != EV_CURRENT) {
2355 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
2356 return -EINVAL;
2357 }
2358
2359 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
2360 default:
2361 fprintf(stderr, "ELF format error, wrong endianness info?\n");
2362 return -EINVAL;
2363 case ELFDATA2LSB:
2364 if (htons(1) == 1) {
2365 fprintf(stderr,
2366 "We are big endian, eBPF object is little endian!\n");
2367 return -EIO;
2368 }
2369 break;
2370 case ELFDATA2MSB:
2371 if (htons(1) != 1) {
2372 fprintf(stderr,
2373 "We are little endian, eBPF object is big endian!\n");
2374 return -EIO;
2375 }
2376 break;
2377 }
2378
2379 return 0;
2380 }
2381
2382 static void bpf_get_cfg(struct bpf_elf_ctx *ctx)
2383 {
2384 static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable";
2385 int fd;
2386
2387 fd = open(path_jit, O_RDONLY);
2388 if (fd > 0) {
2389 char tmp[16] = {};
2390
2391 if (read(fd, tmp, sizeof(tmp)) > 0)
2392 ctx->cfg.jit_enabled = atoi(tmp);
2393 close(fd);
2394 }
2395 }
2396
2397 static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
2398 enum bpf_prog_type type, bool verbose)
2399 {
2400 int ret = -EINVAL;
2401
2402 if (elf_version(EV_CURRENT) == EV_NONE ||
2403 bpf_init_env(pathname))
2404 return ret;
2405
2406 memset(ctx, 0, sizeof(*ctx));
2407 bpf_get_cfg(ctx);
2408 ctx->verbose = verbose;
2409 ctx->type = type;
2410
2411 ctx->obj_fd = open(pathname, O_RDONLY);
2412 if (ctx->obj_fd < 0)
2413 return ctx->obj_fd;
2414
2415 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
2416 if (!ctx->elf_fd) {
2417 ret = -EINVAL;
2418 goto out_fd;
2419 }
2420
2421 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
2422 ret = -EINVAL;
2423 goto out_fd;
2424 }
2425
2426 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
2427 &ctx->elf_hdr) {
2428 ret = -EIO;
2429 goto out_elf;
2430 }
2431
2432 ret = bpf_elf_check_ehdr(ctx);
2433 if (ret < 0)
2434 goto out_elf;
2435
2436 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
2437 sizeof(*(ctx->sec_done)));
2438 if (!ctx->sec_done) {
2439 ret = -ENOMEM;
2440 goto out_elf;
2441 }
2442
2443 if (ctx->verbose && bpf_log_realloc(ctx)) {
2444 ret = -ENOMEM;
2445 goto out_free;
2446 }
2447
2448 bpf_save_finfo(ctx);
2449 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
2450
2451 return 0;
2452 out_free:
2453 free(ctx->sec_done);
2454 out_elf:
2455 elf_end(ctx->elf_fd);
2456 out_fd:
2457 close(ctx->obj_fd);
2458 return ret;
2459 }
2460
2461 static int bpf_maps_count(struct bpf_elf_ctx *ctx)
2462 {
2463 int i, count = 0;
2464
2465 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2466 if (!ctx->map_fds[i])
2467 break;
2468 count++;
2469 }
2470
2471 return count;
2472 }
2473
2474 static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
2475 {
2476 int i;
2477
2478 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2479 if (ctx->map_fds[i])
2480 close(ctx->map_fds[i]);
2481 }
2482 }
2483
2484 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
2485 {
2486 if (failure)
2487 bpf_maps_teardown(ctx);
2488
2489 bpf_hash_destroy(ctx);
2490
2491 free(ctx->sec_done);
2492 free(ctx->log);
2493
2494 elf_end(ctx->elf_fd);
2495 close(ctx->obj_fd);
2496 }
2497
2498 static struct bpf_elf_ctx __ctx;
2499
2500 static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
2501 const char *section, bool verbose)
2502 {
2503 struct bpf_elf_ctx *ctx = &__ctx;
2504 int fd = 0, ret;
2505
2506 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
2507 if (ret < 0) {
2508 fprintf(stderr, "Cannot initialize ELF context!\n");
2509 return ret;
2510 }
2511
2512 ret = bpf_fetch_ancillary(ctx);
2513 if (ret < 0) {
2514 fprintf(stderr, "Error fetching ELF ancillary data!\n");
2515 goto out;
2516 }
2517
2518 fd = bpf_fetch_prog_sec(ctx, section);
2519 if (fd < 0) {
2520 fprintf(stderr, "Error fetching program/map!\n");
2521 ret = fd;
2522 goto out;
2523 }
2524
2525 ret = bpf_fill_prog_arrays(ctx);
2526 if (ret < 0)
2527 fprintf(stderr, "Error filling program arrays!\n");
2528 out:
2529 bpf_elf_ctx_destroy(ctx, ret < 0);
2530 if (ret < 0) {
2531 if (fd)
2532 close(fd);
2533 return ret;
2534 }
2535
2536 return fd;
2537 }
2538
2539 static int
2540 bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
2541 const struct bpf_map_data *aux, unsigned int entries)
2542 {
2543 struct bpf_map_set_msg msg = {
2544 .aux.uds_ver = BPF_SCM_AUX_VER,
2545 .aux.num_ent = entries,
2546 };
2547 int *cmsg_buf, min_fd;
2548 char *amsg_buf;
2549 int i;
2550
2551 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
2552 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
2553
2554 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
2555 amsg_buf = (char *)msg.aux.ent;
2556
2557 for (i = 0; i < entries; i += min_fd) {
2558 int ret;
2559
2560 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
2561 bpf_map_set_init_single(&msg, min_fd);
2562
2563 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
2564 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
2565
2566 ret = sendmsg(fd, &msg.hdr, 0);
2567 if (ret <= 0)
2568 return ret ? : -1;
2569 }
2570
2571 return 0;
2572 }
2573
2574 static int
2575 bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
2576 unsigned int entries)
2577 {
2578 struct bpf_map_set_msg msg;
2579 int *cmsg_buf, min_fd;
2580 char *amsg_buf, *mmsg_buf;
2581 unsigned int needed = 1;
2582 int i;
2583
2584 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
2585 amsg_buf = (char *)msg.aux.ent;
2586 mmsg_buf = (char *)&msg.aux;
2587
2588 for (i = 0; i < min(entries, needed); i += min_fd) {
2589 struct cmsghdr *cmsg;
2590 int ret;
2591
2592 min_fd = min(entries, entries - i);
2593 bpf_map_set_init_single(&msg, min_fd);
2594
2595 ret = recvmsg(fd, &msg.hdr, 0);
2596 if (ret <= 0)
2597 return ret ? : -1;
2598
2599 cmsg = CMSG_FIRSTHDR(&msg.hdr);
2600 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
2601 return -EINVAL;
2602 if (msg.hdr.msg_flags & MSG_CTRUNC)
2603 return -EIO;
2604 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
2605 return -ENOSYS;
2606
2607 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
2608 if (min_fd > entries || min_fd <= 0)
2609 return -EINVAL;
2610
2611 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
2612 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
2613 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
2614
2615 needed = aux->num_ent;
2616 }
2617
2618 return 0;
2619 }
2620
2621 int bpf_send_map_fds(const char *path, const char *obj)
2622 {
2623 struct bpf_elf_ctx *ctx = &__ctx;
2624 struct sockaddr_un addr = { .sun_family = AF_UNIX };
2625 struct bpf_map_data bpf_aux = {
2626 .fds = ctx->map_fds,
2627 .ent = ctx->maps,
2628 .st = &ctx->stat,
2629 .obj = obj,
2630 };
2631 int fd, ret;
2632
2633 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2634 if (fd < 0) {
2635 fprintf(stderr, "Cannot open socket: %s\n",
2636 strerror(errno));
2637 return -1;
2638 }
2639
2640 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2641
2642 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
2643 if (ret < 0) {
2644 fprintf(stderr, "Cannot connect to %s: %s\n",
2645 path, strerror(errno));
2646 return -1;
2647 }
2648
2649 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
2650 bpf_maps_count(ctx));
2651 if (ret < 0)
2652 fprintf(stderr, "Cannot send fds to %s: %s\n",
2653 path, strerror(errno));
2654
2655 bpf_maps_teardown(ctx);
2656 close(fd);
2657 return ret;
2658 }
2659
2660 int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
2661 unsigned int entries)
2662 {
2663 struct sockaddr_un addr = { .sun_family = AF_UNIX };
2664 int fd, ret;
2665
2666 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2667 if (fd < 0) {
2668 fprintf(stderr, "Cannot open socket: %s\n",
2669 strerror(errno));
2670 return -1;
2671 }
2672
2673 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2674
2675 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
2676 if (ret < 0) {
2677 fprintf(stderr, "Cannot bind to socket: %s\n",
2678 strerror(errno));
2679 return -1;
2680 }
2681
2682 ret = bpf_map_set_recv(fd, fds, aux, entries);
2683 if (ret < 0)
2684 fprintf(stderr, "Cannot recv fds from %s: %s\n",
2685 path, strerror(errno));
2686
2687 unlink(addr.sun_path);
2688 close(fd);
2689 return ret;
2690 }
2691 #endif /* HAVE_ELF */