]> git.proxmox.com Git - mirror_iproute2.git/blame - lib/bpf.c
bpf: improve error reporting around tail calls
[mirror_iproute2.git] / lib / bpf.c
CommitLineData
1d129d19 1/*
e4225669 2 * bpf.c BPF common code
1d129d19
JP
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
e4225669 9 * Authors: Daniel Borkmann <daniel@iogearbox.net>
1d129d19 10 * Jiri Pirko <jiri@resnulli.us>
e4225669 11 * Alexei Starovoitov <ast@kernel.org>
1d129d19
JP
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
473d7840 19#include <stdint.h>
1d129d19 20#include <errno.h>
11c39b5e
DB
21#include <fcntl.h>
22#include <stdarg.h>
5c5a0f3d 23#include <limits.h>
e4225669 24#include <assert.h>
1d129d19 25
11c39b5e
DB
26#ifdef HAVE_ELF
27#include <libelf.h>
28#include <gelf.h>
29#endif
30
32e93fb7
DB
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/un.h>
34#include <sys/vfs.h>
35#include <sys/mount.h>
36#include <sys/syscall.h>
37#include <sys/sendfile.h>
38#include <sys/resource.h>
39
8187b012
DB
40#include <arpa/inet.h>
41
1d129d19 42#include "utils.h"
6256f8c9 43
e4225669 44#include "bpf_util.h"
6256f8c9
DB
45#include "bpf_elf.h"
46#include "bpf_scm.h"
47
e4225669
DB
48struct bpf_prog_meta {
49 const char *type;
50 const char *subdir;
51 const char *section;
52 bool may_uds_export;
53};
1d129d19 54
e4225669
DB
55static const enum bpf_prog_type __bpf_types[] = {
56 BPF_PROG_TYPE_SCHED_CLS,
57 BPF_PROG_TYPE_SCHED_ACT,
c7272ca7 58 BPF_PROG_TYPE_XDP,
b15f440e
TG
59 BPF_PROG_TYPE_LWT_IN,
60 BPF_PROG_TYPE_LWT_OUT,
61 BPF_PROG_TYPE_LWT_XMIT,
e4225669 62};
67584e3a 63
e4225669
DB
64static const struct bpf_prog_meta __bpf_prog_meta[] = {
65 [BPF_PROG_TYPE_SCHED_CLS] = {
66 .type = "cls",
67 .subdir = "tc",
68 .section = ELF_SECTION_CLASSIFIER,
69 .may_uds_export = true,
70 },
71 [BPF_PROG_TYPE_SCHED_ACT] = {
72 .type = "act",
73 .subdir = "tc",
74 .section = ELF_SECTION_ACTION,
75 .may_uds_export = true,
76 },
c7272ca7
DB
77 [BPF_PROG_TYPE_XDP] = {
78 .type = "xdp",
79 .subdir = "xdp",
80 .section = ELF_SECTION_PROG,
81 },
b15f440e
TG
82 [BPF_PROG_TYPE_LWT_IN] = {
83 .type = "lwt_in",
84 .subdir = "ip",
85 .section = ELF_SECTION_PROG,
86 },
87 [BPF_PROG_TYPE_LWT_OUT] = {
88 .type = "lwt_out",
89 .subdir = "ip",
90 .section = ELF_SECTION_PROG,
91 },
92 [BPF_PROG_TYPE_LWT_XMIT] = {
93 .type = "lwt_xmit",
94 .subdir = "ip",
95 .section = ELF_SECTION_PROG,
96 },
e4225669
DB
97};
98
99static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
100{
101 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
102 __bpf_prog_meta[type].subdir);
103 return __bpf_prog_meta[type].subdir;
104}
105
106const char *bpf_prog_to_default_section(enum bpf_prog_type type)
107{
108 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
109 __bpf_prog_meta[type].section);
110 return __bpf_prog_meta[type].section;
111}
e77fa41d 112
32e93fb7
DB
113#ifdef HAVE_ELF
114static int bpf_obj_open(const char *path, enum bpf_prog_type type,
115 const char *sec, bool verbose);
116#else
117static int bpf_obj_open(const char *path, enum bpf_prog_type type,
118 const char *sec, bool verbose)
119{
120 fprintf(stderr, "No ELF library support compiled in.\n");
121 errno = ENOSYS;
122 return -1;
123}
124#endif
125
126static inline __u64 bpf_ptr_to_u64(const void *ptr)
127{
128 return (__u64)(unsigned long)ptr;
129}
130
131static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
132{
133#ifdef __NR_bpf
134 return syscall(__NR_bpf, cmd, attr, size);
135#else
136 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
137 errno = ENOSYS;
138 return -1;
139#endif
140}
141
91d88eeb
DB
142static int bpf_map_update(int fd, const void *key, const void *value,
143 uint64_t flags)
32e93fb7 144{
d17b136f 145 union bpf_attr attr = {};
67584e3a 146
67584e3a
ND
147 attr.map_fd = fd;
148 attr.key = bpf_ptr_to_u64(key);
149 attr.value = bpf_ptr_to_u64(value);
150 attr.flags = flags;
32e93fb7 151
91d88eeb 152 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
32e93fb7
DB
153}
154
779525cd
DB
155static int bpf_prog_fd_by_id(uint32_t id)
156{
157 union bpf_attr attr = {};
158
159 attr.prog_id = id;
160
161 return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
162}
163
164static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
165 uint32_t *info_len)
166{
167 union bpf_attr attr = {};
168 int ret;
169
170 attr.info.bpf_fd = fd;
171 attr.info.info = bpf_ptr_to_u64(info);
172 attr.info.info_len = *info_len;
173
174 *info_len = 0;
175 ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
176 if (!ret)
177 *info_len = attr.info.info_len;
178
179 return ret;
180}
181
182void bpf_dump_prog_info(FILE *f, uint32_t id)
183{
184 struct bpf_prog_info info = {};
185 uint32_t len = sizeof(info);
186 int fd, ret;
187
188 fprintf(f, "id %u ", id);
189
190 fd = bpf_prog_fd_by_id(id);
191 if (fd < 0)
192 return;
193
194 ret = bpf_prog_info_by_fd(fd, &info, &len);
195 if (!ret && len) {
196 if (info.jited_prog_len)
197 fprintf(f, "jited ");
198 }
199
200 close(fd);
201}
202
32e93fb7
DB
203static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
204 char **bpf_string, bool *need_release,
205 const char separator)
1d129d19
JP
206{
207 char sp;
208
209 if (from_file) {
210 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
e4225669 211 char *tmp_string, *last;
1d129d19
JP
212 FILE *fp;
213
214 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
f89bb021 215 tmp_string = calloc(1, tmp_len);
1d129d19
JP
216 if (tmp_string == NULL)
217 return -ENOMEM;
218
1d129d19
JP
219 fp = fopen(arg, "r");
220 if (fp == NULL) {
221 perror("Cannot fopen");
222 free(tmp_string);
223 return -ENOENT;
224 }
225
226 if (!fgets(tmp_string, tmp_len, fp)) {
227 free(tmp_string);
228 fclose(fp);
229 return -EIO;
230 }
231
232 fclose(fp);
233
e4225669
DB
234 last = &tmp_string[strlen(tmp_string) - 1];
235 if (*last == '\n')
236 *last = 0;
237
1d129d19
JP
238 *need_release = true;
239 *bpf_string = tmp_string;
240 } else {
241 *need_release = false;
242 *bpf_string = arg;
243 }
244
245 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
246 sp != separator) {
247 if (*need_release)
248 free(*bpf_string);
249 return -EINVAL;
250 }
251
252 return 0;
253}
254
32e93fb7
DB
255static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
256 bool from_file)
1d129d19
JP
257{
258 char *bpf_string, *token, separator = ',';
259 int ret = 0, i = 0;
260 bool need_release;
261 __u16 bpf_len = 0;
262
263 if (argc < 1)
264 return -EINVAL;
265 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
266 &need_release, separator))
267 return -EINVAL;
268 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
269 ret = -EINVAL;
270 goto out;
271 }
272
273 token = bpf_string;
274 while ((token = strchr(token, separator)) && (++token)[0]) {
275 if (i >= bpf_len) {
32a121cb 276 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
1d129d19
JP
277 ret = -EINVAL;
278 goto out;
279 }
280
281 if (sscanf(token, "%hu %hhu %hhu %u,",
282 &bpf_ops[i].code, &bpf_ops[i].jt,
283 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
284 fprintf(stderr, "Error at instruction %d!\n", i);
285 ret = -EINVAL;
286 goto out;
287 }
288
289 i++;
290 }
291
292 if (i != bpf_len) {
afc1a200 293 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
1d129d19
JP
294 ret = -EINVAL;
295 goto out;
296 }
297 ret = bpf_len;
1d129d19
JP
298out:
299 if (need_release)
300 free(bpf_string);
301
302 return ret;
303}
304
305void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
306{
d896797c 307 struct sock_filter *ops = RTA_DATA(bpf_ops);
1d129d19
JP
308 int i;
309
310 if (len == 0)
311 return;
312
313 fprintf(f, "bytecode \'%u,", len);
314
315 for (i = 0; i < len - 1; i++)
316 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
317 ops[i].jf, ops[i].k);
318
6256f8c9 319 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
1d129d19
JP
320 ops[i].jf, ops[i].k);
321}
11c39b5e 322
afc1a200
DB
323static void bpf_map_pin_report(const struct bpf_elf_map *pin,
324 const struct bpf_elf_map *obj)
325{
326 fprintf(stderr, "Map specification differs from pinned file!\n");
327
328 if (obj->type != pin->type)
329 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
330 obj->type, pin->type);
331 if (obj->size_key != pin->size_key)
332 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
333 obj->size_key, pin->size_key);
334 if (obj->size_value != pin->size_value)
335 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
336 obj->size_value, pin->size_value);
337 if (obj->max_elem != pin->max_elem)
338 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
339 obj->max_elem, pin->max_elem);
4dd3f50a
DB
340 if (obj->flags != pin->flags)
341 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
342 obj->flags, pin->flags);
afc1a200
DB
343
344 fprintf(stderr, "\n");
345}
346
ecb05c0f
DB
347struct bpf_prog_data {
348 unsigned int type;
349 unsigned int jited;
350};
351
352struct bpf_map_ext {
353 struct bpf_prog_data owner;
354};
355
356static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
357 struct bpf_map_ext *ext)
9e607f2e 358{
ecb05c0f 359 unsigned int val, owner_type = 0, owner_jited = 0;
9e607f2e 360 char file[PATH_MAX], buff[4096];
9e607f2e
DB
361 FILE *fp;
362
363 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
ecb05c0f 364 memset(map, 0, sizeof(*map));
9e607f2e
DB
365
366 fp = fopen(file, "r");
367 if (!fp) {
368 fprintf(stderr, "No procfs support?!\n");
369 return -EIO;
370 }
371
9e607f2e
DB
372 while (fgets(buff, sizeof(buff), fp)) {
373 if (sscanf(buff, "map_type:\t%u", &val) == 1)
ecb05c0f 374 map->type = val;
9e607f2e 375 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
ecb05c0f 376 map->size_key = val;
9e607f2e 377 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
ecb05c0f 378 map->size_value = val;
9e607f2e 379 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
ecb05c0f 380 map->max_elem = val;
4dd3f50a 381 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
ecb05c0f 382 map->flags = val;
fb24802b
DB
383 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
384 owner_type = val;
ecb05c0f
DB
385 else if (sscanf(buff, "owner_jited:\t%i", &val) == 1)
386 owner_jited = val;
9e607f2e
DB
387 }
388
389 fclose(fp);
ecb05c0f
DB
390 if (ext) {
391 memset(ext, 0, sizeof(*ext));
392 ext->owner.type = owner_type;
393 ext->owner.jited = owner_jited;
394 }
395
396 return 0;
397}
398
399static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
400 struct bpf_map_ext *ext, int length,
401 enum bpf_prog_type type)
402{
403 struct bpf_elf_map tmp, zero = {};
404 int ret;
405
406 ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext);
407 if (ret < 0)
408 return ret;
9e607f2e 409
fb24802b
DB
410 /* The decision to reject this is on kernel side eventually, but
411 * at least give the user a chance to know what's wrong.
412 */
ecb05c0f 413 if (ext->owner.type && ext->owner.type != type)
fb24802b 414 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
ecb05c0f 415 type, ext->owner.type);
fb24802b 416
91d88eeb 417 if (!memcmp(&tmp, map, length)) {
9e607f2e
DB
418 return 0;
419 } else {
9e607f2e
DB
420 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
421 * so just accept it. We know we do have an eBPF fd and in this
422 * case, everything is 0. It is guaranteed that no such map exists
423 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
424 */
91d88eeb 425 if (!memcmp(&tmp, &zero, length))
9e607f2e
DB
426 return 0;
427
afc1a200 428 bpf_map_pin_report(&tmp, map);
9e607f2e
DB
429 return -EINVAL;
430 }
431}
432
91d88eeb
DB
433static int bpf_mnt_fs(const char *target)
434{
435 bool bind_done = false;
436
437 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
438 if (errno != EINVAL || bind_done) {
439 fprintf(stderr, "mount --make-private %s failed: %s\n",
440 target, strerror(errno));
441 return -1;
442 }
443
444 if (mount(target, target, "none", MS_BIND, NULL)) {
445 fprintf(stderr, "mount --bind %s %s failed: %s\n",
446 target, target, strerror(errno));
447 return -1;
448 }
449
450 bind_done = true;
451 }
452
e4225669 453 if (mount("bpf", target, "bpf", 0, "mode=0700")) {
91d88eeb
DB
454 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
455 target, strerror(errno));
456 return -1;
457 }
458
459 return 0;
460}
461
32e93fb7
DB
462static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
463{
464 struct statfs st_fs;
465
466 if (statfs(mnt, &st_fs) < 0)
467 return -ENOENT;
468 if ((unsigned long)st_fs.f_type != magic)
469 return -ENOENT;
470
471 return 0;
472}
473
474static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
475 char *mnt, int len,
476 const char * const *known_mnts)
477{
478 const char * const *ptr;
479 char type[100];
480 FILE *fp;
481
482 if (known_mnts) {
483 ptr = known_mnts;
484 while (*ptr) {
485 if (bpf_valid_mntpt(*ptr, magic) == 0) {
486 strncpy(mnt, *ptr, len - 1);
487 mnt[len - 1] = 0;
488 return mnt;
489 }
490 ptr++;
491 }
492 }
493
494 fp = fopen("/proc/mounts", "r");
495 if (fp == NULL || len != PATH_MAX)
496 return NULL;
497
498 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
499 mnt, type) == 2) {
500 if (strcmp(type, fstype) == 0)
501 break;
502 }
503
504 fclose(fp);
505 if (strcmp(type, fstype) != 0)
506 return NULL;
507
508 return mnt;
509}
510
511int bpf_trace_pipe(void)
512{
513 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
514 static const char * const tracefs_known_mnts[] = {
515 TRACE_DIR_MNT,
516 "/sys/kernel/debug/tracing",
517 "/tracing",
518 "/trace",
519 0,
520 };
521 char tpipe[PATH_MAX];
522 const char *mnt;
523 int fd;
524
525 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
526 sizeof(tracefs_mnt), tracefs_known_mnts);
527 if (!mnt) {
528 fprintf(stderr, "tracefs not mounted?\n");
529 return -1;
530 }
531
532 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
533
534 fd = open(tpipe, O_RDONLY);
535 if (fd < 0)
536 return -1;
537
538 fprintf(stderr, "Running! Hang up with ^C!\n\n");
539 while (1) {
540 static char buff[4096];
541 ssize_t ret;
542
543 ret = read(fd, buff, sizeof(buff) - 1);
544 if (ret > 0) {
545 write(2, buff, ret);
546 fflush(stderr);
547 }
548 }
549
550 return 0;
551}
552
e4225669 553static int bpf_gen_global(const char *bpf_sub_dir)
91d88eeb 554{
e4225669
DB
555 char bpf_glo_dir[PATH_MAX];
556 int ret;
557
558 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
559 bpf_sub_dir, BPF_DIR_GLOBALS);
560
561 ret = mkdir(bpf_glo_dir, S_IRWXU);
562 if (ret && errno != EEXIST) {
563 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
564 strerror(errno));
565 return ret;
566 }
567
568 return 0;
569}
570
571static int bpf_gen_master(const char *base, const char *name)
572{
573 char bpf_sub_dir[PATH_MAX];
574 int ret;
575
576 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
577
578 ret = mkdir(bpf_sub_dir, S_IRWXU);
579 if (ret && errno != EEXIST) {
580 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
581 strerror(errno));
582 return ret;
583 }
584
585 return bpf_gen_global(bpf_sub_dir);
586}
587
588static int bpf_slave_via_bind_mnt(const char *full_name,
589 const char *full_link)
590{
591 int ret;
592
593 ret = mkdir(full_name, S_IRWXU);
594 if (ret) {
595 assert(errno != EEXIST);
596 fprintf(stderr, "mkdir %s failed: %s\n", full_name,
597 strerror(errno));
598 return ret;
599 }
600
601 ret = mount(full_link, full_name, "none", MS_BIND, NULL);
602 if (ret) {
603 rmdir(full_name);
604 fprintf(stderr, "mount --bind %s %s failed: %s\n",
605 full_link, full_name, strerror(errno));
606 }
607
608 return ret;
609}
610
611static int bpf_gen_slave(const char *base, const char *name,
612 const char *link)
613{
614 char bpf_lnk_dir[PATH_MAX];
615 char bpf_sub_dir[PATH_MAX];
616 struct stat sb = {};
617 int ret;
618
619 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
620 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
621
622 ret = symlink(bpf_lnk_dir, bpf_sub_dir);
623 if (ret) {
624 if (errno != EEXIST) {
625 if (errno != EPERM) {
626 fprintf(stderr, "symlink %s failed: %s\n",
627 bpf_sub_dir, strerror(errno));
628 return ret;
629 }
630
631 return bpf_slave_via_bind_mnt(bpf_sub_dir,
632 bpf_lnk_dir);
633 }
634
635 ret = lstat(bpf_sub_dir, &sb);
636 if (ret) {
637 fprintf(stderr, "lstat %s failed: %s\n",
638 bpf_sub_dir, strerror(errno));
639 return ret;
640 }
641
642 if ((sb.st_mode & S_IFMT) != S_IFLNK)
643 return bpf_gen_global(bpf_sub_dir);
644 }
645
646 return 0;
647}
648
649static int bpf_gen_hierarchy(const char *base)
650{
651 int ret, i;
652
653 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
654 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
655 ret = bpf_gen_slave(base,
656 bpf_prog_to_subdir(__bpf_types[i]),
657 bpf_prog_to_subdir(__bpf_types[0]));
658 return ret;
659}
660
661static const char *bpf_get_work_dir(enum bpf_prog_type type)
662{
663 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
664 static char bpf_wrk_dir[PATH_MAX];
91d88eeb 665 static const char *mnt;
e4225669 666 static bool bpf_mnt_cached;
91d88eeb
DB
667 static const char * const bpf_known_mnts[] = {
668 BPF_DIR_MNT,
e4225669 669 "/bpf",
91d88eeb
DB
670 0,
671 };
91d88eeb
DB
672 int ret;
673
e4225669
DB
674 if (bpf_mnt_cached) {
675 const char *out = mnt;
676
51361a9f 677 if (out && type) {
e4225669
DB
678 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
679 out, bpf_prog_to_subdir(type));
680 out = bpf_tmp;
681 }
682 return out;
683 }
91d88eeb 684
e4225669 685 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
91d88eeb
DB
686 bpf_known_mnts);
687 if (!mnt) {
688 mnt = getenv(BPF_ENV_MNT);
689 if (!mnt)
690 mnt = BPF_DIR_MNT;
691 ret = bpf_mnt_fs(mnt);
692 if (ret) {
693 mnt = NULL;
694 goto out;
695 }
696 }
697
e4225669 698 snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
91d88eeb 699
e4225669
DB
700 ret = bpf_gen_hierarchy(bpf_wrk_dir);
701 if (ret) {
91d88eeb
DB
702 mnt = NULL;
703 goto out;
704 }
705
e4225669 706 mnt = bpf_wrk_dir;
91d88eeb
DB
707out:
708 bpf_mnt_cached = true;
91d88eeb
DB
709 return mnt;
710}
711
e4225669 712static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
91d88eeb 713{
d17b136f 714 union bpf_attr attr = {};
91d88eeb
DB
715 char tmp[PATH_MAX];
716
717 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
e4225669 718 pathname[1] == ':' && bpf_get_work_dir(type)) {
91d88eeb 719 snprintf(tmp, sizeof(tmp), "%s/%s",
e4225669 720 bpf_get_work_dir(type), pathname + 2);
91d88eeb
DB
721 pathname = tmp;
722 }
723
91d88eeb
DB
724 attr.pathname = bpf_ptr_to_u64(pathname);
725
726 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
727}
728
21856018
DB
729static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type)
730{
731 int prog_fd = bpf_obj_get(pathname, type);
732
733 if (prog_fd < 0)
734 fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n",
735 pathname, strerror(errno));
736 return prog_fd;
737}
738
91d88eeb 739enum bpf_mode {
e4225669 740 CBPF_BYTECODE,
91d88eeb
DB
741 CBPF_FILE,
742 EBPF_OBJECT,
743 EBPF_PINNED,
e4225669 744 BPF_MODE_MAX,
91d88eeb
DB
745};
746
e4225669
DB
747static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode,
748 struct bpf_cfg_in *cfg, const bool *opt_tbl)
32e93fb7 749{
32e93fb7 750 const char *file, *section, *uds_name;
32e93fb7 751 bool verbose = false;
e4225669 752 int i, ret, argc;
91d88eeb
DB
753 char **argv;
754
e4225669
DB
755 argv = cfg->argv;
756 argc = cfg->argc;
91d88eeb
DB
757
758 if (opt_tbl[CBPF_BYTECODE] &&
759 (matches(*argv, "bytecode") == 0 ||
760 strcmp(*argv, "bc") == 0)) {
761 *mode = CBPF_BYTECODE;
762 } else if (opt_tbl[CBPF_FILE] &&
763 (matches(*argv, "bytecode-file") == 0 ||
764 strcmp(*argv, "bcf") == 0)) {
765 *mode = CBPF_FILE;
766 } else if (opt_tbl[EBPF_OBJECT] &&
767 (matches(*argv, "object-file") == 0 ||
768 strcmp(*argv, "obj") == 0)) {
769 *mode = EBPF_OBJECT;
770 } else if (opt_tbl[EBPF_PINNED] &&
771 (matches(*argv, "object-pinned") == 0 ||
772 matches(*argv, "pinned") == 0 ||
773 matches(*argv, "fd") == 0)) {
774 *mode = EBPF_PINNED;
32e93fb7
DB
775 } else {
776 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
777 return -1;
778 }
779
780 NEXT_ARG();
781 file = section = uds_name = NULL;
91d88eeb 782 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
32e93fb7
DB
783 file = *argv;
784 NEXT_ARG_FWD();
785
91d88eeb
DB
786 if (*type == BPF_PROG_TYPE_UNSPEC) {
787 if (argc > 0 && matches(*argv, "type") == 0) {
788 NEXT_ARG();
e4225669
DB
789 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
790 i++) {
791 if (!__bpf_prog_meta[i].type)
792 continue;
793 if (!matches(*argv,
794 __bpf_prog_meta[i].type)) {
795 *type = i;
796 break;
797 }
798 }
799
800 if (*type == BPF_PROG_TYPE_UNSPEC) {
91d88eeb
DB
801 fprintf(stderr, "What type is \"%s\"?\n",
802 *argv);
803 return -1;
804 }
805 NEXT_ARG_FWD();
806 } else {
807 *type = BPF_PROG_TYPE_SCHED_CLS;
808 }
809 }
810
e4225669 811 section = bpf_prog_to_default_section(*type);
32e93fb7
DB
812 if (argc > 0 && matches(*argv, "section") == 0) {
813 NEXT_ARG();
814 section = *argv;
815 NEXT_ARG_FWD();
816 }
817
e4225669
DB
818 if (__bpf_prog_meta[*type].may_uds_export) {
819 uds_name = getenv(BPF_ENV_UDS);
820 if (argc > 0 && !uds_name &&
821 matches(*argv, "export") == 0) {
822 NEXT_ARG();
823 uds_name = *argv;
824 NEXT_ARG_FWD();
825 }
32e93fb7
DB
826 }
827
828 if (argc > 0 && matches(*argv, "verbose") == 0) {
829 verbose = true;
830 NEXT_ARG_FWD();
831 }
832
833 PREV_ARG();
834 }
835
91d88eeb 836 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
e4225669 837 ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE);
91d88eeb
DB
838 else if (*mode == EBPF_OBJECT)
839 ret = bpf_obj_open(file, *type, section, verbose);
840 else if (*mode == EBPF_PINNED)
21856018 841 ret = bpf_obj_pinned(file, *type);
91d88eeb 842 else
32e93fb7
DB
843 return -1;
844
e4225669
DB
845 cfg->object = file;
846 cfg->section = section;
847 cfg->uds = uds_name;
848 cfg->argc = argc;
849 cfg->argv = argv;
91d88eeb
DB
850
851 return ret;
852}
853
e4225669
DB
854static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
855 const struct bpf_cfg_ops *ops, void *nl,
856 const bool *opt_tbl)
91d88eeb
DB
857{
858 struct sock_filter opcodes[BPF_MAXINSNS];
91d88eeb 859 char annotation[256];
91d88eeb
DB
860 enum bpf_mode mode;
861 int ret;
862
e4225669
DB
863 cfg->ops = opcodes;
864 ret = bpf_parse(&type, &mode, cfg, opt_tbl);
865 cfg->ops = NULL;
91d88eeb
DB
866 if (ret < 0)
867 return ret;
868
e4225669
DB
869 if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
870 ops->cbpf_cb(nl, opcodes, ret);
91d88eeb 871 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
32e93fb7 872 snprintf(annotation, sizeof(annotation), "%s:[%s]",
e4225669
DB
873 basename(cfg->object), mode == EBPF_PINNED ?
874 "*fsobj" : cfg->section);
875 ops->ebpf_cb(nl, ret, annotation);
32e93fb7
DB
876 }
877
91d88eeb
DB
878 return 0;
879}
32e93fb7 880
e4225669
DB
881int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
882 const struct bpf_cfg_ops *ops, void *nl)
883{
884 bool opt_tbl[BPF_MODE_MAX] = {};
885
886 if (ops->cbpf_cb) {
887 opt_tbl[CBPF_BYTECODE] = true;
888 opt_tbl[CBPF_FILE] = true;
889 }
890
891 if (ops->ebpf_cb) {
892 opt_tbl[EBPF_OBJECT] = true;
893 opt_tbl[EBPF_PINNED] = true;
894 }
895
896 return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl);
897}
898
91d88eeb
DB
899int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
900{
901 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
902 const bool opt_tbl[BPF_MODE_MAX] = {
91d88eeb
DB
903 [EBPF_OBJECT] = true,
904 [EBPF_PINNED] = true,
905 };
906 const struct bpf_elf_map test = {
907 .type = BPF_MAP_TYPE_PROG_ARRAY,
908 .size_key = sizeof(int),
909 .size_value = sizeof(int),
910 };
e4225669
DB
911 struct bpf_cfg_in cfg = {
912 .argc = argc,
913 .argv = argv,
914 };
ecb05c0f 915 struct bpf_map_ext ext = {};
91d88eeb 916 int ret, prog_fd, map_fd;
91d88eeb
DB
917 enum bpf_mode mode;
918 uint32_t map_key;
919
e4225669 920 prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl);
91d88eeb
DB
921 if (prog_fd < 0)
922 return prog_fd;
923 if (key) {
924 map_key = *key;
925 } else {
e4225669 926 ret = sscanf(cfg.section, "%*i/%i", &map_key);
91d88eeb 927 if (ret != 1) {
32a121cb 928 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
91d88eeb
DB
929 ret = -EINVAL;
930 goto out_prog;
931 }
932 }
32e93fb7 933
e4225669 934 map_fd = bpf_obj_get(map_path, type);
91d88eeb
DB
935 if (map_fd < 0) {
936 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
937 map_path, strerror(errno));
938 ret = map_fd;
939 goto out_prog;
940 }
941
ecb05c0f 942 ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext,
fb24802b
DB
943 offsetof(struct bpf_elf_map, max_elem),
944 type);
91d88eeb
DB
945 if (ret < 0) {
946 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
947 goto out_map;
948 }
949
950 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
951 if (ret < 0)
952 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
953out_map:
954 close(map_fd);
955out_prog:
956 close(prog_fd);
957 return ret;
32e93fb7
DB
958}
959
fc4ccce0
DA
960int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type)
961{
962 union bpf_attr attr = {};
963
964 attr.target_fd = target_fd;
965 attr.attach_bpf_fd = prog_fd;
966 attr.attach_type = type;
967
968 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
969}
970
971int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type)
972{
973 union bpf_attr attr = {};
974
975 attr.target_fd = target_fd;
976 attr.attach_type = type;
977
978 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
979}
980
869d889e
DA
981int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
982 size_t size_insns, const char *license, char *log,
983 size_t size_log)
984{
985 union bpf_attr attr = {};
986
987 attr.prog_type = type;
988 attr.insns = bpf_ptr_to_u64(insns);
989 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
990 attr.license = bpf_ptr_to_u64(license);
991
992 if (size_log > 0) {
993 attr.log_buf = bpf_ptr_to_u64(log);
994 attr.log_size = size_log;
995 attr.log_level = 1;
996 }
997
998 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
999}
1000
6256f8c9 1001#ifdef HAVE_ELF
32e93fb7
DB
1002struct bpf_elf_prog {
1003 enum bpf_prog_type type;
1004 const struct bpf_insn *insns;
1005 size_t size;
1006 const char *license;
1007};
1008
f6793eec
DB
1009struct bpf_hash_entry {
1010 unsigned int pinning;
1011 const char *subpath;
1012 struct bpf_hash_entry *next;
1013};
1014
ecb05c0f
DB
1015struct bpf_config {
1016 unsigned int jit_enabled;
1017};
1018
32e93fb7 1019struct bpf_elf_ctx {
ecb05c0f 1020 struct bpf_config cfg;
32e93fb7
DB
1021 Elf *elf_fd;
1022 GElf_Ehdr elf_hdr;
1023 Elf_Data *sym_tab;
1024 Elf_Data *str_tab;
1025 int obj_fd;
1026 int map_fds[ELF_MAX_MAPS];
1027 struct bpf_elf_map maps[ELF_MAX_MAPS];
ecb05c0f 1028 struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
32e93fb7
DB
1029 int sym_num;
1030 int map_num;
e4225669 1031 int map_len;
32e93fb7
DB
1032 bool *sec_done;
1033 int sec_maps;
1034 char license[ELF_MAX_LICENSE_LEN];
1035 enum bpf_prog_type type;
1036 bool verbose;
1037 struct bpf_elf_st stat;
f6793eec 1038 struct bpf_hash_entry *ht[256];
f31645d1
DB
1039 char *log;
1040 size_t log_size;
32e93fb7
DB
1041};
1042
6256f8c9 1043struct bpf_elf_sec_data {
32e93fb7
DB
1044 GElf_Shdr sec_hdr;
1045 Elf_Data *sec_data;
1046 const char *sec_name;
6256f8c9
DB
1047};
1048
1049struct bpf_map_data {
32e93fb7
DB
1050 int *fds;
1051 const char *obj;
1052 struct bpf_elf_st *st;
1053 struct bpf_elf_map *ent;
6256f8c9
DB
1054};
1055
f31645d1
DB
1056static __check_format_string(2, 3) void
1057bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
11c39b5e
DB
1058{
1059 va_list vl;
1060
1061 va_start(vl, format);
1062 vfprintf(stderr, format, vl);
1063 va_end(vl);
1064
f31645d1 1065 if (ctx->log && ctx->log[0]) {
afc1a200
DB
1066 if (ctx->verbose) {
1067 fprintf(stderr, "%s\n", ctx->log);
1068 } else {
1069 unsigned int off = 0, len = strlen(ctx->log);
1070
1071 if (len > BPF_MAX_LOG) {
1072 off = len - BPF_MAX_LOG;
1073 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
1074 off);
1075 }
1076 fprintf(stderr, "%s\n", ctx->log + off);
1077 }
1078
f31645d1
DB
1079 memset(ctx->log, 0, ctx->log_size);
1080 }
1081}
1082
1083static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
1084{
0f74d0f3 1085 const size_t log_max = UINT_MAX >> 8;
f31645d1
DB
1086 size_t log_size = ctx->log_size;
1087 void *ptr;
1088
1089 if (!ctx->log) {
1090 log_size = 65536;
0f74d0f3 1091 } else if (log_size < log_max) {
f31645d1 1092 log_size <<= 1;
0f74d0f3
TG
1093 if (log_size > log_max)
1094 log_size = log_max;
1095 } else {
1096 return -EINVAL;
d937a74b 1097 }
f31645d1
DB
1098
1099 ptr = realloc(ctx->log, log_size);
1100 if (!ptr)
1101 return -ENOMEM;
1102
1103 ctx->log = ptr;
1104 ctx->log_size = log_size;
1105
1106 return 0;
11c39b5e
DB
1107}
1108
4dd3f50a
DB
1109static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
1110 uint32_t size_value, uint32_t max_elem,
612ff099 1111 uint32_t flags, int inner_fd)
11c39b5e 1112{
d17b136f 1113 union bpf_attr attr = {};
67584e3a 1114
67584e3a
ND
1115 attr.map_type = type;
1116 attr.key_size = size_key;
612ff099 1117 attr.value_size = inner_fd ? sizeof(int) : size_value;
67584e3a 1118 attr.max_entries = max_elem;
4dd3f50a 1119 attr.map_flags = flags;
612ff099 1120 attr.inner_map_fd = inner_fd;
11c39b5e
DB
1121
1122 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
1123}
11c39b5e 1124
32e93fb7 1125static int bpf_obj_pin(int fd, const char *pathname)
11c39b5e 1126{
d17b136f 1127 union bpf_attr attr = {};
67584e3a 1128
67584e3a
ND
1129 attr.pathname = bpf_ptr_to_u64(pathname);
1130 attr.bpf_fd = fd;
32e93fb7
DB
1131
1132 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
1133}
11c39b5e 1134
32e93fb7
DB
1135static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
1136{
1137 struct sockaddr_alg alg = {
1138 .salg_family = AF_ALG,
1139 .salg_type = "hash",
1140 .salg_name = "sha1",
1141 };
1142 int ret, cfd, ofd, ffd;
1143 struct stat stbuff;
1144 ssize_t size;
1145
1146 if (!object || len != 20)
1147 return -EINVAL;
1148
1149 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
1150 if (cfd < 0) {
1151 fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
1152 strerror(errno));
1153 return cfd;
1154 }
1155
1156 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
1157 if (ret < 0) {
1158 fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
1159 goto out_cfd;
1160 }
1161
1162 ofd = accept(cfd, NULL, 0);
1163 if (ofd < 0) {
1164 fprintf(stderr, "Error accepting socket: %s\n",
1165 strerror(errno));
1166 ret = ofd;
1167 goto out_cfd;
1168 }
1169
1170 ffd = open(object, O_RDONLY);
1171 if (ffd < 0) {
1172 fprintf(stderr, "Error opening object %s: %s\n",
1173 object, strerror(errno));
1174 ret = ffd;
1175 goto out_ofd;
1176 }
1177
32a121cb 1178 ret = fstat(ffd, &stbuff);
32e93fb7
DB
1179 if (ret < 0) {
1180 fprintf(stderr, "Error doing fstat: %s\n",
1181 strerror(errno));
1182 goto out_ffd;
d937a74b 1183 }
11c39b5e 1184
32e93fb7
DB
1185 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
1186 if (size != stbuff.st_size) {
1187 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
1188 size, stbuff.st_size, strerror(errno));
1189 ret = -1;
1190 goto out_ffd;
1191 }
1192
1193 size = read(ofd, out, len);
1194 if (size != len) {
1195 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
1196 size, len, strerror(errno));
1197 ret = -1;
1198 } else {
1199 ret = 0;
1200 }
1201out_ffd:
1202 close(ffd);
1203out_ofd:
1204 close(ofd);
1205out_cfd:
1206 close(cfd);
1207 return ret;
11c39b5e
DB
1208}
1209
32e93fb7 1210static const char *bpf_get_obj_uid(const char *pathname)
11c39b5e 1211{
32a121cb 1212 static bool bpf_uid_cached;
32e93fb7
DB
1213 static char bpf_uid[64];
1214 uint8_t tmp[20];
1215 int ret;
11c39b5e 1216
32e93fb7
DB
1217 if (bpf_uid_cached)
1218 goto done;
11c39b5e 1219
32e93fb7
DB
1220 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
1221 if (ret) {
1222 fprintf(stderr, "Object hashing failed!\n");
1223 return NULL;
1224 }
1225
1226 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
1227 bpf_uid_cached = true;
1228done:
1229 return bpf_uid;
11c39b5e
DB
1230}
1231
32e93fb7
DB
1232static int bpf_init_env(const char *pathname)
1233{
1234 struct rlimit limit = {
1235 .rlim_cur = RLIM_INFINITY,
1236 .rlim_max = RLIM_INFINITY,
1237 };
1238
1239 /* Don't bother in case we fail! */
1240 setrlimit(RLIMIT_MEMLOCK, &limit);
1241
e4225669 1242 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) {
32a121cb 1243 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
32e93fb7
DB
1244 return 0;
1245 }
1246
1247 if (!bpf_get_obj_uid(pathname))
1248 return -1;
1249
1250 return 0;
6256f8c9
DB
1251}
1252
f6793eec
DB
1253static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
1254 uint32_t pinning)
1255{
1256 struct bpf_hash_entry *entry;
1257
1258 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1259 while (entry && entry->pinning != pinning)
1260 entry = entry->next;
1261
1262 return entry ? entry->subpath : NULL;
1263}
1264
1265static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
1266 uint32_t pinning)
11c39b5e 1267{
32e93fb7
DB
1268 switch (pinning) {
1269 case PIN_OBJECT_NS:
1270 case PIN_GLOBAL_NS:
1271 return false;
1272 case PIN_NONE:
32e93fb7 1273 return true;
f6793eec
DB
1274 default:
1275 return !bpf_custom_pinning(ctx, pinning);
32e93fb7
DB
1276 }
1277}
1278
1279static void bpf_make_pathname(char *pathname, size_t len, const char *name,
f6793eec 1280 const struct bpf_elf_ctx *ctx, uint32_t pinning)
32e93fb7
DB
1281{
1282 switch (pinning) {
1283 case PIN_OBJECT_NS:
e4225669
DB
1284 snprintf(pathname, len, "%s/%s/%s",
1285 bpf_get_work_dir(ctx->type),
32e93fb7
DB
1286 bpf_get_obj_uid(NULL), name);
1287 break;
1288 case PIN_GLOBAL_NS:
e4225669
DB
1289 snprintf(pathname, len, "%s/%s/%s",
1290 bpf_get_work_dir(ctx->type),
32e93fb7
DB
1291 BPF_DIR_GLOBALS, name);
1292 break;
f6793eec 1293 default:
e4225669
DB
1294 snprintf(pathname, len, "%s/../%s/%s",
1295 bpf_get_work_dir(ctx->type),
f6793eec
DB
1296 bpf_custom_pinning(ctx, pinning), name);
1297 break;
32e93fb7
DB
1298 }
1299}
1300
f6793eec
DB
1301static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1302 uint32_t pinning)
32e93fb7
DB
1303{
1304 char pathname[PATH_MAX];
1305
e4225669 1306 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
32e93fb7
DB
1307 return 0;
1308
f6793eec 1309 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
e4225669 1310 return bpf_obj_get(pathname, ctx->type);
32e93fb7
DB
1311}
1312
e4225669 1313static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
32e93fb7 1314{
f6793eec 1315 char tmp[PATH_MAX];
32e93fb7
DB
1316 int ret;
1317
e4225669 1318 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
f6793eec
DB
1319 bpf_get_obj_uid(NULL));
1320
1321 ret = mkdir(tmp, S_IRWXU);
1322 if (ret && errno != EEXIST) {
1323 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1324 return ret;
1325 }
1326
1327 return 0;
1328}
1329
e4225669
DB
1330static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
1331 const char *todo)
f6793eec
DB
1332{
1333 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1334 int ret;
1335
e4225669 1336 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
f6793eec
DB
1337 snprintf(rem, sizeof(rem), "%s/", todo);
1338 sub = strtok(rem, "/");
32e93fb7 1339
f6793eec
DB
1340 while (sub) {
1341 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1342 return -EINVAL;
1343
1344 strcat(tmp, sub);
1345 strcat(tmp, "/");
32e93fb7 1346
f6793eec 1347 ret = mkdir(tmp, S_IRWXU);
32e93fb7 1348 if (ret && errno != EEXIST) {
f6793eec 1349 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
32e93fb7
DB
1350 strerror(errno));
1351 return ret;
1352 }
f6793eec
DB
1353
1354 sub = strtok(NULL, "/");
32e93fb7
DB
1355 }
1356
f6793eec
DB
1357 return 0;
1358}
1359
1360static int bpf_place_pinned(int fd, const char *name,
1361 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1362{
1363 char pathname[PATH_MAX];
1364 const char *tmp;
1365 int ret = 0;
1366
e4225669 1367 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
f6793eec
DB
1368 return 0;
1369
1370 if (pinning == PIN_OBJECT_NS)
e4225669 1371 ret = bpf_make_obj_path(ctx);
f6793eec 1372 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
e4225669 1373 ret = bpf_make_custom_path(ctx, tmp);
f6793eec
DB
1374 if (ret < 0)
1375 return ret;
1376
1377 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
32e93fb7
DB
1378 return bpf_obj_pin(fd, pathname);
1379}
1380
f31645d1
DB
1381static void bpf_prog_report(int fd, const char *section,
1382 const struct bpf_elf_prog *prog,
1383 struct bpf_elf_ctx *ctx)
32e93fb7 1384{
afc1a200
DB
1385 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1386
1387 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
f31645d1
DB
1388 fd < 0 ? "rejected: " : "loaded",
1389 fd < 0 ? strerror(errno) : "",
1390 fd < 0 ? errno : fd);
1391
1392 fprintf(stderr, " - Type: %u\n", prog->type);
afc1a200
DB
1393 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1394 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
f31645d1
DB
1395 fprintf(stderr, " - License: %s\n\n", prog->license);
1396
1397 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1398}
32e93fb7 1399
f31645d1
DB
1400static int bpf_prog_attach(const char *section,
1401 const struct bpf_elf_prog *prog,
1402 struct bpf_elf_ctx *ctx)
1403{
1404 int tries = 0, fd;
1405retry:
32e93fb7
DB
1406 errno = 0;
1407 fd = bpf_prog_load(prog->type, prog->insns, prog->size,
f31645d1
DB
1408 prog->license, ctx->log, ctx->log_size);
1409 if (fd < 0 || ctx->verbose) {
1410 /* The verifier log is pretty chatty, sometimes so chatty
1411 * on larger programs, that we could fail to dump everything
1412 * into our buffer. Still, try to give a debuggable error
1413 * log for the user, so enlarge it and re-fail.
1414 */
1415 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
0f74d0f3 1416 if (tries++ < 10 && !bpf_log_realloc(ctx))
f31645d1
DB
1417 goto retry;
1418
32a121cb 1419 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
f31645d1
DB
1420 ctx->log_size, tries);
1421 return fd;
1422 }
1423
1424 bpf_prog_report(fd, section, prog, ctx);
32e93fb7
DB
1425 }
1426
1427 return fd;
1428}
1429
f31645d1
DB
1430static void bpf_map_report(int fd, const char *name,
1431 const struct bpf_elf_map *map,
612ff099 1432 struct bpf_elf_ctx *ctx, int inner_fd)
f31645d1
DB
1433{
1434 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1435 fd < 0 ? "rejected: " : "loaded",
1436 fd < 0 ? strerror(errno) : "",
1437 fd < 0 ? errno : fd);
1438
1439 fprintf(stderr, " - Type: %u\n", map->type);
1440 fprintf(stderr, " - Identifier: %u\n", map->id);
1441 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1442 fprintf(stderr, " - Size key: %u\n", map->size_key);
612ff099
DB
1443 fprintf(stderr, " - Size value: %u\n",
1444 inner_fd ? (int)sizeof(int) : map->size_value);
4dd3f50a
DB
1445 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1446 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
f31645d1
DB
1447}
1448
612ff099
DB
1449static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
1450{
1451 int i;
1452
1453 for (i = 0; i < ctx->map_num; i++) {
1454 if (ctx->maps[i].id != id)
1455 continue;
1456 if (ctx->map_fds[i] < 0)
1457 return -EINVAL;
1458
1459 return ctx->map_fds[i];
1460 }
1461
1462 return -ENOENT;
1463}
1464
612ff099
DB
1465static void bpf_report_map_in_map(int outer_fd, int inner_fd, uint32_t idx)
1466{
1467 struct bpf_elf_map outer_map;
1468 int ret;
1469
1470 fprintf(stderr, "Cannot insert map into map! ");
1471
ecb05c0f 1472 ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL);
612ff099
DB
1473 if (!ret) {
1474 if (idx >= outer_map.max_elem &&
1475 outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
1476 fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
1477 outer_map.max_elem, idx);
1478 return;
1479 }
1480 }
1481
1482 fprintf(stderr, "Different map specs used for outer and inner map?\n");
1483}
1484
1485static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
1486{
1487 return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1488 map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
1489}
1490
ecb05c0f
DB
1491static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
1492 const struct bpf_elf_map *map, struct bpf_map_ext *ext,
1493 int *have_map_in_map)
32e93fb7 1494{
612ff099 1495 int fd, ret, map_inner_fd = 0;
32e93fb7 1496
f6793eec 1497 fd = bpf_probe_pinned(name, ctx, map->pinning);
32e93fb7 1498 if (fd > 0) {
ecb05c0f 1499 ret = bpf_map_selfcheck_pinned(fd, map, ext,
91d88eeb 1500 offsetof(struct bpf_elf_map,
fb24802b 1501 id), ctx->type);
9e607f2e
DB
1502 if (ret < 0) {
1503 close(fd);
1504 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1505 name);
1506 return ret;
1507 }
f31645d1 1508 if (ctx->verbose)
32e93fb7
DB
1509 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1510 name);
1511 return fd;
1512 }
1513
612ff099
DB
1514 if (have_map_in_map && bpf_is_map_in_map_type(map)) {
1515 (*have_map_in_map)++;
1516 if (map->inner_id)
1517 return 0;
1518 fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
1519 name);
1520 return -EINVAL;
1521 }
1522
1523 if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
1524 map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
1525 if (map_inner_fd < 0) {
1526 fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
1527 name, map->inner_id);
1528 return -EINVAL;
1529 }
1530 }
1531
32e93fb7
DB
1532 errno = 0;
1533 fd = bpf_map_create(map->type, map->size_key, map->size_value,
612ff099 1534 map->max_elem, map->flags, map_inner_fd);
f31645d1 1535 if (fd < 0 || ctx->verbose) {
612ff099 1536 bpf_map_report(fd, name, map, ctx, map_inner_fd);
32e93fb7
DB
1537 if (fd < 0)
1538 return fd;
1539 }
1540
f6793eec 1541 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
32e93fb7
DB
1542 if (ret < 0 && errno != EEXIST) {
1543 fprintf(stderr, "Could not pin %s map: %s\n", name,
1544 strerror(errno));
1545 close(fd);
1546 return ret;
1547 }
1548
1549 return fd;
1550}
1551
32e93fb7
DB
1552static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1553 const GElf_Sym *sym)
1554{
1555 return ctx->str_tab->d_buf + sym->st_name;
1556}
1557
1558static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1559{
1560 GElf_Sym sym;
11c39b5e
DB
1561 int i;
1562
32e93fb7
DB
1563 for (i = 0; i < ctx->sym_num; i++) {
1564 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1565 continue;
1566
5230a2ed
DB
1567 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1568 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
32e93fb7 1569 sym.st_shndx != ctx->sec_maps ||
e4225669 1570 sym.st_value / ctx->map_len != which)
32e93fb7
DB
1571 continue;
1572
1573 return bpf_str_tab_name(ctx, &sym);
11c39b5e 1574 }
32e93fb7
DB
1575
1576 return NULL;
11c39b5e
DB
1577}
1578
32e93fb7 1579static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
11c39b5e 1580{
612ff099 1581 int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
32e93fb7 1582 const char *map_name;
11c39b5e 1583
32e93fb7
DB
1584 for (i = 0; i < ctx->map_num; i++) {
1585 map_name = bpf_map_fetch_name(ctx, i);
1586 if (!map_name)
1587 return -EIO;
11c39b5e 1588
ecb05c0f
DB
1589 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1590 &ctx->maps_ext[i], &have_map_in_map);
612ff099
DB
1591 if (fd < 0)
1592 return fd;
1593
1594 ctx->map_fds[i] = !fd ? -1 : fd;
1595 }
1596
1597 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1598 if (ctx->map_fds[i] >= 0)
1599 continue;
1600
1601 map_name = bpf_map_fetch_name(ctx, i);
1602 if (!map_name)
1603 return -EIO;
1604
ecb05c0f
DB
1605 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1606 &ctx->maps_ext[i], NULL);
32e93fb7
DB
1607 if (fd < 0)
1608 return fd;
11c39b5e 1609
32e93fb7 1610 ctx->map_fds[i] = fd;
11c39b5e
DB
1611 }
1612
612ff099
DB
1613 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1614 if (!ctx->maps[i].id ||
1615 ctx->maps[i].inner_id ||
1616 ctx->maps[i].inner_idx == -1)
1617 continue;
1618
1619 inner_fd = ctx->map_fds[i];
1620 inner_idx = ctx->maps[i].inner_idx;
1621
1622 for (j = 0; j < ctx->map_num; j++) {
1623 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
1624 continue;
1625 if (ctx->maps[j].inner_id != ctx->maps[i].id)
1626 continue;
1627
1628 ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
1629 &inner_fd, BPF_ANY);
1630 if (ret < 0) {
1631 bpf_report_map_in_map(ctx->map_fds[j],
1632 inner_fd, inner_idx);
1633 return ret;
1634 }
1635 }
1636 }
1637
11c39b5e 1638 return 0;
11c39b5e
DB
1639}
1640
e4225669
DB
1641static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
1642{
1643 int i, num = 0;
1644 GElf_Sym sym;
1645
1646 for (i = 0; i < ctx->sym_num; i++) {
1647 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1648 continue;
1649
1650 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1651 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1652 sym.st_shndx != ctx->sec_maps)
1653 continue;
1654 num++;
1655 }
1656
1657 return num;
1658}
1659
32e93fb7
DB
1660static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1661 struct bpf_elf_sec_data *data)
11c39b5e 1662{
32e93fb7 1663 Elf_Data *sec_edata;
11c39b5e
DB
1664 GElf_Shdr sec_hdr;
1665 Elf_Scn *sec_fd;
11c39b5e
DB
1666 char *sec_name;
1667
32e93fb7 1668 memset(data, 0, sizeof(*data));
11c39b5e 1669
32e93fb7 1670 sec_fd = elf_getscn(ctx->elf_fd, section);
11c39b5e
DB
1671 if (!sec_fd)
1672 return -EINVAL;
11c39b5e
DB
1673 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1674 return -EIO;
1675
32e93fb7 1676 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
11c39b5e
DB
1677 sec_hdr.sh_name);
1678 if (!sec_name || !sec_hdr.sh_size)
1679 return -ENOENT;
1680
1681 sec_edata = elf_getdata(sec_fd, NULL);
1682 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1683 return -EIO;
1684
32e93fb7 1685 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
11c39b5e 1686
32e93fb7
DB
1687 data->sec_name = sec_name;
1688 data->sec_data = sec_edata;
11c39b5e
DB
1689 return 0;
1690}
1691
e4225669
DB
1692struct bpf_elf_map_min {
1693 __u32 type;
1694 __u32 size_key;
1695 __u32 size_value;
1696 __u32 max_elem;
1697};
11c39b5e 1698
e4225669
DB
1699static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
1700 struct bpf_elf_sec_data *data)
1701{
1702 ctx->map_num = data->sec_data->d_size;
32e93fb7
DB
1703 ctx->sec_maps = section;
1704 ctx->sec_done[section] = true;
11c39b5e 1705
e4225669 1706 if (ctx->map_num > sizeof(ctx->maps)) {
32e93fb7
DB
1707 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1708 return -ENOMEM;
1709 }
11c39b5e 1710
e4225669
DB
1711 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
1712 return 0;
1713}
1714
1715static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
1716{
1717 GElf_Sym sym;
1718 int off, i;
1719
1720 for (off = 0; off < end; off += ctx->map_len) {
1721 /* Order doesn't need to be linear here, hence we walk
1722 * the table again.
1723 */
1724 for (i = 0; i < ctx->sym_num; i++) {
1725 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1726 continue;
1727 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1728 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1729 sym.st_shndx != ctx->sec_maps)
1730 continue;
1731 if (sym.st_value == off)
1732 break;
1733 if (i == ctx->sym_num - 1)
1734 return -1;
1735 }
1736 }
1737
1738 return off == end ? 0 : -1;
1739}
1740
1741static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
1742{
1743 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
1744 int i, sym_num = bpf_map_num_sym(ctx);
1745 __u8 *buff;
1746
1747 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
1748 fprintf(stderr, "%u maps not supported in current map section!\n",
1749 sym_num);
1750 return -EINVAL;
1751 }
1752
1753 if (ctx->map_num % sym_num != 0 ||
1754 ctx->map_num % sizeof(__u32) != 0) {
1755 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1756 return -EINVAL;
1757 }
1758
1759 ctx->map_len = ctx->map_num / sym_num;
1760 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
1761 fprintf(stderr, "Different struct bpf_elf_map in use!\n");
1762 return -EINVAL;
1763 }
1764
1765 if (ctx->map_len == sizeof(struct bpf_elf_map)) {
1766 ctx->map_num = sym_num;
1767 return 0;
1768 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
1769 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
1770 return -EINVAL;
1771 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
1772 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
1773 return -EINVAL;
1774 }
1775
1776 ctx->map_num = sym_num;
1777 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
1778 i++, buff += ctx->map_len) {
1779 /* The fixup leaves the rest of the members as zero, which
1780 * is fine currently, but option exist to set some other
1781 * default value as well when needed in future.
1782 */
1783 memcpy(&fixup[i], buff, ctx->map_len);
1784 }
1785
1786 memcpy(ctx->maps, fixup, sizeof(fixup));
1787
1788 printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
1789 sizeof(struct bpf_elf_map) - ctx->map_len);
32e93fb7
DB
1790 return 0;
1791}
11c39b5e 1792
32e93fb7
DB
1793static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1794 struct bpf_elf_sec_data *data)
1795{
1796 if (data->sec_data->d_size > sizeof(ctx->license))
1797 return -ENOMEM;
11c39b5e 1798
32e93fb7
DB
1799 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1800 ctx->sec_done[section] = true;
1801 return 0;
1802}
11c39b5e 1803
32e93fb7
DB
1804static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1805 struct bpf_elf_sec_data *data)
1806{
1807 ctx->sym_tab = data->sec_data;
1808 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1809 ctx->sec_done[section] = true;
11c39b5e
DB
1810 return 0;
1811}
1812
32e93fb7
DB
1813static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1814 struct bpf_elf_sec_data *data)
11c39b5e 1815{
32e93fb7
DB
1816 ctx->str_tab = data->sec_data;
1817 ctx->sec_done[section] = true;
1818 return 0;
1819}
11c39b5e 1820
afc1a200
DB
1821static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
1822{
1823 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
1824}
1825
32e93fb7
DB
1826static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1827{
1828 struct bpf_elf_sec_data data;
1829 int i, ret = -1;
11c39b5e 1830
32e93fb7
DB
1831 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1832 ret = bpf_fill_section_data(ctx, i, &data);
11c39b5e
DB
1833 if (ret < 0)
1834 continue;
1835
cce3d466
DB
1836 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1837 !strcmp(data.sec_name, ELF_SECTION_MAPS))
e4225669 1838 ret = bpf_fetch_maps_begin(ctx, i, &data);
cce3d466
DB
1839 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1840 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
32e93fb7 1841 ret = bpf_fetch_license(ctx, i, &data);
cce3d466
DB
1842 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
1843 !strcmp(data.sec_name, ".symtab"))
32e93fb7
DB
1844 ret = bpf_fetch_symtab(ctx, i, &data);
1845 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
cce3d466 1846 !strcmp(data.sec_name, ".strtab"))
32e93fb7
DB
1847 ret = bpf_fetch_strtab(ctx, i, &data);
1848 if (ret < 0) {
afc1a200 1849 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
32a121cb 1850 i);
e4225669 1851 return ret;
11c39b5e 1852 }
32e93fb7
DB
1853 }
1854
afc1a200 1855 if (bpf_has_map_data(ctx)) {
e4225669
DB
1856 ret = bpf_fetch_maps_end(ctx);
1857 if (ret < 0) {
1858 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
1859 return ret;
1860 }
1861
32e93fb7
DB
1862 ret = bpf_maps_attach_all(ctx);
1863 if (ret < 0) {
1864 fprintf(stderr, "Error loading maps into kernel!\n");
1865 return ret;
11c39b5e
DB
1866 }
1867 }
1868
1869 return ret;
1870}
1871
e4225669
DB
1872static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
1873 bool *sseen)
11c39b5e 1874{
32e93fb7
DB
1875 struct bpf_elf_sec_data data;
1876 struct bpf_elf_prog prog;
1877 int ret, i, fd = -1;
11c39b5e 1878
32e93fb7
DB
1879 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1880 if (ctx->sec_done[i])
11c39b5e
DB
1881 continue;
1882
32e93fb7 1883 ret = bpf_fill_section_data(ctx, i, &data);
cce3d466
DB
1884 if (ret < 0 ||
1885 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
1886 data.sec_hdr.sh_flags & SHF_EXECINSTR &&
1887 !strcmp(data.sec_name, section)))
11c39b5e
DB
1888 continue;
1889
e4225669
DB
1890 *sseen = true;
1891
32e93fb7
DB
1892 memset(&prog, 0, sizeof(prog));
1893 prog.type = ctx->type;
1894 prog.insns = data.sec_data->d_buf;
1895 prog.size = data.sec_data->d_size;
1896 prog.license = ctx->license;
11c39b5e 1897
f31645d1 1898 fd = bpf_prog_attach(section, &prog, ctx);
32e93fb7 1899 if (fd < 0)
e4225669 1900 return fd;
11c39b5e 1901
32e93fb7 1902 ctx->sec_done[i] = true;
11c39b5e
DB
1903 break;
1904 }
1905
32e93fb7 1906 return fd;
11c39b5e
DB
1907}
1908
ecb05c0f
DB
1909struct bpf_tail_call_props {
1910 unsigned int total;
1911 unsigned int jited;
1912};
1913
32e93fb7
DB
1914static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1915 struct bpf_elf_sec_data *data_relo,
ecb05c0f
DB
1916 struct bpf_elf_sec_data *data_insn,
1917 struct bpf_tail_call_props *props)
11c39b5e 1918{
32e93fb7
DB
1919 Elf_Data *idata = data_insn->sec_data;
1920 GElf_Shdr *rhdr = &data_relo->sec_hdr;
1921 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1922 struct bpf_insn *insns = idata->d_buf;
1923 unsigned int num_insns = idata->d_size / sizeof(*insns);
11c39b5e 1924
32e93fb7
DB
1925 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
1926 unsigned int ioff, rmap;
1927 GElf_Rel relo;
1928 GElf_Sym sym;
1929
1930 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1931 return -EIO;
1932
1933 ioff = relo.r_offset / sizeof(struct bpf_insn);
1934 if (ioff >= num_insns ||
a576c6b9 1935 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
32a121cb 1936 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
a576c6b9
DB
1937 ioff);
1938 if (ioff < num_insns &&
1939 insns[ioff].code == (BPF_JMP | BPF_CALL))
32a121cb 1940 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
32e93fb7 1941 return -EINVAL;
a576c6b9 1942 }
32e93fb7
DB
1943
1944 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
1945 return -EIO;
2486337a 1946 if (sym.st_shndx != ctx->sec_maps) {
32a121cb 1947 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2486337a
DB
1948 relo_ent, sym.st_shndx);
1949 return -EIO;
1950 }
32e93fb7 1951
e4225669 1952 rmap = sym.st_value / ctx->map_len;
32e93fb7
DB
1953 if (rmap >= ARRAY_SIZE(ctx->map_fds))
1954 return -EINVAL;
1955 if (!ctx->map_fds[rmap])
1956 return -EINVAL;
ecb05c0f
DB
1957 if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
1958 props->total++;
1959 if (ctx->maps_ext[rmap].owner.jited ||
1960 (ctx->maps_ext[rmap].owner.type == 0 &&
1961 ctx->cfg.jit_enabled))
1962 props->jited++;
1963 }
32e93fb7
DB
1964
1965 if (ctx->verbose)
32a121cb 1966 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
32e93fb7
DB
1967 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
1968 data_insn->sec_name, ioff);
11c39b5e 1969
32e93fb7
DB
1970 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
1971 insns[ioff].imm = ctx->map_fds[rmap];
1972 }
1973
1974 return 0;
1975}
1976
afc1a200 1977static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
e4225669 1978 bool *lderr, bool *sseen)
32e93fb7
DB
1979{
1980 struct bpf_elf_sec_data data_relo, data_insn;
1981 struct bpf_elf_prog prog;
1982 int ret, idx, i, fd = -1;
1983
1984 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
ecb05c0f
DB
1985 struct bpf_tail_call_props props = {};
1986
32e93fb7
DB
1987 ret = bpf_fill_section_data(ctx, i, &data_relo);
1988 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
11c39b5e
DB
1989 continue;
1990
32e93fb7 1991 idx = data_relo.sec_hdr.sh_info;
e4225669 1992
32e93fb7 1993 ret = bpf_fill_section_data(ctx, idx, &data_insn);
cce3d466
DB
1994 if (ret < 0 ||
1995 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
1996 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
1997 !strcmp(data_insn.sec_name, section)))
11c39b5e 1998 continue;
32e93fb7 1999
e4225669
DB
2000 *sseen = true;
2001
ecb05c0f 2002 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props);
c9c3720d
DB
2003 if (ret < 0) {
2004 *lderr = true;
e4225669 2005 return ret;
c9c3720d 2006 }
11c39b5e 2007
32e93fb7
DB
2008 memset(&prog, 0, sizeof(prog));
2009 prog.type = ctx->type;
2010 prog.insns = data_insn.sec_data->d_buf;
2011 prog.size = data_insn.sec_data->d_size;
2012 prog.license = ctx->license;
2013
f31645d1 2014 fd = bpf_prog_attach(section, &prog, ctx);
afc1a200
DB
2015 if (fd < 0) {
2016 *lderr = true;
ecb05c0f
DB
2017 if (props.total) {
2018 if (ctx->cfg.jit_enabled &&
2019 props.total != props.jited)
2020 fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
2021 props.jited, props.total);
2022 if (!ctx->cfg.jit_enabled &&
2023 props.jited)
2024 fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
2025 props.jited, props.total);
2026 }
e4225669 2027 return fd;
afc1a200 2028 }
11c39b5e 2029
32e93fb7
DB
2030 ctx->sec_done[i] = true;
2031 ctx->sec_done[idx] = true;
11c39b5e
DB
2032 break;
2033 }
2034
32e93fb7 2035 return fd;
11c39b5e
DB
2036}
2037
32e93fb7 2038static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
473d7840 2039{
e4225669 2040 bool lderr = false, sseen = false;
473d7840
DB
2041 int ret = -1;
2042
afc1a200 2043 if (bpf_has_map_data(ctx))
e4225669 2044 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
afc1a200 2045 if (ret < 0 && !lderr)
e4225669
DB
2046 ret = bpf_fetch_prog(ctx, section, &sseen);
2047 if (ret < 0 && !sseen)
2048 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
2049 section);
473d7840
DB
2050 return ret;
2051}
2052
910b543d
DB
2053static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
2054{
2055 int i;
2056
2057 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
2058 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
2059 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
2060 return i;
2061 return -1;
2062}
2063
ecb05c0f
DB
2064struct bpf_jited_aux {
2065 int prog_fd;
2066 int map_fd;
2067 struct bpf_prog_data prog;
2068 struct bpf_map_ext map;
2069};
2070
2071static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog)
2072{
2073 char file[PATH_MAX], buff[4096];
2074 unsigned int val;
2075 FILE *fp;
2076
2077 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
2078 memset(prog, 0, sizeof(*prog));
2079
2080 fp = fopen(file, "r");
2081 if (!fp) {
2082 fprintf(stderr, "No procfs support?!\n");
2083 return -EIO;
2084 }
2085
2086 while (fgets(buff, sizeof(buff), fp)) {
2087 if (sscanf(buff, "prog_type:\t%u", &val) == 1)
2088 prog->type = val;
2089 else if (sscanf(buff, "prog_jited:\t%u", &val) == 1)
2090 prog->jited = val;
2091 }
2092
2093 fclose(fp);
2094 return 0;
2095}
2096
2097static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux)
2098{
2099 struct bpf_elf_map tmp;
2100 int ret;
2101
2102 ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map);
2103 if (!ret)
2104 ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog);
2105
2106 return ret;
2107}
2108
32e93fb7 2109static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
473d7840 2110{
32e93fb7
DB
2111 struct bpf_elf_sec_data data;
2112 uint32_t map_id, key_id;
910b543d 2113 int fd, i, ret, idx;
473d7840 2114
32e93fb7
DB
2115 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2116 if (ctx->sec_done[i])
473d7840
DB
2117 continue;
2118
32e93fb7 2119 ret = bpf_fill_section_data(ctx, i, &data);
473d7840
DB
2120 if (ret < 0)
2121 continue;
2122
910b543d
DB
2123 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
2124 if (ret != 2)
32e93fb7 2125 continue;
910b543d
DB
2126
2127 idx = bpf_find_map_by_id(ctx, map_id);
2128 if (idx < 0)
473d7840
DB
2129 continue;
2130
32e93fb7
DB
2131 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
2132 if (fd < 0)
473d7840
DB
2133 return -EIO;
2134
910b543d
DB
2135 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
2136 &fd, BPF_ANY);
afc1a200 2137 if (ret < 0) {
ecb05c0f
DB
2138 struct bpf_jited_aux aux = {};
2139
2140 ret = -errno;
2141 if (errno == E2BIG) {
afc1a200
DB
2142 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
2143 key_id, map_id);
ecb05c0f
DB
2144 return ret;
2145 }
2146
2147 aux.map_fd = ctx->map_fds[idx];
2148 aux.prog_fd = fd;
2149
2150 if (bpf_tail_call_get_aux(&aux))
2151 return ret;
2152 if (!aux.map.owner.type)
2153 return ret;
2154
2155 if (aux.prog.type != aux.map.owner.type)
2156 fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n",
2157 aux.map.owner.type, aux.prog.type);
2158 if (aux.prog.jited != aux.map.owner.jited)
2159 fprintf(stderr, "Tail call map %s jited, but prog %s!\n",
2160 aux.map.owner.jited ? "is" : "not",
2161 aux.prog.jited ? "is" : "not");
2162 return ret;
afc1a200 2163 }
473d7840 2164
32e93fb7 2165 ctx->sec_done[i] = true;
473d7840
DB
2166 }
2167
2168 return 0;
2169}
2170
32e93fb7 2171static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
11c39b5e 2172{
32e93fb7
DB
2173 struct stat st;
2174 int ret;
11c39b5e 2175
32e93fb7 2176 memset(&ctx->stat, 0, sizeof(ctx->stat));
11c39b5e 2177
32e93fb7
DB
2178 ret = fstat(ctx->obj_fd, &st);
2179 if (ret < 0) {
2180 fprintf(stderr, "Stat of elf file failed: %s\n",
2181 strerror(errno));
2182 return;
2183 }
11c39b5e 2184
32e93fb7
DB
2185 ctx->stat.st_dev = st.st_dev;
2186 ctx->stat.st_ino = st.st_ino;
2187}
2188
f6793eec
DB
2189static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
2190{
2191 char buff[PATH_MAX];
2192
2193 while (fgets(buff, sizeof(buff), fp)) {
2194 char *ptr = buff;
2195
2196 while (*ptr == ' ' || *ptr == '\t')
2197 ptr++;
2198
2199 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
2200 continue;
2201
2202 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
2203 sscanf(ptr, "%i %s #", id, path) != 2) {
2204 strcpy(path, ptr);
2205 return -1;
2206 }
2207
2208 return 1;
2209 }
2210
2211 return 0;
2212}
2213
2214static bool bpf_pinning_reserved(uint32_t pinning)
2215{
2216 switch (pinning) {
2217 case PIN_NONE:
2218 case PIN_OBJECT_NS:
2219 case PIN_GLOBAL_NS:
2220 return true;
2221 default:
2222 return false;
2223 }
2224}
2225
2226static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
2227{
2228 struct bpf_hash_entry *entry;
d17b136f 2229 char subpath[PATH_MAX] = {};
f6793eec
DB
2230 uint32_t pinning;
2231 FILE *fp;
2232 int ret;
2233
2234 fp = fopen(db_file, "r");
2235 if (!fp)
2236 return;
2237
f6793eec
DB
2238 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
2239 if (ret == -1) {
2240 fprintf(stderr, "Database %s is corrupted at: %s\n",
2241 db_file, subpath);
2242 fclose(fp);
2243 return;
2244 }
2245
2246 if (bpf_pinning_reserved(pinning)) {
32a121cb
SH
2247 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
2248 db_file, pinning);
f6793eec
DB
2249 continue;
2250 }
2251
2252 entry = malloc(sizeof(*entry));
2253 if (!entry) {
2254 fprintf(stderr, "No memory left for db entry!\n");
2255 continue;
2256 }
2257
2258 entry->pinning = pinning;
2259 entry->subpath = strdup(subpath);
2260 if (!entry->subpath) {
2261 fprintf(stderr, "No memory left for db entry!\n");
2262 free(entry);
2263 continue;
2264 }
2265
2266 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
2267 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
2268 }
2269
2270 fclose(fp);
2271}
2272
2273static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
2274{
2275 struct bpf_hash_entry *entry;
2276 int i;
2277
2278 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
2279 while ((entry = ctx->ht[i]) != NULL) {
2280 ctx->ht[i] = entry->next;
2281 free((char *)entry->subpath);
2282 free(entry);
2283 }
2284 }
2285}
2286
8187b012
DB
2287static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
2288{
2289 if (ctx->elf_hdr.e_type != ET_REL ||
e77fa41d
DB
2290 (ctx->elf_hdr.e_machine != EM_NONE &&
2291 ctx->elf_hdr.e_machine != EM_BPF) ||
8187b012
DB
2292 ctx->elf_hdr.e_version != EV_CURRENT) {
2293 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
2294 return -EINVAL;
2295 }
2296
2297 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
2298 default:
2299 fprintf(stderr, "ELF format error, wrong endianness info?\n");
2300 return -EINVAL;
2301 case ELFDATA2LSB:
2302 if (htons(1) == 1) {
2303 fprintf(stderr,
2304 "We are big endian, eBPF object is little endian!\n");
2305 return -EIO;
2306 }
2307 break;
2308 case ELFDATA2MSB:
2309 if (htons(1) != 1) {
2310 fprintf(stderr,
2311 "We are little endian, eBPF object is big endian!\n");
2312 return -EIO;
2313 }
2314 break;
2315 }
2316
2317 return 0;
2318}
2319
ecb05c0f
DB
2320static void bpf_get_cfg(struct bpf_elf_ctx *ctx)
2321{
2322 static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable";
2323 int fd;
2324
2325 fd = open(path_jit, O_RDONLY);
2326 if (fd > 0) {
2327 char tmp[16] = {};
2328
2329 if (read(fd, tmp, sizeof(tmp)) > 0)
2330 ctx->cfg.jit_enabled = atoi(tmp);
2331 close(fd);
2332 }
2333}
2334
32e93fb7
DB
2335static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
2336 enum bpf_prog_type type, bool verbose)
2337{
2338 int ret = -EINVAL;
2339
2340 if (elf_version(EV_CURRENT) == EV_NONE ||
2341 bpf_init_env(pathname))
2342 return ret;
2343
2344 memset(ctx, 0, sizeof(*ctx));
ecb05c0f 2345 bpf_get_cfg(ctx);
32e93fb7
DB
2346 ctx->verbose = verbose;
2347 ctx->type = type;
2348
2349 ctx->obj_fd = open(pathname, O_RDONLY);
2350 if (ctx->obj_fd < 0)
2351 return ctx->obj_fd;
2352
2353 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
2354 if (!ctx->elf_fd) {
11c39b5e 2355 ret = -EINVAL;
32e93fb7 2356 goto out_fd;
11c39b5e
DB
2357 }
2358
8187b012
DB
2359 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
2360 ret = -EINVAL;
2361 goto out_fd;
2362 }
2363
32e93fb7
DB
2364 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
2365 &ctx->elf_hdr) {
11c39b5e
DB
2366 ret = -EIO;
2367 goto out_elf;
2368 }
2369
8187b012
DB
2370 ret = bpf_elf_check_ehdr(ctx);
2371 if (ret < 0)
2372 goto out_elf;
2373
32e93fb7
DB
2374 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
2375 sizeof(*(ctx->sec_done)));
2376 if (!ctx->sec_done) {
11c39b5e
DB
2377 ret = -ENOMEM;
2378 goto out_elf;
2379 }
2380
f31645d1
DB
2381 if (ctx->verbose && bpf_log_realloc(ctx)) {
2382 ret = -ENOMEM;
2383 goto out_free;
2384 }
2385
32e93fb7 2386 bpf_save_finfo(ctx);
f6793eec
DB
2387 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
2388
32e93fb7 2389 return 0;
f31645d1
DB
2390out_free:
2391 free(ctx->sec_done);
32e93fb7
DB
2392out_elf:
2393 elf_end(ctx->elf_fd);
2394out_fd:
2395 close(ctx->obj_fd);
2396 return ret;
2397}
d937a74b 2398
32e93fb7
DB
2399static int bpf_maps_count(struct bpf_elf_ctx *ctx)
2400{
2401 int i, count = 0;
11c39b5e 2402
32e93fb7
DB
2403 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2404 if (!ctx->map_fds[i])
2405 break;
2406 count++;
2407 }
473d7840 2408
32e93fb7
DB
2409 return count;
2410}
6256f8c9 2411
32e93fb7
DB
2412static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
2413{
2414 int i;
2415
2416 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2417 if (ctx->map_fds[i])
2418 close(ctx->map_fds[i]);
473d7840 2419 }
32e93fb7
DB
2420}
2421
2422static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
2423{
2424 if (failure)
2425 bpf_maps_teardown(ctx);
473d7840 2426
f6793eec 2427 bpf_hash_destroy(ctx);
f31645d1 2428
32e93fb7 2429 free(ctx->sec_done);
f31645d1
DB
2430 free(ctx->log);
2431
32e93fb7
DB
2432 elf_end(ctx->elf_fd);
2433 close(ctx->obj_fd);
2434}
6256f8c9 2435
32e93fb7 2436static struct bpf_elf_ctx __ctx;
6256f8c9 2437
32e93fb7
DB
2438static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
2439 const char *section, bool verbose)
2440{
2441 struct bpf_elf_ctx *ctx = &__ctx;
2442 int fd = 0, ret;
6256f8c9 2443
32e93fb7
DB
2444 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
2445 if (ret < 0) {
2446 fprintf(stderr, "Cannot initialize ELF context!\n");
2447 return ret;
2448 }
6256f8c9 2449
32e93fb7
DB
2450 ret = bpf_fetch_ancillary(ctx);
2451 if (ret < 0) {
2452 fprintf(stderr, "Error fetching ELF ancillary data!\n");
2453 goto out;
2454 }
2455
2456 fd = bpf_fetch_prog_sec(ctx, section);
2457 if (fd < 0) {
2458 fprintf(stderr, "Error fetching program/map!\n");
2459 ret = fd;
2460 goto out;
2461 }
2462
2463 ret = bpf_fill_prog_arrays(ctx);
2464 if (ret < 0)
2465 fprintf(stderr, "Error filling program arrays!\n");
11c39b5e 2466out:
32e93fb7
DB
2467 bpf_elf_ctx_destroy(ctx, ret < 0);
2468 if (ret < 0) {
2469 if (fd)
2470 close(fd);
2471 return ret;
2472 }
2473
2474 return fd;
6256f8c9 2475}
11c39b5e 2476
6256f8c9 2477static int
4bd62446
DB
2478bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
2479 const struct bpf_map_data *aux, unsigned int entries)
6256f8c9 2480{
d17b136f
PS
2481 struct bpf_map_set_msg msg = {
2482 .aux.uds_ver = BPF_SCM_AUX_VER,
2483 .aux.num_ent = entries,
2484 };
6256f8c9
DB
2485 int *cmsg_buf, min_fd;
2486 char *amsg_buf;
2487 int i;
2488
6256f8c9
DB
2489 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
2490 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
2491
2492 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
2493 amsg_buf = (char *)msg.aux.ent;
2494
4bd62446 2495 for (i = 0; i < entries; i += min_fd) {
6256f8c9
DB
2496 int ret;
2497
4bd62446 2498 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
6256f8c9
DB
2499 bpf_map_set_init_single(&msg, min_fd);
2500
2501 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
2502 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
2503
2504 ret = sendmsg(fd, &msg.hdr, 0);
2505 if (ret <= 0)
2506 return ret ? : -1;
2507 }
2508
2509 return 0;
11c39b5e
DB
2510}
2511
4bd62446
DB
2512static int
2513bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
2514 unsigned int entries)
2515{
2516 struct bpf_map_set_msg msg;
2517 int *cmsg_buf, min_fd;
2518 char *amsg_buf, *mmsg_buf;
2519 unsigned int needed = 1;
2520 int i;
2521
2522 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
2523 amsg_buf = (char *)msg.aux.ent;
2524 mmsg_buf = (char *)&msg.aux;
2525
2526 for (i = 0; i < min(entries, needed); i += min_fd) {
2527 struct cmsghdr *cmsg;
2528 int ret;
2529
2530 min_fd = min(entries, entries - i);
2531 bpf_map_set_init_single(&msg, min_fd);
2532
2533 ret = recvmsg(fd, &msg.hdr, 0);
2534 if (ret <= 0)
2535 return ret ? : -1;
2536
2537 cmsg = CMSG_FIRSTHDR(&msg.hdr);
2538 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
2539 return -EINVAL;
2540 if (msg.hdr.msg_flags & MSG_CTRUNC)
2541 return -EIO;
2542 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
2543 return -ENOSYS;
2544
2545 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
2546 if (min_fd > entries || min_fd <= 0)
2547 return -EINVAL;
2548
2549 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
2550 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
2551 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
2552
2553 needed = aux->num_ent;
2554 }
2555
2556 return 0;
2557}
2558
2559int bpf_send_map_fds(const char *path, const char *obj)
6256f8c9 2560{
32e93fb7 2561 struct bpf_elf_ctx *ctx = &__ctx;
d17b136f
PS
2562 struct sockaddr_un addr = { .sun_family = AF_UNIX };
2563 struct bpf_map_data bpf_aux = {
2564 .fds = ctx->map_fds,
2565 .ent = ctx->maps,
2566 .st = &ctx->stat,
2567 .obj = obj,
2568 };
6256f8c9
DB
2569 int fd, ret;
2570
2571 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2572 if (fd < 0) {
2573 fprintf(stderr, "Cannot open socket: %s\n",
2574 strerror(errno));
2575 return -1;
2576 }
2577
6256f8c9
DB
2578 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2579
2580 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
2581 if (ret < 0) {
2582 fprintf(stderr, "Cannot connect to %s: %s\n",
2583 path, strerror(errno));
2584 return -1;
2585 }
2586
4bd62446 2587 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
32e93fb7 2588 bpf_maps_count(ctx));
6256f8c9 2589 if (ret < 0)
4bd62446
DB
2590 fprintf(stderr, "Cannot send fds to %s: %s\n",
2591 path, strerror(errno));
2592
32e93fb7 2593 bpf_maps_teardown(ctx);
4bd62446
DB
2594 close(fd);
2595 return ret;
2596}
2597
2598int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
2599 unsigned int entries)
2600{
d17b136f 2601 struct sockaddr_un addr = { .sun_family = AF_UNIX };
4bd62446
DB
2602 int fd, ret;
2603
2604 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2605 if (fd < 0) {
2606 fprintf(stderr, "Cannot open socket: %s\n",
2607 strerror(errno));
2608 return -1;
2609 }
2610
4bd62446
DB
2611 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2612
2613 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
2614 if (ret < 0) {
2615 fprintf(stderr, "Cannot bind to socket: %s\n",
2616 strerror(errno));
2617 return -1;
2618 }
2619
2620 ret = bpf_map_set_recv(fd, fds, aux, entries);
2621 if (ret < 0)
2622 fprintf(stderr, "Cannot recv fds from %s: %s\n",
6256f8c9
DB
2623 path, strerror(errno));
2624
4bd62446 2625 unlink(addr.sun_path);
6256f8c9
DB
2626 close(fd);
2627 return ret;
2628}
11c39b5e 2629#endif /* HAVE_ELF */