]> git.proxmox.com Git - mirror_iproute2.git/blame - tc/tc_bpf.c
tc: m_action: Improve conversion to C99 style initializers
[mirror_iproute2.git] / tc / tc_bpf.c
CommitLineData
1d129d19
JP
1/*
2 * tc_bpf.c BPF common code
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Daniel Borkmann <dborkman@redhat.com>
10 * Jiri Pirko <jiri@resnulli.us>
11c39b5e 11 * Alexei Starovoitov <ast@plumgrid.com>
1d129d19
JP
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
473d7840 19#include <stdint.h>
1d129d19 20#include <errno.h>
11c39b5e
DB
21#include <fcntl.h>
22#include <stdarg.h>
5c5a0f3d 23#include <limits.h>
1d129d19 24
11c39b5e
DB
25#ifdef HAVE_ELF
26#include <libelf.h>
27#include <gelf.h>
28#endif
29
32e93fb7
DB
30#include <sys/types.h>
31#include <sys/stat.h>
32#include <sys/un.h>
33#include <sys/vfs.h>
34#include <sys/mount.h>
35#include <sys/syscall.h>
36#include <sys/sendfile.h>
37#include <sys/resource.h>
38
39#include <linux/bpf.h>
40#include <linux/filter.h>
41#include <linux/if_alg.h>
42
8187b012
DB
43#include <arpa/inet.h>
44
1d129d19 45#include "utils.h"
6256f8c9
DB
46
47#include "bpf_elf.h"
48#include "bpf_scm.h"
49
1d129d19
JP
50#include "tc_util.h"
51#include "tc_bpf.h"
52
67584e3a
ND
53#ifndef AF_ALG
54#define AF_ALG 38
55#endif
56
e77fa41d
DB
57#ifndef EM_BPF
58#define EM_BPF 247
59#endif
60
32e93fb7
DB
61#ifdef HAVE_ELF
62static int bpf_obj_open(const char *path, enum bpf_prog_type type,
63 const char *sec, bool verbose);
64#else
65static int bpf_obj_open(const char *path, enum bpf_prog_type type,
66 const char *sec, bool verbose)
67{
68 fprintf(stderr, "No ELF library support compiled in.\n");
69 errno = ENOSYS;
70 return -1;
71}
72#endif
73
74static inline __u64 bpf_ptr_to_u64(const void *ptr)
75{
76 return (__u64)(unsigned long)ptr;
77}
78
79static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
80{
81#ifdef __NR_bpf
82 return syscall(__NR_bpf, cmd, attr, size);
83#else
84 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
85 errno = ENOSYS;
86 return -1;
87#endif
88}
89
91d88eeb
DB
90static int bpf_map_update(int fd, const void *key, const void *value,
91 uint64_t flags)
32e93fb7 92{
67584e3a
ND
93 union bpf_attr attr;
94
95 memset(&attr, 0, sizeof(attr));
96 attr.map_fd = fd;
97 attr.key = bpf_ptr_to_u64(key);
98 attr.value = bpf_ptr_to_u64(value);
99 attr.flags = flags;
32e93fb7 100
91d88eeb 101 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
32e93fb7
DB
102}
103
104static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
105 char **bpf_string, bool *need_release,
106 const char separator)
1d129d19
JP
107{
108 char sp;
109
110 if (from_file) {
111 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
112 char *tmp_string;
113 FILE *fp;
114
115 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
116 tmp_string = malloc(tmp_len);
117 if (tmp_string == NULL)
118 return -ENOMEM;
119
120 memset(tmp_string, 0, tmp_len);
121
122 fp = fopen(arg, "r");
123 if (fp == NULL) {
124 perror("Cannot fopen");
125 free(tmp_string);
126 return -ENOENT;
127 }
128
129 if (!fgets(tmp_string, tmp_len, fp)) {
130 free(tmp_string);
131 fclose(fp);
132 return -EIO;
133 }
134
135 fclose(fp);
136
137 *need_release = true;
138 *bpf_string = tmp_string;
139 } else {
140 *need_release = false;
141 *bpf_string = arg;
142 }
143
144 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
145 sp != separator) {
146 if (*need_release)
147 free(*bpf_string);
148 return -EINVAL;
149 }
150
151 return 0;
152}
153
32e93fb7
DB
154static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
155 bool from_file)
1d129d19
JP
156{
157 char *bpf_string, *token, separator = ',';
158 int ret = 0, i = 0;
159 bool need_release;
160 __u16 bpf_len = 0;
161
162 if (argc < 1)
163 return -EINVAL;
164 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
165 &need_release, separator))
166 return -EINVAL;
167 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
168 ret = -EINVAL;
169 goto out;
170 }
171
172 token = bpf_string;
173 while ((token = strchr(token, separator)) && (++token)[0]) {
174 if (i >= bpf_len) {
32a121cb 175 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
1d129d19
JP
176 ret = -EINVAL;
177 goto out;
178 }
179
180 if (sscanf(token, "%hu %hhu %hhu %u,",
181 &bpf_ops[i].code, &bpf_ops[i].jt,
182 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
183 fprintf(stderr, "Error at instruction %d!\n", i);
184 ret = -EINVAL;
185 goto out;
186 }
187
188 i++;
189 }
190
191 if (i != bpf_len) {
afc1a200 192 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
1d129d19
JP
193 ret = -EINVAL;
194 goto out;
195 }
196 ret = bpf_len;
1d129d19
JP
197out:
198 if (need_release)
199 free(bpf_string);
200
201 return ret;
202}
203
204void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
205{
206 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
207 int i;
208
209 if (len == 0)
210 return;
211
212 fprintf(f, "bytecode \'%u,", len);
213
214 for (i = 0; i < len - 1; i++)
215 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
216 ops[i].jf, ops[i].k);
217
6256f8c9 218 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
1d129d19
JP
219 ops[i].jf, ops[i].k);
220}
11c39b5e 221
afc1a200
DB
222static void bpf_map_pin_report(const struct bpf_elf_map *pin,
223 const struct bpf_elf_map *obj)
224{
225 fprintf(stderr, "Map specification differs from pinned file!\n");
226
227 if (obj->type != pin->type)
228 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
229 obj->type, pin->type);
230 if (obj->size_key != pin->size_key)
231 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
232 obj->size_key, pin->size_key);
233 if (obj->size_value != pin->size_value)
234 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
235 obj->size_value, pin->size_value);
236 if (obj->max_elem != pin->max_elem)
237 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
238 obj->max_elem, pin->max_elem);
4dd3f50a
DB
239 if (obj->flags != pin->flags)
240 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
241 obj->flags, pin->flags);
afc1a200
DB
242
243 fprintf(stderr, "\n");
244}
245
91d88eeb
DB
246static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
247 int length)
9e607f2e
DB
248{
249 char file[PATH_MAX], buff[4096];
250 struct bpf_elf_map tmp, zero;
251 unsigned int val;
252 FILE *fp;
253
254 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
255
256 fp = fopen(file, "r");
257 if (!fp) {
258 fprintf(stderr, "No procfs support?!\n");
259 return -EIO;
260 }
261
262 memset(&tmp, 0, sizeof(tmp));
263 while (fgets(buff, sizeof(buff), fp)) {
264 if (sscanf(buff, "map_type:\t%u", &val) == 1)
265 tmp.type = val;
266 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
267 tmp.size_key = val;
268 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
269 tmp.size_value = val;
270 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
271 tmp.max_elem = val;
4dd3f50a
DB
272 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
273 tmp.flags = val;
9e607f2e
DB
274 }
275
276 fclose(fp);
277
91d88eeb 278 if (!memcmp(&tmp, map, length)) {
9e607f2e
DB
279 return 0;
280 } else {
281 memset(&zero, 0, sizeof(zero));
282 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
283 * so just accept it. We know we do have an eBPF fd and in this
284 * case, everything is 0. It is guaranteed that no such map exists
285 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
286 */
91d88eeb 287 if (!memcmp(&tmp, &zero, length))
9e607f2e
DB
288 return 0;
289
afc1a200 290 bpf_map_pin_report(&tmp, map);
9e607f2e
DB
291 return -EINVAL;
292 }
293}
294
91d88eeb
DB
295static int bpf_mnt_fs(const char *target)
296{
297 bool bind_done = false;
298
299 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
300 if (errno != EINVAL || bind_done) {
301 fprintf(stderr, "mount --make-private %s failed: %s\n",
302 target, strerror(errno));
303 return -1;
304 }
305
306 if (mount(target, target, "none", MS_BIND, NULL)) {
307 fprintf(stderr, "mount --bind %s %s failed: %s\n",
308 target, target, strerror(errno));
309 return -1;
310 }
311
312 bind_done = true;
313 }
314
315 if (mount("bpf", target, "bpf", 0, NULL)) {
316 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
317 target, strerror(errno));
318 return -1;
319 }
320
321 return 0;
322}
323
32e93fb7
DB
324static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
325{
326 struct statfs st_fs;
327
328 if (statfs(mnt, &st_fs) < 0)
329 return -ENOENT;
330 if ((unsigned long)st_fs.f_type != magic)
331 return -ENOENT;
332
333 return 0;
334}
335
336static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
337 char *mnt, int len,
338 const char * const *known_mnts)
339{
340 const char * const *ptr;
341 char type[100];
342 FILE *fp;
343
344 if (known_mnts) {
345 ptr = known_mnts;
346 while (*ptr) {
347 if (bpf_valid_mntpt(*ptr, magic) == 0) {
348 strncpy(mnt, *ptr, len - 1);
349 mnt[len - 1] = 0;
350 return mnt;
351 }
352 ptr++;
353 }
354 }
355
356 fp = fopen("/proc/mounts", "r");
357 if (fp == NULL || len != PATH_MAX)
358 return NULL;
359
360 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
361 mnt, type) == 2) {
362 if (strcmp(type, fstype) == 0)
363 break;
364 }
365
366 fclose(fp);
367 if (strcmp(type, fstype) != 0)
368 return NULL;
369
370 return mnt;
371}
372
373int bpf_trace_pipe(void)
374{
375 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
376 static const char * const tracefs_known_mnts[] = {
377 TRACE_DIR_MNT,
378 "/sys/kernel/debug/tracing",
379 "/tracing",
380 "/trace",
381 0,
382 };
383 char tpipe[PATH_MAX];
384 const char *mnt;
385 int fd;
386
387 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
388 sizeof(tracefs_mnt), tracefs_known_mnts);
389 if (!mnt) {
390 fprintf(stderr, "tracefs not mounted?\n");
391 return -1;
392 }
393
394 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
395
396 fd = open(tpipe, O_RDONLY);
397 if (fd < 0)
398 return -1;
399
400 fprintf(stderr, "Running! Hang up with ^C!\n\n");
401 while (1) {
402 static char buff[4096];
403 ssize_t ret;
404
405 ret = read(fd, buff, sizeof(buff) - 1);
406 if (ret > 0) {
407 write(2, buff, ret);
408 fflush(stderr);
409 }
410 }
411
412 return 0;
413}
414
91d88eeb
DB
415static const char *bpf_get_tc_dir(void)
416{
32a121cb 417 static bool bpf_mnt_cached;
91d88eeb
DB
418 static char bpf_tc_dir[PATH_MAX];
419 static const char *mnt;
420 static const char * const bpf_known_mnts[] = {
421 BPF_DIR_MNT,
422 0,
423 };
424 char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
425 char bpf_glo_dir[PATH_MAX];
426 int ret;
427
428 if (bpf_mnt_cached)
429 goto done;
430
431 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
432 bpf_known_mnts);
433 if (!mnt) {
434 mnt = getenv(BPF_ENV_MNT);
435 if (!mnt)
436 mnt = BPF_DIR_MNT;
437 ret = bpf_mnt_fs(mnt);
438 if (ret) {
439 mnt = NULL;
440 goto out;
441 }
442 }
443
444 snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
445 ret = mkdir(bpf_tc_dir, S_IRWXU);
446 if (ret && errno != EEXIST) {
447 fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
448 strerror(errno));
449 mnt = NULL;
450 goto out;
451 }
452
453 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
454 bpf_tc_dir, BPF_DIR_GLOBALS);
455 ret = mkdir(bpf_glo_dir, S_IRWXU);
456 if (ret && errno != EEXIST) {
457 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
458 strerror(errno));
459 mnt = NULL;
460 goto out;
461 }
462
463 mnt = bpf_tc_dir;
464out:
465 bpf_mnt_cached = true;
466done:
467 return mnt;
468}
469
470static int bpf_obj_get(const char *pathname)
471{
472 union bpf_attr attr;
473 char tmp[PATH_MAX];
474
475 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
476 pathname[1] == ':' && bpf_get_tc_dir()) {
477 snprintf(tmp, sizeof(tmp), "%s/%s",
478 bpf_get_tc_dir(), pathname + 2);
479 pathname = tmp;
480 }
481
482 memset(&attr, 0, sizeof(attr));
483 attr.pathname = bpf_ptr_to_u64(pathname);
484
485 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
486}
487
6256f8c9 488const char *bpf_default_section(const enum bpf_prog_type type)
11c39b5e
DB
489{
490 switch (type) {
491 case BPF_PROG_TYPE_SCHED_CLS:
492 return ELF_SECTION_CLASSIFIER;
6256f8c9
DB
493 case BPF_PROG_TYPE_SCHED_ACT:
494 return ELF_SECTION_ACTION;
11c39b5e
DB
495 default:
496 return NULL;
497 }
498}
499
91d88eeb
DB
500enum bpf_mode {
501 CBPF_BYTECODE = 0,
502 CBPF_FILE,
503 EBPF_OBJECT,
504 EBPF_PINNED,
505 __BPF_MODE_MAX,
506#define BPF_MODE_MAX __BPF_MODE_MAX
507};
508
509static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
510 enum bpf_prog_type *type, enum bpf_mode *mode,
511 const char **ptr_object, const char **ptr_section,
512 const char **ptr_uds_name, struct sock_filter *opcodes)
32e93fb7 513{
32e93fb7 514 const char *file, *section, *uds_name;
32e93fb7 515 bool verbose = false;
91d88eeb
DB
516 int ret, argc;
517 char **argv;
518
519 argv = *ptr_argv;
520 argc = *ptr_argc;
521
522 if (opt_tbl[CBPF_BYTECODE] &&
523 (matches(*argv, "bytecode") == 0 ||
524 strcmp(*argv, "bc") == 0)) {
525 *mode = CBPF_BYTECODE;
526 } else if (opt_tbl[CBPF_FILE] &&
527 (matches(*argv, "bytecode-file") == 0 ||
528 strcmp(*argv, "bcf") == 0)) {
529 *mode = CBPF_FILE;
530 } else if (opt_tbl[EBPF_OBJECT] &&
531 (matches(*argv, "object-file") == 0 ||
532 strcmp(*argv, "obj") == 0)) {
533 *mode = EBPF_OBJECT;
534 } else if (opt_tbl[EBPF_PINNED] &&
535 (matches(*argv, "object-pinned") == 0 ||
536 matches(*argv, "pinned") == 0 ||
537 matches(*argv, "fd") == 0)) {
538 *mode = EBPF_PINNED;
32e93fb7
DB
539 } else {
540 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
541 return -1;
542 }
543
544 NEXT_ARG();
545 file = section = uds_name = NULL;
91d88eeb 546 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
32e93fb7
DB
547 file = *argv;
548 NEXT_ARG_FWD();
549
91d88eeb
DB
550 if (*type == BPF_PROG_TYPE_UNSPEC) {
551 if (argc > 0 && matches(*argv, "type") == 0) {
552 NEXT_ARG();
553 if (matches(*argv, "cls") == 0) {
554 *type = BPF_PROG_TYPE_SCHED_CLS;
555 } else if (matches(*argv, "act") == 0) {
556 *type = BPF_PROG_TYPE_SCHED_ACT;
557 } else {
558 fprintf(stderr, "What type is \"%s\"?\n",
559 *argv);
560 return -1;
561 }
562 NEXT_ARG_FWD();
563 } else {
564 *type = BPF_PROG_TYPE_SCHED_CLS;
565 }
566 }
567
568 section = bpf_default_section(*type);
32e93fb7
DB
569 if (argc > 0 && matches(*argv, "section") == 0) {
570 NEXT_ARG();
571 section = *argv;
572 NEXT_ARG_FWD();
573 }
574
575 uds_name = getenv(BPF_ENV_UDS);
576 if (argc > 0 && !uds_name &&
577 matches(*argv, "export") == 0) {
578 NEXT_ARG();
579 uds_name = *argv;
580 NEXT_ARG_FWD();
581 }
582
583 if (argc > 0 && matches(*argv, "verbose") == 0) {
584 verbose = true;
585 NEXT_ARG_FWD();
586 }
587
588 PREV_ARG();
589 }
590
91d88eeb
DB
591 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
592 ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
593 else if (*mode == EBPF_OBJECT)
594 ret = bpf_obj_open(file, *type, section, verbose);
595 else if (*mode == EBPF_PINNED)
32e93fb7 596 ret = bpf_obj_get(file);
91d88eeb 597 else
32e93fb7
DB
598 return -1;
599
91d88eeb
DB
600 if (ptr_object)
601 *ptr_object = file;
602 if (ptr_section)
603 *ptr_section = section;
604 if (ptr_uds_name)
605 *ptr_uds_name = uds_name;
606
607 *ptr_argc = argc;
608 *ptr_argv = argv;
609
610 return ret;
611}
612
613int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
614 enum bpf_prog_type type, const char **ptr_object,
615 const char **ptr_uds_name, struct nlmsghdr *n)
616{
617 struct sock_filter opcodes[BPF_MAXINSNS];
618 const bool opt_tbl[BPF_MODE_MAX] = {
619 [CBPF_BYTECODE] = true,
620 [CBPF_FILE] = true,
621 [EBPF_OBJECT] = true,
622 [EBPF_PINNED] = true,
623 };
624 char annotation[256];
625 const char *section;
626 enum bpf_mode mode;
627 int ret;
628
629 ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
630 ptr_object, &section, ptr_uds_name, opcodes);
631 if (ret < 0)
632 return ret;
633
32e93fb7
DB
634 if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
635 addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
636 addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
637 ret * sizeof(struct sock_filter));
91d88eeb
DB
638 }
639
640 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
32e93fb7 641 snprintf(annotation, sizeof(annotation), "%s:[%s]",
91d88eeb
DB
642 basename(*ptr_object), mode == EBPF_PINNED ?
643 "*fsobj" : section);
32e93fb7
DB
644
645 addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
646 addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
647 }
648
91d88eeb
DB
649 return 0;
650}
32e93fb7 651
91d88eeb
DB
652int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
653{
654 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
655 const bool opt_tbl[BPF_MODE_MAX] = {
656 [CBPF_BYTECODE] = false,
657 [CBPF_FILE] = false,
658 [EBPF_OBJECT] = true,
659 [EBPF_PINNED] = true,
660 };
661 const struct bpf_elf_map test = {
662 .type = BPF_MAP_TYPE_PROG_ARRAY,
663 .size_key = sizeof(int),
664 .size_value = sizeof(int),
665 };
666 int ret, prog_fd, map_fd;
667 const char *section;
668 enum bpf_mode mode;
669 uint32_t map_key;
670
671 prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
672 NULL, &section, NULL, NULL);
673 if (prog_fd < 0)
674 return prog_fd;
675 if (key) {
676 map_key = *key;
677 } else {
678 ret = sscanf(section, "%*i/%i", &map_key);
679 if (ret != 1) {
32a121cb 680 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
91d88eeb
DB
681 ret = -EINVAL;
682 goto out_prog;
683 }
684 }
32e93fb7 685
91d88eeb
DB
686 map_fd = bpf_obj_get(map_path);
687 if (map_fd < 0) {
688 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
689 map_path, strerror(errno));
690 ret = map_fd;
691 goto out_prog;
692 }
693
694 ret = bpf_map_selfcheck_pinned(map_fd, &test,
695 offsetof(struct bpf_elf_map, max_elem));
696 if (ret < 0) {
697 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
698 goto out_map;
699 }
700
701 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
702 if (ret < 0)
703 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
704out_map:
705 close(map_fd);
706out_prog:
707 close(prog_fd);
708 return ret;
32e93fb7
DB
709}
710
6256f8c9 711#ifdef HAVE_ELF
32e93fb7
DB
712struct bpf_elf_prog {
713 enum bpf_prog_type type;
714 const struct bpf_insn *insns;
715 size_t size;
716 const char *license;
717};
718
f6793eec
DB
719struct bpf_hash_entry {
720 unsigned int pinning;
721 const char *subpath;
722 struct bpf_hash_entry *next;
723};
724
32e93fb7
DB
725struct bpf_elf_ctx {
726 Elf *elf_fd;
727 GElf_Ehdr elf_hdr;
728 Elf_Data *sym_tab;
729 Elf_Data *str_tab;
730 int obj_fd;
731 int map_fds[ELF_MAX_MAPS];
732 struct bpf_elf_map maps[ELF_MAX_MAPS];
733 int sym_num;
734 int map_num;
735 bool *sec_done;
736 int sec_maps;
737 char license[ELF_MAX_LICENSE_LEN];
738 enum bpf_prog_type type;
739 bool verbose;
740 struct bpf_elf_st stat;
f6793eec 741 struct bpf_hash_entry *ht[256];
f31645d1
DB
742 char *log;
743 size_t log_size;
32e93fb7
DB
744};
745
6256f8c9 746struct bpf_elf_sec_data {
32e93fb7
DB
747 GElf_Shdr sec_hdr;
748 Elf_Data *sec_data;
749 const char *sec_name;
6256f8c9
DB
750};
751
752struct bpf_map_data {
32e93fb7
DB
753 int *fds;
754 const char *obj;
755 struct bpf_elf_st *st;
756 struct bpf_elf_map *ent;
6256f8c9
DB
757};
758
f31645d1
DB
759static __check_format_string(2, 3) void
760bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
11c39b5e
DB
761{
762 va_list vl;
763
764 va_start(vl, format);
765 vfprintf(stderr, format, vl);
766 va_end(vl);
767
f31645d1 768 if (ctx->log && ctx->log[0]) {
afc1a200
DB
769 if (ctx->verbose) {
770 fprintf(stderr, "%s\n", ctx->log);
771 } else {
772 unsigned int off = 0, len = strlen(ctx->log);
773
774 if (len > BPF_MAX_LOG) {
775 off = len - BPF_MAX_LOG;
776 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
777 off);
778 }
779 fprintf(stderr, "%s\n", ctx->log + off);
780 }
781
f31645d1
DB
782 memset(ctx->log, 0, ctx->log_size);
783 }
784}
785
786static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
787{
788 size_t log_size = ctx->log_size;
789 void *ptr;
790
791 if (!ctx->log) {
792 log_size = 65536;
793 } else {
794 log_size <<= 1;
795 if (log_size > (UINT_MAX >> 8))
796 return -EINVAL;
d937a74b 797 }
f31645d1
DB
798
799 ptr = realloc(ctx->log, log_size);
800 if (!ptr)
801 return -ENOMEM;
802
803 ctx->log = ptr;
804 ctx->log_size = log_size;
805
806 return 0;
11c39b5e
DB
807}
808
4dd3f50a
DB
809static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
810 uint32_t size_value, uint32_t max_elem,
811 uint32_t flags)
11c39b5e 812{
67584e3a
ND
813 union bpf_attr attr;
814
815 memset(&attr, 0, sizeof(attr));
816 attr.map_type = type;
817 attr.key_size = size_key;
818 attr.value_size = size_value;
819 attr.max_entries = max_elem;
4dd3f50a 820 attr.map_flags = flags;
11c39b5e
DB
821
822 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
823}
824
825static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
f31645d1
DB
826 size_t size_insns, const char *license, char *log,
827 size_t size_log)
11c39b5e 828{
67584e3a
ND
829 union bpf_attr attr;
830
831 memset(&attr, 0, sizeof(attr));
832 attr.prog_type = type;
833 attr.insns = bpf_ptr_to_u64(insns);
f31645d1 834 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
67584e3a 835 attr.license = bpf_ptr_to_u64(license);
11c39b5e 836
f31645d1
DB
837 if (size_log > 0) {
838 attr.log_buf = bpf_ptr_to_u64(log);
839 attr.log_size = size_log;
840 attr.log_level = 1;
32e93fb7
DB
841 }
842
11c39b5e
DB
843 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
844}
845
32e93fb7 846static int bpf_obj_pin(int fd, const char *pathname)
11c39b5e 847{
67584e3a
ND
848 union bpf_attr attr;
849
850 memset(&attr, 0, sizeof(attr));
851 attr.pathname = bpf_ptr_to_u64(pathname);
852 attr.bpf_fd = fd;
32e93fb7
DB
853
854 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
855}
11c39b5e 856
32e93fb7
DB
857static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
858{
859 struct sockaddr_alg alg = {
860 .salg_family = AF_ALG,
861 .salg_type = "hash",
862 .salg_name = "sha1",
863 };
864 int ret, cfd, ofd, ffd;
865 struct stat stbuff;
866 ssize_t size;
867
868 if (!object || len != 20)
869 return -EINVAL;
870
871 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
872 if (cfd < 0) {
873 fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
874 strerror(errno));
875 return cfd;
876 }
877
878 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
879 if (ret < 0) {
880 fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
881 goto out_cfd;
882 }
883
884 ofd = accept(cfd, NULL, 0);
885 if (ofd < 0) {
886 fprintf(stderr, "Error accepting socket: %s\n",
887 strerror(errno));
888 ret = ofd;
889 goto out_cfd;
890 }
891
892 ffd = open(object, O_RDONLY);
893 if (ffd < 0) {
894 fprintf(stderr, "Error opening object %s: %s\n",
895 object, strerror(errno));
896 ret = ffd;
897 goto out_ofd;
898 }
899
32a121cb 900 ret = fstat(ffd, &stbuff);
32e93fb7
DB
901 if (ret < 0) {
902 fprintf(stderr, "Error doing fstat: %s\n",
903 strerror(errno));
904 goto out_ffd;
d937a74b 905 }
11c39b5e 906
32e93fb7
DB
907 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
908 if (size != stbuff.st_size) {
909 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
910 size, stbuff.st_size, strerror(errno));
911 ret = -1;
912 goto out_ffd;
913 }
914
915 size = read(ofd, out, len);
916 if (size != len) {
917 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
918 size, len, strerror(errno));
919 ret = -1;
920 } else {
921 ret = 0;
922 }
923out_ffd:
924 close(ffd);
925out_ofd:
926 close(ofd);
927out_cfd:
928 close(cfd);
929 return ret;
11c39b5e
DB
930}
931
32e93fb7 932static const char *bpf_get_obj_uid(const char *pathname)
11c39b5e 933{
32a121cb 934 static bool bpf_uid_cached;
32e93fb7
DB
935 static char bpf_uid[64];
936 uint8_t tmp[20];
937 int ret;
11c39b5e 938
32e93fb7
DB
939 if (bpf_uid_cached)
940 goto done;
11c39b5e 941
32e93fb7
DB
942 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
943 if (ret) {
944 fprintf(stderr, "Object hashing failed!\n");
945 return NULL;
946 }
947
948 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
949 bpf_uid_cached = true;
950done:
951 return bpf_uid;
11c39b5e
DB
952}
953
32e93fb7
DB
954static int bpf_init_env(const char *pathname)
955{
956 struct rlimit limit = {
957 .rlim_cur = RLIM_INFINITY,
958 .rlim_max = RLIM_INFINITY,
959 };
960
961 /* Don't bother in case we fail! */
962 setrlimit(RLIMIT_MEMLOCK, &limit);
963
964 if (!bpf_get_tc_dir()) {
32a121cb 965 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
32e93fb7
DB
966 return 0;
967 }
968
969 if (!bpf_get_obj_uid(pathname))
970 return -1;
971
972 return 0;
6256f8c9
DB
973}
974
f6793eec
DB
975static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
976 uint32_t pinning)
977{
978 struct bpf_hash_entry *entry;
979
980 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
981 while (entry && entry->pinning != pinning)
982 entry = entry->next;
983
984 return entry ? entry->subpath : NULL;
985}
986
987static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
988 uint32_t pinning)
11c39b5e 989{
32e93fb7
DB
990 switch (pinning) {
991 case PIN_OBJECT_NS:
992 case PIN_GLOBAL_NS:
993 return false;
994 case PIN_NONE:
32e93fb7 995 return true;
f6793eec
DB
996 default:
997 return !bpf_custom_pinning(ctx, pinning);
32e93fb7
DB
998 }
999}
1000
1001static void bpf_make_pathname(char *pathname, size_t len, const char *name,
f6793eec 1002 const struct bpf_elf_ctx *ctx, uint32_t pinning)
32e93fb7
DB
1003{
1004 switch (pinning) {
1005 case PIN_OBJECT_NS:
1006 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
1007 bpf_get_obj_uid(NULL), name);
1008 break;
1009 case PIN_GLOBAL_NS:
1010 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
1011 BPF_DIR_GLOBALS, name);
1012 break;
f6793eec
DB
1013 default:
1014 snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
1015 bpf_custom_pinning(ctx, pinning), name);
1016 break;
32e93fb7
DB
1017 }
1018}
1019
f6793eec
DB
1020static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1021 uint32_t pinning)
32e93fb7
DB
1022{
1023 char pathname[PATH_MAX];
1024
f6793eec 1025 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
32e93fb7
DB
1026 return 0;
1027
f6793eec 1028 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
32e93fb7
DB
1029 return bpf_obj_get(pathname);
1030}
1031
f6793eec 1032static int bpf_make_obj_path(void)
32e93fb7 1033{
f6793eec 1034 char tmp[PATH_MAX];
32e93fb7
DB
1035 int ret;
1036
f6793eec
DB
1037 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
1038 bpf_get_obj_uid(NULL));
1039
1040 ret = mkdir(tmp, S_IRWXU);
1041 if (ret && errno != EEXIST) {
1042 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1043 return ret;
1044 }
1045
1046 return 0;
1047}
1048
1049static int bpf_make_custom_path(const char *todo)
1050{
1051 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1052 int ret;
1053
1054 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
1055 snprintf(rem, sizeof(rem), "%s/", todo);
1056 sub = strtok(rem, "/");
32e93fb7 1057
f6793eec
DB
1058 while (sub) {
1059 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1060 return -EINVAL;
1061
1062 strcat(tmp, sub);
1063 strcat(tmp, "/");
32e93fb7 1064
f6793eec 1065 ret = mkdir(tmp, S_IRWXU);
32e93fb7 1066 if (ret && errno != EEXIST) {
f6793eec 1067 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
32e93fb7
DB
1068 strerror(errno));
1069 return ret;
1070 }
f6793eec
DB
1071
1072 sub = strtok(NULL, "/");
32e93fb7
DB
1073 }
1074
f6793eec
DB
1075 return 0;
1076}
1077
1078static int bpf_place_pinned(int fd, const char *name,
1079 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1080{
1081 char pathname[PATH_MAX];
1082 const char *tmp;
1083 int ret = 0;
1084
1085 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
1086 return 0;
1087
1088 if (pinning == PIN_OBJECT_NS)
1089 ret = bpf_make_obj_path();
1090 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
1091 ret = bpf_make_custom_path(tmp);
1092 if (ret < 0)
1093 return ret;
1094
1095 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
32e93fb7
DB
1096 return bpf_obj_pin(fd, pathname);
1097}
1098
f31645d1
DB
1099static void bpf_prog_report(int fd, const char *section,
1100 const struct bpf_elf_prog *prog,
1101 struct bpf_elf_ctx *ctx)
32e93fb7 1102{
afc1a200
DB
1103 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1104
1105 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
f31645d1
DB
1106 fd < 0 ? "rejected: " : "loaded",
1107 fd < 0 ? strerror(errno) : "",
1108 fd < 0 ? errno : fd);
1109
1110 fprintf(stderr, " - Type: %u\n", prog->type);
afc1a200
DB
1111 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1112 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
f31645d1
DB
1113 fprintf(stderr, " - License: %s\n\n", prog->license);
1114
1115 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1116}
32e93fb7 1117
f31645d1
DB
1118static int bpf_prog_attach(const char *section,
1119 const struct bpf_elf_prog *prog,
1120 struct bpf_elf_ctx *ctx)
1121{
1122 int tries = 0, fd;
1123retry:
32e93fb7
DB
1124 errno = 0;
1125 fd = bpf_prog_load(prog->type, prog->insns, prog->size,
f31645d1
DB
1126 prog->license, ctx->log, ctx->log_size);
1127 if (fd < 0 || ctx->verbose) {
1128 /* The verifier log is pretty chatty, sometimes so chatty
1129 * on larger programs, that we could fail to dump everything
1130 * into our buffer. Still, try to give a debuggable error
1131 * log for the user, so enlarge it and re-fail.
1132 */
1133 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
1134 if (tries++ < 6 && !bpf_log_realloc(ctx))
1135 goto retry;
1136
32a121cb 1137 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
f31645d1
DB
1138 ctx->log_size, tries);
1139 return fd;
1140 }
1141
1142 bpf_prog_report(fd, section, prog, ctx);
32e93fb7
DB
1143 }
1144
1145 return fd;
1146}
1147
f31645d1
DB
1148static void bpf_map_report(int fd, const char *name,
1149 const struct bpf_elf_map *map,
1150 struct bpf_elf_ctx *ctx)
1151{
1152 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1153 fd < 0 ? "rejected: " : "loaded",
1154 fd < 0 ? strerror(errno) : "",
1155 fd < 0 ? errno : fd);
1156
1157 fprintf(stderr, " - Type: %u\n", map->type);
1158 fprintf(stderr, " - Identifier: %u\n", map->id);
1159 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1160 fprintf(stderr, " - Size key: %u\n", map->size_key);
1161 fprintf(stderr, " - Size value: %u\n", map->size_value);
4dd3f50a
DB
1162 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1163 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
f31645d1
DB
1164}
1165
32e93fb7 1166static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
f31645d1 1167 struct bpf_elf_ctx *ctx)
32e93fb7
DB
1168{
1169 int fd, ret;
1170
f6793eec 1171 fd = bpf_probe_pinned(name, ctx, map->pinning);
32e93fb7 1172 if (fd > 0) {
91d88eeb
DB
1173 ret = bpf_map_selfcheck_pinned(fd, map,
1174 offsetof(struct bpf_elf_map,
1175 id));
9e607f2e
DB
1176 if (ret < 0) {
1177 close(fd);
1178 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1179 name);
1180 return ret;
1181 }
f31645d1 1182 if (ctx->verbose)
32e93fb7
DB
1183 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1184 name);
1185 return fd;
1186 }
1187
1188 errno = 0;
1189 fd = bpf_map_create(map->type, map->size_key, map->size_value,
4dd3f50a 1190 map->max_elem, map->flags);
f31645d1
DB
1191 if (fd < 0 || ctx->verbose) {
1192 bpf_map_report(fd, name, map, ctx);
32e93fb7
DB
1193 if (fd < 0)
1194 return fd;
1195 }
1196
f6793eec 1197 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
32e93fb7
DB
1198 if (ret < 0 && errno != EEXIST) {
1199 fprintf(stderr, "Could not pin %s map: %s\n", name,
1200 strerror(errno));
1201 close(fd);
1202 return ret;
1203 }
1204
1205 return fd;
1206}
1207
32e93fb7
DB
1208static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1209 const GElf_Sym *sym)
1210{
1211 return ctx->str_tab->d_buf + sym->st_name;
1212}
1213
1214static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1215{
1216 GElf_Sym sym;
11c39b5e
DB
1217 int i;
1218
32e93fb7
DB
1219 for (i = 0; i < ctx->sym_num; i++) {
1220 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1221 continue;
1222
5230a2ed
DB
1223 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1224 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
32e93fb7
DB
1225 sym.st_shndx != ctx->sec_maps ||
1226 sym.st_value / sizeof(struct bpf_elf_map) != which)
1227 continue;
1228
1229 return bpf_str_tab_name(ctx, &sym);
11c39b5e 1230 }
32e93fb7
DB
1231
1232 return NULL;
11c39b5e
DB
1233}
1234
32e93fb7 1235static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
11c39b5e 1236{
32e93fb7
DB
1237 const char *map_name;
1238 int i, fd;
11c39b5e 1239
32e93fb7
DB
1240 for (i = 0; i < ctx->map_num; i++) {
1241 map_name = bpf_map_fetch_name(ctx, i);
1242 if (!map_name)
1243 return -EIO;
11c39b5e 1244
f31645d1 1245 fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
32e93fb7
DB
1246 if (fd < 0)
1247 return fd;
11c39b5e 1248
32e93fb7 1249 ctx->map_fds[i] = fd;
11c39b5e
DB
1250 }
1251
1252 return 0;
11c39b5e
DB
1253}
1254
32e93fb7
DB
1255static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1256 struct bpf_elf_sec_data *data)
11c39b5e 1257{
32e93fb7 1258 Elf_Data *sec_edata;
11c39b5e
DB
1259 GElf_Shdr sec_hdr;
1260 Elf_Scn *sec_fd;
11c39b5e
DB
1261 char *sec_name;
1262
32e93fb7 1263 memset(data, 0, sizeof(*data));
11c39b5e 1264
32e93fb7 1265 sec_fd = elf_getscn(ctx->elf_fd, section);
11c39b5e
DB
1266 if (!sec_fd)
1267 return -EINVAL;
11c39b5e
DB
1268 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1269 return -EIO;
1270
32e93fb7 1271 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
11c39b5e
DB
1272 sec_hdr.sh_name);
1273 if (!sec_name || !sec_hdr.sh_size)
1274 return -ENOENT;
1275
1276 sec_edata = elf_getdata(sec_fd, NULL);
1277 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1278 return -EIO;
1279
32e93fb7 1280 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
11c39b5e 1281
32e93fb7
DB
1282 data->sec_name = sec_name;
1283 data->sec_data = sec_edata;
11c39b5e
DB
1284 return 0;
1285}
1286
32e93fb7
DB
1287static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
1288 struct bpf_elf_sec_data *data)
11c39b5e 1289{
32e93fb7
DB
1290 if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
1291 return -EINVAL;
11c39b5e 1292
32e93fb7
DB
1293 ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
1294 ctx->sec_maps = section;
1295 ctx->sec_done[section] = true;
11c39b5e 1296
32e93fb7
DB
1297 if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
1298 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1299 return -ENOMEM;
1300 }
11c39b5e 1301
32e93fb7
DB
1302 memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
1303 return 0;
1304}
11c39b5e 1305
32e93fb7
DB
1306static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1307 struct bpf_elf_sec_data *data)
1308{
1309 if (data->sec_data->d_size > sizeof(ctx->license))
1310 return -ENOMEM;
11c39b5e 1311
32e93fb7
DB
1312 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1313 ctx->sec_done[section] = true;
1314 return 0;
1315}
11c39b5e 1316
32e93fb7
DB
1317static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1318 struct bpf_elf_sec_data *data)
1319{
1320 ctx->sym_tab = data->sec_data;
1321 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1322 ctx->sec_done[section] = true;
11c39b5e
DB
1323 return 0;
1324}
1325
32e93fb7
DB
1326static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1327 struct bpf_elf_sec_data *data)
11c39b5e 1328{
32e93fb7
DB
1329 ctx->str_tab = data->sec_data;
1330 ctx->sec_done[section] = true;
1331 return 0;
1332}
11c39b5e 1333
afc1a200
DB
1334static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
1335{
1336 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
1337}
1338
32e93fb7
DB
1339static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1340{
1341 struct bpf_elf_sec_data data;
1342 int i, ret = -1;
11c39b5e 1343
32e93fb7
DB
1344 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1345 ret = bpf_fill_section_data(ctx, i, &data);
11c39b5e
DB
1346 if (ret < 0)
1347 continue;
1348
cce3d466
DB
1349 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1350 !strcmp(data.sec_name, ELF_SECTION_MAPS))
32e93fb7 1351 ret = bpf_fetch_maps(ctx, i, &data);
cce3d466
DB
1352 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1353 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
32e93fb7 1354 ret = bpf_fetch_license(ctx, i, &data);
cce3d466
DB
1355 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
1356 !strcmp(data.sec_name, ".symtab"))
32e93fb7
DB
1357 ret = bpf_fetch_symtab(ctx, i, &data);
1358 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
cce3d466 1359 !strcmp(data.sec_name, ".strtab"))
32e93fb7
DB
1360 ret = bpf_fetch_strtab(ctx, i, &data);
1361 if (ret < 0) {
afc1a200 1362 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
32a121cb 1363 i);
32e93fb7 1364 break;
11c39b5e 1365 }
32e93fb7
DB
1366 }
1367
afc1a200 1368 if (bpf_has_map_data(ctx)) {
32e93fb7
DB
1369 ret = bpf_maps_attach_all(ctx);
1370 if (ret < 0) {
1371 fprintf(stderr, "Error loading maps into kernel!\n");
1372 return ret;
11c39b5e
DB
1373 }
1374 }
1375
1376 return ret;
1377}
1378
32e93fb7 1379static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
11c39b5e 1380{
32e93fb7
DB
1381 struct bpf_elf_sec_data data;
1382 struct bpf_elf_prog prog;
1383 int ret, i, fd = -1;
11c39b5e 1384
32e93fb7
DB
1385 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1386 if (ctx->sec_done[i])
11c39b5e
DB
1387 continue;
1388
32e93fb7 1389 ret = bpf_fill_section_data(ctx, i, &data);
cce3d466
DB
1390 if (ret < 0 ||
1391 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
1392 data.sec_hdr.sh_flags & SHF_EXECINSTR &&
1393 !strcmp(data.sec_name, section)))
11c39b5e
DB
1394 continue;
1395
32e93fb7
DB
1396 memset(&prog, 0, sizeof(prog));
1397 prog.type = ctx->type;
1398 prog.insns = data.sec_data->d_buf;
1399 prog.size = data.sec_data->d_size;
1400 prog.license = ctx->license;
11c39b5e 1401
f31645d1 1402 fd = bpf_prog_attach(section, &prog, ctx);
32e93fb7 1403 if (fd < 0)
afc1a200 1404 break;
11c39b5e 1405
32e93fb7 1406 ctx->sec_done[i] = true;
11c39b5e
DB
1407 break;
1408 }
1409
32e93fb7 1410 return fd;
11c39b5e
DB
1411}
1412
32e93fb7
DB
1413static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1414 struct bpf_elf_sec_data *data_relo,
1415 struct bpf_elf_sec_data *data_insn)
11c39b5e 1416{
32e93fb7
DB
1417 Elf_Data *idata = data_insn->sec_data;
1418 GElf_Shdr *rhdr = &data_relo->sec_hdr;
1419 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1420 struct bpf_insn *insns = idata->d_buf;
1421 unsigned int num_insns = idata->d_size / sizeof(*insns);
11c39b5e 1422
32e93fb7
DB
1423 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
1424 unsigned int ioff, rmap;
1425 GElf_Rel relo;
1426 GElf_Sym sym;
1427
1428 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1429 return -EIO;
1430
1431 ioff = relo.r_offset / sizeof(struct bpf_insn);
1432 if (ioff >= num_insns ||
a576c6b9 1433 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
32a121cb 1434 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
a576c6b9
DB
1435 ioff);
1436 if (ioff < num_insns &&
1437 insns[ioff].code == (BPF_JMP | BPF_CALL))
32a121cb 1438 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
32e93fb7 1439 return -EINVAL;
a576c6b9 1440 }
32e93fb7
DB
1441
1442 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
1443 return -EIO;
2486337a 1444 if (sym.st_shndx != ctx->sec_maps) {
32a121cb 1445 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2486337a
DB
1446 relo_ent, sym.st_shndx);
1447 return -EIO;
1448 }
32e93fb7
DB
1449
1450 rmap = sym.st_value / sizeof(struct bpf_elf_map);
1451 if (rmap >= ARRAY_SIZE(ctx->map_fds))
1452 return -EINVAL;
1453 if (!ctx->map_fds[rmap])
1454 return -EINVAL;
1455
1456 if (ctx->verbose)
32a121cb 1457 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
32e93fb7
DB
1458 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
1459 data_insn->sec_name, ioff);
11c39b5e 1460
32e93fb7
DB
1461 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
1462 insns[ioff].imm = ctx->map_fds[rmap];
1463 }
1464
1465 return 0;
1466}
1467
afc1a200
DB
1468static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
1469 bool *lderr)
32e93fb7
DB
1470{
1471 struct bpf_elf_sec_data data_relo, data_insn;
1472 struct bpf_elf_prog prog;
1473 int ret, idx, i, fd = -1;
1474
1475 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1476 ret = bpf_fill_section_data(ctx, i, &data_relo);
1477 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
11c39b5e
DB
1478 continue;
1479
32e93fb7
DB
1480 idx = data_relo.sec_hdr.sh_info;
1481 ret = bpf_fill_section_data(ctx, idx, &data_insn);
cce3d466
DB
1482 if (ret < 0 ||
1483 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
1484 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
1485 !strcmp(data_insn.sec_name, section)))
11c39b5e 1486 continue;
32e93fb7
DB
1487
1488 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
1489 if (ret < 0)
11c39b5e
DB
1490 continue;
1491
32e93fb7
DB
1492 memset(&prog, 0, sizeof(prog));
1493 prog.type = ctx->type;
1494 prog.insns = data_insn.sec_data->d_buf;
1495 prog.size = data_insn.sec_data->d_size;
1496 prog.license = ctx->license;
1497
f31645d1 1498 fd = bpf_prog_attach(section, &prog, ctx);
afc1a200
DB
1499 if (fd < 0) {
1500 *lderr = true;
1501 break;
1502 }
11c39b5e 1503
32e93fb7
DB
1504 ctx->sec_done[i] = true;
1505 ctx->sec_done[idx] = true;
11c39b5e
DB
1506 break;
1507 }
1508
32e93fb7 1509 return fd;
11c39b5e
DB
1510}
1511
32e93fb7 1512static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
473d7840 1513{
afc1a200 1514 bool lderr = false;
473d7840
DB
1515 int ret = -1;
1516
afc1a200
DB
1517 if (bpf_has_map_data(ctx))
1518 ret = bpf_fetch_prog_relo(ctx, section, &lderr);
1519 if (ret < 0 && !lderr)
32e93fb7
DB
1520 ret = bpf_fetch_prog(ctx, section);
1521
473d7840
DB
1522 return ret;
1523}
1524
910b543d
DB
1525static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
1526{
1527 int i;
1528
1529 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
1530 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
1531 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
1532 return i;
1533 return -1;
1534}
1535
32e93fb7 1536static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
473d7840 1537{
32e93fb7
DB
1538 struct bpf_elf_sec_data data;
1539 uint32_t map_id, key_id;
910b543d 1540 int fd, i, ret, idx;
473d7840 1541
32e93fb7
DB
1542 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1543 if (ctx->sec_done[i])
473d7840
DB
1544 continue;
1545
32e93fb7 1546 ret = bpf_fill_section_data(ctx, i, &data);
473d7840
DB
1547 if (ret < 0)
1548 continue;
1549
910b543d
DB
1550 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
1551 if (ret != 2)
32e93fb7 1552 continue;
910b543d
DB
1553
1554 idx = bpf_find_map_by_id(ctx, map_id);
1555 if (idx < 0)
473d7840
DB
1556 continue;
1557
32e93fb7
DB
1558 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
1559 if (fd < 0)
473d7840
DB
1560 return -EIO;
1561
910b543d
DB
1562 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
1563 &fd, BPF_ANY);
afc1a200
DB
1564 if (ret < 0) {
1565 if (errno == E2BIG)
1566 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
1567 key_id, map_id);
1568 return -errno;
1569 }
473d7840 1570
32e93fb7 1571 ctx->sec_done[i] = true;
473d7840
DB
1572 }
1573
1574 return 0;
1575}
1576
32e93fb7 1577static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
11c39b5e 1578{
32e93fb7
DB
1579 struct stat st;
1580 int ret;
11c39b5e 1581
32e93fb7 1582 memset(&ctx->stat, 0, sizeof(ctx->stat));
11c39b5e 1583
32e93fb7
DB
1584 ret = fstat(ctx->obj_fd, &st);
1585 if (ret < 0) {
1586 fprintf(stderr, "Stat of elf file failed: %s\n",
1587 strerror(errno));
1588 return;
1589 }
11c39b5e 1590
32e93fb7
DB
1591 ctx->stat.st_dev = st.st_dev;
1592 ctx->stat.st_ino = st.st_ino;
1593}
1594
f6793eec
DB
1595static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
1596{
1597 char buff[PATH_MAX];
1598
1599 while (fgets(buff, sizeof(buff), fp)) {
1600 char *ptr = buff;
1601
1602 while (*ptr == ' ' || *ptr == '\t')
1603 ptr++;
1604
1605 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
1606 continue;
1607
1608 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
1609 sscanf(ptr, "%i %s #", id, path) != 2) {
1610 strcpy(path, ptr);
1611 return -1;
1612 }
1613
1614 return 1;
1615 }
1616
1617 return 0;
1618}
1619
1620static bool bpf_pinning_reserved(uint32_t pinning)
1621{
1622 switch (pinning) {
1623 case PIN_NONE:
1624 case PIN_OBJECT_NS:
1625 case PIN_GLOBAL_NS:
1626 return true;
1627 default:
1628 return false;
1629 }
1630}
1631
1632static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
1633{
1634 struct bpf_hash_entry *entry;
1635 char subpath[PATH_MAX];
1636 uint32_t pinning;
1637 FILE *fp;
1638 int ret;
1639
1640 fp = fopen(db_file, "r");
1641 if (!fp)
1642 return;
1643
1644 memset(subpath, 0, sizeof(subpath));
1645 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
1646 if (ret == -1) {
1647 fprintf(stderr, "Database %s is corrupted at: %s\n",
1648 db_file, subpath);
1649 fclose(fp);
1650 return;
1651 }
1652
1653 if (bpf_pinning_reserved(pinning)) {
32a121cb
SH
1654 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
1655 db_file, pinning);
f6793eec
DB
1656 continue;
1657 }
1658
1659 entry = malloc(sizeof(*entry));
1660 if (!entry) {
1661 fprintf(stderr, "No memory left for db entry!\n");
1662 continue;
1663 }
1664
1665 entry->pinning = pinning;
1666 entry->subpath = strdup(subpath);
1667 if (!entry->subpath) {
1668 fprintf(stderr, "No memory left for db entry!\n");
1669 free(entry);
1670 continue;
1671 }
1672
1673 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1674 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
1675 }
1676
1677 fclose(fp);
1678}
1679
1680static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
1681{
1682 struct bpf_hash_entry *entry;
1683 int i;
1684
1685 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
1686 while ((entry = ctx->ht[i]) != NULL) {
1687 ctx->ht[i] = entry->next;
1688 free((char *)entry->subpath);
1689 free(entry);
1690 }
1691 }
1692}
1693
8187b012
DB
1694static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
1695{
1696 if (ctx->elf_hdr.e_type != ET_REL ||
e77fa41d
DB
1697 (ctx->elf_hdr.e_machine != EM_NONE &&
1698 ctx->elf_hdr.e_machine != EM_BPF) ||
8187b012
DB
1699 ctx->elf_hdr.e_version != EV_CURRENT) {
1700 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
1701 return -EINVAL;
1702 }
1703
1704 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
1705 default:
1706 fprintf(stderr, "ELF format error, wrong endianness info?\n");
1707 return -EINVAL;
1708 case ELFDATA2LSB:
1709 if (htons(1) == 1) {
1710 fprintf(stderr,
1711 "We are big endian, eBPF object is little endian!\n");
1712 return -EIO;
1713 }
1714 break;
1715 case ELFDATA2MSB:
1716 if (htons(1) != 1) {
1717 fprintf(stderr,
1718 "We are little endian, eBPF object is big endian!\n");
1719 return -EIO;
1720 }
1721 break;
1722 }
1723
1724 return 0;
1725}
1726
32e93fb7
DB
1727static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
1728 enum bpf_prog_type type, bool verbose)
1729{
1730 int ret = -EINVAL;
1731
1732 if (elf_version(EV_CURRENT) == EV_NONE ||
1733 bpf_init_env(pathname))
1734 return ret;
1735
1736 memset(ctx, 0, sizeof(*ctx));
1737 ctx->verbose = verbose;
1738 ctx->type = type;
1739
1740 ctx->obj_fd = open(pathname, O_RDONLY);
1741 if (ctx->obj_fd < 0)
1742 return ctx->obj_fd;
1743
1744 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
1745 if (!ctx->elf_fd) {
11c39b5e 1746 ret = -EINVAL;
32e93fb7 1747 goto out_fd;
11c39b5e
DB
1748 }
1749
8187b012
DB
1750 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
1751 ret = -EINVAL;
1752 goto out_fd;
1753 }
1754
32e93fb7
DB
1755 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
1756 &ctx->elf_hdr) {
11c39b5e
DB
1757 ret = -EIO;
1758 goto out_elf;
1759 }
1760
8187b012
DB
1761 ret = bpf_elf_check_ehdr(ctx);
1762 if (ret < 0)
1763 goto out_elf;
1764
32e93fb7
DB
1765 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
1766 sizeof(*(ctx->sec_done)));
1767 if (!ctx->sec_done) {
11c39b5e
DB
1768 ret = -ENOMEM;
1769 goto out_elf;
1770 }
1771
f31645d1
DB
1772 if (ctx->verbose && bpf_log_realloc(ctx)) {
1773 ret = -ENOMEM;
1774 goto out_free;
1775 }
1776
32e93fb7 1777 bpf_save_finfo(ctx);
f6793eec
DB
1778 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
1779
32e93fb7 1780 return 0;
f31645d1
DB
1781out_free:
1782 free(ctx->sec_done);
32e93fb7
DB
1783out_elf:
1784 elf_end(ctx->elf_fd);
1785out_fd:
1786 close(ctx->obj_fd);
1787 return ret;
1788}
d937a74b 1789
32e93fb7
DB
1790static int bpf_maps_count(struct bpf_elf_ctx *ctx)
1791{
1792 int i, count = 0;
11c39b5e 1793
32e93fb7
DB
1794 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1795 if (!ctx->map_fds[i])
1796 break;
1797 count++;
1798 }
473d7840 1799
32e93fb7
DB
1800 return count;
1801}
6256f8c9 1802
32e93fb7
DB
1803static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
1804{
1805 int i;
1806
1807 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1808 if (ctx->map_fds[i])
1809 close(ctx->map_fds[i]);
473d7840 1810 }
32e93fb7
DB
1811}
1812
1813static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
1814{
1815 if (failure)
1816 bpf_maps_teardown(ctx);
473d7840 1817
f6793eec 1818 bpf_hash_destroy(ctx);
f31645d1 1819
32e93fb7 1820 free(ctx->sec_done);
f31645d1
DB
1821 free(ctx->log);
1822
32e93fb7
DB
1823 elf_end(ctx->elf_fd);
1824 close(ctx->obj_fd);
1825}
6256f8c9 1826
32e93fb7 1827static struct bpf_elf_ctx __ctx;
6256f8c9 1828
32e93fb7
DB
1829static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
1830 const char *section, bool verbose)
1831{
1832 struct bpf_elf_ctx *ctx = &__ctx;
1833 int fd = 0, ret;
6256f8c9 1834
32e93fb7
DB
1835 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
1836 if (ret < 0) {
1837 fprintf(stderr, "Cannot initialize ELF context!\n");
1838 return ret;
1839 }
6256f8c9 1840
32e93fb7
DB
1841 ret = bpf_fetch_ancillary(ctx);
1842 if (ret < 0) {
1843 fprintf(stderr, "Error fetching ELF ancillary data!\n");
1844 goto out;
1845 }
1846
1847 fd = bpf_fetch_prog_sec(ctx, section);
1848 if (fd < 0) {
1849 fprintf(stderr, "Error fetching program/map!\n");
1850 ret = fd;
1851 goto out;
1852 }
1853
1854 ret = bpf_fill_prog_arrays(ctx);
1855 if (ret < 0)
1856 fprintf(stderr, "Error filling program arrays!\n");
11c39b5e 1857out:
32e93fb7
DB
1858 bpf_elf_ctx_destroy(ctx, ret < 0);
1859 if (ret < 0) {
1860 if (fd)
1861 close(fd);
1862 return ret;
1863 }
1864
1865 return fd;
6256f8c9 1866}
11c39b5e 1867
6256f8c9 1868static int
4bd62446
DB
1869bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
1870 const struct bpf_map_data *aux, unsigned int entries)
6256f8c9
DB
1871{
1872 struct bpf_map_set_msg msg;
1873 int *cmsg_buf, min_fd;
1874 char *amsg_buf;
1875 int i;
1876
1877 memset(&msg, 0, sizeof(msg));
1878
1879 msg.aux.uds_ver = BPF_SCM_AUX_VER;
4bd62446 1880 msg.aux.num_ent = entries;
6256f8c9
DB
1881
1882 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
1883 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
1884
1885 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
1886 amsg_buf = (char *)msg.aux.ent;
1887
4bd62446 1888 for (i = 0; i < entries; i += min_fd) {
6256f8c9
DB
1889 int ret;
1890
4bd62446 1891 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
6256f8c9
DB
1892 bpf_map_set_init_single(&msg, min_fd);
1893
1894 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
1895 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
1896
1897 ret = sendmsg(fd, &msg.hdr, 0);
1898 if (ret <= 0)
1899 return ret ? : -1;
1900 }
1901
1902 return 0;
11c39b5e
DB
1903}
1904
4bd62446
DB
1905static int
1906bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
1907 unsigned int entries)
1908{
1909 struct bpf_map_set_msg msg;
1910 int *cmsg_buf, min_fd;
1911 char *amsg_buf, *mmsg_buf;
1912 unsigned int needed = 1;
1913 int i;
1914
1915 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
1916 amsg_buf = (char *)msg.aux.ent;
1917 mmsg_buf = (char *)&msg.aux;
1918
1919 for (i = 0; i < min(entries, needed); i += min_fd) {
1920 struct cmsghdr *cmsg;
1921 int ret;
1922
1923 min_fd = min(entries, entries - i);
1924 bpf_map_set_init_single(&msg, min_fd);
1925
1926 ret = recvmsg(fd, &msg.hdr, 0);
1927 if (ret <= 0)
1928 return ret ? : -1;
1929
1930 cmsg = CMSG_FIRSTHDR(&msg.hdr);
1931 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
1932 return -EINVAL;
1933 if (msg.hdr.msg_flags & MSG_CTRUNC)
1934 return -EIO;
1935 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
1936 return -ENOSYS;
1937
1938 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
1939 if (min_fd > entries || min_fd <= 0)
1940 return -EINVAL;
1941
1942 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
1943 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
1944 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
1945
1946 needed = aux->num_ent;
1947 }
1948
1949 return 0;
1950}
1951
1952int bpf_send_map_fds(const char *path, const char *obj)
6256f8c9 1953{
32e93fb7 1954 struct bpf_elf_ctx *ctx = &__ctx;
6256f8c9
DB
1955 struct sockaddr_un addr;
1956 struct bpf_map_data bpf_aux;
1957 int fd, ret;
1958
1959 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
1960 if (fd < 0) {
1961 fprintf(stderr, "Cannot open socket: %s\n",
1962 strerror(errno));
1963 return -1;
1964 }
1965
1966 memset(&addr, 0, sizeof(addr));
1967 addr.sun_family = AF_UNIX;
1968 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
1969
1970 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
1971 if (ret < 0) {
1972 fprintf(stderr, "Cannot connect to %s: %s\n",
1973 path, strerror(errno));
1974 return -1;
1975 }
1976
1977 memset(&bpf_aux, 0, sizeof(bpf_aux));
1978
32e93fb7
DB
1979 bpf_aux.fds = ctx->map_fds;
1980 bpf_aux.ent = ctx->maps;
1981 bpf_aux.st = &ctx->stat;
6256f8c9 1982 bpf_aux.obj = obj;
6256f8c9 1983
4bd62446 1984 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
32e93fb7 1985 bpf_maps_count(ctx));
6256f8c9 1986 if (ret < 0)
4bd62446
DB
1987 fprintf(stderr, "Cannot send fds to %s: %s\n",
1988 path, strerror(errno));
1989
32e93fb7 1990 bpf_maps_teardown(ctx);
4bd62446
DB
1991 close(fd);
1992 return ret;
1993}
1994
1995int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
1996 unsigned int entries)
1997{
1998 struct sockaddr_un addr;
1999 int fd, ret;
2000
2001 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2002 if (fd < 0) {
2003 fprintf(stderr, "Cannot open socket: %s\n",
2004 strerror(errno));
2005 return -1;
2006 }
2007
2008 memset(&addr, 0, sizeof(addr));
2009 addr.sun_family = AF_UNIX;
2010 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2011
2012 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
2013 if (ret < 0) {
2014 fprintf(stderr, "Cannot bind to socket: %s\n",
2015 strerror(errno));
2016 return -1;
2017 }
2018
2019 ret = bpf_map_set_recv(fd, fds, aux, entries);
2020 if (ret < 0)
2021 fprintf(stderr, "Cannot recv fds from %s: %s\n",
6256f8c9
DB
2022 path, strerror(errno));
2023
4bd62446 2024 unlink(addr.sun_path);
6256f8c9
DB
2025 close(fd);
2026 return ret;
2027}
11c39b5e 2028#endif /* HAVE_ELF */