]> git.proxmox.com Git - mirror_iproute2.git/blame - lib/bpf.c
tc: use rta_getattr_u32
[mirror_iproute2.git] / lib / bpf.c
CommitLineData
1d129d19 1/*
e4225669 2 * bpf.c BPF common code
1d129d19
JP
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
e4225669 9 * Authors: Daniel Borkmann <daniel@iogearbox.net>
1d129d19 10 * Jiri Pirko <jiri@resnulli.us>
e4225669 11 * Alexei Starovoitov <ast@kernel.org>
1d129d19
JP
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
473d7840 19#include <stdint.h>
1d129d19 20#include <errno.h>
11c39b5e
DB
21#include <fcntl.h>
22#include <stdarg.h>
5c5a0f3d 23#include <limits.h>
e4225669 24#include <assert.h>
1d129d19 25
11c39b5e
DB
26#ifdef HAVE_ELF
27#include <libelf.h>
28#include <gelf.h>
29#endif
30
32e93fb7
DB
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/un.h>
34#include <sys/vfs.h>
35#include <sys/mount.h>
36#include <sys/syscall.h>
37#include <sys/sendfile.h>
38#include <sys/resource.h>
39
8187b012
DB
40#include <arpa/inet.h>
41
1d129d19 42#include "utils.h"
6256f8c9 43
e4225669 44#include "bpf_util.h"
6256f8c9
DB
45#include "bpf_elf.h"
46#include "bpf_scm.h"
47
e4225669
DB
48struct bpf_prog_meta {
49 const char *type;
50 const char *subdir;
51 const char *section;
52 bool may_uds_export;
53};
1d129d19 54
e4225669
DB
55static const enum bpf_prog_type __bpf_types[] = {
56 BPF_PROG_TYPE_SCHED_CLS,
57 BPF_PROG_TYPE_SCHED_ACT,
c7272ca7 58 BPF_PROG_TYPE_XDP,
b15f440e
TG
59 BPF_PROG_TYPE_LWT_IN,
60 BPF_PROG_TYPE_LWT_OUT,
61 BPF_PROG_TYPE_LWT_XMIT,
e4225669 62};
67584e3a 63
e4225669
DB
64static const struct bpf_prog_meta __bpf_prog_meta[] = {
65 [BPF_PROG_TYPE_SCHED_CLS] = {
66 .type = "cls",
67 .subdir = "tc",
68 .section = ELF_SECTION_CLASSIFIER,
69 .may_uds_export = true,
70 },
71 [BPF_PROG_TYPE_SCHED_ACT] = {
72 .type = "act",
73 .subdir = "tc",
74 .section = ELF_SECTION_ACTION,
75 .may_uds_export = true,
76 },
c7272ca7
DB
77 [BPF_PROG_TYPE_XDP] = {
78 .type = "xdp",
79 .subdir = "xdp",
80 .section = ELF_SECTION_PROG,
81 },
b15f440e
TG
82 [BPF_PROG_TYPE_LWT_IN] = {
83 .type = "lwt_in",
84 .subdir = "ip",
85 .section = ELF_SECTION_PROG,
86 },
87 [BPF_PROG_TYPE_LWT_OUT] = {
88 .type = "lwt_out",
89 .subdir = "ip",
90 .section = ELF_SECTION_PROG,
91 },
92 [BPF_PROG_TYPE_LWT_XMIT] = {
93 .type = "lwt_xmit",
94 .subdir = "ip",
95 .section = ELF_SECTION_PROG,
96 },
e4225669
DB
97};
98
99static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
100{
101 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
102 __bpf_prog_meta[type].subdir);
103 return __bpf_prog_meta[type].subdir;
104}
105
106const char *bpf_prog_to_default_section(enum bpf_prog_type type)
107{
108 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
109 __bpf_prog_meta[type].section);
110 return __bpf_prog_meta[type].section;
111}
e77fa41d 112
32e93fb7
DB
113#ifdef HAVE_ELF
114static int bpf_obj_open(const char *path, enum bpf_prog_type type,
115 const char *sec, bool verbose);
116#else
117static int bpf_obj_open(const char *path, enum bpf_prog_type type,
118 const char *sec, bool verbose)
119{
120 fprintf(stderr, "No ELF library support compiled in.\n");
121 errno = ENOSYS;
122 return -1;
123}
124#endif
125
126static inline __u64 bpf_ptr_to_u64(const void *ptr)
127{
128 return (__u64)(unsigned long)ptr;
129}
130
131static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
132{
133#ifdef __NR_bpf
134 return syscall(__NR_bpf, cmd, attr, size);
135#else
136 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
137 errno = ENOSYS;
138 return -1;
139#endif
140}
141
91d88eeb
DB
142static int bpf_map_update(int fd, const void *key, const void *value,
143 uint64_t flags)
32e93fb7 144{
d17b136f 145 union bpf_attr attr = {};
67584e3a 146
67584e3a
ND
147 attr.map_fd = fd;
148 attr.key = bpf_ptr_to_u64(key);
149 attr.value = bpf_ptr_to_u64(value);
150 attr.flags = flags;
32e93fb7 151
91d88eeb 152 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
32e93fb7
DB
153}
154
155static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
156 char **bpf_string, bool *need_release,
157 const char separator)
1d129d19
JP
158{
159 char sp;
160
161 if (from_file) {
162 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
e4225669 163 char *tmp_string, *last;
1d129d19
JP
164 FILE *fp;
165
166 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
f89bb021 167 tmp_string = calloc(1, tmp_len);
1d129d19
JP
168 if (tmp_string == NULL)
169 return -ENOMEM;
170
1d129d19
JP
171 fp = fopen(arg, "r");
172 if (fp == NULL) {
173 perror("Cannot fopen");
174 free(tmp_string);
175 return -ENOENT;
176 }
177
178 if (!fgets(tmp_string, tmp_len, fp)) {
179 free(tmp_string);
180 fclose(fp);
181 return -EIO;
182 }
183
184 fclose(fp);
185
e4225669
DB
186 last = &tmp_string[strlen(tmp_string) - 1];
187 if (*last == '\n')
188 *last = 0;
189
1d129d19
JP
190 *need_release = true;
191 *bpf_string = tmp_string;
192 } else {
193 *need_release = false;
194 *bpf_string = arg;
195 }
196
197 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
198 sp != separator) {
199 if (*need_release)
200 free(*bpf_string);
201 return -EINVAL;
202 }
203
204 return 0;
205}
206
32e93fb7
DB
207static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
208 bool from_file)
1d129d19
JP
209{
210 char *bpf_string, *token, separator = ',';
211 int ret = 0, i = 0;
212 bool need_release;
213 __u16 bpf_len = 0;
214
215 if (argc < 1)
216 return -EINVAL;
217 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
218 &need_release, separator))
219 return -EINVAL;
220 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
221 ret = -EINVAL;
222 goto out;
223 }
224
225 token = bpf_string;
226 while ((token = strchr(token, separator)) && (++token)[0]) {
227 if (i >= bpf_len) {
32a121cb 228 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
1d129d19
JP
229 ret = -EINVAL;
230 goto out;
231 }
232
233 if (sscanf(token, "%hu %hhu %hhu %u,",
234 &bpf_ops[i].code, &bpf_ops[i].jt,
235 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
236 fprintf(stderr, "Error at instruction %d!\n", i);
237 ret = -EINVAL;
238 goto out;
239 }
240
241 i++;
242 }
243
244 if (i != bpf_len) {
afc1a200 245 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
1d129d19
JP
246 ret = -EINVAL;
247 goto out;
248 }
249 ret = bpf_len;
1d129d19
JP
250out:
251 if (need_release)
252 free(bpf_string);
253
254 return ret;
255}
256
257void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
258{
259 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
260 int i;
261
262 if (len == 0)
263 return;
264
265 fprintf(f, "bytecode \'%u,", len);
266
267 for (i = 0; i < len - 1; i++)
268 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
269 ops[i].jf, ops[i].k);
270
6256f8c9 271 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
1d129d19
JP
272 ops[i].jf, ops[i].k);
273}
11c39b5e 274
afc1a200
DB
275static void bpf_map_pin_report(const struct bpf_elf_map *pin,
276 const struct bpf_elf_map *obj)
277{
278 fprintf(stderr, "Map specification differs from pinned file!\n");
279
280 if (obj->type != pin->type)
281 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
282 obj->type, pin->type);
283 if (obj->size_key != pin->size_key)
284 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
285 obj->size_key, pin->size_key);
286 if (obj->size_value != pin->size_value)
287 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
288 obj->size_value, pin->size_value);
289 if (obj->max_elem != pin->max_elem)
290 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
291 obj->max_elem, pin->max_elem);
4dd3f50a
DB
292 if (obj->flags != pin->flags)
293 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
294 obj->flags, pin->flags);
afc1a200
DB
295
296 fprintf(stderr, "\n");
297}
298
91d88eeb 299static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
fb24802b 300 int length, enum bpf_prog_type type)
9e607f2e
DB
301{
302 char file[PATH_MAX], buff[4096];
d17b136f 303 struct bpf_elf_map tmp = {}, zero = {};
fb24802b 304 unsigned int val, owner_type = 0;
9e607f2e
DB
305 FILE *fp;
306
307 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
308
309 fp = fopen(file, "r");
310 if (!fp) {
311 fprintf(stderr, "No procfs support?!\n");
312 return -EIO;
313 }
314
9e607f2e
DB
315 while (fgets(buff, sizeof(buff), fp)) {
316 if (sscanf(buff, "map_type:\t%u", &val) == 1)
317 tmp.type = val;
318 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
319 tmp.size_key = val;
320 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
321 tmp.size_value = val;
322 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
323 tmp.max_elem = val;
4dd3f50a
DB
324 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
325 tmp.flags = val;
fb24802b
DB
326 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
327 owner_type = val;
9e607f2e
DB
328 }
329
330 fclose(fp);
331
fb24802b
DB
332 /* The decision to reject this is on kernel side eventually, but
333 * at least give the user a chance to know what's wrong.
334 */
335 if (owner_type && owner_type != type)
336 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
337 type, owner_type);
338
91d88eeb 339 if (!memcmp(&tmp, map, length)) {
9e607f2e
DB
340 return 0;
341 } else {
9e607f2e
DB
342 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
343 * so just accept it. We know we do have an eBPF fd and in this
344 * case, everything is 0. It is guaranteed that no such map exists
345 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
346 */
91d88eeb 347 if (!memcmp(&tmp, &zero, length))
9e607f2e
DB
348 return 0;
349
afc1a200 350 bpf_map_pin_report(&tmp, map);
9e607f2e
DB
351 return -EINVAL;
352 }
353}
354
91d88eeb
DB
355static int bpf_mnt_fs(const char *target)
356{
357 bool bind_done = false;
358
359 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
360 if (errno != EINVAL || bind_done) {
361 fprintf(stderr, "mount --make-private %s failed: %s\n",
362 target, strerror(errno));
363 return -1;
364 }
365
366 if (mount(target, target, "none", MS_BIND, NULL)) {
367 fprintf(stderr, "mount --bind %s %s failed: %s\n",
368 target, target, strerror(errno));
369 return -1;
370 }
371
372 bind_done = true;
373 }
374
e4225669 375 if (mount("bpf", target, "bpf", 0, "mode=0700")) {
91d88eeb
DB
376 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
377 target, strerror(errno));
378 return -1;
379 }
380
381 return 0;
382}
383
32e93fb7
DB
384static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
385{
386 struct statfs st_fs;
387
388 if (statfs(mnt, &st_fs) < 0)
389 return -ENOENT;
390 if ((unsigned long)st_fs.f_type != magic)
391 return -ENOENT;
392
393 return 0;
394}
395
396static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
397 char *mnt, int len,
398 const char * const *known_mnts)
399{
400 const char * const *ptr;
401 char type[100];
402 FILE *fp;
403
404 if (known_mnts) {
405 ptr = known_mnts;
406 while (*ptr) {
407 if (bpf_valid_mntpt(*ptr, magic) == 0) {
408 strncpy(mnt, *ptr, len - 1);
409 mnt[len - 1] = 0;
410 return mnt;
411 }
412 ptr++;
413 }
414 }
415
416 fp = fopen("/proc/mounts", "r");
417 if (fp == NULL || len != PATH_MAX)
418 return NULL;
419
420 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
421 mnt, type) == 2) {
422 if (strcmp(type, fstype) == 0)
423 break;
424 }
425
426 fclose(fp);
427 if (strcmp(type, fstype) != 0)
428 return NULL;
429
430 return mnt;
431}
432
433int bpf_trace_pipe(void)
434{
435 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
436 static const char * const tracefs_known_mnts[] = {
437 TRACE_DIR_MNT,
438 "/sys/kernel/debug/tracing",
439 "/tracing",
440 "/trace",
441 0,
442 };
443 char tpipe[PATH_MAX];
444 const char *mnt;
445 int fd;
446
447 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
448 sizeof(tracefs_mnt), tracefs_known_mnts);
449 if (!mnt) {
450 fprintf(stderr, "tracefs not mounted?\n");
451 return -1;
452 }
453
454 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
455
456 fd = open(tpipe, O_RDONLY);
457 if (fd < 0)
458 return -1;
459
460 fprintf(stderr, "Running! Hang up with ^C!\n\n");
461 while (1) {
462 static char buff[4096];
463 ssize_t ret;
464
465 ret = read(fd, buff, sizeof(buff) - 1);
466 if (ret > 0) {
467 write(2, buff, ret);
468 fflush(stderr);
469 }
470 }
471
472 return 0;
473}
474
e4225669 475static int bpf_gen_global(const char *bpf_sub_dir)
91d88eeb 476{
e4225669
DB
477 char bpf_glo_dir[PATH_MAX];
478 int ret;
479
480 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
481 bpf_sub_dir, BPF_DIR_GLOBALS);
482
483 ret = mkdir(bpf_glo_dir, S_IRWXU);
484 if (ret && errno != EEXIST) {
485 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
486 strerror(errno));
487 return ret;
488 }
489
490 return 0;
491}
492
493static int bpf_gen_master(const char *base, const char *name)
494{
495 char bpf_sub_dir[PATH_MAX];
496 int ret;
497
498 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
499
500 ret = mkdir(bpf_sub_dir, S_IRWXU);
501 if (ret && errno != EEXIST) {
502 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
503 strerror(errno));
504 return ret;
505 }
506
507 return bpf_gen_global(bpf_sub_dir);
508}
509
510static int bpf_slave_via_bind_mnt(const char *full_name,
511 const char *full_link)
512{
513 int ret;
514
515 ret = mkdir(full_name, S_IRWXU);
516 if (ret) {
517 assert(errno != EEXIST);
518 fprintf(stderr, "mkdir %s failed: %s\n", full_name,
519 strerror(errno));
520 return ret;
521 }
522
523 ret = mount(full_link, full_name, "none", MS_BIND, NULL);
524 if (ret) {
525 rmdir(full_name);
526 fprintf(stderr, "mount --bind %s %s failed: %s\n",
527 full_link, full_name, strerror(errno));
528 }
529
530 return ret;
531}
532
533static int bpf_gen_slave(const char *base, const char *name,
534 const char *link)
535{
536 char bpf_lnk_dir[PATH_MAX];
537 char bpf_sub_dir[PATH_MAX];
538 struct stat sb = {};
539 int ret;
540
541 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
542 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
543
544 ret = symlink(bpf_lnk_dir, bpf_sub_dir);
545 if (ret) {
546 if (errno != EEXIST) {
547 if (errno != EPERM) {
548 fprintf(stderr, "symlink %s failed: %s\n",
549 bpf_sub_dir, strerror(errno));
550 return ret;
551 }
552
553 return bpf_slave_via_bind_mnt(bpf_sub_dir,
554 bpf_lnk_dir);
555 }
556
557 ret = lstat(bpf_sub_dir, &sb);
558 if (ret) {
559 fprintf(stderr, "lstat %s failed: %s\n",
560 bpf_sub_dir, strerror(errno));
561 return ret;
562 }
563
564 if ((sb.st_mode & S_IFMT) != S_IFLNK)
565 return bpf_gen_global(bpf_sub_dir);
566 }
567
568 return 0;
569}
570
571static int bpf_gen_hierarchy(const char *base)
572{
573 int ret, i;
574
575 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
576 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
577 ret = bpf_gen_slave(base,
578 bpf_prog_to_subdir(__bpf_types[i]),
579 bpf_prog_to_subdir(__bpf_types[0]));
580 return ret;
581}
582
583static const char *bpf_get_work_dir(enum bpf_prog_type type)
584{
585 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
586 static char bpf_wrk_dir[PATH_MAX];
91d88eeb 587 static const char *mnt;
e4225669 588 static bool bpf_mnt_cached;
91d88eeb
DB
589 static const char * const bpf_known_mnts[] = {
590 BPF_DIR_MNT,
e4225669 591 "/bpf",
91d88eeb
DB
592 0,
593 };
91d88eeb
DB
594 int ret;
595
e4225669
DB
596 if (bpf_mnt_cached) {
597 const char *out = mnt;
598
599 if (out) {
600 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
601 out, bpf_prog_to_subdir(type));
602 out = bpf_tmp;
603 }
604 return out;
605 }
91d88eeb 606
e4225669 607 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
91d88eeb
DB
608 bpf_known_mnts);
609 if (!mnt) {
610 mnt = getenv(BPF_ENV_MNT);
611 if (!mnt)
612 mnt = BPF_DIR_MNT;
613 ret = bpf_mnt_fs(mnt);
614 if (ret) {
615 mnt = NULL;
616 goto out;
617 }
618 }
619
e4225669 620 snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
91d88eeb 621
e4225669
DB
622 ret = bpf_gen_hierarchy(bpf_wrk_dir);
623 if (ret) {
91d88eeb
DB
624 mnt = NULL;
625 goto out;
626 }
627
e4225669 628 mnt = bpf_wrk_dir;
91d88eeb
DB
629out:
630 bpf_mnt_cached = true;
91d88eeb
DB
631 return mnt;
632}
633
e4225669 634static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
91d88eeb 635{
d17b136f 636 union bpf_attr attr = {};
91d88eeb
DB
637 char tmp[PATH_MAX];
638
639 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
e4225669 640 pathname[1] == ':' && bpf_get_work_dir(type)) {
91d88eeb 641 snprintf(tmp, sizeof(tmp), "%s/%s",
e4225669 642 bpf_get_work_dir(type), pathname + 2);
91d88eeb
DB
643 pathname = tmp;
644 }
645
91d88eeb
DB
646 attr.pathname = bpf_ptr_to_u64(pathname);
647
648 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
649}
650
91d88eeb 651enum bpf_mode {
e4225669 652 CBPF_BYTECODE,
91d88eeb
DB
653 CBPF_FILE,
654 EBPF_OBJECT,
655 EBPF_PINNED,
e4225669 656 BPF_MODE_MAX,
91d88eeb
DB
657};
658
e4225669
DB
659static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode,
660 struct bpf_cfg_in *cfg, const bool *opt_tbl)
32e93fb7 661{
32e93fb7 662 const char *file, *section, *uds_name;
32e93fb7 663 bool verbose = false;
e4225669 664 int i, ret, argc;
91d88eeb
DB
665 char **argv;
666
e4225669
DB
667 argv = cfg->argv;
668 argc = cfg->argc;
91d88eeb
DB
669
670 if (opt_tbl[CBPF_BYTECODE] &&
671 (matches(*argv, "bytecode") == 0 ||
672 strcmp(*argv, "bc") == 0)) {
673 *mode = CBPF_BYTECODE;
674 } else if (opt_tbl[CBPF_FILE] &&
675 (matches(*argv, "bytecode-file") == 0 ||
676 strcmp(*argv, "bcf") == 0)) {
677 *mode = CBPF_FILE;
678 } else if (opt_tbl[EBPF_OBJECT] &&
679 (matches(*argv, "object-file") == 0 ||
680 strcmp(*argv, "obj") == 0)) {
681 *mode = EBPF_OBJECT;
682 } else if (opt_tbl[EBPF_PINNED] &&
683 (matches(*argv, "object-pinned") == 0 ||
684 matches(*argv, "pinned") == 0 ||
685 matches(*argv, "fd") == 0)) {
686 *mode = EBPF_PINNED;
32e93fb7
DB
687 } else {
688 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
689 return -1;
690 }
691
692 NEXT_ARG();
693 file = section = uds_name = NULL;
91d88eeb 694 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
32e93fb7
DB
695 file = *argv;
696 NEXT_ARG_FWD();
697
91d88eeb
DB
698 if (*type == BPF_PROG_TYPE_UNSPEC) {
699 if (argc > 0 && matches(*argv, "type") == 0) {
700 NEXT_ARG();
e4225669
DB
701 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
702 i++) {
703 if (!__bpf_prog_meta[i].type)
704 continue;
705 if (!matches(*argv,
706 __bpf_prog_meta[i].type)) {
707 *type = i;
708 break;
709 }
710 }
711
712 if (*type == BPF_PROG_TYPE_UNSPEC) {
91d88eeb
DB
713 fprintf(stderr, "What type is \"%s\"?\n",
714 *argv);
715 return -1;
716 }
717 NEXT_ARG_FWD();
718 } else {
719 *type = BPF_PROG_TYPE_SCHED_CLS;
720 }
721 }
722
e4225669 723 section = bpf_prog_to_default_section(*type);
32e93fb7
DB
724 if (argc > 0 && matches(*argv, "section") == 0) {
725 NEXT_ARG();
726 section = *argv;
727 NEXT_ARG_FWD();
728 }
729
e4225669
DB
730 if (__bpf_prog_meta[*type].may_uds_export) {
731 uds_name = getenv(BPF_ENV_UDS);
732 if (argc > 0 && !uds_name &&
733 matches(*argv, "export") == 0) {
734 NEXT_ARG();
735 uds_name = *argv;
736 NEXT_ARG_FWD();
737 }
32e93fb7
DB
738 }
739
740 if (argc > 0 && matches(*argv, "verbose") == 0) {
741 verbose = true;
742 NEXT_ARG_FWD();
743 }
744
745 PREV_ARG();
746 }
747
91d88eeb 748 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
e4225669 749 ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE);
91d88eeb
DB
750 else if (*mode == EBPF_OBJECT)
751 ret = bpf_obj_open(file, *type, section, verbose);
752 else if (*mode == EBPF_PINNED)
e4225669 753 ret = bpf_obj_get(file, *type);
91d88eeb 754 else
32e93fb7
DB
755 return -1;
756
e4225669
DB
757 cfg->object = file;
758 cfg->section = section;
759 cfg->uds = uds_name;
760 cfg->argc = argc;
761 cfg->argv = argv;
91d88eeb
DB
762
763 return ret;
764}
765
e4225669
DB
766static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
767 const struct bpf_cfg_ops *ops, void *nl,
768 const bool *opt_tbl)
91d88eeb
DB
769{
770 struct sock_filter opcodes[BPF_MAXINSNS];
91d88eeb 771 char annotation[256];
91d88eeb
DB
772 enum bpf_mode mode;
773 int ret;
774
e4225669
DB
775 cfg->ops = opcodes;
776 ret = bpf_parse(&type, &mode, cfg, opt_tbl);
777 cfg->ops = NULL;
91d88eeb
DB
778 if (ret < 0)
779 return ret;
780
e4225669
DB
781 if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
782 ops->cbpf_cb(nl, opcodes, ret);
91d88eeb 783 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
32e93fb7 784 snprintf(annotation, sizeof(annotation), "%s:[%s]",
e4225669
DB
785 basename(cfg->object), mode == EBPF_PINNED ?
786 "*fsobj" : cfg->section);
787 ops->ebpf_cb(nl, ret, annotation);
32e93fb7
DB
788 }
789
91d88eeb
DB
790 return 0;
791}
32e93fb7 792
e4225669
DB
793int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
794 const struct bpf_cfg_ops *ops, void *nl)
795{
796 bool opt_tbl[BPF_MODE_MAX] = {};
797
798 if (ops->cbpf_cb) {
799 opt_tbl[CBPF_BYTECODE] = true;
800 opt_tbl[CBPF_FILE] = true;
801 }
802
803 if (ops->ebpf_cb) {
804 opt_tbl[EBPF_OBJECT] = true;
805 opt_tbl[EBPF_PINNED] = true;
806 }
807
808 return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl);
809}
810
91d88eeb
DB
811int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
812{
813 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
814 const bool opt_tbl[BPF_MODE_MAX] = {
91d88eeb
DB
815 [EBPF_OBJECT] = true,
816 [EBPF_PINNED] = true,
817 };
818 const struct bpf_elf_map test = {
819 .type = BPF_MAP_TYPE_PROG_ARRAY,
820 .size_key = sizeof(int),
821 .size_value = sizeof(int),
822 };
e4225669
DB
823 struct bpf_cfg_in cfg = {
824 .argc = argc,
825 .argv = argv,
826 };
91d88eeb 827 int ret, prog_fd, map_fd;
91d88eeb
DB
828 enum bpf_mode mode;
829 uint32_t map_key;
830
e4225669 831 prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl);
91d88eeb
DB
832 if (prog_fd < 0)
833 return prog_fd;
834 if (key) {
835 map_key = *key;
836 } else {
e4225669 837 ret = sscanf(cfg.section, "%*i/%i", &map_key);
91d88eeb 838 if (ret != 1) {
32a121cb 839 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
91d88eeb
DB
840 ret = -EINVAL;
841 goto out_prog;
842 }
843 }
32e93fb7 844
e4225669 845 map_fd = bpf_obj_get(map_path, type);
91d88eeb
DB
846 if (map_fd < 0) {
847 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
848 map_path, strerror(errno));
849 ret = map_fd;
850 goto out_prog;
851 }
852
853 ret = bpf_map_selfcheck_pinned(map_fd, &test,
fb24802b
DB
854 offsetof(struct bpf_elf_map, max_elem),
855 type);
91d88eeb
DB
856 if (ret < 0) {
857 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
858 goto out_map;
859 }
860
861 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
862 if (ret < 0)
863 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
864out_map:
865 close(map_fd);
866out_prog:
867 close(prog_fd);
868 return ret;
32e93fb7
DB
869}
870
fc4ccce0
DA
871int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type)
872{
873 union bpf_attr attr = {};
874
875 attr.target_fd = target_fd;
876 attr.attach_bpf_fd = prog_fd;
877 attr.attach_type = type;
878
879 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
880}
881
882int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type)
883{
884 union bpf_attr attr = {};
885
886 attr.target_fd = target_fd;
887 attr.attach_type = type;
888
889 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
890}
891
869d889e
DA
892int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
893 size_t size_insns, const char *license, char *log,
894 size_t size_log)
895{
896 union bpf_attr attr = {};
897
898 attr.prog_type = type;
899 attr.insns = bpf_ptr_to_u64(insns);
900 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
901 attr.license = bpf_ptr_to_u64(license);
902
903 if (size_log > 0) {
904 attr.log_buf = bpf_ptr_to_u64(log);
905 attr.log_size = size_log;
906 attr.log_level = 1;
907 }
908
909 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
910}
911
6256f8c9 912#ifdef HAVE_ELF
32e93fb7
DB
913struct bpf_elf_prog {
914 enum bpf_prog_type type;
915 const struct bpf_insn *insns;
916 size_t size;
917 const char *license;
918};
919
f6793eec
DB
920struct bpf_hash_entry {
921 unsigned int pinning;
922 const char *subpath;
923 struct bpf_hash_entry *next;
924};
925
32e93fb7
DB
926struct bpf_elf_ctx {
927 Elf *elf_fd;
928 GElf_Ehdr elf_hdr;
929 Elf_Data *sym_tab;
930 Elf_Data *str_tab;
931 int obj_fd;
932 int map_fds[ELF_MAX_MAPS];
933 struct bpf_elf_map maps[ELF_MAX_MAPS];
934 int sym_num;
935 int map_num;
e4225669 936 int map_len;
32e93fb7
DB
937 bool *sec_done;
938 int sec_maps;
939 char license[ELF_MAX_LICENSE_LEN];
940 enum bpf_prog_type type;
941 bool verbose;
942 struct bpf_elf_st stat;
f6793eec 943 struct bpf_hash_entry *ht[256];
f31645d1
DB
944 char *log;
945 size_t log_size;
32e93fb7
DB
946};
947
6256f8c9 948struct bpf_elf_sec_data {
32e93fb7
DB
949 GElf_Shdr sec_hdr;
950 Elf_Data *sec_data;
951 const char *sec_name;
6256f8c9
DB
952};
953
954struct bpf_map_data {
32e93fb7
DB
955 int *fds;
956 const char *obj;
957 struct bpf_elf_st *st;
958 struct bpf_elf_map *ent;
6256f8c9
DB
959};
960
f31645d1
DB
961static __check_format_string(2, 3) void
962bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
11c39b5e
DB
963{
964 va_list vl;
965
966 va_start(vl, format);
967 vfprintf(stderr, format, vl);
968 va_end(vl);
969
f31645d1 970 if (ctx->log && ctx->log[0]) {
afc1a200
DB
971 if (ctx->verbose) {
972 fprintf(stderr, "%s\n", ctx->log);
973 } else {
974 unsigned int off = 0, len = strlen(ctx->log);
975
976 if (len > BPF_MAX_LOG) {
977 off = len - BPF_MAX_LOG;
978 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
979 off);
980 }
981 fprintf(stderr, "%s\n", ctx->log + off);
982 }
983
f31645d1
DB
984 memset(ctx->log, 0, ctx->log_size);
985 }
986}
987
988static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
989{
0f74d0f3 990 const size_t log_max = UINT_MAX >> 8;
f31645d1
DB
991 size_t log_size = ctx->log_size;
992 void *ptr;
993
994 if (!ctx->log) {
995 log_size = 65536;
0f74d0f3 996 } else if (log_size < log_max) {
f31645d1 997 log_size <<= 1;
0f74d0f3
TG
998 if (log_size > log_max)
999 log_size = log_max;
1000 } else {
1001 return -EINVAL;
d937a74b 1002 }
f31645d1
DB
1003
1004 ptr = realloc(ctx->log, log_size);
1005 if (!ptr)
1006 return -ENOMEM;
1007
1008 ctx->log = ptr;
1009 ctx->log_size = log_size;
1010
1011 return 0;
11c39b5e
DB
1012}
1013
4dd3f50a
DB
1014static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
1015 uint32_t size_value, uint32_t max_elem,
1016 uint32_t flags)
11c39b5e 1017{
d17b136f 1018 union bpf_attr attr = {};
67584e3a 1019
67584e3a
ND
1020 attr.map_type = type;
1021 attr.key_size = size_key;
1022 attr.value_size = size_value;
1023 attr.max_entries = max_elem;
4dd3f50a 1024 attr.map_flags = flags;
11c39b5e
DB
1025
1026 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
1027}
11c39b5e 1028
32e93fb7 1029static int bpf_obj_pin(int fd, const char *pathname)
11c39b5e 1030{
d17b136f 1031 union bpf_attr attr = {};
67584e3a 1032
67584e3a
ND
1033 attr.pathname = bpf_ptr_to_u64(pathname);
1034 attr.bpf_fd = fd;
32e93fb7
DB
1035
1036 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
1037}
11c39b5e 1038
32e93fb7
DB
1039static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
1040{
1041 struct sockaddr_alg alg = {
1042 .salg_family = AF_ALG,
1043 .salg_type = "hash",
1044 .salg_name = "sha1",
1045 };
1046 int ret, cfd, ofd, ffd;
1047 struct stat stbuff;
1048 ssize_t size;
1049
1050 if (!object || len != 20)
1051 return -EINVAL;
1052
1053 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
1054 if (cfd < 0) {
1055 fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
1056 strerror(errno));
1057 return cfd;
1058 }
1059
1060 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
1061 if (ret < 0) {
1062 fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
1063 goto out_cfd;
1064 }
1065
1066 ofd = accept(cfd, NULL, 0);
1067 if (ofd < 0) {
1068 fprintf(stderr, "Error accepting socket: %s\n",
1069 strerror(errno));
1070 ret = ofd;
1071 goto out_cfd;
1072 }
1073
1074 ffd = open(object, O_RDONLY);
1075 if (ffd < 0) {
1076 fprintf(stderr, "Error opening object %s: %s\n",
1077 object, strerror(errno));
1078 ret = ffd;
1079 goto out_ofd;
1080 }
1081
32a121cb 1082 ret = fstat(ffd, &stbuff);
32e93fb7
DB
1083 if (ret < 0) {
1084 fprintf(stderr, "Error doing fstat: %s\n",
1085 strerror(errno));
1086 goto out_ffd;
d937a74b 1087 }
11c39b5e 1088
32e93fb7
DB
1089 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
1090 if (size != stbuff.st_size) {
1091 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
1092 size, stbuff.st_size, strerror(errno));
1093 ret = -1;
1094 goto out_ffd;
1095 }
1096
1097 size = read(ofd, out, len);
1098 if (size != len) {
1099 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
1100 size, len, strerror(errno));
1101 ret = -1;
1102 } else {
1103 ret = 0;
1104 }
1105out_ffd:
1106 close(ffd);
1107out_ofd:
1108 close(ofd);
1109out_cfd:
1110 close(cfd);
1111 return ret;
11c39b5e
DB
1112}
1113
32e93fb7 1114static const char *bpf_get_obj_uid(const char *pathname)
11c39b5e 1115{
32a121cb 1116 static bool bpf_uid_cached;
32e93fb7
DB
1117 static char bpf_uid[64];
1118 uint8_t tmp[20];
1119 int ret;
11c39b5e 1120
32e93fb7
DB
1121 if (bpf_uid_cached)
1122 goto done;
11c39b5e 1123
32e93fb7
DB
1124 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
1125 if (ret) {
1126 fprintf(stderr, "Object hashing failed!\n");
1127 return NULL;
1128 }
1129
1130 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
1131 bpf_uid_cached = true;
1132done:
1133 return bpf_uid;
11c39b5e
DB
1134}
1135
32e93fb7
DB
1136static int bpf_init_env(const char *pathname)
1137{
1138 struct rlimit limit = {
1139 .rlim_cur = RLIM_INFINITY,
1140 .rlim_max = RLIM_INFINITY,
1141 };
1142
1143 /* Don't bother in case we fail! */
1144 setrlimit(RLIMIT_MEMLOCK, &limit);
1145
e4225669 1146 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) {
32a121cb 1147 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
32e93fb7
DB
1148 return 0;
1149 }
1150
1151 if (!bpf_get_obj_uid(pathname))
1152 return -1;
1153
1154 return 0;
6256f8c9
DB
1155}
1156
f6793eec
DB
1157static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
1158 uint32_t pinning)
1159{
1160 struct bpf_hash_entry *entry;
1161
1162 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1163 while (entry && entry->pinning != pinning)
1164 entry = entry->next;
1165
1166 return entry ? entry->subpath : NULL;
1167}
1168
1169static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
1170 uint32_t pinning)
11c39b5e 1171{
32e93fb7
DB
1172 switch (pinning) {
1173 case PIN_OBJECT_NS:
1174 case PIN_GLOBAL_NS:
1175 return false;
1176 case PIN_NONE:
32e93fb7 1177 return true;
f6793eec
DB
1178 default:
1179 return !bpf_custom_pinning(ctx, pinning);
32e93fb7
DB
1180 }
1181}
1182
1183static void bpf_make_pathname(char *pathname, size_t len, const char *name,
f6793eec 1184 const struct bpf_elf_ctx *ctx, uint32_t pinning)
32e93fb7
DB
1185{
1186 switch (pinning) {
1187 case PIN_OBJECT_NS:
e4225669
DB
1188 snprintf(pathname, len, "%s/%s/%s",
1189 bpf_get_work_dir(ctx->type),
32e93fb7
DB
1190 bpf_get_obj_uid(NULL), name);
1191 break;
1192 case PIN_GLOBAL_NS:
e4225669
DB
1193 snprintf(pathname, len, "%s/%s/%s",
1194 bpf_get_work_dir(ctx->type),
32e93fb7
DB
1195 BPF_DIR_GLOBALS, name);
1196 break;
f6793eec 1197 default:
e4225669
DB
1198 snprintf(pathname, len, "%s/../%s/%s",
1199 bpf_get_work_dir(ctx->type),
f6793eec
DB
1200 bpf_custom_pinning(ctx, pinning), name);
1201 break;
32e93fb7
DB
1202 }
1203}
1204
f6793eec
DB
1205static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1206 uint32_t pinning)
32e93fb7
DB
1207{
1208 char pathname[PATH_MAX];
1209
e4225669 1210 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
32e93fb7
DB
1211 return 0;
1212
f6793eec 1213 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
e4225669 1214 return bpf_obj_get(pathname, ctx->type);
32e93fb7
DB
1215}
1216
e4225669 1217static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
32e93fb7 1218{
f6793eec 1219 char tmp[PATH_MAX];
32e93fb7
DB
1220 int ret;
1221
e4225669 1222 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
f6793eec
DB
1223 bpf_get_obj_uid(NULL));
1224
1225 ret = mkdir(tmp, S_IRWXU);
1226 if (ret && errno != EEXIST) {
1227 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1228 return ret;
1229 }
1230
1231 return 0;
1232}
1233
e4225669
DB
1234static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
1235 const char *todo)
f6793eec
DB
1236{
1237 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1238 int ret;
1239
e4225669 1240 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
f6793eec
DB
1241 snprintf(rem, sizeof(rem), "%s/", todo);
1242 sub = strtok(rem, "/");
32e93fb7 1243
f6793eec
DB
1244 while (sub) {
1245 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1246 return -EINVAL;
1247
1248 strcat(tmp, sub);
1249 strcat(tmp, "/");
32e93fb7 1250
f6793eec 1251 ret = mkdir(tmp, S_IRWXU);
32e93fb7 1252 if (ret && errno != EEXIST) {
f6793eec 1253 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
32e93fb7
DB
1254 strerror(errno));
1255 return ret;
1256 }
f6793eec
DB
1257
1258 sub = strtok(NULL, "/");
32e93fb7
DB
1259 }
1260
f6793eec
DB
1261 return 0;
1262}
1263
1264static int bpf_place_pinned(int fd, const char *name,
1265 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1266{
1267 char pathname[PATH_MAX];
1268 const char *tmp;
1269 int ret = 0;
1270
e4225669 1271 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
f6793eec
DB
1272 return 0;
1273
1274 if (pinning == PIN_OBJECT_NS)
e4225669 1275 ret = bpf_make_obj_path(ctx);
f6793eec 1276 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
e4225669 1277 ret = bpf_make_custom_path(ctx, tmp);
f6793eec
DB
1278 if (ret < 0)
1279 return ret;
1280
1281 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
32e93fb7
DB
1282 return bpf_obj_pin(fd, pathname);
1283}
1284
f31645d1
DB
1285static void bpf_prog_report(int fd, const char *section,
1286 const struct bpf_elf_prog *prog,
1287 struct bpf_elf_ctx *ctx)
32e93fb7 1288{
afc1a200
DB
1289 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1290
1291 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
f31645d1
DB
1292 fd < 0 ? "rejected: " : "loaded",
1293 fd < 0 ? strerror(errno) : "",
1294 fd < 0 ? errno : fd);
1295
1296 fprintf(stderr, " - Type: %u\n", prog->type);
afc1a200
DB
1297 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1298 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
f31645d1
DB
1299 fprintf(stderr, " - License: %s\n\n", prog->license);
1300
1301 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1302}
32e93fb7 1303
f31645d1
DB
1304static int bpf_prog_attach(const char *section,
1305 const struct bpf_elf_prog *prog,
1306 struct bpf_elf_ctx *ctx)
1307{
1308 int tries = 0, fd;
1309retry:
32e93fb7
DB
1310 errno = 0;
1311 fd = bpf_prog_load(prog->type, prog->insns, prog->size,
f31645d1
DB
1312 prog->license, ctx->log, ctx->log_size);
1313 if (fd < 0 || ctx->verbose) {
1314 /* The verifier log is pretty chatty, sometimes so chatty
1315 * on larger programs, that we could fail to dump everything
1316 * into our buffer. Still, try to give a debuggable error
1317 * log for the user, so enlarge it and re-fail.
1318 */
1319 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
0f74d0f3 1320 if (tries++ < 10 && !bpf_log_realloc(ctx))
f31645d1
DB
1321 goto retry;
1322
32a121cb 1323 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
f31645d1
DB
1324 ctx->log_size, tries);
1325 return fd;
1326 }
1327
1328 bpf_prog_report(fd, section, prog, ctx);
32e93fb7
DB
1329 }
1330
1331 return fd;
1332}
1333
f31645d1
DB
1334static void bpf_map_report(int fd, const char *name,
1335 const struct bpf_elf_map *map,
1336 struct bpf_elf_ctx *ctx)
1337{
1338 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1339 fd < 0 ? "rejected: " : "loaded",
1340 fd < 0 ? strerror(errno) : "",
1341 fd < 0 ? errno : fd);
1342
1343 fprintf(stderr, " - Type: %u\n", map->type);
1344 fprintf(stderr, " - Identifier: %u\n", map->id);
1345 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1346 fprintf(stderr, " - Size key: %u\n", map->size_key);
1347 fprintf(stderr, " - Size value: %u\n", map->size_value);
4dd3f50a
DB
1348 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1349 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
f31645d1
DB
1350}
1351
32e93fb7 1352static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
f31645d1 1353 struct bpf_elf_ctx *ctx)
32e93fb7
DB
1354{
1355 int fd, ret;
1356
f6793eec 1357 fd = bpf_probe_pinned(name, ctx, map->pinning);
32e93fb7 1358 if (fd > 0) {
91d88eeb
DB
1359 ret = bpf_map_selfcheck_pinned(fd, map,
1360 offsetof(struct bpf_elf_map,
fb24802b 1361 id), ctx->type);
9e607f2e
DB
1362 if (ret < 0) {
1363 close(fd);
1364 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1365 name);
1366 return ret;
1367 }
f31645d1 1368 if (ctx->verbose)
32e93fb7
DB
1369 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1370 name);
1371 return fd;
1372 }
1373
1374 errno = 0;
1375 fd = bpf_map_create(map->type, map->size_key, map->size_value,
4dd3f50a 1376 map->max_elem, map->flags);
f31645d1
DB
1377 if (fd < 0 || ctx->verbose) {
1378 bpf_map_report(fd, name, map, ctx);
32e93fb7
DB
1379 if (fd < 0)
1380 return fd;
1381 }
1382
f6793eec 1383 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
32e93fb7
DB
1384 if (ret < 0 && errno != EEXIST) {
1385 fprintf(stderr, "Could not pin %s map: %s\n", name,
1386 strerror(errno));
1387 close(fd);
1388 return ret;
1389 }
1390
1391 return fd;
1392}
1393
32e93fb7
DB
1394static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1395 const GElf_Sym *sym)
1396{
1397 return ctx->str_tab->d_buf + sym->st_name;
1398}
1399
1400static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1401{
1402 GElf_Sym sym;
11c39b5e
DB
1403 int i;
1404
32e93fb7
DB
1405 for (i = 0; i < ctx->sym_num; i++) {
1406 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1407 continue;
1408
5230a2ed
DB
1409 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1410 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
32e93fb7 1411 sym.st_shndx != ctx->sec_maps ||
e4225669 1412 sym.st_value / ctx->map_len != which)
32e93fb7
DB
1413 continue;
1414
1415 return bpf_str_tab_name(ctx, &sym);
11c39b5e 1416 }
32e93fb7
DB
1417
1418 return NULL;
11c39b5e
DB
1419}
1420
32e93fb7 1421static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
11c39b5e 1422{
32e93fb7
DB
1423 const char *map_name;
1424 int i, fd;
11c39b5e 1425
32e93fb7
DB
1426 for (i = 0; i < ctx->map_num; i++) {
1427 map_name = bpf_map_fetch_name(ctx, i);
1428 if (!map_name)
1429 return -EIO;
11c39b5e 1430
f31645d1 1431 fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
32e93fb7
DB
1432 if (fd < 0)
1433 return fd;
11c39b5e 1434
32e93fb7 1435 ctx->map_fds[i] = fd;
11c39b5e
DB
1436 }
1437
1438 return 0;
11c39b5e
DB
1439}
1440
e4225669
DB
1441static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
1442{
1443 int i, num = 0;
1444 GElf_Sym sym;
1445
1446 for (i = 0; i < ctx->sym_num; i++) {
1447 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1448 continue;
1449
1450 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1451 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1452 sym.st_shndx != ctx->sec_maps)
1453 continue;
1454 num++;
1455 }
1456
1457 return num;
1458}
1459
32e93fb7
DB
1460static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1461 struct bpf_elf_sec_data *data)
11c39b5e 1462{
32e93fb7 1463 Elf_Data *sec_edata;
11c39b5e
DB
1464 GElf_Shdr sec_hdr;
1465 Elf_Scn *sec_fd;
11c39b5e
DB
1466 char *sec_name;
1467
32e93fb7 1468 memset(data, 0, sizeof(*data));
11c39b5e 1469
32e93fb7 1470 sec_fd = elf_getscn(ctx->elf_fd, section);
11c39b5e
DB
1471 if (!sec_fd)
1472 return -EINVAL;
11c39b5e
DB
1473 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1474 return -EIO;
1475
32e93fb7 1476 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
11c39b5e
DB
1477 sec_hdr.sh_name);
1478 if (!sec_name || !sec_hdr.sh_size)
1479 return -ENOENT;
1480
1481 sec_edata = elf_getdata(sec_fd, NULL);
1482 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1483 return -EIO;
1484
32e93fb7 1485 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
11c39b5e 1486
32e93fb7
DB
1487 data->sec_name = sec_name;
1488 data->sec_data = sec_edata;
11c39b5e
DB
1489 return 0;
1490}
1491
e4225669
DB
1492struct bpf_elf_map_min {
1493 __u32 type;
1494 __u32 size_key;
1495 __u32 size_value;
1496 __u32 max_elem;
1497};
11c39b5e 1498
e4225669
DB
1499static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
1500 struct bpf_elf_sec_data *data)
1501{
1502 ctx->map_num = data->sec_data->d_size;
32e93fb7
DB
1503 ctx->sec_maps = section;
1504 ctx->sec_done[section] = true;
11c39b5e 1505
e4225669 1506 if (ctx->map_num > sizeof(ctx->maps)) {
32e93fb7
DB
1507 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1508 return -ENOMEM;
1509 }
11c39b5e 1510
e4225669
DB
1511 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
1512 return 0;
1513}
1514
1515static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
1516{
1517 GElf_Sym sym;
1518 int off, i;
1519
1520 for (off = 0; off < end; off += ctx->map_len) {
1521 /* Order doesn't need to be linear here, hence we walk
1522 * the table again.
1523 */
1524 for (i = 0; i < ctx->sym_num; i++) {
1525 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1526 continue;
1527 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1528 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
1529 sym.st_shndx != ctx->sec_maps)
1530 continue;
1531 if (sym.st_value == off)
1532 break;
1533 if (i == ctx->sym_num - 1)
1534 return -1;
1535 }
1536 }
1537
1538 return off == end ? 0 : -1;
1539}
1540
1541static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
1542{
1543 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
1544 int i, sym_num = bpf_map_num_sym(ctx);
1545 __u8 *buff;
1546
1547 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
1548 fprintf(stderr, "%u maps not supported in current map section!\n",
1549 sym_num);
1550 return -EINVAL;
1551 }
1552
1553 if (ctx->map_num % sym_num != 0 ||
1554 ctx->map_num % sizeof(__u32) != 0) {
1555 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1556 return -EINVAL;
1557 }
1558
1559 ctx->map_len = ctx->map_num / sym_num;
1560 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
1561 fprintf(stderr, "Different struct bpf_elf_map in use!\n");
1562 return -EINVAL;
1563 }
1564
1565 if (ctx->map_len == sizeof(struct bpf_elf_map)) {
1566 ctx->map_num = sym_num;
1567 return 0;
1568 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
1569 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
1570 return -EINVAL;
1571 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
1572 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
1573 return -EINVAL;
1574 }
1575
1576 ctx->map_num = sym_num;
1577 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
1578 i++, buff += ctx->map_len) {
1579 /* The fixup leaves the rest of the members as zero, which
1580 * is fine currently, but option exist to set some other
1581 * default value as well when needed in future.
1582 */
1583 memcpy(&fixup[i], buff, ctx->map_len);
1584 }
1585
1586 memcpy(ctx->maps, fixup, sizeof(fixup));
1587
1588 printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
1589 sizeof(struct bpf_elf_map) - ctx->map_len);
32e93fb7
DB
1590 return 0;
1591}
11c39b5e 1592
32e93fb7
DB
1593static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1594 struct bpf_elf_sec_data *data)
1595{
1596 if (data->sec_data->d_size > sizeof(ctx->license))
1597 return -ENOMEM;
11c39b5e 1598
32e93fb7
DB
1599 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1600 ctx->sec_done[section] = true;
1601 return 0;
1602}
11c39b5e 1603
32e93fb7
DB
1604static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1605 struct bpf_elf_sec_data *data)
1606{
1607 ctx->sym_tab = data->sec_data;
1608 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1609 ctx->sec_done[section] = true;
11c39b5e
DB
1610 return 0;
1611}
1612
32e93fb7
DB
1613static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1614 struct bpf_elf_sec_data *data)
11c39b5e 1615{
32e93fb7
DB
1616 ctx->str_tab = data->sec_data;
1617 ctx->sec_done[section] = true;
1618 return 0;
1619}
11c39b5e 1620
afc1a200
DB
1621static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
1622{
1623 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
1624}
1625
32e93fb7
DB
1626static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1627{
1628 struct bpf_elf_sec_data data;
1629 int i, ret = -1;
11c39b5e 1630
32e93fb7
DB
1631 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1632 ret = bpf_fill_section_data(ctx, i, &data);
11c39b5e
DB
1633 if (ret < 0)
1634 continue;
1635
cce3d466
DB
1636 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1637 !strcmp(data.sec_name, ELF_SECTION_MAPS))
e4225669 1638 ret = bpf_fetch_maps_begin(ctx, i, &data);
cce3d466
DB
1639 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1640 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
32e93fb7 1641 ret = bpf_fetch_license(ctx, i, &data);
cce3d466
DB
1642 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
1643 !strcmp(data.sec_name, ".symtab"))
32e93fb7
DB
1644 ret = bpf_fetch_symtab(ctx, i, &data);
1645 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
cce3d466 1646 !strcmp(data.sec_name, ".strtab"))
32e93fb7
DB
1647 ret = bpf_fetch_strtab(ctx, i, &data);
1648 if (ret < 0) {
afc1a200 1649 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
32a121cb 1650 i);
e4225669 1651 return ret;
11c39b5e 1652 }
32e93fb7
DB
1653 }
1654
afc1a200 1655 if (bpf_has_map_data(ctx)) {
e4225669
DB
1656 ret = bpf_fetch_maps_end(ctx);
1657 if (ret < 0) {
1658 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
1659 return ret;
1660 }
1661
32e93fb7
DB
1662 ret = bpf_maps_attach_all(ctx);
1663 if (ret < 0) {
1664 fprintf(stderr, "Error loading maps into kernel!\n");
1665 return ret;
11c39b5e
DB
1666 }
1667 }
1668
1669 return ret;
1670}
1671
e4225669
DB
1672static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
1673 bool *sseen)
11c39b5e 1674{
32e93fb7
DB
1675 struct bpf_elf_sec_data data;
1676 struct bpf_elf_prog prog;
1677 int ret, i, fd = -1;
11c39b5e 1678
32e93fb7
DB
1679 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1680 if (ctx->sec_done[i])
11c39b5e
DB
1681 continue;
1682
32e93fb7 1683 ret = bpf_fill_section_data(ctx, i, &data);
cce3d466
DB
1684 if (ret < 0 ||
1685 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
1686 data.sec_hdr.sh_flags & SHF_EXECINSTR &&
1687 !strcmp(data.sec_name, section)))
11c39b5e
DB
1688 continue;
1689
e4225669
DB
1690 *sseen = true;
1691
32e93fb7
DB
1692 memset(&prog, 0, sizeof(prog));
1693 prog.type = ctx->type;
1694 prog.insns = data.sec_data->d_buf;
1695 prog.size = data.sec_data->d_size;
1696 prog.license = ctx->license;
11c39b5e 1697
f31645d1 1698 fd = bpf_prog_attach(section, &prog, ctx);
32e93fb7 1699 if (fd < 0)
e4225669 1700 return fd;
11c39b5e 1701
32e93fb7 1702 ctx->sec_done[i] = true;
11c39b5e
DB
1703 break;
1704 }
1705
32e93fb7 1706 return fd;
11c39b5e
DB
1707}
1708
32e93fb7
DB
1709static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1710 struct bpf_elf_sec_data *data_relo,
1711 struct bpf_elf_sec_data *data_insn)
11c39b5e 1712{
32e93fb7
DB
1713 Elf_Data *idata = data_insn->sec_data;
1714 GElf_Shdr *rhdr = &data_relo->sec_hdr;
1715 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1716 struct bpf_insn *insns = idata->d_buf;
1717 unsigned int num_insns = idata->d_size / sizeof(*insns);
11c39b5e 1718
32e93fb7
DB
1719 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
1720 unsigned int ioff, rmap;
1721 GElf_Rel relo;
1722 GElf_Sym sym;
1723
1724 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1725 return -EIO;
1726
1727 ioff = relo.r_offset / sizeof(struct bpf_insn);
1728 if (ioff >= num_insns ||
a576c6b9 1729 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
32a121cb 1730 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
a576c6b9
DB
1731 ioff);
1732 if (ioff < num_insns &&
1733 insns[ioff].code == (BPF_JMP | BPF_CALL))
32a121cb 1734 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
32e93fb7 1735 return -EINVAL;
a576c6b9 1736 }
32e93fb7
DB
1737
1738 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
1739 return -EIO;
2486337a 1740 if (sym.st_shndx != ctx->sec_maps) {
32a121cb 1741 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2486337a
DB
1742 relo_ent, sym.st_shndx);
1743 return -EIO;
1744 }
32e93fb7 1745
e4225669 1746 rmap = sym.st_value / ctx->map_len;
32e93fb7
DB
1747 if (rmap >= ARRAY_SIZE(ctx->map_fds))
1748 return -EINVAL;
1749 if (!ctx->map_fds[rmap])
1750 return -EINVAL;
1751
1752 if (ctx->verbose)
32a121cb 1753 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
32e93fb7
DB
1754 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
1755 data_insn->sec_name, ioff);
11c39b5e 1756
32e93fb7
DB
1757 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
1758 insns[ioff].imm = ctx->map_fds[rmap];
1759 }
1760
1761 return 0;
1762}
1763
afc1a200 1764static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
e4225669 1765 bool *lderr, bool *sseen)
32e93fb7
DB
1766{
1767 struct bpf_elf_sec_data data_relo, data_insn;
1768 struct bpf_elf_prog prog;
1769 int ret, idx, i, fd = -1;
1770
1771 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1772 ret = bpf_fill_section_data(ctx, i, &data_relo);
1773 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
11c39b5e
DB
1774 continue;
1775
32e93fb7 1776 idx = data_relo.sec_hdr.sh_info;
e4225669 1777
32e93fb7 1778 ret = bpf_fill_section_data(ctx, idx, &data_insn);
cce3d466
DB
1779 if (ret < 0 ||
1780 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
1781 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
1782 !strcmp(data_insn.sec_name, section)))
11c39b5e 1783 continue;
32e93fb7 1784
e4225669
DB
1785 *sseen = true;
1786
32e93fb7
DB
1787 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
1788 if (ret < 0)
e4225669 1789 return ret;
11c39b5e 1790
32e93fb7
DB
1791 memset(&prog, 0, sizeof(prog));
1792 prog.type = ctx->type;
1793 prog.insns = data_insn.sec_data->d_buf;
1794 prog.size = data_insn.sec_data->d_size;
1795 prog.license = ctx->license;
1796
f31645d1 1797 fd = bpf_prog_attach(section, &prog, ctx);
afc1a200
DB
1798 if (fd < 0) {
1799 *lderr = true;
e4225669 1800 return fd;
afc1a200 1801 }
11c39b5e 1802
32e93fb7
DB
1803 ctx->sec_done[i] = true;
1804 ctx->sec_done[idx] = true;
11c39b5e
DB
1805 break;
1806 }
1807
32e93fb7 1808 return fd;
11c39b5e
DB
1809}
1810
32e93fb7 1811static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
473d7840 1812{
e4225669 1813 bool lderr = false, sseen = false;
473d7840
DB
1814 int ret = -1;
1815
afc1a200 1816 if (bpf_has_map_data(ctx))
e4225669 1817 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
afc1a200 1818 if (ret < 0 && !lderr)
e4225669
DB
1819 ret = bpf_fetch_prog(ctx, section, &sseen);
1820 if (ret < 0 && !sseen)
1821 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
1822 section);
473d7840
DB
1823 return ret;
1824}
1825
910b543d
DB
1826static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
1827{
1828 int i;
1829
1830 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
1831 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
1832 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
1833 return i;
1834 return -1;
1835}
1836
32e93fb7 1837static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
473d7840 1838{
32e93fb7
DB
1839 struct bpf_elf_sec_data data;
1840 uint32_t map_id, key_id;
910b543d 1841 int fd, i, ret, idx;
473d7840 1842
32e93fb7
DB
1843 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1844 if (ctx->sec_done[i])
473d7840
DB
1845 continue;
1846
32e93fb7 1847 ret = bpf_fill_section_data(ctx, i, &data);
473d7840
DB
1848 if (ret < 0)
1849 continue;
1850
910b543d
DB
1851 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
1852 if (ret != 2)
32e93fb7 1853 continue;
910b543d
DB
1854
1855 idx = bpf_find_map_by_id(ctx, map_id);
1856 if (idx < 0)
473d7840
DB
1857 continue;
1858
32e93fb7
DB
1859 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
1860 if (fd < 0)
473d7840
DB
1861 return -EIO;
1862
910b543d
DB
1863 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
1864 &fd, BPF_ANY);
afc1a200
DB
1865 if (ret < 0) {
1866 if (errno == E2BIG)
1867 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
1868 key_id, map_id);
1869 return -errno;
1870 }
473d7840 1871
32e93fb7 1872 ctx->sec_done[i] = true;
473d7840
DB
1873 }
1874
1875 return 0;
1876}
1877
32e93fb7 1878static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
11c39b5e 1879{
32e93fb7
DB
1880 struct stat st;
1881 int ret;
11c39b5e 1882
32e93fb7 1883 memset(&ctx->stat, 0, sizeof(ctx->stat));
11c39b5e 1884
32e93fb7
DB
1885 ret = fstat(ctx->obj_fd, &st);
1886 if (ret < 0) {
1887 fprintf(stderr, "Stat of elf file failed: %s\n",
1888 strerror(errno));
1889 return;
1890 }
11c39b5e 1891
32e93fb7
DB
1892 ctx->stat.st_dev = st.st_dev;
1893 ctx->stat.st_ino = st.st_ino;
1894}
1895
f6793eec
DB
1896static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
1897{
1898 char buff[PATH_MAX];
1899
1900 while (fgets(buff, sizeof(buff), fp)) {
1901 char *ptr = buff;
1902
1903 while (*ptr == ' ' || *ptr == '\t')
1904 ptr++;
1905
1906 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
1907 continue;
1908
1909 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
1910 sscanf(ptr, "%i %s #", id, path) != 2) {
1911 strcpy(path, ptr);
1912 return -1;
1913 }
1914
1915 return 1;
1916 }
1917
1918 return 0;
1919}
1920
1921static bool bpf_pinning_reserved(uint32_t pinning)
1922{
1923 switch (pinning) {
1924 case PIN_NONE:
1925 case PIN_OBJECT_NS:
1926 case PIN_GLOBAL_NS:
1927 return true;
1928 default:
1929 return false;
1930 }
1931}
1932
1933static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
1934{
1935 struct bpf_hash_entry *entry;
d17b136f 1936 char subpath[PATH_MAX] = {};
f6793eec
DB
1937 uint32_t pinning;
1938 FILE *fp;
1939 int ret;
1940
1941 fp = fopen(db_file, "r");
1942 if (!fp)
1943 return;
1944
f6793eec
DB
1945 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
1946 if (ret == -1) {
1947 fprintf(stderr, "Database %s is corrupted at: %s\n",
1948 db_file, subpath);
1949 fclose(fp);
1950 return;
1951 }
1952
1953 if (bpf_pinning_reserved(pinning)) {
32a121cb
SH
1954 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
1955 db_file, pinning);
f6793eec
DB
1956 continue;
1957 }
1958
1959 entry = malloc(sizeof(*entry));
1960 if (!entry) {
1961 fprintf(stderr, "No memory left for db entry!\n");
1962 continue;
1963 }
1964
1965 entry->pinning = pinning;
1966 entry->subpath = strdup(subpath);
1967 if (!entry->subpath) {
1968 fprintf(stderr, "No memory left for db entry!\n");
1969 free(entry);
1970 continue;
1971 }
1972
1973 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1974 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
1975 }
1976
1977 fclose(fp);
1978}
1979
1980static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
1981{
1982 struct bpf_hash_entry *entry;
1983 int i;
1984
1985 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
1986 while ((entry = ctx->ht[i]) != NULL) {
1987 ctx->ht[i] = entry->next;
1988 free((char *)entry->subpath);
1989 free(entry);
1990 }
1991 }
1992}
1993
8187b012
DB
1994static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
1995{
1996 if (ctx->elf_hdr.e_type != ET_REL ||
e77fa41d
DB
1997 (ctx->elf_hdr.e_machine != EM_NONE &&
1998 ctx->elf_hdr.e_machine != EM_BPF) ||
8187b012
DB
1999 ctx->elf_hdr.e_version != EV_CURRENT) {
2000 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
2001 return -EINVAL;
2002 }
2003
2004 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
2005 default:
2006 fprintf(stderr, "ELF format error, wrong endianness info?\n");
2007 return -EINVAL;
2008 case ELFDATA2LSB:
2009 if (htons(1) == 1) {
2010 fprintf(stderr,
2011 "We are big endian, eBPF object is little endian!\n");
2012 return -EIO;
2013 }
2014 break;
2015 case ELFDATA2MSB:
2016 if (htons(1) != 1) {
2017 fprintf(stderr,
2018 "We are little endian, eBPF object is big endian!\n");
2019 return -EIO;
2020 }
2021 break;
2022 }
2023
2024 return 0;
2025}
2026
32e93fb7
DB
2027static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
2028 enum bpf_prog_type type, bool verbose)
2029{
2030 int ret = -EINVAL;
2031
2032 if (elf_version(EV_CURRENT) == EV_NONE ||
2033 bpf_init_env(pathname))
2034 return ret;
2035
2036 memset(ctx, 0, sizeof(*ctx));
2037 ctx->verbose = verbose;
2038 ctx->type = type;
2039
2040 ctx->obj_fd = open(pathname, O_RDONLY);
2041 if (ctx->obj_fd < 0)
2042 return ctx->obj_fd;
2043
2044 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
2045 if (!ctx->elf_fd) {
11c39b5e 2046 ret = -EINVAL;
32e93fb7 2047 goto out_fd;
11c39b5e
DB
2048 }
2049
8187b012
DB
2050 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
2051 ret = -EINVAL;
2052 goto out_fd;
2053 }
2054
32e93fb7
DB
2055 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
2056 &ctx->elf_hdr) {
11c39b5e
DB
2057 ret = -EIO;
2058 goto out_elf;
2059 }
2060
8187b012
DB
2061 ret = bpf_elf_check_ehdr(ctx);
2062 if (ret < 0)
2063 goto out_elf;
2064
32e93fb7
DB
2065 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
2066 sizeof(*(ctx->sec_done)));
2067 if (!ctx->sec_done) {
11c39b5e
DB
2068 ret = -ENOMEM;
2069 goto out_elf;
2070 }
2071
f31645d1
DB
2072 if (ctx->verbose && bpf_log_realloc(ctx)) {
2073 ret = -ENOMEM;
2074 goto out_free;
2075 }
2076
32e93fb7 2077 bpf_save_finfo(ctx);
f6793eec
DB
2078 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
2079
32e93fb7 2080 return 0;
f31645d1
DB
2081out_free:
2082 free(ctx->sec_done);
32e93fb7
DB
2083out_elf:
2084 elf_end(ctx->elf_fd);
2085out_fd:
2086 close(ctx->obj_fd);
2087 return ret;
2088}
d937a74b 2089
32e93fb7
DB
2090static int bpf_maps_count(struct bpf_elf_ctx *ctx)
2091{
2092 int i, count = 0;
11c39b5e 2093
32e93fb7
DB
2094 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2095 if (!ctx->map_fds[i])
2096 break;
2097 count++;
2098 }
473d7840 2099
32e93fb7
DB
2100 return count;
2101}
6256f8c9 2102
32e93fb7
DB
2103static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
2104{
2105 int i;
2106
2107 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2108 if (ctx->map_fds[i])
2109 close(ctx->map_fds[i]);
473d7840 2110 }
32e93fb7
DB
2111}
2112
2113static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
2114{
2115 if (failure)
2116 bpf_maps_teardown(ctx);
473d7840 2117
f6793eec 2118 bpf_hash_destroy(ctx);
f31645d1 2119
32e93fb7 2120 free(ctx->sec_done);
f31645d1
DB
2121 free(ctx->log);
2122
32e93fb7
DB
2123 elf_end(ctx->elf_fd);
2124 close(ctx->obj_fd);
2125}
6256f8c9 2126
32e93fb7 2127static struct bpf_elf_ctx __ctx;
6256f8c9 2128
32e93fb7
DB
2129static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
2130 const char *section, bool verbose)
2131{
2132 struct bpf_elf_ctx *ctx = &__ctx;
2133 int fd = 0, ret;
6256f8c9 2134
32e93fb7
DB
2135 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
2136 if (ret < 0) {
2137 fprintf(stderr, "Cannot initialize ELF context!\n");
2138 return ret;
2139 }
6256f8c9 2140
32e93fb7
DB
2141 ret = bpf_fetch_ancillary(ctx);
2142 if (ret < 0) {
2143 fprintf(stderr, "Error fetching ELF ancillary data!\n");
2144 goto out;
2145 }
2146
2147 fd = bpf_fetch_prog_sec(ctx, section);
2148 if (fd < 0) {
2149 fprintf(stderr, "Error fetching program/map!\n");
2150 ret = fd;
2151 goto out;
2152 }
2153
2154 ret = bpf_fill_prog_arrays(ctx);
2155 if (ret < 0)
2156 fprintf(stderr, "Error filling program arrays!\n");
11c39b5e 2157out:
32e93fb7
DB
2158 bpf_elf_ctx_destroy(ctx, ret < 0);
2159 if (ret < 0) {
2160 if (fd)
2161 close(fd);
2162 return ret;
2163 }
2164
2165 return fd;
6256f8c9 2166}
11c39b5e 2167
6256f8c9 2168static int
4bd62446
DB
2169bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
2170 const struct bpf_map_data *aux, unsigned int entries)
6256f8c9 2171{
d17b136f
PS
2172 struct bpf_map_set_msg msg = {
2173 .aux.uds_ver = BPF_SCM_AUX_VER,
2174 .aux.num_ent = entries,
2175 };
6256f8c9
DB
2176 int *cmsg_buf, min_fd;
2177 char *amsg_buf;
2178 int i;
2179
6256f8c9
DB
2180 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
2181 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
2182
2183 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
2184 amsg_buf = (char *)msg.aux.ent;
2185
4bd62446 2186 for (i = 0; i < entries; i += min_fd) {
6256f8c9
DB
2187 int ret;
2188
4bd62446 2189 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
6256f8c9
DB
2190 bpf_map_set_init_single(&msg, min_fd);
2191
2192 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
2193 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
2194
2195 ret = sendmsg(fd, &msg.hdr, 0);
2196 if (ret <= 0)
2197 return ret ? : -1;
2198 }
2199
2200 return 0;
11c39b5e
DB
2201}
2202
4bd62446
DB
2203static int
2204bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
2205 unsigned int entries)
2206{
2207 struct bpf_map_set_msg msg;
2208 int *cmsg_buf, min_fd;
2209 char *amsg_buf, *mmsg_buf;
2210 unsigned int needed = 1;
2211 int i;
2212
2213 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
2214 amsg_buf = (char *)msg.aux.ent;
2215 mmsg_buf = (char *)&msg.aux;
2216
2217 for (i = 0; i < min(entries, needed); i += min_fd) {
2218 struct cmsghdr *cmsg;
2219 int ret;
2220
2221 min_fd = min(entries, entries - i);
2222 bpf_map_set_init_single(&msg, min_fd);
2223
2224 ret = recvmsg(fd, &msg.hdr, 0);
2225 if (ret <= 0)
2226 return ret ? : -1;
2227
2228 cmsg = CMSG_FIRSTHDR(&msg.hdr);
2229 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
2230 return -EINVAL;
2231 if (msg.hdr.msg_flags & MSG_CTRUNC)
2232 return -EIO;
2233 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
2234 return -ENOSYS;
2235
2236 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
2237 if (min_fd > entries || min_fd <= 0)
2238 return -EINVAL;
2239
2240 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
2241 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
2242 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
2243
2244 needed = aux->num_ent;
2245 }
2246
2247 return 0;
2248}
2249
2250int bpf_send_map_fds(const char *path, const char *obj)
6256f8c9 2251{
32e93fb7 2252 struct bpf_elf_ctx *ctx = &__ctx;
d17b136f
PS
2253 struct sockaddr_un addr = { .sun_family = AF_UNIX };
2254 struct bpf_map_data bpf_aux = {
2255 .fds = ctx->map_fds,
2256 .ent = ctx->maps,
2257 .st = &ctx->stat,
2258 .obj = obj,
2259 };
6256f8c9
DB
2260 int fd, ret;
2261
2262 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2263 if (fd < 0) {
2264 fprintf(stderr, "Cannot open socket: %s\n",
2265 strerror(errno));
2266 return -1;
2267 }
2268
6256f8c9
DB
2269 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2270
2271 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
2272 if (ret < 0) {
2273 fprintf(stderr, "Cannot connect to %s: %s\n",
2274 path, strerror(errno));
2275 return -1;
2276 }
2277
4bd62446 2278 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
32e93fb7 2279 bpf_maps_count(ctx));
6256f8c9 2280 if (ret < 0)
4bd62446
DB
2281 fprintf(stderr, "Cannot send fds to %s: %s\n",
2282 path, strerror(errno));
2283
32e93fb7 2284 bpf_maps_teardown(ctx);
4bd62446
DB
2285 close(fd);
2286 return ret;
2287}
2288
2289int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
2290 unsigned int entries)
2291{
d17b136f 2292 struct sockaddr_un addr = { .sun_family = AF_UNIX };
4bd62446
DB
2293 int fd, ret;
2294
2295 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2296 if (fd < 0) {
2297 fprintf(stderr, "Cannot open socket: %s\n",
2298 strerror(errno));
2299 return -1;
2300 }
2301
4bd62446
DB
2302 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2303
2304 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
2305 if (ret < 0) {
2306 fprintf(stderr, "Cannot bind to socket: %s\n",
2307 strerror(errno));
2308 return -1;
2309 }
2310
2311 ret = bpf_map_set_recv(fd, fds, aux, entries);
2312 if (ret < 0)
2313 fprintf(stderr, "Cannot recv fds from %s: %s\n",
6256f8c9
DB
2314 path, strerror(errno));
2315
4bd62446 2316 unlink(addr.sun_path);
6256f8c9
DB
2317 close(fd);
2318 return ret;
2319}
11c39b5e 2320#endif /* HAVE_ELF */