2 * bpf.c BPF common code
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Daniel Borkmann <daniel@iogearbox.net>
10 * Jiri Pirko <jiri@resnulli.us>
11 * Alexei Starovoitov <ast@kernel.org>
31 #include <sys/types.h>
35 #include <sys/mount.h>
36 #include <sys/syscall.h>
37 #include <sys/sendfile.h>
38 #include <sys/resource.h>
40 #include <arpa/inet.h>
48 struct bpf_prog_meta
{
55 static const enum bpf_prog_type __bpf_types
[] = {
56 BPF_PROG_TYPE_SCHED_CLS
,
57 BPF_PROG_TYPE_SCHED_ACT
,
60 BPF_PROG_TYPE_LWT_OUT
,
61 BPF_PROG_TYPE_LWT_XMIT
,
64 static const struct bpf_prog_meta __bpf_prog_meta
[] = {
65 [BPF_PROG_TYPE_SCHED_CLS
] = {
68 .section
= ELF_SECTION_CLASSIFIER
,
69 .may_uds_export
= true,
71 [BPF_PROG_TYPE_SCHED_ACT
] = {
74 .section
= ELF_SECTION_ACTION
,
75 .may_uds_export
= true,
77 [BPF_PROG_TYPE_XDP
] = {
80 .section
= ELF_SECTION_PROG
,
82 [BPF_PROG_TYPE_LWT_IN
] = {
85 .section
= ELF_SECTION_PROG
,
87 [BPF_PROG_TYPE_LWT_OUT
] = {
90 .section
= ELF_SECTION_PROG
,
92 [BPF_PROG_TYPE_LWT_XMIT
] = {
95 .section
= ELF_SECTION_PROG
,
99 static const char *bpf_prog_to_subdir(enum bpf_prog_type type
)
101 assert(type
< ARRAY_SIZE(__bpf_prog_meta
) &&
102 __bpf_prog_meta
[type
].subdir
);
103 return __bpf_prog_meta
[type
].subdir
;
106 const char *bpf_prog_to_default_section(enum bpf_prog_type type
)
108 assert(type
< ARRAY_SIZE(__bpf_prog_meta
) &&
109 __bpf_prog_meta
[type
].section
);
110 return __bpf_prog_meta
[type
].section
;
114 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
115 const char *sec
, bool verbose
);
117 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
118 const char *sec
, bool verbose
)
120 fprintf(stderr
, "No ELF library support compiled in.\n");
126 static inline __u64
bpf_ptr_to_u64(const void *ptr
)
128 return (__u64
)(unsigned long)ptr
;
131 static int bpf(int cmd
, union bpf_attr
*attr
, unsigned int size
)
134 return syscall(__NR_bpf
, cmd
, attr
, size
);
136 fprintf(stderr
, "No bpf syscall, kernel headers too old?\n");
142 static int bpf_map_update(int fd
, const void *key
, const void *value
,
145 union bpf_attr attr
= {};
148 attr
.key
= bpf_ptr_to_u64(key
);
149 attr
.value
= bpf_ptr_to_u64(value
);
152 return bpf(BPF_MAP_UPDATE_ELEM
, &attr
, sizeof(attr
));
155 static int bpf_parse_string(char *arg
, bool from_file
, __u16
*bpf_len
,
156 char **bpf_string
, bool *need_release
,
157 const char separator
)
162 size_t tmp_len
, op_len
= sizeof("65535 255 255 4294967295,");
163 char *tmp_string
, *last
;
166 tmp_len
= sizeof("4096,") + BPF_MAXINSNS
* op_len
;
167 tmp_string
= calloc(1, tmp_len
);
168 if (tmp_string
== NULL
)
171 fp
= fopen(arg
, "r");
173 perror("Cannot fopen");
178 if (!fgets(tmp_string
, tmp_len
, fp
)) {
186 last
= &tmp_string
[strlen(tmp_string
) - 1];
190 *need_release
= true;
191 *bpf_string
= tmp_string
;
193 *need_release
= false;
197 if (sscanf(*bpf_string
, "%hu%c", bpf_len
, &sp
) != 2 ||
207 static int bpf_ops_parse(int argc
, char **argv
, struct sock_filter
*bpf_ops
,
210 char *bpf_string
, *token
, separator
= ',';
217 if (bpf_parse_string(argv
[0], from_file
, &bpf_len
, &bpf_string
,
218 &need_release
, separator
))
220 if (bpf_len
== 0 || bpf_len
> BPF_MAXINSNS
) {
226 while ((token
= strchr(token
, separator
)) && (++token
)[0]) {
228 fprintf(stderr
, "Real program length exceeds encoded length parameter!\n");
233 if (sscanf(token
, "%hu %hhu %hhu %u,",
234 &bpf_ops
[i
].code
, &bpf_ops
[i
].jt
,
235 &bpf_ops
[i
].jf
, &bpf_ops
[i
].k
) != 4) {
236 fprintf(stderr
, "Error at instruction %d!\n", i
);
245 fprintf(stderr
, "Parsed program length is less than encoded length parameter!\n");
257 void bpf_print_ops(FILE *f
, struct rtattr
*bpf_ops
, __u16 len
)
259 struct sock_filter
*ops
= RTA_DATA(bpf_ops
);
265 fprintf(f
, "bytecode \'%u,", len
);
267 for (i
= 0; i
< len
- 1; i
++)
268 fprintf(f
, "%hu %hhu %hhu %u,", ops
[i
].code
, ops
[i
].jt
,
269 ops
[i
].jf
, ops
[i
].k
);
271 fprintf(f
, "%hu %hhu %hhu %u\'", ops
[i
].code
, ops
[i
].jt
,
272 ops
[i
].jf
, ops
[i
].k
);
275 static void bpf_map_pin_report(const struct bpf_elf_map
*pin
,
276 const struct bpf_elf_map
*obj
)
278 fprintf(stderr
, "Map specification differs from pinned file!\n");
280 if (obj
->type
!= pin
->type
)
281 fprintf(stderr
, " - Type: %u (obj) != %u (pin)\n",
282 obj
->type
, pin
->type
);
283 if (obj
->size_key
!= pin
->size_key
)
284 fprintf(stderr
, " - Size key: %u (obj) != %u (pin)\n",
285 obj
->size_key
, pin
->size_key
);
286 if (obj
->size_value
!= pin
->size_value
)
287 fprintf(stderr
, " - Size value: %u (obj) != %u (pin)\n",
288 obj
->size_value
, pin
->size_value
);
289 if (obj
->max_elem
!= pin
->max_elem
)
290 fprintf(stderr
, " - Max elems: %u (obj) != %u (pin)\n",
291 obj
->max_elem
, pin
->max_elem
);
292 if (obj
->flags
!= pin
->flags
)
293 fprintf(stderr
, " - Flags: %#x (obj) != %#x (pin)\n",
294 obj
->flags
, pin
->flags
);
296 fprintf(stderr
, "\n");
299 static int bpf_map_selfcheck_pinned(int fd
, const struct bpf_elf_map
*map
,
300 int length
, enum bpf_prog_type type
)
302 char file
[PATH_MAX
], buff
[4096];
303 struct bpf_elf_map tmp
= {}, zero
= {};
304 unsigned int val
, owner_type
= 0;
307 snprintf(file
, sizeof(file
), "/proc/%d/fdinfo/%d", getpid(), fd
);
309 fp
= fopen(file
, "r");
311 fprintf(stderr
, "No procfs support?!\n");
315 while (fgets(buff
, sizeof(buff
), fp
)) {
316 if (sscanf(buff
, "map_type:\t%u", &val
) == 1)
318 else if (sscanf(buff
, "key_size:\t%u", &val
) == 1)
320 else if (sscanf(buff
, "value_size:\t%u", &val
) == 1)
321 tmp
.size_value
= val
;
322 else if (sscanf(buff
, "max_entries:\t%u", &val
) == 1)
324 else if (sscanf(buff
, "map_flags:\t%i", &val
) == 1)
326 else if (sscanf(buff
, "owner_prog_type:\t%i", &val
) == 1)
332 /* The decision to reject this is on kernel side eventually, but
333 * at least give the user a chance to know what's wrong.
335 if (owner_type
&& owner_type
!= type
)
336 fprintf(stderr
, "Program array map owner types differ: %u (obj) != %u (pin)\n",
339 if (!memcmp(&tmp
, map
, length
)) {
342 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
343 * so just accept it. We know we do have an eBPF fd and in this
344 * case, everything is 0. It is guaranteed that no such map exists
345 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
347 if (!memcmp(&tmp
, &zero
, length
))
350 bpf_map_pin_report(&tmp
, map
);
355 static int bpf_mnt_fs(const char *target
)
357 bool bind_done
= false;
359 while (mount("", target
, "none", MS_PRIVATE
| MS_REC
, NULL
)) {
360 if (errno
!= EINVAL
|| bind_done
) {
361 fprintf(stderr
, "mount --make-private %s failed: %s\n",
362 target
, strerror(errno
));
366 if (mount(target
, target
, "none", MS_BIND
, NULL
)) {
367 fprintf(stderr
, "mount --bind %s %s failed: %s\n",
368 target
, target
, strerror(errno
));
375 if (mount("bpf", target
, "bpf", 0, "mode=0700")) {
376 fprintf(stderr
, "mount -t bpf bpf %s failed: %s\n",
377 target
, strerror(errno
));
384 static int bpf_valid_mntpt(const char *mnt
, unsigned long magic
)
388 if (statfs(mnt
, &st_fs
) < 0)
390 if ((unsigned long)st_fs
.f_type
!= magic
)
396 static const char *bpf_find_mntpt(const char *fstype
, unsigned long magic
,
398 const char * const *known_mnts
)
400 const char * const *ptr
;
407 if (bpf_valid_mntpt(*ptr
, magic
) == 0) {
408 strncpy(mnt
, *ptr
, len
- 1);
416 fp
= fopen("/proc/mounts", "r");
417 if (fp
== NULL
|| len
!= PATH_MAX
)
420 while (fscanf(fp
, "%*s %" textify(PATH_MAX
) "s %99s %*s %*d %*d\n",
422 if (strcmp(type
, fstype
) == 0)
427 if (strcmp(type
, fstype
) != 0)
433 int bpf_trace_pipe(void)
435 char tracefs_mnt
[PATH_MAX
] = TRACE_DIR_MNT
;
436 static const char * const tracefs_known_mnts
[] = {
438 "/sys/kernel/debug/tracing",
443 char tpipe
[PATH_MAX
];
447 mnt
= bpf_find_mntpt("tracefs", TRACEFS_MAGIC
, tracefs_mnt
,
448 sizeof(tracefs_mnt
), tracefs_known_mnts
);
450 fprintf(stderr
, "tracefs not mounted?\n");
454 snprintf(tpipe
, sizeof(tpipe
), "%s/trace_pipe", mnt
);
456 fd
= open(tpipe
, O_RDONLY
);
460 fprintf(stderr
, "Running! Hang up with ^C!\n\n");
462 static char buff
[4096];
465 ret
= read(fd
, buff
, sizeof(buff
) - 1);
475 static int bpf_gen_global(const char *bpf_sub_dir
)
477 char bpf_glo_dir
[PATH_MAX
];
480 snprintf(bpf_glo_dir
, sizeof(bpf_glo_dir
), "%s/%s/",
481 bpf_sub_dir
, BPF_DIR_GLOBALS
);
483 ret
= mkdir(bpf_glo_dir
, S_IRWXU
);
484 if (ret
&& errno
!= EEXIST
) {
485 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_glo_dir
,
493 static int bpf_gen_master(const char *base
, const char *name
)
495 char bpf_sub_dir
[PATH_MAX
];
498 snprintf(bpf_sub_dir
, sizeof(bpf_sub_dir
), "%s%s/", base
, name
);
500 ret
= mkdir(bpf_sub_dir
, S_IRWXU
);
501 if (ret
&& errno
!= EEXIST
) {
502 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_sub_dir
,
507 return bpf_gen_global(bpf_sub_dir
);
510 static int bpf_slave_via_bind_mnt(const char *full_name
,
511 const char *full_link
)
515 ret
= mkdir(full_name
, S_IRWXU
);
517 assert(errno
!= EEXIST
);
518 fprintf(stderr
, "mkdir %s failed: %s\n", full_name
,
523 ret
= mount(full_link
, full_name
, "none", MS_BIND
, NULL
);
526 fprintf(stderr
, "mount --bind %s %s failed: %s\n",
527 full_link
, full_name
, strerror(errno
));
533 static int bpf_gen_slave(const char *base
, const char *name
,
536 char bpf_lnk_dir
[PATH_MAX
];
537 char bpf_sub_dir
[PATH_MAX
];
541 snprintf(bpf_lnk_dir
, sizeof(bpf_lnk_dir
), "%s%s/", base
, link
);
542 snprintf(bpf_sub_dir
, sizeof(bpf_sub_dir
), "%s%s", base
, name
);
544 ret
= symlink(bpf_lnk_dir
, bpf_sub_dir
);
546 if (errno
!= EEXIST
) {
547 if (errno
!= EPERM
) {
548 fprintf(stderr
, "symlink %s failed: %s\n",
549 bpf_sub_dir
, strerror(errno
));
553 return bpf_slave_via_bind_mnt(bpf_sub_dir
,
557 ret
= lstat(bpf_sub_dir
, &sb
);
559 fprintf(stderr
, "lstat %s failed: %s\n",
560 bpf_sub_dir
, strerror(errno
));
564 if ((sb
.st_mode
& S_IFMT
) != S_IFLNK
)
565 return bpf_gen_global(bpf_sub_dir
);
571 static int bpf_gen_hierarchy(const char *base
)
575 ret
= bpf_gen_master(base
, bpf_prog_to_subdir(__bpf_types
[0]));
576 for (i
= 1; i
< ARRAY_SIZE(__bpf_types
) && !ret
; i
++)
577 ret
= bpf_gen_slave(base
,
578 bpf_prog_to_subdir(__bpf_types
[i
]),
579 bpf_prog_to_subdir(__bpf_types
[0]));
583 static const char *bpf_get_work_dir(enum bpf_prog_type type
)
585 static char bpf_tmp
[PATH_MAX
] = BPF_DIR_MNT
;
586 static char bpf_wrk_dir
[PATH_MAX
];
587 static const char *mnt
;
588 static bool bpf_mnt_cached
;
589 static const char * const bpf_known_mnts
[] = {
596 if (bpf_mnt_cached
) {
597 const char *out
= mnt
;
600 snprintf(bpf_tmp
, sizeof(bpf_tmp
), "%s%s/",
601 out
, bpf_prog_to_subdir(type
));
607 mnt
= bpf_find_mntpt("bpf", BPF_FS_MAGIC
, bpf_tmp
, sizeof(bpf_tmp
),
610 mnt
= getenv(BPF_ENV_MNT
);
613 ret
= bpf_mnt_fs(mnt
);
620 snprintf(bpf_wrk_dir
, sizeof(bpf_wrk_dir
), "%s/", mnt
);
622 ret
= bpf_gen_hierarchy(bpf_wrk_dir
);
630 bpf_mnt_cached
= true;
634 static int bpf_obj_get(const char *pathname
, enum bpf_prog_type type
)
636 union bpf_attr attr
= {};
639 if (strlen(pathname
) > 2 && pathname
[0] == 'm' &&
640 pathname
[1] == ':' && bpf_get_work_dir(type
)) {
641 snprintf(tmp
, sizeof(tmp
), "%s/%s",
642 bpf_get_work_dir(type
), pathname
+ 2);
646 attr
.pathname
= bpf_ptr_to_u64(pathname
);
648 return bpf(BPF_OBJ_GET
, &attr
, sizeof(attr
));
651 static int bpf_obj_pinned(const char *pathname
, enum bpf_prog_type type
)
653 int prog_fd
= bpf_obj_get(pathname
, type
);
656 fprintf(stderr
, "Couldn\'t retrieve pinned program \'%s\': %s\n",
657 pathname
, strerror(errno
));
669 static int bpf_parse(enum bpf_prog_type
*type
, enum bpf_mode
*mode
,
670 struct bpf_cfg_in
*cfg
, const bool *opt_tbl
)
672 const char *file
, *section
, *uds_name
;
673 bool verbose
= false;
680 if (opt_tbl
[CBPF_BYTECODE
] &&
681 (matches(*argv
, "bytecode") == 0 ||
682 strcmp(*argv
, "bc") == 0)) {
683 *mode
= CBPF_BYTECODE
;
684 } else if (opt_tbl
[CBPF_FILE
] &&
685 (matches(*argv
, "bytecode-file") == 0 ||
686 strcmp(*argv
, "bcf") == 0)) {
688 } else if (opt_tbl
[EBPF_OBJECT
] &&
689 (matches(*argv
, "object-file") == 0 ||
690 strcmp(*argv
, "obj") == 0)) {
692 } else if (opt_tbl
[EBPF_PINNED
] &&
693 (matches(*argv
, "object-pinned") == 0 ||
694 matches(*argv
, "pinned") == 0 ||
695 matches(*argv
, "fd") == 0)) {
698 fprintf(stderr
, "What mode is \"%s\"?\n", *argv
);
703 file
= section
= uds_name
= NULL
;
704 if (*mode
== EBPF_OBJECT
|| *mode
== EBPF_PINNED
) {
708 if (*type
== BPF_PROG_TYPE_UNSPEC
) {
709 if (argc
> 0 && matches(*argv
, "type") == 0) {
711 for (i
= 0; i
< ARRAY_SIZE(__bpf_prog_meta
);
713 if (!__bpf_prog_meta
[i
].type
)
716 __bpf_prog_meta
[i
].type
)) {
722 if (*type
== BPF_PROG_TYPE_UNSPEC
) {
723 fprintf(stderr
, "What type is \"%s\"?\n",
729 *type
= BPF_PROG_TYPE_SCHED_CLS
;
733 section
= bpf_prog_to_default_section(*type
);
734 if (argc
> 0 && matches(*argv
, "section") == 0) {
740 if (__bpf_prog_meta
[*type
].may_uds_export
) {
741 uds_name
= getenv(BPF_ENV_UDS
);
742 if (argc
> 0 && !uds_name
&&
743 matches(*argv
, "export") == 0) {
750 if (argc
> 0 && matches(*argv
, "verbose") == 0) {
758 if (*mode
== CBPF_BYTECODE
|| *mode
== CBPF_FILE
)
759 ret
= bpf_ops_parse(argc
, argv
, cfg
->ops
, *mode
== CBPF_FILE
);
760 else if (*mode
== EBPF_OBJECT
)
761 ret
= bpf_obj_open(file
, *type
, section
, verbose
);
762 else if (*mode
== EBPF_PINNED
)
763 ret
= bpf_obj_pinned(file
, *type
);
768 cfg
->section
= section
;
776 static int bpf_parse_opt_tbl(enum bpf_prog_type type
, struct bpf_cfg_in
*cfg
,
777 const struct bpf_cfg_ops
*ops
, void *nl
,
780 struct sock_filter opcodes
[BPF_MAXINSNS
];
781 char annotation
[256];
786 ret
= bpf_parse(&type
, &mode
, cfg
, opt_tbl
);
791 if (mode
== CBPF_BYTECODE
|| mode
== CBPF_FILE
)
792 ops
->cbpf_cb(nl
, opcodes
, ret
);
793 if (mode
== EBPF_OBJECT
|| mode
== EBPF_PINNED
) {
794 snprintf(annotation
, sizeof(annotation
), "%s:[%s]",
795 basename(cfg
->object
), mode
== EBPF_PINNED
?
796 "*fsobj" : cfg
->section
);
797 ops
->ebpf_cb(nl
, ret
, annotation
);
803 int bpf_parse_common(enum bpf_prog_type type
, struct bpf_cfg_in
*cfg
,
804 const struct bpf_cfg_ops
*ops
, void *nl
)
806 bool opt_tbl
[BPF_MODE_MAX
] = {};
809 opt_tbl
[CBPF_BYTECODE
] = true;
810 opt_tbl
[CBPF_FILE
] = true;
814 opt_tbl
[EBPF_OBJECT
] = true;
815 opt_tbl
[EBPF_PINNED
] = true;
818 return bpf_parse_opt_tbl(type
, cfg
, ops
, nl
, opt_tbl
);
821 int bpf_graft_map(const char *map_path
, uint32_t *key
, int argc
, char **argv
)
823 enum bpf_prog_type type
= BPF_PROG_TYPE_UNSPEC
;
824 const bool opt_tbl
[BPF_MODE_MAX
] = {
825 [EBPF_OBJECT
] = true,
826 [EBPF_PINNED
] = true,
828 const struct bpf_elf_map test
= {
829 .type
= BPF_MAP_TYPE_PROG_ARRAY
,
830 .size_key
= sizeof(int),
831 .size_value
= sizeof(int),
833 struct bpf_cfg_in cfg
= {
837 int ret
, prog_fd
, map_fd
;
841 prog_fd
= bpf_parse(&type
, &mode
, &cfg
, opt_tbl
);
847 ret
= sscanf(cfg
.section
, "%*i/%i", &map_key
);
849 fprintf(stderr
, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
855 map_fd
= bpf_obj_get(map_path
, type
);
857 fprintf(stderr
, "Couldn\'t retrieve pinned map \'%s\': %s\n",
858 map_path
, strerror(errno
));
863 ret
= bpf_map_selfcheck_pinned(map_fd
, &test
,
864 offsetof(struct bpf_elf_map
, max_elem
),
867 fprintf(stderr
, "Map \'%s\' self-check failed!\n", map_path
);
871 ret
= bpf_map_update(map_fd
, &map_key
, &prog_fd
, BPF_ANY
);
873 fprintf(stderr
, "Map update failed: %s\n", strerror(errno
));
881 int bpf_prog_attach_fd(int prog_fd
, int target_fd
, enum bpf_attach_type type
)
883 union bpf_attr attr
= {};
885 attr
.target_fd
= target_fd
;
886 attr
.attach_bpf_fd
= prog_fd
;
887 attr
.attach_type
= type
;
889 return bpf(BPF_PROG_ATTACH
, &attr
, sizeof(attr
));
892 int bpf_prog_detach_fd(int target_fd
, enum bpf_attach_type type
)
894 union bpf_attr attr
= {};
896 attr
.target_fd
= target_fd
;
897 attr
.attach_type
= type
;
899 return bpf(BPF_PROG_DETACH
, &attr
, sizeof(attr
));
902 int bpf_prog_load(enum bpf_prog_type type
, const struct bpf_insn
*insns
,
903 size_t size_insns
, const char *license
, char *log
,
906 union bpf_attr attr
= {};
908 attr
.prog_type
= type
;
909 attr
.insns
= bpf_ptr_to_u64(insns
);
910 attr
.insn_cnt
= size_insns
/ sizeof(struct bpf_insn
);
911 attr
.license
= bpf_ptr_to_u64(license
);
914 attr
.log_buf
= bpf_ptr_to_u64(log
);
915 attr
.log_size
= size_log
;
919 return bpf(BPF_PROG_LOAD
, &attr
, sizeof(attr
));
923 struct bpf_elf_prog
{
924 enum bpf_prog_type type
;
925 const struct bpf_insn
*insns
;
930 struct bpf_hash_entry
{
931 unsigned int pinning
;
933 struct bpf_hash_entry
*next
;
942 int map_fds
[ELF_MAX_MAPS
];
943 struct bpf_elf_map maps
[ELF_MAX_MAPS
];
949 char license
[ELF_MAX_LICENSE_LEN
];
950 enum bpf_prog_type type
;
952 struct bpf_elf_st stat
;
953 struct bpf_hash_entry
*ht
[256];
958 struct bpf_elf_sec_data
{
961 const char *sec_name
;
964 struct bpf_map_data
{
967 struct bpf_elf_st
*st
;
968 struct bpf_elf_map
*ent
;
971 static __check_format_string(2, 3) void
972 bpf_dump_error(struct bpf_elf_ctx
*ctx
, const char *format
, ...)
976 va_start(vl
, format
);
977 vfprintf(stderr
, format
, vl
);
980 if (ctx
->log
&& ctx
->log
[0]) {
982 fprintf(stderr
, "%s\n", ctx
->log
);
984 unsigned int off
= 0, len
= strlen(ctx
->log
);
986 if (len
> BPF_MAX_LOG
) {
987 off
= len
- BPF_MAX_LOG
;
988 fprintf(stderr
, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
991 fprintf(stderr
, "%s\n", ctx
->log
+ off
);
994 memset(ctx
->log
, 0, ctx
->log_size
);
998 static int bpf_log_realloc(struct bpf_elf_ctx
*ctx
)
1000 const size_t log_max
= UINT_MAX
>> 8;
1001 size_t log_size
= ctx
->log_size
;
1006 } else if (log_size
< log_max
) {
1008 if (log_size
> log_max
)
1014 ptr
= realloc(ctx
->log
, log_size
);
1019 ctx
->log_size
= log_size
;
1024 static int bpf_map_create(enum bpf_map_type type
, uint32_t size_key
,
1025 uint32_t size_value
, uint32_t max_elem
,
1028 union bpf_attr attr
= {};
1030 attr
.map_type
= type
;
1031 attr
.key_size
= size_key
;
1032 attr
.value_size
= size_value
;
1033 attr
.max_entries
= max_elem
;
1034 attr
.map_flags
= flags
;
1036 return bpf(BPF_MAP_CREATE
, &attr
, sizeof(attr
));
1039 static int bpf_obj_pin(int fd
, const char *pathname
)
1041 union bpf_attr attr
= {};
1043 attr
.pathname
= bpf_ptr_to_u64(pathname
);
1046 return bpf(BPF_OBJ_PIN
, &attr
, sizeof(attr
));
1049 static int bpf_obj_hash(const char *object
, uint8_t *out
, size_t len
)
1051 struct sockaddr_alg alg
= {
1052 .salg_family
= AF_ALG
,
1053 .salg_type
= "hash",
1054 .salg_name
= "sha1",
1056 int ret
, cfd
, ofd
, ffd
;
1060 if (!object
|| len
!= 20)
1063 cfd
= socket(AF_ALG
, SOCK_SEQPACKET
, 0);
1065 fprintf(stderr
, "Cannot get AF_ALG socket: %s\n",
1070 ret
= bind(cfd
, (struct sockaddr
*)&alg
, sizeof(alg
));
1072 fprintf(stderr
, "Error binding socket: %s\n", strerror(errno
));
1076 ofd
= accept(cfd
, NULL
, 0);
1078 fprintf(stderr
, "Error accepting socket: %s\n",
1084 ffd
= open(object
, O_RDONLY
);
1086 fprintf(stderr
, "Error opening object %s: %s\n",
1087 object
, strerror(errno
));
1092 ret
= fstat(ffd
, &stbuff
);
1094 fprintf(stderr
, "Error doing fstat: %s\n",
1099 size
= sendfile(ofd
, ffd
, NULL
, stbuff
.st_size
);
1100 if (size
!= stbuff
.st_size
) {
1101 fprintf(stderr
, "Error from sendfile (%zd vs %zu bytes): %s\n",
1102 size
, stbuff
.st_size
, strerror(errno
));
1107 size
= read(ofd
, out
, len
);
1109 fprintf(stderr
, "Error from read (%zd vs %zu bytes): %s\n",
1110 size
, len
, strerror(errno
));
1124 static const char *bpf_get_obj_uid(const char *pathname
)
1126 static bool bpf_uid_cached
;
1127 static char bpf_uid
[64];
1134 ret
= bpf_obj_hash(pathname
, tmp
, sizeof(tmp
));
1136 fprintf(stderr
, "Object hashing failed!\n");
1140 hexstring_n2a(tmp
, sizeof(tmp
), bpf_uid
, sizeof(bpf_uid
));
1141 bpf_uid_cached
= true;
1146 static int bpf_init_env(const char *pathname
)
1148 struct rlimit limit
= {
1149 .rlim_cur
= RLIM_INFINITY
,
1150 .rlim_max
= RLIM_INFINITY
,
1153 /* Don't bother in case we fail! */
1154 setrlimit(RLIMIT_MEMLOCK
, &limit
);
1156 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC
)) {
1157 fprintf(stderr
, "Continuing without mounted eBPF fs. Too old kernel?\n");
1161 if (!bpf_get_obj_uid(pathname
))
1167 static const char *bpf_custom_pinning(const struct bpf_elf_ctx
*ctx
,
1170 struct bpf_hash_entry
*entry
;
1172 entry
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
1173 while (entry
&& entry
->pinning
!= pinning
)
1174 entry
= entry
->next
;
1176 return entry
? entry
->subpath
: NULL
;
1179 static bool bpf_no_pinning(const struct bpf_elf_ctx
*ctx
,
1189 return !bpf_custom_pinning(ctx
, pinning
);
1193 static void bpf_make_pathname(char *pathname
, size_t len
, const char *name
,
1194 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
1198 snprintf(pathname
, len
, "%s/%s/%s",
1199 bpf_get_work_dir(ctx
->type
),
1200 bpf_get_obj_uid(NULL
), name
);
1203 snprintf(pathname
, len
, "%s/%s/%s",
1204 bpf_get_work_dir(ctx
->type
),
1205 BPF_DIR_GLOBALS
, name
);
1208 snprintf(pathname
, len
, "%s/../%s/%s",
1209 bpf_get_work_dir(ctx
->type
),
1210 bpf_custom_pinning(ctx
, pinning
), name
);
1215 static int bpf_probe_pinned(const char *name
, const struct bpf_elf_ctx
*ctx
,
1218 char pathname
[PATH_MAX
];
1220 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_work_dir(ctx
->type
))
1223 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
1224 return bpf_obj_get(pathname
, ctx
->type
);
1227 static int bpf_make_obj_path(const struct bpf_elf_ctx
*ctx
)
1232 snprintf(tmp
, sizeof(tmp
), "%s/%s", bpf_get_work_dir(ctx
->type
),
1233 bpf_get_obj_uid(NULL
));
1235 ret
= mkdir(tmp
, S_IRWXU
);
1236 if (ret
&& errno
!= EEXIST
) {
1237 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
, strerror(errno
));
1244 static int bpf_make_custom_path(const struct bpf_elf_ctx
*ctx
,
1247 char tmp
[PATH_MAX
], rem
[PATH_MAX
], *sub
;
1250 snprintf(tmp
, sizeof(tmp
), "%s/../", bpf_get_work_dir(ctx
->type
));
1251 snprintf(rem
, sizeof(rem
), "%s/", todo
);
1252 sub
= strtok(rem
, "/");
1255 if (strlen(tmp
) + strlen(sub
) + 2 > PATH_MAX
)
1261 ret
= mkdir(tmp
, S_IRWXU
);
1262 if (ret
&& errno
!= EEXIST
) {
1263 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
,
1268 sub
= strtok(NULL
, "/");
1274 static int bpf_place_pinned(int fd
, const char *name
,
1275 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
1277 char pathname
[PATH_MAX
];
1281 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_work_dir(ctx
->type
))
1284 if (pinning
== PIN_OBJECT_NS
)
1285 ret
= bpf_make_obj_path(ctx
);
1286 else if ((tmp
= bpf_custom_pinning(ctx
, pinning
)))
1287 ret
= bpf_make_custom_path(ctx
, tmp
);
1291 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
1292 return bpf_obj_pin(fd
, pathname
);
1295 static void bpf_prog_report(int fd
, const char *section
,
1296 const struct bpf_elf_prog
*prog
,
1297 struct bpf_elf_ctx
*ctx
)
1299 unsigned int insns
= prog
->size
/ sizeof(struct bpf_insn
);
1301 fprintf(stderr
, "\nProg section \'%s\' %s%s (%d)!\n", section
,
1302 fd
< 0 ? "rejected: " : "loaded",
1303 fd
< 0 ? strerror(errno
) : "",
1304 fd
< 0 ? errno
: fd
);
1306 fprintf(stderr
, " - Type: %u\n", prog
->type
);
1307 fprintf(stderr
, " - Instructions: %u (%u over limit)\n",
1308 insns
, insns
> BPF_MAXINSNS
? insns
- BPF_MAXINSNS
: 0);
1309 fprintf(stderr
, " - License: %s\n\n", prog
->license
);
1311 bpf_dump_error(ctx
, "Verifier analysis:\n\n");
1314 static int bpf_prog_attach(const char *section
,
1315 const struct bpf_elf_prog
*prog
,
1316 struct bpf_elf_ctx
*ctx
)
1321 fd
= bpf_prog_load(prog
->type
, prog
->insns
, prog
->size
,
1322 prog
->license
, ctx
->log
, ctx
->log_size
);
1323 if (fd
< 0 || ctx
->verbose
) {
1324 /* The verifier log is pretty chatty, sometimes so chatty
1325 * on larger programs, that we could fail to dump everything
1326 * into our buffer. Still, try to give a debuggable error
1327 * log for the user, so enlarge it and re-fail.
1329 if (fd
< 0 && (errno
== ENOSPC
|| !ctx
->log_size
)) {
1330 if (tries
++ < 10 && !bpf_log_realloc(ctx
))
1333 fprintf(stderr
, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
1334 ctx
->log_size
, tries
);
1338 bpf_prog_report(fd
, section
, prog
, ctx
);
1344 static void bpf_map_report(int fd
, const char *name
,
1345 const struct bpf_elf_map
*map
,
1346 struct bpf_elf_ctx
*ctx
)
1348 fprintf(stderr
, "Map object \'%s\' %s%s (%d)!\n", name
,
1349 fd
< 0 ? "rejected: " : "loaded",
1350 fd
< 0 ? strerror(errno
) : "",
1351 fd
< 0 ? errno
: fd
);
1353 fprintf(stderr
, " - Type: %u\n", map
->type
);
1354 fprintf(stderr
, " - Identifier: %u\n", map
->id
);
1355 fprintf(stderr
, " - Pinning: %u\n", map
->pinning
);
1356 fprintf(stderr
, " - Size key: %u\n", map
->size_key
);
1357 fprintf(stderr
, " - Size value: %u\n", map
->size_value
);
1358 fprintf(stderr
, " - Max elems: %u\n", map
->max_elem
);
1359 fprintf(stderr
, " - Flags: %#x\n\n", map
->flags
);
1362 static int bpf_map_attach(const char *name
, const struct bpf_elf_map
*map
,
1363 struct bpf_elf_ctx
*ctx
)
1367 fd
= bpf_probe_pinned(name
, ctx
, map
->pinning
);
1369 ret
= bpf_map_selfcheck_pinned(fd
, map
,
1370 offsetof(struct bpf_elf_map
,
1374 fprintf(stderr
, "Map \'%s\' self-check failed!\n",
1379 fprintf(stderr
, "Map \'%s\' loaded as pinned!\n",
1385 fd
= bpf_map_create(map
->type
, map
->size_key
, map
->size_value
,
1386 map
->max_elem
, map
->flags
);
1387 if (fd
< 0 || ctx
->verbose
) {
1388 bpf_map_report(fd
, name
, map
, ctx
);
1393 ret
= bpf_place_pinned(fd
, name
, ctx
, map
->pinning
);
1394 if (ret
< 0 && errno
!= EEXIST
) {
1395 fprintf(stderr
, "Could not pin %s map: %s\n", name
,
1404 static const char *bpf_str_tab_name(const struct bpf_elf_ctx
*ctx
,
1405 const GElf_Sym
*sym
)
1407 return ctx
->str_tab
->d_buf
+ sym
->st_name
;
1410 static const char *bpf_map_fetch_name(struct bpf_elf_ctx
*ctx
, int which
)
1415 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1416 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1419 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1420 GELF_ST_TYPE(sym
.st_info
) != STT_NOTYPE
||
1421 sym
.st_shndx
!= ctx
->sec_maps
||
1422 sym
.st_value
/ ctx
->map_len
!= which
)
1425 return bpf_str_tab_name(ctx
, &sym
);
1431 static int bpf_maps_attach_all(struct bpf_elf_ctx
*ctx
)
1433 const char *map_name
;
1436 for (i
= 0; i
< ctx
->map_num
; i
++) {
1437 map_name
= bpf_map_fetch_name(ctx
, i
);
1441 fd
= bpf_map_attach(map_name
, &ctx
->maps
[i
], ctx
);
1445 ctx
->map_fds
[i
] = fd
;
1451 static int bpf_map_num_sym(struct bpf_elf_ctx
*ctx
)
1456 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1457 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1460 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1461 GELF_ST_TYPE(sym
.st_info
) != STT_NOTYPE
||
1462 sym
.st_shndx
!= ctx
->sec_maps
)
1470 static int bpf_fill_section_data(struct bpf_elf_ctx
*ctx
, int section
,
1471 struct bpf_elf_sec_data
*data
)
1473 Elf_Data
*sec_edata
;
1478 memset(data
, 0, sizeof(*data
));
1480 sec_fd
= elf_getscn(ctx
->elf_fd
, section
);
1483 if (gelf_getshdr(sec_fd
, &sec_hdr
) != &sec_hdr
)
1486 sec_name
= elf_strptr(ctx
->elf_fd
, ctx
->elf_hdr
.e_shstrndx
,
1488 if (!sec_name
|| !sec_hdr
.sh_size
)
1491 sec_edata
= elf_getdata(sec_fd
, NULL
);
1492 if (!sec_edata
|| elf_getdata(sec_fd
, sec_edata
))
1495 memcpy(&data
->sec_hdr
, &sec_hdr
, sizeof(sec_hdr
));
1497 data
->sec_name
= sec_name
;
1498 data
->sec_data
= sec_edata
;
1502 struct bpf_elf_map_min
{
1509 static int bpf_fetch_maps_begin(struct bpf_elf_ctx
*ctx
, int section
,
1510 struct bpf_elf_sec_data
*data
)
1512 ctx
->map_num
= data
->sec_data
->d_size
;
1513 ctx
->sec_maps
= section
;
1514 ctx
->sec_done
[section
] = true;
1516 if (ctx
->map_num
> sizeof(ctx
->maps
)) {
1517 fprintf(stderr
, "Too many BPF maps in ELF section!\n");
1521 memcpy(ctx
->maps
, data
->sec_data
->d_buf
, ctx
->map_num
);
1525 static int bpf_map_verify_all_offs(struct bpf_elf_ctx
*ctx
, int end
)
1530 for (off
= 0; off
< end
; off
+= ctx
->map_len
) {
1531 /* Order doesn't need to be linear here, hence we walk
1534 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1535 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1537 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1538 GELF_ST_TYPE(sym
.st_info
) != STT_NOTYPE
||
1539 sym
.st_shndx
!= ctx
->sec_maps
)
1541 if (sym
.st_value
== off
)
1543 if (i
== ctx
->sym_num
- 1)
1548 return off
== end
? 0 : -1;
1551 static int bpf_fetch_maps_end(struct bpf_elf_ctx
*ctx
)
1553 struct bpf_elf_map fixup
[ARRAY_SIZE(ctx
->maps
)] = {};
1554 int i
, sym_num
= bpf_map_num_sym(ctx
);
1557 if (sym_num
== 0 || sym_num
> ARRAY_SIZE(ctx
->maps
)) {
1558 fprintf(stderr
, "%u maps not supported in current map section!\n",
1563 if (ctx
->map_num
% sym_num
!= 0 ||
1564 ctx
->map_num
% sizeof(__u32
) != 0) {
1565 fprintf(stderr
, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1569 ctx
->map_len
= ctx
->map_num
/ sym_num
;
1570 if (bpf_map_verify_all_offs(ctx
, ctx
->map_num
)) {
1571 fprintf(stderr
, "Different struct bpf_elf_map in use!\n");
1575 if (ctx
->map_len
== sizeof(struct bpf_elf_map
)) {
1576 ctx
->map_num
= sym_num
;
1578 } else if (ctx
->map_len
> sizeof(struct bpf_elf_map
)) {
1579 fprintf(stderr
, "struct bpf_elf_map not supported, coming from future version?\n");
1581 } else if (ctx
->map_len
< sizeof(struct bpf_elf_map_min
)) {
1582 fprintf(stderr
, "struct bpf_elf_map too small, not supported!\n");
1586 ctx
->map_num
= sym_num
;
1587 for (i
= 0, buff
= (void *)ctx
->maps
; i
< ctx
->map_num
;
1588 i
++, buff
+= ctx
->map_len
) {
1589 /* The fixup leaves the rest of the members as zero, which
1590 * is fine currently, but option exist to set some other
1591 * default value as well when needed in future.
1593 memcpy(&fixup
[i
], buff
, ctx
->map_len
);
1596 memcpy(ctx
->maps
, fixup
, sizeof(fixup
));
1598 printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
1599 sizeof(struct bpf_elf_map
) - ctx
->map_len
);
1603 static int bpf_fetch_license(struct bpf_elf_ctx
*ctx
, int section
,
1604 struct bpf_elf_sec_data
*data
)
1606 if (data
->sec_data
->d_size
> sizeof(ctx
->license
))
1609 memcpy(ctx
->license
, data
->sec_data
->d_buf
, data
->sec_data
->d_size
);
1610 ctx
->sec_done
[section
] = true;
1614 static int bpf_fetch_symtab(struct bpf_elf_ctx
*ctx
, int section
,
1615 struct bpf_elf_sec_data
*data
)
1617 ctx
->sym_tab
= data
->sec_data
;
1618 ctx
->sym_num
= data
->sec_hdr
.sh_size
/ data
->sec_hdr
.sh_entsize
;
1619 ctx
->sec_done
[section
] = true;
1623 static int bpf_fetch_strtab(struct bpf_elf_ctx
*ctx
, int section
,
1624 struct bpf_elf_sec_data
*data
)
1626 ctx
->str_tab
= data
->sec_data
;
1627 ctx
->sec_done
[section
] = true;
1631 static bool bpf_has_map_data(const struct bpf_elf_ctx
*ctx
)
1633 return ctx
->sym_tab
&& ctx
->str_tab
&& ctx
->sec_maps
;
1636 static int bpf_fetch_ancillary(struct bpf_elf_ctx
*ctx
)
1638 struct bpf_elf_sec_data data
;
1641 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1642 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1646 if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1647 !strcmp(data
.sec_name
, ELF_SECTION_MAPS
))
1648 ret
= bpf_fetch_maps_begin(ctx
, i
, &data
);
1649 else if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1650 !strcmp(data
.sec_name
, ELF_SECTION_LICENSE
))
1651 ret
= bpf_fetch_license(ctx
, i
, &data
);
1652 else if (data
.sec_hdr
.sh_type
== SHT_SYMTAB
&&
1653 !strcmp(data
.sec_name
, ".symtab"))
1654 ret
= bpf_fetch_symtab(ctx
, i
, &data
);
1655 else if (data
.sec_hdr
.sh_type
== SHT_STRTAB
&&
1656 !strcmp(data
.sec_name
, ".strtab"))
1657 ret
= bpf_fetch_strtab(ctx
, i
, &data
);
1659 fprintf(stderr
, "Error parsing section %d! Perhaps check with readelf -a?\n",
1665 if (bpf_has_map_data(ctx
)) {
1666 ret
= bpf_fetch_maps_end(ctx
);
1668 fprintf(stderr
, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
1672 ret
= bpf_maps_attach_all(ctx
);
1674 fprintf(stderr
, "Error loading maps into kernel!\n");
1682 static int bpf_fetch_prog(struct bpf_elf_ctx
*ctx
, const char *section
,
1685 struct bpf_elf_sec_data data
;
1686 struct bpf_elf_prog prog
;
1687 int ret
, i
, fd
= -1;
1689 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1690 if (ctx
->sec_done
[i
])
1693 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1695 !(data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1696 data
.sec_hdr
.sh_flags
& SHF_EXECINSTR
&&
1697 !strcmp(data
.sec_name
, section
)))
1702 memset(&prog
, 0, sizeof(prog
));
1703 prog
.type
= ctx
->type
;
1704 prog
.insns
= data
.sec_data
->d_buf
;
1705 prog
.size
= data
.sec_data
->d_size
;
1706 prog
.license
= ctx
->license
;
1708 fd
= bpf_prog_attach(section
, &prog
, ctx
);
1712 ctx
->sec_done
[i
] = true;
1719 static int bpf_apply_relo_data(struct bpf_elf_ctx
*ctx
,
1720 struct bpf_elf_sec_data
*data_relo
,
1721 struct bpf_elf_sec_data
*data_insn
)
1723 Elf_Data
*idata
= data_insn
->sec_data
;
1724 GElf_Shdr
*rhdr
= &data_relo
->sec_hdr
;
1725 int relo_ent
, relo_num
= rhdr
->sh_size
/ rhdr
->sh_entsize
;
1726 struct bpf_insn
*insns
= idata
->d_buf
;
1727 unsigned int num_insns
= idata
->d_size
/ sizeof(*insns
);
1729 for (relo_ent
= 0; relo_ent
< relo_num
; relo_ent
++) {
1730 unsigned int ioff
, rmap
;
1734 if (gelf_getrel(data_relo
->sec_data
, relo_ent
, &relo
) != &relo
)
1737 ioff
= relo
.r_offset
/ sizeof(struct bpf_insn
);
1738 if (ioff
>= num_insns
||
1739 insns
[ioff
].code
!= (BPF_LD
| BPF_IMM
| BPF_DW
)) {
1740 fprintf(stderr
, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
1742 if (ioff
< num_insns
&&
1743 insns
[ioff
].code
== (BPF_JMP
| BPF_CALL
))
1744 fprintf(stderr
, " - Try to annotate functions with always_inline attribute!\n");
1748 if (gelf_getsym(ctx
->sym_tab
, GELF_R_SYM(relo
.r_info
), &sym
) != &sym
)
1750 if (sym
.st_shndx
!= ctx
->sec_maps
) {
1751 fprintf(stderr
, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
1752 relo_ent
, sym
.st_shndx
);
1756 rmap
= sym
.st_value
/ ctx
->map_len
;
1757 if (rmap
>= ARRAY_SIZE(ctx
->map_fds
))
1759 if (!ctx
->map_fds
[rmap
])
1763 fprintf(stderr
, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
1764 bpf_str_tab_name(ctx
, &sym
), ctx
->map_fds
[rmap
],
1765 data_insn
->sec_name
, ioff
);
1767 insns
[ioff
].src_reg
= BPF_PSEUDO_MAP_FD
;
1768 insns
[ioff
].imm
= ctx
->map_fds
[rmap
];
1774 static int bpf_fetch_prog_relo(struct bpf_elf_ctx
*ctx
, const char *section
,
1775 bool *lderr
, bool *sseen
)
1777 struct bpf_elf_sec_data data_relo
, data_insn
;
1778 struct bpf_elf_prog prog
;
1779 int ret
, idx
, i
, fd
= -1;
1781 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1782 ret
= bpf_fill_section_data(ctx
, i
, &data_relo
);
1783 if (ret
< 0 || data_relo
.sec_hdr
.sh_type
!= SHT_REL
)
1786 idx
= data_relo
.sec_hdr
.sh_info
;
1788 ret
= bpf_fill_section_data(ctx
, idx
, &data_insn
);
1790 !(data_insn
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1791 data_insn
.sec_hdr
.sh_flags
& SHF_EXECINSTR
&&
1792 !strcmp(data_insn
.sec_name
, section
)))
1797 ret
= bpf_apply_relo_data(ctx
, &data_relo
, &data_insn
);
1801 memset(&prog
, 0, sizeof(prog
));
1802 prog
.type
= ctx
->type
;
1803 prog
.insns
= data_insn
.sec_data
->d_buf
;
1804 prog
.size
= data_insn
.sec_data
->d_size
;
1805 prog
.license
= ctx
->license
;
1807 fd
= bpf_prog_attach(section
, &prog
, ctx
);
1813 ctx
->sec_done
[i
] = true;
1814 ctx
->sec_done
[idx
] = true;
1821 static int bpf_fetch_prog_sec(struct bpf_elf_ctx
*ctx
, const char *section
)
1823 bool lderr
= false, sseen
= false;
1826 if (bpf_has_map_data(ctx
))
1827 ret
= bpf_fetch_prog_relo(ctx
, section
, &lderr
, &sseen
);
1828 if (ret
< 0 && !lderr
)
1829 ret
= bpf_fetch_prog(ctx
, section
, &sseen
);
1830 if (ret
< 0 && !sseen
)
1831 fprintf(stderr
, "Program section \'%s\' not found in ELF file!\n",
1836 static int bpf_find_map_by_id(struct bpf_elf_ctx
*ctx
, uint32_t id
)
1840 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++)
1841 if (ctx
->map_fds
[i
] && ctx
->maps
[i
].id
== id
&&
1842 ctx
->maps
[i
].type
== BPF_MAP_TYPE_PROG_ARRAY
)
1847 static int bpf_fill_prog_arrays(struct bpf_elf_ctx
*ctx
)
1849 struct bpf_elf_sec_data data
;
1850 uint32_t map_id
, key_id
;
1851 int fd
, i
, ret
, idx
;
1853 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1854 if (ctx
->sec_done
[i
])
1857 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1861 ret
= sscanf(data
.sec_name
, "%i/%i", &map_id
, &key_id
);
1865 idx
= bpf_find_map_by_id(ctx
, map_id
);
1869 fd
= bpf_fetch_prog_sec(ctx
, data
.sec_name
);
1873 ret
= bpf_map_update(ctx
->map_fds
[idx
], &key_id
,
1877 fprintf(stderr
, "Tail call key %u for map %u out of bounds?\n",
1882 ctx
->sec_done
[i
] = true;
1888 static void bpf_save_finfo(struct bpf_elf_ctx
*ctx
)
1893 memset(&ctx
->stat
, 0, sizeof(ctx
->stat
));
1895 ret
= fstat(ctx
->obj_fd
, &st
);
1897 fprintf(stderr
, "Stat of elf file failed: %s\n",
1902 ctx
->stat
.st_dev
= st
.st_dev
;
1903 ctx
->stat
.st_ino
= st
.st_ino
;
1906 static int bpf_read_pin_mapping(FILE *fp
, uint32_t *id
, char *path
)
1908 char buff
[PATH_MAX
];
1910 while (fgets(buff
, sizeof(buff
), fp
)) {
1913 while (*ptr
== ' ' || *ptr
== '\t')
1916 if (*ptr
== '#' || *ptr
== '\n' || *ptr
== 0)
1919 if (sscanf(ptr
, "%i %s\n", id
, path
) != 2 &&
1920 sscanf(ptr
, "%i %s #", id
, path
) != 2) {
1931 static bool bpf_pinning_reserved(uint32_t pinning
)
1943 static void bpf_hash_init(struct bpf_elf_ctx
*ctx
, const char *db_file
)
1945 struct bpf_hash_entry
*entry
;
1946 char subpath
[PATH_MAX
] = {};
1951 fp
= fopen(db_file
, "r");
1955 while ((ret
= bpf_read_pin_mapping(fp
, &pinning
, subpath
))) {
1957 fprintf(stderr
, "Database %s is corrupted at: %s\n",
1963 if (bpf_pinning_reserved(pinning
)) {
1964 fprintf(stderr
, "Database %s, id %u is reserved - ignoring!\n",
1969 entry
= malloc(sizeof(*entry
));
1971 fprintf(stderr
, "No memory left for db entry!\n");
1975 entry
->pinning
= pinning
;
1976 entry
->subpath
= strdup(subpath
);
1977 if (!entry
->subpath
) {
1978 fprintf(stderr
, "No memory left for db entry!\n");
1983 entry
->next
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
1984 ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)] = entry
;
1990 static void bpf_hash_destroy(struct bpf_elf_ctx
*ctx
)
1992 struct bpf_hash_entry
*entry
;
1995 for (i
= 0; i
< ARRAY_SIZE(ctx
->ht
); i
++) {
1996 while ((entry
= ctx
->ht
[i
]) != NULL
) {
1997 ctx
->ht
[i
] = entry
->next
;
1998 free((char *)entry
->subpath
);
2004 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx
*ctx
)
2006 if (ctx
->elf_hdr
.e_type
!= ET_REL
||
2007 (ctx
->elf_hdr
.e_machine
!= EM_NONE
&&
2008 ctx
->elf_hdr
.e_machine
!= EM_BPF
) ||
2009 ctx
->elf_hdr
.e_version
!= EV_CURRENT
) {
2010 fprintf(stderr
, "ELF format error, ELF file not for eBPF?\n");
2014 switch (ctx
->elf_hdr
.e_ident
[EI_DATA
]) {
2016 fprintf(stderr
, "ELF format error, wrong endianness info?\n");
2019 if (htons(1) == 1) {
2021 "We are big endian, eBPF object is little endian!\n");
2026 if (htons(1) != 1) {
2028 "We are little endian, eBPF object is big endian!\n");
2037 static int bpf_elf_ctx_init(struct bpf_elf_ctx
*ctx
, const char *pathname
,
2038 enum bpf_prog_type type
, bool verbose
)
2042 if (elf_version(EV_CURRENT
) == EV_NONE
||
2043 bpf_init_env(pathname
))
2046 memset(ctx
, 0, sizeof(*ctx
));
2047 ctx
->verbose
= verbose
;
2050 ctx
->obj_fd
= open(pathname
, O_RDONLY
);
2051 if (ctx
->obj_fd
< 0)
2054 ctx
->elf_fd
= elf_begin(ctx
->obj_fd
, ELF_C_READ
, NULL
);
2060 if (elf_kind(ctx
->elf_fd
) != ELF_K_ELF
) {
2065 if (gelf_getehdr(ctx
->elf_fd
, &ctx
->elf_hdr
) !=
2071 ret
= bpf_elf_check_ehdr(ctx
);
2075 ctx
->sec_done
= calloc(ctx
->elf_hdr
.e_shnum
,
2076 sizeof(*(ctx
->sec_done
)));
2077 if (!ctx
->sec_done
) {
2082 if (ctx
->verbose
&& bpf_log_realloc(ctx
)) {
2087 bpf_save_finfo(ctx
);
2088 bpf_hash_init(ctx
, CONFDIR
"/bpf_pinning");
2092 free(ctx
->sec_done
);
2094 elf_end(ctx
->elf_fd
);
2100 static int bpf_maps_count(struct bpf_elf_ctx
*ctx
)
2104 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
2105 if (!ctx
->map_fds
[i
])
2113 static void bpf_maps_teardown(struct bpf_elf_ctx
*ctx
)
2117 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
2118 if (ctx
->map_fds
[i
])
2119 close(ctx
->map_fds
[i
]);
2123 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx
*ctx
, bool failure
)
2126 bpf_maps_teardown(ctx
);
2128 bpf_hash_destroy(ctx
);
2130 free(ctx
->sec_done
);
2133 elf_end(ctx
->elf_fd
);
2137 static struct bpf_elf_ctx __ctx
;
2139 static int bpf_obj_open(const char *pathname
, enum bpf_prog_type type
,
2140 const char *section
, bool verbose
)
2142 struct bpf_elf_ctx
*ctx
= &__ctx
;
2145 ret
= bpf_elf_ctx_init(ctx
, pathname
, type
, verbose
);
2147 fprintf(stderr
, "Cannot initialize ELF context!\n");
2151 ret
= bpf_fetch_ancillary(ctx
);
2153 fprintf(stderr
, "Error fetching ELF ancillary data!\n");
2157 fd
= bpf_fetch_prog_sec(ctx
, section
);
2159 fprintf(stderr
, "Error fetching program/map!\n");
2164 ret
= bpf_fill_prog_arrays(ctx
);
2166 fprintf(stderr
, "Error filling program arrays!\n");
2168 bpf_elf_ctx_destroy(ctx
, ret
< 0);
2179 bpf_map_set_send(int fd
, struct sockaddr_un
*addr
, unsigned int addr_len
,
2180 const struct bpf_map_data
*aux
, unsigned int entries
)
2182 struct bpf_map_set_msg msg
= {
2183 .aux
.uds_ver
= BPF_SCM_AUX_VER
,
2184 .aux
.num_ent
= entries
,
2186 int *cmsg_buf
, min_fd
;
2190 strncpy(msg
.aux
.obj_name
, aux
->obj
, sizeof(msg
.aux
.obj_name
));
2191 memcpy(&msg
.aux
.obj_st
, aux
->st
, sizeof(msg
.aux
.obj_st
));
2193 cmsg_buf
= bpf_map_set_init(&msg
, addr
, addr_len
);
2194 amsg_buf
= (char *)msg
.aux
.ent
;
2196 for (i
= 0; i
< entries
; i
+= min_fd
) {
2199 min_fd
= min(BPF_SCM_MAX_FDS
* 1U, entries
- i
);
2200 bpf_map_set_init_single(&msg
, min_fd
);
2202 memcpy(cmsg_buf
, &aux
->fds
[i
], sizeof(aux
->fds
[0]) * min_fd
);
2203 memcpy(amsg_buf
, &aux
->ent
[i
], sizeof(aux
->ent
[0]) * min_fd
);
2205 ret
= sendmsg(fd
, &msg
.hdr
, 0);
2214 bpf_map_set_recv(int fd
, int *fds
, struct bpf_map_aux
*aux
,
2215 unsigned int entries
)
2217 struct bpf_map_set_msg msg
;
2218 int *cmsg_buf
, min_fd
;
2219 char *amsg_buf
, *mmsg_buf
;
2220 unsigned int needed
= 1;
2223 cmsg_buf
= bpf_map_set_init(&msg
, NULL
, 0);
2224 amsg_buf
= (char *)msg
.aux
.ent
;
2225 mmsg_buf
= (char *)&msg
.aux
;
2227 for (i
= 0; i
< min(entries
, needed
); i
+= min_fd
) {
2228 struct cmsghdr
*cmsg
;
2231 min_fd
= min(entries
, entries
- i
);
2232 bpf_map_set_init_single(&msg
, min_fd
);
2234 ret
= recvmsg(fd
, &msg
.hdr
, 0);
2238 cmsg
= CMSG_FIRSTHDR(&msg
.hdr
);
2239 if (!cmsg
|| cmsg
->cmsg_type
!= SCM_RIGHTS
)
2241 if (msg
.hdr
.msg_flags
& MSG_CTRUNC
)
2243 if (msg
.aux
.uds_ver
!= BPF_SCM_AUX_VER
)
2246 min_fd
= (cmsg
->cmsg_len
- sizeof(*cmsg
)) / sizeof(fd
);
2247 if (min_fd
> entries
|| min_fd
<= 0)
2250 memcpy(&fds
[i
], cmsg_buf
, sizeof(fds
[0]) * min_fd
);
2251 memcpy(&aux
->ent
[i
], amsg_buf
, sizeof(aux
->ent
[0]) * min_fd
);
2252 memcpy(aux
, mmsg_buf
, offsetof(struct bpf_map_aux
, ent
));
2254 needed
= aux
->num_ent
;
2260 int bpf_send_map_fds(const char *path
, const char *obj
)
2262 struct bpf_elf_ctx
*ctx
= &__ctx
;
2263 struct sockaddr_un addr
= { .sun_family
= AF_UNIX
};
2264 struct bpf_map_data bpf_aux
= {
2265 .fds
= ctx
->map_fds
,
2272 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
2274 fprintf(stderr
, "Cannot open socket: %s\n",
2279 strncpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
2281 ret
= connect(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
2283 fprintf(stderr
, "Cannot connect to %s: %s\n",
2284 path
, strerror(errno
));
2288 ret
= bpf_map_set_send(fd
, &addr
, sizeof(addr
), &bpf_aux
,
2289 bpf_maps_count(ctx
));
2291 fprintf(stderr
, "Cannot send fds to %s: %s\n",
2292 path
, strerror(errno
));
2294 bpf_maps_teardown(ctx
);
2299 int bpf_recv_map_fds(const char *path
, int *fds
, struct bpf_map_aux
*aux
,
2300 unsigned int entries
)
2302 struct sockaddr_un addr
= { .sun_family
= AF_UNIX
};
2305 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
2307 fprintf(stderr
, "Cannot open socket: %s\n",
2312 strncpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
2314 ret
= bind(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
2316 fprintf(stderr
, "Cannot bind to socket: %s\n",
2321 ret
= bpf_map_set_recv(fd
, fds
, aux
, entries
);
2323 fprintf(stderr
, "Cannot recv fds from %s: %s\n",
2324 path
, strerror(errno
));
2326 unlink(addr
.sun_path
);
2330 #endif /* HAVE_ELF */