2 * bpf.c BPF common code
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Daniel Borkmann <daniel@iogearbox.net>
10 * Jiri Pirko <jiri@resnulli.us>
11 * Alexei Starovoitov <ast@kernel.org>
31 #include <sys/types.h>
35 #include <sys/mount.h>
36 #include <sys/syscall.h>
37 #include <sys/sendfile.h>
38 #include <sys/resource.h>
40 #include <arpa/inet.h>
43 #include "json_print.h"
49 struct bpf_prog_meta
{
56 static const enum bpf_prog_type __bpf_types
[] = {
57 BPF_PROG_TYPE_SCHED_CLS
,
58 BPF_PROG_TYPE_SCHED_ACT
,
61 BPF_PROG_TYPE_LWT_OUT
,
62 BPF_PROG_TYPE_LWT_XMIT
,
65 static const struct bpf_prog_meta __bpf_prog_meta
[] = {
66 [BPF_PROG_TYPE_SCHED_CLS
] = {
69 .section
= ELF_SECTION_CLASSIFIER
,
70 .may_uds_export
= true,
72 [BPF_PROG_TYPE_SCHED_ACT
] = {
75 .section
= ELF_SECTION_ACTION
,
76 .may_uds_export
= true,
78 [BPF_PROG_TYPE_XDP
] = {
81 .section
= ELF_SECTION_PROG
,
83 [BPF_PROG_TYPE_LWT_IN
] = {
86 .section
= ELF_SECTION_PROG
,
88 [BPF_PROG_TYPE_LWT_OUT
] = {
91 .section
= ELF_SECTION_PROG
,
93 [BPF_PROG_TYPE_LWT_XMIT
] = {
96 .section
= ELF_SECTION_PROG
,
98 [BPF_PROG_TYPE_LWT_SEG6LOCAL
] = {
99 .type
= "lwt_seg6local",
101 .section
= ELF_SECTION_PROG
,
105 static const char *bpf_prog_to_subdir(enum bpf_prog_type type
)
107 assert(type
< ARRAY_SIZE(__bpf_prog_meta
) &&
108 __bpf_prog_meta
[type
].subdir
);
109 return __bpf_prog_meta
[type
].subdir
;
112 const char *bpf_prog_to_default_section(enum bpf_prog_type type
)
114 assert(type
< ARRAY_SIZE(__bpf_prog_meta
) &&
115 __bpf_prog_meta
[type
].section
);
116 return __bpf_prog_meta
[type
].section
;
120 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
121 const char *sec
, __u32 ifindex
, bool verbose
);
123 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
124 const char *sec
, __u32 ifindex
, bool verbose
)
126 fprintf(stderr
, "No ELF library support compiled in.\n");
132 static inline __u64
bpf_ptr_to_u64(const void *ptr
)
134 return (__u64
)(unsigned long)ptr
;
137 static int bpf(int cmd
, union bpf_attr
*attr
, unsigned int size
)
140 return syscall(__NR_bpf
, cmd
, attr
, size
);
142 fprintf(stderr
, "No bpf syscall, kernel headers too old?\n");
148 static int bpf_map_update(int fd
, const void *key
, const void *value
,
151 union bpf_attr attr
= {};
154 attr
.key
= bpf_ptr_to_u64(key
);
155 attr
.value
= bpf_ptr_to_u64(value
);
158 return bpf(BPF_MAP_UPDATE_ELEM
, &attr
, sizeof(attr
));
161 static int bpf_prog_fd_by_id(uint32_t id
)
163 union bpf_attr attr
= {};
167 return bpf(BPF_PROG_GET_FD_BY_ID
, &attr
, sizeof(attr
));
170 static int bpf_prog_info_by_fd(int fd
, struct bpf_prog_info
*info
,
173 union bpf_attr attr
= {};
176 attr
.info
.bpf_fd
= fd
;
177 attr
.info
.info
= bpf_ptr_to_u64(info
);
178 attr
.info
.info_len
= *info_len
;
181 ret
= bpf(BPF_OBJ_GET_INFO_BY_FD
, &attr
, sizeof(attr
));
183 *info_len
= attr
.info
.info_len
;
188 int bpf_dump_prog_info(FILE *f
, uint32_t id
)
190 struct bpf_prog_info info
= {};
191 uint32_t len
= sizeof(info
);
192 int fd
, ret
, dump_ok
= 0;
195 open_json_object("prog");
196 print_uint(PRINT_ANY
, "id", "id %u ", id
);
198 fd
= bpf_prog_fd_by_id(id
);
202 ret
= bpf_prog_info_by_fd(fd
, &info
, &len
);
204 int jited
= !!info
.jited_prog_len
;
206 print_string(PRINT_ANY
, "tag", "tag %s ",
207 hexstring_n2a(info
.tag
, sizeof(info
.tag
),
209 print_uint(PRINT_JSON
, "jited", NULL
, jited
);
210 if (jited
&& !is_json_context())
211 fprintf(f
, "jited ");
221 static int bpf_parse_string(char *arg
, bool from_file
, __u16
*bpf_len
,
222 char **bpf_string
, bool *need_release
,
223 const char separator
)
228 size_t tmp_len
, op_len
= sizeof("65535 255 255 4294967295,");
229 char *tmp_string
, *pos
, c_prev
= ' ';
233 tmp_len
= sizeof("4096,") + BPF_MAXINSNS
* op_len
;
234 tmp_string
= pos
= calloc(1, tmp_len
);
235 if (tmp_string
== NULL
)
238 fp
= fopen(arg
, "r");
240 perror("Cannot fopen");
245 while ((c
= fgetc(fp
)) != EOF
) {
262 if (pos
- tmp_string
== tmp_len
)
275 *need_release
= true;
276 *bpf_string
= tmp_string
;
278 *need_release
= false;
282 if (sscanf(*bpf_string
, "%hu%c", bpf_len
, &sp
) != 2 ||
292 static int bpf_ops_parse(int argc
, char **argv
, struct sock_filter
*bpf_ops
,
295 char *bpf_string
, *token
, separator
= ',';
302 if (bpf_parse_string(argv
[0], from_file
, &bpf_len
, &bpf_string
,
303 &need_release
, separator
))
305 if (bpf_len
== 0 || bpf_len
> BPF_MAXINSNS
) {
311 while ((token
= strchr(token
, separator
)) && (++token
)[0]) {
313 fprintf(stderr
, "Real program length exceeds encoded length parameter!\n");
318 if (sscanf(token
, "%hu %hhu %hhu %u,",
319 &bpf_ops
[i
].code
, &bpf_ops
[i
].jt
,
320 &bpf_ops
[i
].jf
, &bpf_ops
[i
].k
) != 4) {
321 fprintf(stderr
, "Error at instruction %d!\n", i
);
330 fprintf(stderr
, "Parsed program length is less than encoded length parameter!\n");
342 void bpf_print_ops(struct rtattr
*bpf_ops
, __u16 len
)
344 struct sock_filter
*ops
= RTA_DATA(bpf_ops
);
350 open_json_object("bytecode");
351 print_uint(PRINT_ANY
, "length", "bytecode \'%u,", len
);
352 open_json_array(PRINT_JSON
, "insns");
354 for (i
= 0; i
< len
; i
++) {
355 open_json_object(NULL
);
356 print_hu(PRINT_ANY
, "code", "%hu ", ops
[i
].code
);
357 print_hhu(PRINT_ANY
, "jt", "%hhu ", ops
[i
].jt
);
358 print_hhu(PRINT_ANY
, "jf", "%hhu ", ops
[i
].jf
);
360 print_uint(PRINT_ANY
, "k", "%u\'", ops
[i
].k
);
362 print_uint(PRINT_ANY
, "k", "%u,", ops
[i
].k
);
366 close_json_array(PRINT_JSON
, NULL
);
370 static void bpf_map_pin_report(const struct bpf_elf_map
*pin
,
371 const struct bpf_elf_map
*obj
)
373 fprintf(stderr
, "Map specification differs from pinned file!\n");
375 if (obj
->type
!= pin
->type
)
376 fprintf(stderr
, " - Type: %u (obj) != %u (pin)\n",
377 obj
->type
, pin
->type
);
378 if (obj
->size_key
!= pin
->size_key
)
379 fprintf(stderr
, " - Size key: %u (obj) != %u (pin)\n",
380 obj
->size_key
, pin
->size_key
);
381 if (obj
->size_value
!= pin
->size_value
)
382 fprintf(stderr
, " - Size value: %u (obj) != %u (pin)\n",
383 obj
->size_value
, pin
->size_value
);
384 if (obj
->max_elem
!= pin
->max_elem
)
385 fprintf(stderr
, " - Max elems: %u (obj) != %u (pin)\n",
386 obj
->max_elem
, pin
->max_elem
);
387 if (obj
->flags
!= pin
->flags
)
388 fprintf(stderr
, " - Flags: %#x (obj) != %#x (pin)\n",
389 obj
->flags
, pin
->flags
);
391 fprintf(stderr
, "\n");
394 struct bpf_prog_data
{
400 struct bpf_prog_data owner
;
401 unsigned int btf_id_key
;
402 unsigned int btf_id_val
;
405 static int bpf_derive_elf_map_from_fdinfo(int fd
, struct bpf_elf_map
*map
,
406 struct bpf_map_ext
*ext
)
408 unsigned int val
, owner_type
= 0, owner_jited
= 0;
409 char file
[PATH_MAX
], buff
[4096];
412 snprintf(file
, sizeof(file
), "/proc/%d/fdinfo/%d", getpid(), fd
);
413 memset(map
, 0, sizeof(*map
));
415 fp
= fopen(file
, "r");
417 fprintf(stderr
, "No procfs support?!\n");
421 while (fgets(buff
, sizeof(buff
), fp
)) {
422 if (sscanf(buff
, "map_type:\t%u", &val
) == 1)
424 else if (sscanf(buff
, "key_size:\t%u", &val
) == 1)
426 else if (sscanf(buff
, "value_size:\t%u", &val
) == 1)
427 map
->size_value
= val
;
428 else if (sscanf(buff
, "max_entries:\t%u", &val
) == 1)
430 else if (sscanf(buff
, "map_flags:\t%i", &val
) == 1)
432 else if (sscanf(buff
, "owner_prog_type:\t%i", &val
) == 1)
434 else if (sscanf(buff
, "owner_jited:\t%i", &val
) == 1)
440 memset(ext
, 0, sizeof(*ext
));
441 ext
->owner
.type
= owner_type
;
442 ext
->owner
.jited
= owner_jited
;
448 static int bpf_map_selfcheck_pinned(int fd
, const struct bpf_elf_map
*map
,
449 struct bpf_map_ext
*ext
, int length
,
450 enum bpf_prog_type type
)
452 struct bpf_elf_map tmp
, zero
= {};
455 ret
= bpf_derive_elf_map_from_fdinfo(fd
, &tmp
, ext
);
459 /* The decision to reject this is on kernel side eventually, but
460 * at least give the user a chance to know what's wrong.
462 if (ext
->owner
.type
&& ext
->owner
.type
!= type
)
463 fprintf(stderr
, "Program array map owner types differ: %u (obj) != %u (pin)\n",
464 type
, ext
->owner
.type
);
466 if (!memcmp(&tmp
, map
, length
)) {
469 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
470 * so just accept it. We know we do have an eBPF fd and in this
471 * case, everything is 0. It is guaranteed that no such map exists
472 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
474 if (!memcmp(&tmp
, &zero
, length
))
477 bpf_map_pin_report(&tmp
, map
);
482 static int bpf_mnt_fs(const char *target
)
484 bool bind_done
= false;
486 while (mount("", target
, "none", MS_PRIVATE
| MS_REC
, NULL
)) {
487 if (errno
!= EINVAL
|| bind_done
) {
488 fprintf(stderr
, "mount --make-private %s failed: %s\n",
489 target
, strerror(errno
));
493 if (mount(target
, target
, "none", MS_BIND
, NULL
)) {
494 fprintf(stderr
, "mount --bind %s %s failed: %s\n",
495 target
, target
, strerror(errno
));
502 if (mount("bpf", target
, "bpf", 0, "mode=0700")) {
503 fprintf(stderr
, "mount -t bpf bpf %s failed: %s\n",
504 target
, strerror(errno
));
511 static int bpf_mnt_check_target(const char *target
)
515 ret
= mkdir(target
, S_IRWXU
);
516 if (ret
&& errno
!= EEXIST
)
517 fprintf(stderr
, "mkdir %s failed: %s\n", target
,
523 static int bpf_valid_mntpt(const char *mnt
, unsigned long magic
)
527 if (statfs(mnt
, &st_fs
) < 0)
529 if ((unsigned long)st_fs
.f_type
!= magic
)
535 static const char *bpf_find_mntpt_single(unsigned long magic
, char *mnt
,
536 int len
, const char *mntpt
)
540 ret
= bpf_valid_mntpt(mntpt
, magic
);
542 strlcpy(mnt
, mntpt
, len
);
549 static const char *bpf_find_mntpt(const char *fstype
, unsigned long magic
,
551 const char * const *known_mnts
)
553 const char * const *ptr
;
560 if (bpf_find_mntpt_single(magic
, mnt
, len
, *ptr
))
569 fp
= fopen("/proc/mounts", "r");
573 while (fscanf(fp
, "%*s %" textify(PATH_MAX
) "s %99s %*s %*d %*d\n",
575 if (strcmp(type
, fstype
) == 0)
580 if (strcmp(type
, fstype
) != 0)
586 int bpf_trace_pipe(void)
588 char tracefs_mnt
[PATH_MAX
] = TRACE_DIR_MNT
;
589 static const char * const tracefs_known_mnts
[] = {
591 "/sys/kernel/debug/tracing",
596 int fd_in
, fd_out
= STDERR_FILENO
;
597 char tpipe
[PATH_MAX
];
600 mnt
= bpf_find_mntpt("tracefs", TRACEFS_MAGIC
, tracefs_mnt
,
601 sizeof(tracefs_mnt
), tracefs_known_mnts
);
603 fprintf(stderr
, "tracefs not mounted?\n");
607 snprintf(tpipe
, sizeof(tpipe
), "%s/trace_pipe", mnt
);
609 fd_in
= open(tpipe
, O_RDONLY
);
613 fprintf(stderr
, "Running! Hang up with ^C!\n\n");
615 static char buff
[4096];
618 ret
= read(fd_in
, buff
, sizeof(buff
));
619 if (ret
> 0 && write(fd_out
, buff
, ret
) == ret
)
628 static int bpf_gen_global(const char *bpf_sub_dir
)
630 char bpf_glo_dir
[PATH_MAX
];
633 snprintf(bpf_glo_dir
, sizeof(bpf_glo_dir
), "%s/%s/",
634 bpf_sub_dir
, BPF_DIR_GLOBALS
);
636 ret
= mkdir(bpf_glo_dir
, S_IRWXU
);
637 if (ret
&& errno
!= EEXIST
) {
638 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_glo_dir
,
646 static int bpf_gen_master(const char *base
, const char *name
)
648 char bpf_sub_dir
[PATH_MAX
+ NAME_MAX
+ 1];
651 snprintf(bpf_sub_dir
, sizeof(bpf_sub_dir
), "%s%s/", base
, name
);
653 ret
= mkdir(bpf_sub_dir
, S_IRWXU
);
654 if (ret
&& errno
!= EEXIST
) {
655 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_sub_dir
,
660 return bpf_gen_global(bpf_sub_dir
);
663 static int bpf_slave_via_bind_mnt(const char *full_name
,
664 const char *full_link
)
668 ret
= mkdir(full_name
, S_IRWXU
);
670 assert(errno
!= EEXIST
);
671 fprintf(stderr
, "mkdir %s failed: %s\n", full_name
,
676 ret
= mount(full_link
, full_name
, "none", MS_BIND
, NULL
);
679 fprintf(stderr
, "mount --bind %s %s failed: %s\n",
680 full_link
, full_name
, strerror(errno
));
686 static int bpf_gen_slave(const char *base
, const char *name
,
689 char bpf_lnk_dir
[PATH_MAX
+ NAME_MAX
+ 1];
690 char bpf_sub_dir
[PATH_MAX
+ NAME_MAX
];
694 snprintf(bpf_lnk_dir
, sizeof(bpf_lnk_dir
), "%s%s/", base
, link
);
695 snprintf(bpf_sub_dir
, sizeof(bpf_sub_dir
), "%s%s", base
, name
);
697 ret
= symlink(bpf_lnk_dir
, bpf_sub_dir
);
699 if (errno
!= EEXIST
) {
700 if (errno
!= EPERM
) {
701 fprintf(stderr
, "symlink %s failed: %s\n",
702 bpf_sub_dir
, strerror(errno
));
706 return bpf_slave_via_bind_mnt(bpf_sub_dir
,
710 ret
= lstat(bpf_sub_dir
, &sb
);
712 fprintf(stderr
, "lstat %s failed: %s\n",
713 bpf_sub_dir
, strerror(errno
));
717 if ((sb
.st_mode
& S_IFMT
) != S_IFLNK
)
718 return bpf_gen_global(bpf_sub_dir
);
724 static int bpf_gen_hierarchy(const char *base
)
728 ret
= bpf_gen_master(base
, bpf_prog_to_subdir(__bpf_types
[0]));
729 for (i
= 1; i
< ARRAY_SIZE(__bpf_types
) && !ret
; i
++)
730 ret
= bpf_gen_slave(base
,
731 bpf_prog_to_subdir(__bpf_types
[i
]),
732 bpf_prog_to_subdir(__bpf_types
[0]));
736 static const char *bpf_get_work_dir(enum bpf_prog_type type
)
738 static char bpf_tmp
[PATH_MAX
] = BPF_DIR_MNT
;
739 static char bpf_wrk_dir
[PATH_MAX
];
740 static const char *mnt
;
741 static bool bpf_mnt_cached
;
742 const char *mnt_env
= getenv(BPF_ENV_MNT
);
743 static const char * const bpf_known_mnts
[] = {
750 if (bpf_mnt_cached
) {
751 const char *out
= mnt
;
754 snprintf(bpf_tmp
, sizeof(bpf_tmp
), "%s%s/",
755 out
, bpf_prog_to_subdir(type
));
762 mnt
= bpf_find_mntpt_single(BPF_FS_MAGIC
, bpf_tmp
,
763 sizeof(bpf_tmp
), mnt_env
);
765 mnt
= bpf_find_mntpt("bpf", BPF_FS_MAGIC
, bpf_tmp
,
766 sizeof(bpf_tmp
), bpf_known_mnts
);
768 mnt
= mnt_env
? : BPF_DIR_MNT
;
769 ret
= bpf_mnt_check_target(mnt
);
771 ret
= bpf_mnt_fs(mnt
);
778 ret
= snprintf(bpf_wrk_dir
, sizeof(bpf_wrk_dir
), "%s/", mnt
);
779 if (ret
< 0 || ret
>= sizeof(bpf_wrk_dir
)) {
784 ret
= bpf_gen_hierarchy(bpf_wrk_dir
);
792 bpf_mnt_cached
= true;
796 static int bpf_obj_get(const char *pathname
, enum bpf_prog_type type
)
798 union bpf_attr attr
= {};
801 if (strlen(pathname
) > 2 && pathname
[0] == 'm' &&
802 pathname
[1] == ':' && bpf_get_work_dir(type
)) {
803 snprintf(tmp
, sizeof(tmp
), "%s/%s",
804 bpf_get_work_dir(type
), pathname
+ 2);
808 attr
.pathname
= bpf_ptr_to_u64(pathname
);
810 return bpf(BPF_OBJ_GET
, &attr
, sizeof(attr
));
813 static int bpf_obj_pinned(const char *pathname
, enum bpf_prog_type type
)
815 int prog_fd
= bpf_obj_get(pathname
, type
);
818 fprintf(stderr
, "Couldn\'t retrieve pinned program \'%s\': %s\n",
819 pathname
, strerror(errno
));
823 static int bpf_do_parse(struct bpf_cfg_in
*cfg
, const bool *opt_tbl
)
825 const char *file
, *section
, *uds_name
;
826 bool verbose
= false;
833 if (opt_tbl
[CBPF_BYTECODE
] &&
834 (matches(*argv
, "bytecode") == 0 ||
835 strcmp(*argv
, "bc") == 0)) {
836 cfg
->mode
= CBPF_BYTECODE
;
837 } else if (opt_tbl
[CBPF_FILE
] &&
838 (matches(*argv
, "bytecode-file") == 0 ||
839 strcmp(*argv
, "bcf") == 0)) {
840 cfg
->mode
= CBPF_FILE
;
841 } else if (opt_tbl
[EBPF_OBJECT
] &&
842 (matches(*argv
, "object-file") == 0 ||
843 strcmp(*argv
, "obj") == 0)) {
844 cfg
->mode
= EBPF_OBJECT
;
845 } else if (opt_tbl
[EBPF_PINNED
] &&
846 (matches(*argv
, "object-pinned") == 0 ||
847 matches(*argv
, "pinned") == 0 ||
848 matches(*argv
, "fd") == 0)) {
849 cfg
->mode
= EBPF_PINNED
;
851 fprintf(stderr
, "What mode is \"%s\"?\n", *argv
);
856 file
= section
= uds_name
= NULL
;
857 if (cfg
->mode
== EBPF_OBJECT
|| cfg
->mode
== EBPF_PINNED
) {
861 if (cfg
->type
== BPF_PROG_TYPE_UNSPEC
) {
862 if (argc
> 0 && matches(*argv
, "type") == 0) {
864 for (i
= 0; i
< ARRAY_SIZE(__bpf_prog_meta
);
866 if (!__bpf_prog_meta
[i
].type
)
869 __bpf_prog_meta
[i
].type
)) {
875 if (cfg
->type
== BPF_PROG_TYPE_UNSPEC
) {
876 fprintf(stderr
, "What type is \"%s\"?\n",
882 cfg
->type
= BPF_PROG_TYPE_SCHED_CLS
;
886 section
= bpf_prog_to_default_section(cfg
->type
);
887 if (argc
> 0 && matches(*argv
, "section") == 0) {
893 if (__bpf_prog_meta
[cfg
->type
].may_uds_export
) {
894 uds_name
= getenv(BPF_ENV_UDS
);
895 if (argc
> 0 && !uds_name
&&
896 matches(*argv
, "export") == 0) {
903 if (argc
> 0 && matches(*argv
, "verbose") == 0) {
911 if (cfg
->mode
== CBPF_BYTECODE
|| cfg
->mode
== CBPF_FILE
) {
912 ret
= bpf_ops_parse(argc
, argv
, cfg
->opcodes
,
913 cfg
->mode
== CBPF_FILE
);
914 cfg
->n_opcodes
= ret
;
915 } else if (cfg
->mode
== EBPF_OBJECT
) {
916 ret
= 0; /* program will be loaded by load stage */
917 } else if (cfg
->mode
== EBPF_PINNED
) {
918 ret
= bpf_obj_pinned(file
, cfg
->type
);
925 cfg
->section
= section
;
929 cfg
->verbose
= verbose
;
934 static int bpf_do_load(struct bpf_cfg_in
*cfg
)
936 if (cfg
->mode
== EBPF_OBJECT
) {
938 return iproute2_load_libbpf(cfg
);
940 cfg
->prog_fd
= bpf_obj_open(cfg
->object
, cfg
->type
,
941 cfg
->section
, cfg
->ifindex
,
948 int bpf_load_common(struct bpf_cfg_in
*cfg
, const struct bpf_cfg_ops
*ops
,
951 char annotation
[256];
954 ret
= bpf_do_load(cfg
);
958 if (cfg
->mode
== CBPF_BYTECODE
|| cfg
->mode
== CBPF_FILE
)
959 ops
->cbpf_cb(nl
, cfg
->opcodes
, cfg
->n_opcodes
);
960 if (cfg
->mode
== EBPF_OBJECT
|| cfg
->mode
== EBPF_PINNED
) {
961 snprintf(annotation
, sizeof(annotation
), "%s:[%s]",
962 basename(cfg
->object
), cfg
->mode
== EBPF_PINNED
?
963 "*fsobj" : cfg
->section
);
964 ops
->ebpf_cb(nl
, cfg
->prog_fd
, annotation
);
970 int bpf_parse_common(struct bpf_cfg_in
*cfg
, const struct bpf_cfg_ops
*ops
)
972 bool opt_tbl
[BPF_MODE_MAX
] = {};
975 opt_tbl
[CBPF_BYTECODE
] = true;
976 opt_tbl
[CBPF_FILE
] = true;
980 opt_tbl
[EBPF_OBJECT
] = true;
981 opt_tbl
[EBPF_PINNED
] = true;
984 return bpf_do_parse(cfg
, opt_tbl
);
987 int bpf_parse_and_load_common(struct bpf_cfg_in
*cfg
,
988 const struct bpf_cfg_ops
*ops
, void *nl
)
992 ret
= bpf_parse_common(cfg
, ops
);
996 return bpf_load_common(cfg
, ops
, nl
);
999 int bpf_graft_map(const char *map_path
, uint32_t *key
, int argc
, char **argv
)
1001 const bool opt_tbl
[BPF_MODE_MAX
] = {
1002 [EBPF_OBJECT
] = true,
1003 [EBPF_PINNED
] = true,
1005 const struct bpf_elf_map test
= {
1006 .type
= BPF_MAP_TYPE_PROG_ARRAY
,
1007 .size_key
= sizeof(int),
1008 .size_value
= sizeof(int),
1010 struct bpf_cfg_in cfg
= {
1011 .type
= BPF_PROG_TYPE_UNSPEC
,
1015 struct bpf_map_ext ext
= {};
1016 int ret
, prog_fd
, map_fd
;
1019 ret
= bpf_do_parse(&cfg
, opt_tbl
);
1023 ret
= bpf_do_load(&cfg
);
1027 prog_fd
= cfg
.prog_fd
;
1032 ret
= sscanf(cfg
.section
, "%*i/%i", &map_key
);
1034 fprintf(stderr
, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
1040 map_fd
= bpf_obj_get(map_path
, cfg
.type
);
1042 fprintf(stderr
, "Couldn\'t retrieve pinned map \'%s\': %s\n",
1043 map_path
, strerror(errno
));
1048 ret
= bpf_map_selfcheck_pinned(map_fd
, &test
, &ext
,
1049 offsetof(struct bpf_elf_map
, max_elem
),
1052 fprintf(stderr
, "Map \'%s\' self-check failed!\n", map_path
);
1056 ret
= bpf_map_update(map_fd
, &map_key
, &prog_fd
, BPF_ANY
);
1058 fprintf(stderr
, "Map update failed: %s\n", strerror(errno
));
1066 int bpf_prog_attach_fd(int prog_fd
, int target_fd
, enum bpf_attach_type type
)
1068 union bpf_attr attr
= {};
1070 attr
.target_fd
= target_fd
;
1071 attr
.attach_bpf_fd
= prog_fd
;
1072 attr
.attach_type
= type
;
1074 return bpf(BPF_PROG_ATTACH
, &attr
, sizeof(attr
));
1077 int bpf_prog_detach_fd(int target_fd
, enum bpf_attach_type type
)
1079 union bpf_attr attr
= {};
1081 attr
.target_fd
= target_fd
;
1082 attr
.attach_type
= type
;
1084 return bpf(BPF_PROG_DETACH
, &attr
, sizeof(attr
));
1087 int bpf_prog_load_dev(enum bpf_prog_type type
, const struct bpf_insn
*insns
,
1088 size_t size_insns
, const char *license
, __u32 ifindex
,
1089 char *log
, size_t size_log
)
1091 union bpf_attr attr
= {};
1093 attr
.prog_type
= type
;
1094 attr
.insns
= bpf_ptr_to_u64(insns
);
1095 attr
.insn_cnt
= size_insns
/ sizeof(struct bpf_insn
);
1096 attr
.license
= bpf_ptr_to_u64(license
);
1097 attr
.prog_ifindex
= ifindex
;
1100 attr
.log_buf
= bpf_ptr_to_u64(log
);
1101 attr
.log_size
= size_log
;
1105 return bpf(BPF_PROG_LOAD
, &attr
, sizeof(attr
));
1109 struct bpf_elf_prog
{
1110 enum bpf_prog_type type
;
1111 struct bpf_insn
*insns
;
1112 unsigned int insns_num
;
1114 const char *license
;
1117 struct bpf_hash_entry
{
1118 unsigned int pinning
;
1119 const char *subpath
;
1120 struct bpf_hash_entry
*next
;
1124 unsigned int jit_enabled
;
1128 const struct btf_header
*hdr
;
1130 const char *strings
;
1131 const struct btf_type
**types
;
1135 struct bpf_elf_ctx
{
1136 struct bpf_config cfg
;
1145 int map_fds
[ELF_MAX_MAPS
];
1146 struct bpf_elf_map maps
[ELF_MAX_MAPS
];
1147 struct bpf_map_ext maps_ext
[ELF_MAX_MAPS
];
1148 struct bpf_elf_prog prog_text
;
1157 char license
[ELF_MAX_LICENSE_LEN
];
1158 enum bpf_prog_type type
;
1162 struct bpf_elf_st stat
;
1163 struct bpf_hash_entry
*ht
[256];
1168 struct bpf_elf_sec_data
{
1171 const char *sec_name
;
1174 struct bpf_map_data
{
1177 struct bpf_elf_st
*st
;
1178 struct bpf_elf_map
*ent
;
1181 static bool bpf_log_has_data(struct bpf_elf_ctx
*ctx
)
1183 return ctx
->log
&& ctx
->log
[0];
1186 static __check_format_string(2, 3) void
1187 bpf_dump_error(struct bpf_elf_ctx
*ctx
, const char *format
, ...)
1191 va_start(vl
, format
);
1192 vfprintf(stderr
, format
, vl
);
1195 if (bpf_log_has_data(ctx
)) {
1197 fprintf(stderr
, "%s\n", ctx
->log
);
1199 unsigned int off
= 0, len
= strlen(ctx
->log
);
1201 if (len
> BPF_MAX_LOG
) {
1202 off
= len
- BPF_MAX_LOG
;
1203 fprintf(stderr
, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
1206 fprintf(stderr
, "%s\n", ctx
->log
+ off
);
1209 memset(ctx
->log
, 0, ctx
->log_size
);
1213 static int bpf_log_realloc(struct bpf_elf_ctx
*ctx
)
1215 const size_t log_max
= UINT_MAX
>> 8;
1216 size_t log_size
= ctx
->log_size
;
1221 } else if (log_size
< log_max
) {
1223 if (log_size
> log_max
)
1229 ptr
= realloc(ctx
->log
, log_size
);
1235 ctx
->log_size
= log_size
;
1240 static int bpf_map_create(enum bpf_map_type type
, uint32_t size_key
,
1241 uint32_t size_value
, uint32_t max_elem
,
1242 uint32_t flags
, int inner_fd
, int btf_fd
,
1243 uint32_t ifindex
, uint32_t btf_id_key
,
1244 uint32_t btf_id_val
)
1246 union bpf_attr attr
= {};
1248 attr
.map_type
= type
;
1249 attr
.key_size
= size_key
;
1250 attr
.value_size
= inner_fd
? sizeof(int) : size_value
;
1251 attr
.max_entries
= max_elem
;
1252 attr
.map_flags
= flags
;
1253 attr
.inner_map_fd
= inner_fd
;
1254 attr
.map_ifindex
= ifindex
;
1255 attr
.btf_fd
= btf_fd
;
1256 attr
.btf_key_type_id
= btf_id_key
;
1257 attr
.btf_value_type_id
= btf_id_val
;
1259 return bpf(BPF_MAP_CREATE
, &attr
, sizeof(attr
));
1262 static int bpf_btf_load(void *btf
, size_t size_btf
,
1263 char *log
, size_t size_log
)
1265 union bpf_attr attr
= {};
1267 attr
.btf
= bpf_ptr_to_u64(btf
);
1268 attr
.btf_size
= size_btf
;
1271 attr
.btf_log_buf
= bpf_ptr_to_u64(log
);
1272 attr
.btf_log_size
= size_log
;
1273 attr
.btf_log_level
= 1;
1276 return bpf(BPF_BTF_LOAD
, &attr
, sizeof(attr
));
1279 static int bpf_obj_pin(int fd
, const char *pathname
)
1281 union bpf_attr attr
= {};
1283 attr
.pathname
= bpf_ptr_to_u64(pathname
);
1286 return bpf(BPF_OBJ_PIN
, &attr
, sizeof(attr
));
1289 static int bpf_obj_hash(const char *object
, uint8_t *out
, size_t len
)
1291 struct sockaddr_alg alg
= {
1292 .salg_family
= AF_ALG
,
1293 .salg_type
= "hash",
1294 .salg_name
= "sha1",
1296 int ret
, cfd
, ofd
, ffd
;
1300 if (!object
|| len
!= 20)
1303 cfd
= socket(AF_ALG
, SOCK_SEQPACKET
, 0);
1307 ret
= bind(cfd
, (struct sockaddr
*)&alg
, sizeof(alg
));
1311 ofd
= accept(cfd
, NULL
, 0);
1317 ffd
= open(object
, O_RDONLY
);
1319 fprintf(stderr
, "Error opening object %s: %s\n",
1320 object
, strerror(errno
));
1325 ret
= fstat(ffd
, &stbuff
);
1327 fprintf(stderr
, "Error doing fstat: %s\n",
1332 size
= sendfile(ofd
, ffd
, NULL
, stbuff
.st_size
);
1333 if (size
!= stbuff
.st_size
) {
1334 fprintf(stderr
, "Error from sendfile (%zd vs %zu bytes): %s\n",
1335 size
, stbuff
.st_size
, strerror(errno
));
1340 size
= read(ofd
, out
, len
);
1342 fprintf(stderr
, "Error from read (%zd vs %zu bytes): %s\n",
1343 size
, len
, strerror(errno
));
1357 static void bpf_init_env(void)
1359 struct rlimit limit
= {
1360 .rlim_cur
= RLIM_INFINITY
,
1361 .rlim_max
= RLIM_INFINITY
,
1364 /* Don't bother in case we fail! */
1365 setrlimit(RLIMIT_MEMLOCK
, &limit
);
1367 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC
))
1368 fprintf(stderr
, "Continuing without mounted eBPF fs. Too old kernel?\n");
1371 static const char *bpf_custom_pinning(const struct bpf_elf_ctx
*ctx
,
1374 struct bpf_hash_entry
*entry
;
1376 entry
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
1377 while (entry
&& entry
->pinning
!= pinning
)
1378 entry
= entry
->next
;
1380 return entry
? entry
->subpath
: NULL
;
1383 static bool bpf_no_pinning(const struct bpf_elf_ctx
*ctx
,
1393 return !bpf_custom_pinning(ctx
, pinning
);
1397 static void bpf_make_pathname(char *pathname
, size_t len
, const char *name
,
1398 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
1402 snprintf(pathname
, len
, "%s/%s/%s",
1403 bpf_get_work_dir(ctx
->type
),
1404 ctx
->obj_uid
, name
);
1407 snprintf(pathname
, len
, "%s/%s/%s",
1408 bpf_get_work_dir(ctx
->type
),
1409 BPF_DIR_GLOBALS
, name
);
1412 snprintf(pathname
, len
, "%s/../%s/%s",
1413 bpf_get_work_dir(ctx
->type
),
1414 bpf_custom_pinning(ctx
, pinning
), name
);
1419 static int bpf_probe_pinned(const char *name
, const struct bpf_elf_ctx
*ctx
,
1422 char pathname
[PATH_MAX
];
1424 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_work_dir(ctx
->type
))
1427 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
1428 return bpf_obj_get(pathname
, ctx
->type
);
1431 static int bpf_make_obj_path(const struct bpf_elf_ctx
*ctx
)
1436 snprintf(tmp
, sizeof(tmp
), "%s/%s", bpf_get_work_dir(ctx
->type
),
1439 ret
= mkdir(tmp
, S_IRWXU
);
1440 if (ret
&& errno
!= EEXIST
) {
1441 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
, strerror(errno
));
1448 static int bpf_make_custom_path(const struct bpf_elf_ctx
*ctx
,
1451 char tmp
[PATH_MAX
], rem
[PATH_MAX
], *sub
;
1454 snprintf(tmp
, sizeof(tmp
), "%s/../", bpf_get_work_dir(ctx
->type
));
1455 snprintf(rem
, sizeof(rem
), "%s/", todo
);
1456 sub
= strtok(rem
, "/");
1459 if (strlen(tmp
) + strlen(sub
) + 2 > PATH_MAX
)
1465 ret
= mkdir(tmp
, S_IRWXU
);
1466 if (ret
&& errno
!= EEXIST
) {
1467 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
,
1472 sub
= strtok(NULL
, "/");
1478 static int bpf_place_pinned(int fd
, const char *name
,
1479 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
1481 char pathname
[PATH_MAX
];
1485 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_work_dir(ctx
->type
))
1488 if (pinning
== PIN_OBJECT_NS
)
1489 ret
= bpf_make_obj_path(ctx
);
1490 else if ((tmp
= bpf_custom_pinning(ctx
, pinning
)))
1491 ret
= bpf_make_custom_path(ctx
, tmp
);
1495 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
1496 return bpf_obj_pin(fd
, pathname
);
1499 static void bpf_prog_report(int fd
, const char *section
,
1500 const struct bpf_elf_prog
*prog
,
1501 struct bpf_elf_ctx
*ctx
)
1503 unsigned int insns
= prog
->size
/ sizeof(struct bpf_insn
);
1505 fprintf(stderr
, "\nProg section \'%s\' %s%s (%d)!\n", section
,
1506 fd
< 0 ? "rejected: " : "loaded",
1507 fd
< 0 ? strerror(errno
) : "",
1508 fd
< 0 ? errno
: fd
);
1510 fprintf(stderr
, " - Type: %u\n", prog
->type
);
1511 fprintf(stderr
, " - Instructions: %u (%u over limit)\n",
1512 insns
, insns
> BPF_MAXINSNS
? insns
- BPF_MAXINSNS
: 0);
1513 fprintf(stderr
, " - License: %s\n\n", prog
->license
);
1515 bpf_dump_error(ctx
, "Verifier analysis:\n\n");
1518 static int bpf_prog_attach(const char *section
,
1519 const struct bpf_elf_prog
*prog
,
1520 struct bpf_elf_ctx
*ctx
)
1525 fd
= bpf_prog_load_dev(prog
->type
, prog
->insns
, prog
->size
,
1526 prog
->license
, ctx
->ifindex
,
1527 ctx
->log
, ctx
->log_size
);
1528 if (fd
< 0 || ctx
->verbose
) {
1529 /* The verifier log is pretty chatty, sometimes so chatty
1530 * on larger programs, that we could fail to dump everything
1531 * into our buffer. Still, try to give a debuggable error
1532 * log for the user, so enlarge it and re-fail.
1534 if (fd
< 0 && (errno
== ENOSPC
|| !ctx
->log_size
)) {
1535 if (tries
++ < 10 && !bpf_log_realloc(ctx
))
1538 fprintf(stderr
, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
1539 ctx
->log_size
, tries
);
1543 bpf_prog_report(fd
, section
, prog
, ctx
);
1549 static void bpf_map_report(int fd
, const char *name
,
1550 const struct bpf_elf_map
*map
,
1551 struct bpf_elf_ctx
*ctx
, int inner_fd
)
1553 fprintf(stderr
, "Map object \'%s\' %s%s (%d)!\n", name
,
1554 fd
< 0 ? "rejected: " : "loaded",
1555 fd
< 0 ? strerror(errno
) : "",
1556 fd
< 0 ? errno
: fd
);
1558 fprintf(stderr
, " - Type: %u\n", map
->type
);
1559 fprintf(stderr
, " - Identifier: %u\n", map
->id
);
1560 fprintf(stderr
, " - Pinning: %u\n", map
->pinning
);
1561 fprintf(stderr
, " - Size key: %u\n", map
->size_key
);
1562 fprintf(stderr
, " - Size value: %u\n",
1563 inner_fd
? (int)sizeof(int) : map
->size_value
);
1564 fprintf(stderr
, " - Max elems: %u\n", map
->max_elem
);
1565 fprintf(stderr
, " - Flags: %#x\n\n", map
->flags
);
1568 static int bpf_find_map_id(const struct bpf_elf_ctx
*ctx
, uint32_t id
)
1572 for (i
= 0; i
< ctx
->map_num
; i
++) {
1573 if (ctx
->maps
[i
].id
!= id
)
1575 if (ctx
->map_fds
[i
] < 0)
1578 return ctx
->map_fds
[i
];
1584 static void bpf_report_map_in_map(int outer_fd
, uint32_t idx
)
1586 struct bpf_elf_map outer_map
;
1589 fprintf(stderr
, "Cannot insert map into map! ");
1591 ret
= bpf_derive_elf_map_from_fdinfo(outer_fd
, &outer_map
, NULL
);
1593 if (idx
>= outer_map
.max_elem
&&
1594 outer_map
.type
== BPF_MAP_TYPE_ARRAY_OF_MAPS
) {
1595 fprintf(stderr
, "Outer map has %u elements, index %u is invalid!\n",
1596 outer_map
.max_elem
, idx
);
1601 fprintf(stderr
, "Different map specs used for outer and inner map?\n");
1604 static bool bpf_is_map_in_map_type(const struct bpf_elf_map
*map
)
1606 return map
->type
== BPF_MAP_TYPE_ARRAY_OF_MAPS
||
1607 map
->type
== BPF_MAP_TYPE_HASH_OF_MAPS
;
1610 static bool bpf_map_offload_neutral(enum bpf_map_type type
)
1612 return type
== BPF_MAP_TYPE_PERF_EVENT_ARRAY
;
1615 static int bpf_map_attach(const char *name
, struct bpf_elf_ctx
*ctx
,
1616 const struct bpf_elf_map
*map
, struct bpf_map_ext
*ext
,
1617 int *have_map_in_map
)
1619 int fd
, ifindex
, ret
, map_inner_fd
= 0;
1620 bool retried
= false;
1623 fd
= bpf_probe_pinned(name
, ctx
, map
->pinning
);
1625 ret
= bpf_map_selfcheck_pinned(fd
, map
, ext
,
1626 offsetof(struct bpf_elf_map
,
1630 fprintf(stderr
, "Map \'%s\' self-check failed!\n",
1635 fprintf(stderr
, "Map \'%s\' loaded as pinned!\n",
1640 if (have_map_in_map
&& bpf_is_map_in_map_type(map
)) {
1641 (*have_map_in_map
)++;
1644 fprintf(stderr
, "Map \'%s\' cannot be created since no inner map ID defined!\n",
1649 if (!have_map_in_map
&& bpf_is_map_in_map_type(map
)) {
1650 map_inner_fd
= bpf_find_map_id(ctx
, map
->inner_id
);
1651 if (map_inner_fd
< 0) {
1652 fprintf(stderr
, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
1653 name
, map
->inner_id
);
1658 ifindex
= bpf_map_offload_neutral(map
->type
) ? 0 : ctx
->ifindex
;
1660 fd
= bpf_map_create(map
->type
, map
->size_key
, map
->size_value
,
1661 map
->max_elem
, map
->flags
, map_inner_fd
, ctx
->btf_fd
,
1662 ifindex
, ext
->btf_id_key
, ext
->btf_id_val
);
1664 if (fd
< 0 || ctx
->verbose
) {
1665 bpf_map_report(fd
, name
, map
, ctx
, map_inner_fd
);
1670 ret
= bpf_place_pinned(fd
, name
, ctx
, map
->pinning
);
1673 if (!retried
&& errno
== EEXIST
) {
1677 fprintf(stderr
, "Could not pin %s map: %s\n", name
,
1685 static const char *bpf_str_tab_name(const struct bpf_elf_ctx
*ctx
,
1686 const GElf_Sym
*sym
)
1688 return ctx
->str_tab
->d_buf
+ sym
->st_name
;
1691 static int bpf_btf_find(struct bpf_elf_ctx
*ctx
, const char *name
)
1693 const struct btf_type
*type
;
1697 for (id
= 1; id
< ctx
->btf
.types_num
; id
++) {
1698 type
= ctx
->btf
.types
[id
];
1699 if (type
->name_off
>= ctx
->btf
.hdr
->str_len
)
1701 res
= &ctx
->btf
.strings
[type
->name_off
];
1702 if (!strcmp(res
, name
))
1709 static int bpf_btf_find_kv(struct bpf_elf_ctx
*ctx
, const struct bpf_elf_map
*map
,
1710 const char *name
, uint32_t *id_key
, uint32_t *id_val
)
1712 const struct btf_member
*key
, *val
;
1713 const struct btf_type
*type
;
1718 snprintf(btf_name
, sizeof(btf_name
), "____btf_map_%s", name
);
1719 id
= bpf_btf_find(ctx
, btf_name
);
1723 type
= ctx
->btf
.types
[id
];
1724 if (BTF_INFO_KIND(type
->info
) != BTF_KIND_STRUCT
)
1726 if (BTF_INFO_VLEN(type
->info
) != 2)
1729 key
= ((void *) type
) + sizeof(*type
);
1731 if (!key
->type
|| key
->type
>= ctx
->btf
.types_num
||
1732 !val
->type
|| val
->type
>= ctx
->btf
.types_num
)
1735 if (key
->name_off
>= ctx
->btf
.hdr
->str_len
||
1736 val
->name_off
>= ctx
->btf
.hdr
->str_len
)
1739 res
= &ctx
->btf
.strings
[key
->name_off
];
1740 if (strcmp(res
, "key"))
1743 res
= &ctx
->btf
.strings
[val
->name_off
];
1744 if (strcmp(res
, "value"))
1747 *id_key
= key
->type
;
1748 *id_val
= val
->type
;
1752 static void bpf_btf_annotate(struct bpf_elf_ctx
*ctx
, int which
, const char *name
)
1754 uint32_t id_key
= 0, id_val
= 0;
1756 if (!bpf_btf_find_kv(ctx
, &ctx
->maps
[which
], name
, &id_key
, &id_val
)) {
1757 ctx
->maps_ext
[which
].btf_id_key
= id_key
;
1758 ctx
->maps_ext
[which
].btf_id_val
= id_val
;
1762 static const char *bpf_map_fetch_name(struct bpf_elf_ctx
*ctx
, int which
)
1768 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1771 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1774 type
= GELF_ST_TYPE(sym
.st_info
);
1775 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1776 (type
!= STT_NOTYPE
&& type
!= STT_OBJECT
) ||
1777 sym
.st_shndx
!= ctx
->sec_maps
||
1778 sym
.st_value
/ ctx
->map_len
!= which
)
1781 name
= bpf_str_tab_name(ctx
, &sym
);
1782 bpf_btf_annotate(ctx
, which
, name
);
1789 static int bpf_maps_attach_all(struct bpf_elf_ctx
*ctx
)
1791 int i
, j
, ret
, fd
, inner_fd
, inner_idx
, have_map_in_map
= 0;
1792 const char *map_name
;
1794 for (i
= 0; i
< ctx
->map_num
; i
++) {
1795 if (ctx
->maps
[i
].pinning
== PIN_OBJECT_NS
&&
1797 fprintf(stderr
, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
1801 map_name
= bpf_map_fetch_name(ctx
, i
);
1805 fd
= bpf_map_attach(map_name
, ctx
, &ctx
->maps
[i
],
1806 &ctx
->maps_ext
[i
], &have_map_in_map
);
1810 ctx
->map_fds
[i
] = !fd
? -1 : fd
;
1813 for (i
= 0; have_map_in_map
&& i
< ctx
->map_num
; i
++) {
1814 if (ctx
->map_fds
[i
] >= 0)
1817 map_name
= bpf_map_fetch_name(ctx
, i
);
1821 fd
= bpf_map_attach(map_name
, ctx
, &ctx
->maps
[i
],
1822 &ctx
->maps_ext
[i
], NULL
);
1826 ctx
->map_fds
[i
] = fd
;
1829 for (i
= 0; have_map_in_map
&& i
< ctx
->map_num
; i
++) {
1830 if (!ctx
->maps
[i
].id
||
1831 ctx
->maps
[i
].inner_id
||
1832 ctx
->maps
[i
].inner_idx
== -1)
1835 inner_fd
= ctx
->map_fds
[i
];
1836 inner_idx
= ctx
->maps
[i
].inner_idx
;
1838 for (j
= 0; j
< ctx
->map_num
; j
++) {
1839 if (!bpf_is_map_in_map_type(&ctx
->maps
[j
]))
1841 if (ctx
->maps
[j
].inner_id
!= ctx
->maps
[i
].id
)
1844 ret
= bpf_map_update(ctx
->map_fds
[j
], &inner_idx
,
1845 &inner_fd
, BPF_ANY
);
1847 bpf_report_map_in_map(ctx
->map_fds
[j
],
1857 static int bpf_map_num_sym(struct bpf_elf_ctx
*ctx
)
1862 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1865 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1868 type
= GELF_ST_TYPE(sym
.st_info
);
1869 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1870 (type
!= STT_NOTYPE
&& type
!= STT_OBJECT
) ||
1871 sym
.st_shndx
!= ctx
->sec_maps
)
1879 static int bpf_fill_section_data(struct bpf_elf_ctx
*ctx
, int section
,
1880 struct bpf_elf_sec_data
*data
)
1882 Elf_Data
*sec_edata
;
1887 memset(data
, 0, sizeof(*data
));
1889 sec_fd
= elf_getscn(ctx
->elf_fd
, section
);
1892 if (gelf_getshdr(sec_fd
, &sec_hdr
) != &sec_hdr
)
1895 sec_name
= elf_strptr(ctx
->elf_fd
, ctx
->elf_hdr
.e_shstrndx
,
1897 if (!sec_name
|| !sec_hdr
.sh_size
)
1900 sec_edata
= elf_getdata(sec_fd
, NULL
);
1901 if (!sec_edata
|| elf_getdata(sec_fd
, sec_edata
))
1904 memcpy(&data
->sec_hdr
, &sec_hdr
, sizeof(sec_hdr
));
1906 data
->sec_name
= sec_name
;
1907 data
->sec_data
= sec_edata
;
1911 struct bpf_elf_map_min
{
1918 static int bpf_fetch_maps_begin(struct bpf_elf_ctx
*ctx
, int section
,
1919 struct bpf_elf_sec_data
*data
)
1921 ctx
->map_num
= data
->sec_data
->d_size
;
1922 ctx
->sec_maps
= section
;
1923 ctx
->sec_done
[section
] = true;
1925 if (ctx
->map_num
> sizeof(ctx
->maps
)) {
1926 fprintf(stderr
, "Too many BPF maps in ELF section!\n");
1930 memcpy(ctx
->maps
, data
->sec_data
->d_buf
, ctx
->map_num
);
1934 static int bpf_map_verify_all_offs(struct bpf_elf_ctx
*ctx
, int end
)
1939 for (off
= 0; off
< end
; off
+= ctx
->map_len
) {
1940 /* Order doesn't need to be linear here, hence we walk
1943 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1946 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1949 type
= GELF_ST_TYPE(sym
.st_info
);
1950 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1951 (type
!= STT_NOTYPE
&& type
!= STT_OBJECT
) ||
1952 sym
.st_shndx
!= ctx
->sec_maps
)
1954 if (sym
.st_value
== off
)
1956 if (i
== ctx
->sym_num
- 1)
1961 return off
== end
? 0 : -1;
1964 static int bpf_fetch_maps_end(struct bpf_elf_ctx
*ctx
)
1966 struct bpf_elf_map fixup
[ARRAY_SIZE(ctx
->maps
)] = {};
1967 int i
, sym_num
= bpf_map_num_sym(ctx
);
1970 if (sym_num
== 0 || sym_num
> ARRAY_SIZE(ctx
->maps
)) {
1971 fprintf(stderr
, "%u maps not supported in current map section!\n",
1976 if (ctx
->map_num
% sym_num
!= 0 ||
1977 ctx
->map_num
% sizeof(__u32
) != 0) {
1978 fprintf(stderr
, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1982 ctx
->map_len
= ctx
->map_num
/ sym_num
;
1983 if (bpf_map_verify_all_offs(ctx
, ctx
->map_num
)) {
1984 fprintf(stderr
, "Different struct bpf_elf_map in use!\n");
1988 if (ctx
->map_len
== sizeof(struct bpf_elf_map
)) {
1989 ctx
->map_num
= sym_num
;
1991 } else if (ctx
->map_len
> sizeof(struct bpf_elf_map
)) {
1992 fprintf(stderr
, "struct bpf_elf_map not supported, coming from future version?\n");
1994 } else if (ctx
->map_len
< sizeof(struct bpf_elf_map_min
)) {
1995 fprintf(stderr
, "struct bpf_elf_map too small, not supported!\n");
1999 ctx
->map_num
= sym_num
;
2000 for (i
= 0, buff
= (void *)ctx
->maps
; i
< ctx
->map_num
;
2001 i
++, buff
+= ctx
->map_len
) {
2002 /* The fixup leaves the rest of the members as zero, which
2003 * is fine currently, but option exist to set some other
2004 * default value as well when needed in future.
2006 memcpy(&fixup
[i
], buff
, ctx
->map_len
);
2009 memcpy(ctx
->maps
, fixup
, sizeof(fixup
));
2011 printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
2012 sizeof(struct bpf_elf_map
) - ctx
->map_len
);
2016 static int bpf_fetch_license(struct bpf_elf_ctx
*ctx
, int section
,
2017 struct bpf_elf_sec_data
*data
)
2019 if (data
->sec_data
->d_size
> sizeof(ctx
->license
))
2022 memcpy(ctx
->license
, data
->sec_data
->d_buf
, data
->sec_data
->d_size
);
2023 ctx
->sec_done
[section
] = true;
2027 static int bpf_fetch_symtab(struct bpf_elf_ctx
*ctx
, int section
,
2028 struct bpf_elf_sec_data
*data
)
2030 ctx
->sym_tab
= data
->sec_data
;
2031 ctx
->sym_num
= data
->sec_hdr
.sh_size
/ data
->sec_hdr
.sh_entsize
;
2032 ctx
->sec_done
[section
] = true;
2036 static int bpf_fetch_strtab(struct bpf_elf_ctx
*ctx
, int section
,
2037 struct bpf_elf_sec_data
*data
)
2039 ctx
->str_tab
= data
->sec_data
;
2040 ctx
->sec_done
[section
] = true;
2044 static int bpf_fetch_text(struct bpf_elf_ctx
*ctx
, int section
,
2045 struct bpf_elf_sec_data
*data
)
2047 ctx
->sec_text
= section
;
2048 ctx
->sec_done
[section
] = true;
2052 static void bpf_btf_report(int fd
, struct bpf_elf_ctx
*ctx
)
2054 fprintf(stderr
, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n",
2055 fd
< 0 ? "rejected: " : "loaded",
2056 fd
< 0 ? strerror(errno
) : "",
2057 fd
< 0 ? errno
: fd
);
2059 fprintf(stderr
, " - Length: %zu\n", ctx
->btf_data
->d_size
);
2061 bpf_dump_error(ctx
, "Verifier analysis:\n\n");
2064 static int bpf_btf_attach(struct bpf_elf_ctx
*ctx
)
2069 fd
= bpf_btf_load(ctx
->btf_data
->d_buf
, ctx
->btf_data
->d_size
,
2070 ctx
->log
, ctx
->log_size
);
2071 if (fd
< 0 || ctx
->verbose
) {
2072 if (fd
< 0 && (errno
== ENOSPC
|| !ctx
->log_size
)) {
2073 if (tries
++ < 10 && !bpf_log_realloc(ctx
))
2076 fprintf(stderr
, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
2077 ctx
->log_size
, tries
);
2081 if (bpf_log_has_data(ctx
))
2082 bpf_btf_report(fd
, ctx
);
2088 static int bpf_fetch_btf_begin(struct bpf_elf_ctx
*ctx
, int section
,
2089 struct bpf_elf_sec_data
*data
)
2091 ctx
->btf_data
= data
->sec_data
;
2092 ctx
->sec_btf
= section
;
2093 ctx
->sec_done
[section
] = true;
2097 static int bpf_btf_check_header(struct bpf_elf_ctx
*ctx
)
2099 const struct btf_header
*hdr
= ctx
->btf_data
->d_buf
;
2100 const char *str_start
, *str_end
;
2101 unsigned int data_len
;
2103 if (hdr
->magic
!= BTF_MAGIC
) {
2104 fprintf(stderr
, "Object has wrong BTF magic: %x, expected: %x!\n",
2105 hdr
->magic
, BTF_MAGIC
);
2109 if (hdr
->version
!= BTF_VERSION
) {
2110 fprintf(stderr
, "Object has wrong BTF version: %u, expected: %u!\n",
2111 hdr
->version
, BTF_VERSION
);
2116 fprintf(stderr
, "Object has unsupported BTF flags %x!\n",
2121 data_len
= ctx
->btf_data
->d_size
- sizeof(*hdr
);
2122 if (data_len
< hdr
->type_off
||
2123 data_len
< hdr
->str_off
||
2124 data_len
< hdr
->type_len
+ hdr
->str_len
||
2125 hdr
->type_off
>= hdr
->str_off
||
2126 hdr
->type_off
+ hdr
->type_len
!= hdr
->str_off
||
2127 hdr
->str_off
+ hdr
->str_len
!= data_len
||
2128 (hdr
->type_off
& (sizeof(uint32_t) - 1))) {
2129 fprintf(stderr
, "Object has malformed BTF data!\n");
2134 ctx
->btf
.raw
= hdr
+ 1;
2136 str_start
= ctx
->btf
.raw
+ hdr
->str_off
;
2137 str_end
= str_start
+ hdr
->str_len
;
2138 if (!hdr
->str_len
||
2139 hdr
->str_len
- 1 > BTF_MAX_NAME_OFFSET
||
2140 str_start
[0] || str_end
[-1]) {
2141 fprintf(stderr
, "Object has malformed BTF string data!\n");
2145 ctx
->btf
.strings
= str_start
;
2149 static int bpf_btf_register_type(struct bpf_elf_ctx
*ctx
,
2150 const struct btf_type
*type
)
2152 int cur
= ctx
->btf
.types_num
, num
= cur
+ 1;
2153 const struct btf_type
**types
;
2155 types
= realloc(ctx
->btf
.types
, num
* sizeof(type
));
2157 free(ctx
->btf
.types
);
2158 ctx
->btf
.types
= NULL
;
2159 ctx
->btf
.types_num
= 0;
2163 ctx
->btf
.types
= types
;
2164 ctx
->btf
.types
[cur
] = type
;
2165 ctx
->btf
.types_num
= num
;
2169 static struct btf_type btf_type_void
;
2171 static int bpf_btf_prep_type_data(struct bpf_elf_ctx
*ctx
)
2173 const void *type_cur
= ctx
->btf
.raw
+ ctx
->btf
.hdr
->type_off
;
2174 const void *type_end
= ctx
->btf
.raw
+ ctx
->btf
.hdr
->str_off
;
2175 const struct btf_type
*type
;
2179 ret
= bpf_btf_register_type(ctx
, &btf_type_void
);
2183 while (type_cur
< type_end
) {
2185 type_cur
+= sizeof(*type
);
2187 var_len
= BTF_INFO_VLEN(type
->info
);
2188 kind
= BTF_INFO_KIND(type
->info
);
2192 type_cur
+= sizeof(int);
2194 case BTF_KIND_ARRAY
:
2195 type_cur
+= sizeof(struct btf_array
);
2197 case BTF_KIND_STRUCT
:
2198 case BTF_KIND_UNION
:
2199 type_cur
+= var_len
* sizeof(struct btf_member
);
2202 type_cur
+= var_len
* sizeof(struct btf_enum
);
2204 case BTF_KIND_FUNC_PROTO
:
2205 type_cur
+= var_len
* sizeof(struct btf_param
);
2207 case BTF_KIND_TYPEDEF
:
2210 case BTF_KIND_VOLATILE
:
2211 case BTF_KIND_CONST
:
2212 case BTF_KIND_RESTRICT
:
2216 fprintf(stderr
, "Object has unknown BTF type: %u!\n", kind
);
2220 ret
= bpf_btf_register_type(ctx
, type
);
2228 static int bpf_btf_prep_data(struct bpf_elf_ctx
*ctx
)
2230 int ret
= bpf_btf_check_header(ctx
);
2233 return bpf_btf_prep_type_data(ctx
);
2237 static void bpf_fetch_btf_end(struct bpf_elf_ctx
*ctx
)
2239 int fd
= bpf_btf_attach(ctx
);
2244 if (bpf_btf_prep_data(ctx
) < 0) {
2250 static bool bpf_has_map_data(const struct bpf_elf_ctx
*ctx
)
2252 return ctx
->sym_tab
&& ctx
->str_tab
&& ctx
->sec_maps
;
2255 static bool bpf_has_btf_data(const struct bpf_elf_ctx
*ctx
)
2257 return ctx
->sec_btf
;
2260 static bool bpf_has_call_data(const struct bpf_elf_ctx
*ctx
)
2262 return ctx
->sec_text
;
2265 static int bpf_fetch_ancillary(struct bpf_elf_ctx
*ctx
, bool check_text_sec
)
2267 struct bpf_elf_sec_data data
;
2270 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
2271 ret
= bpf_fill_section_data(ctx
, i
, &data
);
2275 if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2276 !strcmp(data
.sec_name
, ELF_SECTION_MAPS
))
2277 ret
= bpf_fetch_maps_begin(ctx
, i
, &data
);
2278 else if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2279 !strcmp(data
.sec_name
, ELF_SECTION_LICENSE
))
2280 ret
= bpf_fetch_license(ctx
, i
, &data
);
2281 else if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2282 (data
.sec_hdr
.sh_flags
& SHF_EXECINSTR
) &&
2283 !strcmp(data
.sec_name
, ".text") &&
2285 ret
= bpf_fetch_text(ctx
, i
, &data
);
2286 else if (data
.sec_hdr
.sh_type
== SHT_SYMTAB
&&
2287 !strcmp(data
.sec_name
, ".symtab"))
2288 ret
= bpf_fetch_symtab(ctx
, i
, &data
);
2289 else if (data
.sec_hdr
.sh_type
== SHT_STRTAB
&&
2290 !strcmp(data
.sec_name
, ".strtab"))
2291 ret
= bpf_fetch_strtab(ctx
, i
, &data
);
2292 else if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2293 !strcmp(data
.sec_name
, ".BTF"))
2294 ret
= bpf_fetch_btf_begin(ctx
, i
, &data
);
2296 fprintf(stderr
, "Error parsing section %d! Perhaps check with readelf -a?\n",
2302 if (bpf_has_btf_data(ctx
))
2303 bpf_fetch_btf_end(ctx
);
2304 if (bpf_has_map_data(ctx
)) {
2305 ret
= bpf_fetch_maps_end(ctx
);
2307 fprintf(stderr
, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
2311 ret
= bpf_maps_attach_all(ctx
);
2313 fprintf(stderr
, "Error loading maps into kernel!\n");
2321 static int bpf_fetch_prog(struct bpf_elf_ctx
*ctx
, const char *section
,
2324 struct bpf_elf_sec_data data
;
2325 struct bpf_elf_prog prog
;
2326 int ret
, i
, fd
= -1;
2328 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
2329 if (ctx
->sec_done
[i
])
2332 ret
= bpf_fill_section_data(ctx
, i
, &data
);
2334 !(data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2335 (data
.sec_hdr
.sh_flags
& SHF_EXECINSTR
) &&
2336 !strcmp(data
.sec_name
, section
)))
2341 memset(&prog
, 0, sizeof(prog
));
2342 prog
.type
= ctx
->type
;
2343 prog
.license
= ctx
->license
;
2344 prog
.size
= data
.sec_data
->d_size
;
2345 prog
.insns_num
= prog
.size
/ sizeof(struct bpf_insn
);
2346 prog
.insns
= data
.sec_data
->d_buf
;
2348 fd
= bpf_prog_attach(section
, &prog
, ctx
);
2352 ctx
->sec_done
[i
] = true;
2359 struct bpf_relo_props
{
2360 struct bpf_tail_call
{
2367 static int bpf_apply_relo_map(struct bpf_elf_ctx
*ctx
, struct bpf_elf_prog
*prog
,
2368 GElf_Rel
*relo
, GElf_Sym
*sym
,
2369 struct bpf_relo_props
*props
)
2371 unsigned int insn_off
= relo
->r_offset
/ sizeof(struct bpf_insn
);
2372 unsigned int map_idx
= sym
->st_value
/ ctx
->map_len
;
2374 if (insn_off
>= prog
->insns_num
)
2376 if (prog
->insns
[insn_off
].code
!= (BPF_LD
| BPF_IMM
| BPF_DW
)) {
2377 fprintf(stderr
, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
2382 if (map_idx
>= ARRAY_SIZE(ctx
->map_fds
))
2384 if (!ctx
->map_fds
[map_idx
])
2386 if (ctx
->maps
[map_idx
].type
== BPF_MAP_TYPE_PROG_ARRAY
) {
2388 if (ctx
->maps_ext
[map_idx
].owner
.jited
||
2389 (ctx
->maps_ext
[map_idx
].owner
.type
== 0 &&
2390 ctx
->cfg
.jit_enabled
))
2394 prog
->insns
[insn_off
].src_reg
= BPF_PSEUDO_MAP_FD
;
2395 prog
->insns
[insn_off
].imm
= ctx
->map_fds
[map_idx
];
2399 static int bpf_apply_relo_call(struct bpf_elf_ctx
*ctx
, struct bpf_elf_prog
*prog
,
2400 GElf_Rel
*relo
, GElf_Sym
*sym
,
2401 struct bpf_relo_props
*props
)
2403 unsigned int insn_off
= relo
->r_offset
/ sizeof(struct bpf_insn
);
2404 struct bpf_elf_prog
*prog_text
= &ctx
->prog_text
;
2406 if (insn_off
>= prog
->insns_num
)
2408 if (prog
->insns
[insn_off
].code
!= (BPF_JMP
| BPF_CALL
) &&
2409 prog
->insns
[insn_off
].src_reg
!= BPF_PSEUDO_CALL
) {
2410 fprintf(stderr
, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
2415 if (!props
->main_num
) {
2416 struct bpf_insn
*insns
= realloc(prog
->insns
,
2417 prog
->size
+ prog_text
->size
);
2421 memcpy(insns
+ prog
->insns_num
, prog_text
->insns
,
2423 props
->main_num
= prog
->insns_num
;
2424 prog
->insns
= insns
;
2425 prog
->insns_num
+= prog_text
->insns_num
;
2426 prog
->size
+= prog_text
->size
;
2429 prog
->insns
[insn_off
].imm
+= props
->main_num
- insn_off
;
2433 static int bpf_apply_relo_data(struct bpf_elf_ctx
*ctx
,
2434 struct bpf_elf_sec_data
*data_relo
,
2435 struct bpf_elf_prog
*prog
,
2436 struct bpf_relo_props
*props
)
2438 GElf_Shdr
*rhdr
= &data_relo
->sec_hdr
;
2439 int relo_ent
, relo_num
= rhdr
->sh_size
/ rhdr
->sh_entsize
;
2441 for (relo_ent
= 0; relo_ent
< relo_num
; relo_ent
++) {
2446 if (gelf_getrel(data_relo
->sec_data
, relo_ent
, &relo
) != &relo
)
2448 if (gelf_getsym(ctx
->sym_tab
, GELF_R_SYM(relo
.r_info
), &sym
) != &sym
)
2451 if (sym
.st_shndx
== ctx
->sec_maps
)
2452 ret
= bpf_apply_relo_map(ctx
, prog
, &relo
, &sym
, props
);
2453 else if (sym
.st_shndx
== ctx
->sec_text
)
2454 ret
= bpf_apply_relo_call(ctx
, prog
, &relo
, &sym
, props
);
2456 fprintf(stderr
, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2457 relo_ent
, sym
.st_shndx
);
2465 static int bpf_fetch_prog_relo(struct bpf_elf_ctx
*ctx
, const char *section
,
2466 bool *lderr
, bool *sseen
, struct bpf_elf_prog
*prog
)
2468 struct bpf_elf_sec_data data_relo
, data_insn
;
2469 int ret
, idx
, i
, fd
= -1;
2471 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
2472 struct bpf_relo_props props
= {};
2474 ret
= bpf_fill_section_data(ctx
, i
, &data_relo
);
2475 if (ret
< 0 || data_relo
.sec_hdr
.sh_type
!= SHT_REL
)
2478 idx
= data_relo
.sec_hdr
.sh_info
;
2480 ret
= bpf_fill_section_data(ctx
, idx
, &data_insn
);
2482 !(data_insn
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
2483 (data_insn
.sec_hdr
.sh_flags
& SHF_EXECINSTR
) &&
2484 !strcmp(data_insn
.sec_name
, section
)))
2489 memset(prog
, 0, sizeof(*prog
));
2490 prog
->type
= ctx
->type
;
2491 prog
->license
= ctx
->license
;
2492 prog
->size
= data_insn
.sec_data
->d_size
;
2493 prog
->insns_num
= prog
->size
/ sizeof(struct bpf_insn
);
2494 prog
->insns
= malloc(prog
->size
);
2500 memcpy(prog
->insns
, data_insn
.sec_data
->d_buf
, prog
->size
);
2502 ret
= bpf_apply_relo_data(ctx
, &data_relo
, prog
, &props
);
2505 if (ctx
->sec_text
!= idx
)
2509 if (ctx
->sec_text
== idx
) {
2514 fd
= bpf_prog_attach(section
, prog
, ctx
);
2518 if (props
.tc
.total
) {
2519 if (ctx
->cfg
.jit_enabled
&&
2520 props
.tc
.total
!= props
.tc
.jited
)
2521 fprintf(stderr
, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
2522 props
.tc
.jited
, props
.tc
.total
);
2523 if (!ctx
->cfg
.jit_enabled
&&
2525 fprintf(stderr
, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
2526 props
.tc
.jited
, props
.tc
.total
);
2531 ctx
->sec_done
[i
] = true;
2532 ctx
->sec_done
[idx
] = true;
2539 static int bpf_fetch_prog_sec(struct bpf_elf_ctx
*ctx
, const char *section
)
2541 bool lderr
= false, sseen
= false;
2542 struct bpf_elf_prog prog
;
2545 if (bpf_has_call_data(ctx
)) {
2546 ret
= bpf_fetch_prog_relo(ctx
, ".text", &lderr
, NULL
,
2552 if (bpf_has_map_data(ctx
) || bpf_has_call_data(ctx
))
2553 ret
= bpf_fetch_prog_relo(ctx
, section
, &lderr
, &sseen
, &prog
);
2554 if (ret
< 0 && !lderr
)
2555 ret
= bpf_fetch_prog(ctx
, section
, &sseen
);
2556 if (ret
< 0 && !sseen
)
2557 fprintf(stderr
, "Program section \'%s\' not found in ELF file!\n",
2562 static int bpf_find_map_by_id(struct bpf_elf_ctx
*ctx
, uint32_t id
)
2566 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++)
2567 if (ctx
->map_fds
[i
] && ctx
->maps
[i
].id
== id
&&
2568 ctx
->maps
[i
].type
== BPF_MAP_TYPE_PROG_ARRAY
)
2573 struct bpf_jited_aux
{
2576 struct bpf_prog_data prog
;
2577 struct bpf_map_ext map
;
2580 static int bpf_derive_prog_from_fdinfo(int fd
, struct bpf_prog_data
*prog
)
2582 char file
[PATH_MAX
], buff
[4096];
2586 snprintf(file
, sizeof(file
), "/proc/%d/fdinfo/%d", getpid(), fd
);
2587 memset(prog
, 0, sizeof(*prog
));
2589 fp
= fopen(file
, "r");
2591 fprintf(stderr
, "No procfs support?!\n");
2595 while (fgets(buff
, sizeof(buff
), fp
)) {
2596 if (sscanf(buff
, "prog_type:\t%u", &val
) == 1)
2598 else if (sscanf(buff
, "prog_jited:\t%u", &val
) == 1)
2606 static int bpf_tail_call_get_aux(struct bpf_jited_aux
*aux
)
2608 struct bpf_elf_map tmp
;
2611 ret
= bpf_derive_elf_map_from_fdinfo(aux
->map_fd
, &tmp
, &aux
->map
);
2613 ret
= bpf_derive_prog_from_fdinfo(aux
->prog_fd
, &aux
->prog
);
2618 static int bpf_fill_prog_arrays(struct bpf_elf_ctx
*ctx
)
2620 struct bpf_elf_sec_data data
;
2621 uint32_t map_id
, key_id
;
2622 int fd
, i
, ret
, idx
;
2624 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
2625 if (ctx
->sec_done
[i
])
2628 ret
= bpf_fill_section_data(ctx
, i
, &data
);
2632 ret
= sscanf(data
.sec_name
, "%i/%i", &map_id
, &key_id
);
2636 idx
= bpf_find_map_by_id(ctx
, map_id
);
2640 fd
= bpf_fetch_prog_sec(ctx
, data
.sec_name
);
2644 ret
= bpf_map_update(ctx
->map_fds
[idx
], &key_id
,
2647 struct bpf_jited_aux aux
= {};
2650 if (errno
== E2BIG
) {
2651 fprintf(stderr
, "Tail call key %u for map %u out of bounds?\n",
2656 aux
.map_fd
= ctx
->map_fds
[idx
];
2659 if (bpf_tail_call_get_aux(&aux
))
2661 if (!aux
.map
.owner
.type
)
2664 if (aux
.prog
.type
!= aux
.map
.owner
.type
)
2665 fprintf(stderr
, "Tail call map owned by prog type %u, but prog type is %u!\n",
2666 aux
.map
.owner
.type
, aux
.prog
.type
);
2667 if (aux
.prog
.jited
!= aux
.map
.owner
.jited
)
2668 fprintf(stderr
, "Tail call map %s jited, but prog %s!\n",
2669 aux
.map
.owner
.jited
? "is" : "not",
2670 aux
.prog
.jited
? "is" : "not");
2674 ctx
->sec_done
[i
] = true;
2680 static void bpf_save_finfo(struct bpf_elf_ctx
*ctx
)
2685 memset(&ctx
->stat
, 0, sizeof(ctx
->stat
));
2687 ret
= fstat(ctx
->obj_fd
, &st
);
2689 fprintf(stderr
, "Stat of elf file failed: %s\n",
2694 ctx
->stat
.st_dev
= st
.st_dev
;
2695 ctx
->stat
.st_ino
= st
.st_ino
;
2698 static int bpf_read_pin_mapping(FILE *fp
, uint32_t *id
, char *path
)
2700 char buff
[PATH_MAX
];
2702 while (fgets(buff
, sizeof(buff
), fp
)) {
2705 while (*ptr
== ' ' || *ptr
== '\t')
2708 if (*ptr
== '#' || *ptr
== '\n' || *ptr
== 0)
2711 if (sscanf(ptr
, "%i %s\n", id
, path
) != 2 &&
2712 sscanf(ptr
, "%i %s #", id
, path
) != 2) {
2723 static bool bpf_pinning_reserved(uint32_t pinning
)
2735 static void bpf_hash_init(struct bpf_elf_ctx
*ctx
, const char *db_file
)
2737 struct bpf_hash_entry
*entry
;
2738 char subpath
[PATH_MAX
] = {};
2743 fp
= fopen(db_file
, "r");
2747 while ((ret
= bpf_read_pin_mapping(fp
, &pinning
, subpath
))) {
2749 fprintf(stderr
, "Database %s is corrupted at: %s\n",
2755 if (bpf_pinning_reserved(pinning
)) {
2756 fprintf(stderr
, "Database %s, id %u is reserved - ignoring!\n",
2761 entry
= malloc(sizeof(*entry
));
2763 fprintf(stderr
, "No memory left for db entry!\n");
2767 entry
->pinning
= pinning
;
2768 entry
->subpath
= strdup(subpath
);
2769 if (!entry
->subpath
) {
2770 fprintf(stderr
, "No memory left for db entry!\n");
2775 entry
->next
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
2776 ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)] = entry
;
2782 static void bpf_hash_destroy(struct bpf_elf_ctx
*ctx
)
2784 struct bpf_hash_entry
*entry
;
2787 for (i
= 0; i
< ARRAY_SIZE(ctx
->ht
); i
++) {
2788 while ((entry
= ctx
->ht
[i
]) != NULL
) {
2789 ctx
->ht
[i
] = entry
->next
;
2790 free((char *)entry
->subpath
);
2796 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx
*ctx
)
2798 if (ctx
->elf_hdr
.e_type
!= ET_REL
||
2799 (ctx
->elf_hdr
.e_machine
!= EM_NONE
&&
2800 ctx
->elf_hdr
.e_machine
!= EM_BPF
) ||
2801 ctx
->elf_hdr
.e_version
!= EV_CURRENT
) {
2802 fprintf(stderr
, "ELF format error, ELF file not for eBPF?\n");
2806 switch (ctx
->elf_hdr
.e_ident
[EI_DATA
]) {
2808 fprintf(stderr
, "ELF format error, wrong endianness info?\n");
2811 if (htons(1) == 1) {
2813 "We are big endian, eBPF object is little endian!\n");
2818 if (htons(1) != 1) {
2820 "We are little endian, eBPF object is big endian!\n");
2829 static void bpf_get_cfg(struct bpf_elf_ctx
*ctx
)
2831 static const char *path_jit
= "/proc/sys/net/core/bpf_jit_enable";
2834 fd
= open(path_jit
, O_RDONLY
);
2838 if (read(fd
, tmp
, sizeof(tmp
)) > 0)
2839 ctx
->cfg
.jit_enabled
= atoi(tmp
);
2844 static int bpf_elf_ctx_init(struct bpf_elf_ctx
*ctx
, const char *pathname
,
2845 enum bpf_prog_type type
, __u32 ifindex
,
2851 if (elf_version(EV_CURRENT
) == EV_NONE
)
2856 memset(ctx
, 0, sizeof(*ctx
));
2859 ret
= bpf_obj_hash(pathname
, tmp
, sizeof(tmp
));
2861 ctx
->noafalg
= true;
2863 hexstring_n2a(tmp
, sizeof(tmp
), ctx
->obj_uid
,
2864 sizeof(ctx
->obj_uid
));
2866 ctx
->verbose
= verbose
;
2868 ctx
->ifindex
= ifindex
;
2870 ctx
->obj_fd
= open(pathname
, O_RDONLY
);
2871 if (ctx
->obj_fd
< 0)
2874 ctx
->elf_fd
= elf_begin(ctx
->obj_fd
, ELF_C_READ
, NULL
);
2880 if (elf_kind(ctx
->elf_fd
) != ELF_K_ELF
) {
2885 if (gelf_getehdr(ctx
->elf_fd
, &ctx
->elf_hdr
) !=
2891 ret
= bpf_elf_check_ehdr(ctx
);
2895 ctx
->sec_done
= calloc(ctx
->elf_hdr
.e_shnum
,
2896 sizeof(*(ctx
->sec_done
)));
2897 if (!ctx
->sec_done
) {
2902 if (ctx
->verbose
&& bpf_log_realloc(ctx
)) {
2907 bpf_save_finfo(ctx
);
2908 bpf_hash_init(ctx
, CONFDIR
"/bpf_pinning");
2912 free(ctx
->sec_done
);
2914 elf_end(ctx
->elf_fd
);
2920 static int bpf_maps_count(struct bpf_elf_ctx
*ctx
)
2924 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
2925 if (!ctx
->map_fds
[i
])
2933 static void bpf_maps_teardown(struct bpf_elf_ctx
*ctx
)
2937 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
2938 if (ctx
->map_fds
[i
])
2939 close(ctx
->map_fds
[i
]);
2944 free(ctx
->btf
.types
);
2947 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx
*ctx
, bool failure
)
2950 bpf_maps_teardown(ctx
);
2952 bpf_hash_destroy(ctx
);
2954 free(ctx
->prog_text
.insns
);
2955 free(ctx
->sec_done
);
2958 elf_end(ctx
->elf_fd
);
2962 static struct bpf_elf_ctx __ctx
;
2964 static int bpf_obj_open(const char *pathname
, enum bpf_prog_type type
,
2965 const char *section
, __u32 ifindex
, bool verbose
)
2967 struct bpf_elf_ctx
*ctx
= &__ctx
;
2970 ret
= bpf_elf_ctx_init(ctx
, pathname
, type
, ifindex
, verbose
);
2972 fprintf(stderr
, "Cannot initialize ELF context!\n");
2976 ret
= bpf_fetch_ancillary(ctx
, strcmp(section
, ".text"));
2978 fprintf(stderr
, "Error fetching ELF ancillary data!\n");
2982 fd
= bpf_fetch_prog_sec(ctx
, section
);
2984 fprintf(stderr
, "Error fetching program/map!\n");
2989 ret
= bpf_fill_prog_arrays(ctx
);
2991 fprintf(stderr
, "Error filling program arrays!\n");
2993 bpf_elf_ctx_destroy(ctx
, ret
< 0);
3004 bpf_map_set_send(int fd
, struct sockaddr_un
*addr
, unsigned int addr_len
,
3005 const struct bpf_map_data
*aux
, unsigned int entries
)
3007 struct bpf_map_set_msg msg
= {
3008 .aux
.uds_ver
= BPF_SCM_AUX_VER
,
3009 .aux
.num_ent
= entries
,
3011 int *cmsg_buf
, min_fd
;
3015 strlcpy(msg
.aux
.obj_name
, aux
->obj
, sizeof(msg
.aux
.obj_name
));
3016 memcpy(&msg
.aux
.obj_st
, aux
->st
, sizeof(msg
.aux
.obj_st
));
3018 cmsg_buf
= bpf_map_set_init(&msg
, addr
, addr_len
);
3019 amsg_buf
= (char *)msg
.aux
.ent
;
3021 for (i
= 0; i
< entries
; i
+= min_fd
) {
3024 min_fd
= min(BPF_SCM_MAX_FDS
* 1U, entries
- i
);
3025 bpf_map_set_init_single(&msg
, min_fd
);
3027 memcpy(cmsg_buf
, &aux
->fds
[i
], sizeof(aux
->fds
[0]) * min_fd
);
3028 memcpy(amsg_buf
, &aux
->ent
[i
], sizeof(aux
->ent
[0]) * min_fd
);
3030 ret
= sendmsg(fd
, &msg
.hdr
, 0);
3039 bpf_map_set_recv(int fd
, int *fds
, struct bpf_map_aux
*aux
,
3040 unsigned int entries
)
3042 struct bpf_map_set_msg msg
;
3043 int *cmsg_buf
, min_fd
;
3044 char *amsg_buf
, *mmsg_buf
;
3045 unsigned int needed
= 1;
3048 cmsg_buf
= bpf_map_set_init(&msg
, NULL
, 0);
3049 amsg_buf
= (char *)msg
.aux
.ent
;
3050 mmsg_buf
= (char *)&msg
.aux
;
3052 for (i
= 0; i
< min(entries
, needed
); i
+= min_fd
) {
3053 struct cmsghdr
*cmsg
;
3056 min_fd
= min(entries
, entries
- i
);
3057 bpf_map_set_init_single(&msg
, min_fd
);
3059 ret
= recvmsg(fd
, &msg
.hdr
, 0);
3063 cmsg
= CMSG_FIRSTHDR(&msg
.hdr
);
3064 if (!cmsg
|| cmsg
->cmsg_type
!= SCM_RIGHTS
)
3066 if (msg
.hdr
.msg_flags
& MSG_CTRUNC
)
3068 if (msg
.aux
.uds_ver
!= BPF_SCM_AUX_VER
)
3071 min_fd
= (cmsg
->cmsg_len
- sizeof(*cmsg
)) / sizeof(fd
);
3072 if (min_fd
> entries
|| min_fd
<= 0)
3075 memcpy(&fds
[i
], cmsg_buf
, sizeof(fds
[0]) * min_fd
);
3076 memcpy(&aux
->ent
[i
], amsg_buf
, sizeof(aux
->ent
[0]) * min_fd
);
3077 memcpy(aux
, mmsg_buf
, offsetof(struct bpf_map_aux
, ent
));
3079 needed
= aux
->num_ent
;
3085 int bpf_send_map_fds(const char *path
, const char *obj
)
3087 struct bpf_elf_ctx
*ctx
= &__ctx
;
3088 struct sockaddr_un addr
= { .sun_family
= AF_UNIX
};
3089 struct bpf_map_data bpf_aux
= {
3090 .fds
= ctx
->map_fds
,
3097 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
3099 fprintf(stderr
, "Cannot open socket: %s\n",
3104 strlcpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
3106 ret
= connect(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
3108 fprintf(stderr
, "Cannot connect to %s: %s\n",
3109 path
, strerror(errno
));
3113 ret
= bpf_map_set_send(fd
, &addr
, sizeof(addr
), &bpf_aux
,
3114 bpf_maps_count(ctx
));
3116 fprintf(stderr
, "Cannot send fds to %s: %s\n",
3117 path
, strerror(errno
));
3119 bpf_maps_teardown(ctx
);
3124 int bpf_recv_map_fds(const char *path
, int *fds
, struct bpf_map_aux
*aux
,
3125 unsigned int entries
)
3127 struct sockaddr_un addr
= { .sun_family
= AF_UNIX
};
3130 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
3132 fprintf(stderr
, "Cannot open socket: %s\n",
3137 strlcpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
3139 ret
= bind(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
3141 fprintf(stderr
, "Cannot bind to socket: %s\n",
3146 ret
= bpf_map_set_recv(fd
, fds
, aux
, entries
);
3148 fprintf(stderr
, "Cannot recv fds from %s: %s\n",
3149 path
, strerror(errno
));
3151 unlink(addr
.sun_path
);
3157 /* The following functions are wrapper functions for libbpf code to be
3158 * compatible with the legacy format. So all the functions have prefix
3161 int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in
*cfg
)
3163 struct bpf_elf_ctx
*ctx
= &__ctx
;
3165 return bpf_elf_ctx_init(ctx
, cfg
->object
, cfg
->type
, cfg
->ifindex
, cfg
->verbose
);
3168 int iproute2_bpf_fetch_ancillary(void)
3170 struct bpf_elf_ctx
*ctx
= &__ctx
;
3171 struct bpf_elf_sec_data data
;
3174 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
3175 ret
= bpf_fill_section_data(ctx
, i
, &data
);
3179 if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
3180 !strcmp(data
.sec_name
, ELF_SECTION_MAPS
))
3181 ret
= bpf_fetch_maps_begin(ctx
, i
, &data
);
3182 else if (data
.sec_hdr
.sh_type
== SHT_SYMTAB
&&
3183 !strcmp(data
.sec_name
, ".symtab"))
3184 ret
= bpf_fetch_symtab(ctx
, i
, &data
);
3185 else if (data
.sec_hdr
.sh_type
== SHT_STRTAB
&&
3186 !strcmp(data
.sec_name
, ".strtab"))
3187 ret
= bpf_fetch_strtab(ctx
, i
, &data
);
3189 fprintf(stderr
, "Error parsing section %d! Perhaps check with readelf -a?\n",
3195 if (bpf_has_map_data(ctx
)) {
3196 ret
= bpf_fetch_maps_end(ctx
);
3198 fprintf(stderr
, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
3206 int iproute2_get_root_path(char *root_path
, size_t len
)
3208 struct bpf_elf_ctx
*ctx
= &__ctx
;
3211 snprintf(root_path
, len
, "%s/%s",
3212 bpf_get_work_dir(ctx
->type
), BPF_DIR_GLOBALS
);
3214 ret
= mkdir(root_path
, S_IRWXU
);
3215 if (ret
&& errno
!= EEXIST
) {
3216 fprintf(stderr
, "mkdir %s failed: %s\n", root_path
, strerror(errno
));
3223 bool iproute2_is_pin_map(const char *libbpf_map_name
, char *pathname
)
3225 struct bpf_elf_ctx
*ctx
= &__ctx
;
3226 const char *map_name
, *tmp
;
3227 unsigned int pinning
;
3230 for (i
= 0; i
< ctx
->map_num
; i
++) {
3231 if (ctx
->maps
[i
].pinning
== PIN_OBJECT_NS
&&
3233 fprintf(stderr
, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
3237 map_name
= bpf_map_fetch_name(ctx
, i
);
3242 if (strcmp(libbpf_map_name
, map_name
))
3245 pinning
= ctx
->maps
[i
].pinning
;
3247 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_work_dir(ctx
->type
))
3250 if (pinning
== PIN_OBJECT_NS
)
3251 ret
= bpf_make_obj_path(ctx
);
3252 else if ((tmp
= bpf_custom_pinning(ctx
, pinning
)))
3253 ret
= bpf_make_custom_path(ctx
, tmp
);
3257 bpf_make_pathname(pathname
, PATH_MAX
, map_name
, ctx
, pinning
);
3265 bool iproute2_is_map_in_map(const char *libbpf_map_name
, struct bpf_elf_map
*imap
,
3266 struct bpf_elf_map
*omap
, char *omap_name
)
3268 struct bpf_elf_ctx
*ctx
= &__ctx
;
3269 const char *inner_map_name
, *outer_map_name
;
3272 for (i
= 0; i
< ctx
->map_num
; i
++) {
3273 inner_map_name
= bpf_map_fetch_name(ctx
, i
);
3274 if (!inner_map_name
) {
3278 if (strcmp(libbpf_map_name
, inner_map_name
))
3281 if (!ctx
->maps
[i
].id
||
3282 ctx
->maps
[i
].inner_id
||
3283 ctx
->maps
[i
].inner_idx
== -1)
3286 *imap
= ctx
->maps
[i
];
3288 for (j
= 0; j
< ctx
->map_num
; j
++) {
3289 if (!bpf_is_map_in_map_type(&ctx
->maps
[j
]))
3291 if (ctx
->maps
[j
].inner_id
!= ctx
->maps
[i
].id
)
3294 *omap
= ctx
->maps
[j
];
3295 outer_map_name
= bpf_map_fetch_name(ctx
, j
);
3296 memcpy(omap_name
, outer_map_name
, strlen(outer_map_name
) + 1);
3305 int iproute2_find_map_name_by_id(unsigned int map_id
, char *name
)
3307 struct bpf_elf_ctx
*ctx
= &__ctx
;
3308 const char *map_name
;
3311 for (i
= 0; i
< ctx
->map_num
; i
++) {
3312 if (ctx
->maps
[i
].id
== map_id
&&
3313 ctx
->maps
[i
].type
== BPF_MAP_TYPE_PROG_ARRAY
) {
3322 map_name
= bpf_map_fetch_name(ctx
, idx
);
3326 memcpy(name
, map_name
, strlen(map_name
) + 1);
3329 #endif /* HAVE_LIBBPF */
3330 #endif /* HAVE_ELF */