2 * tc_bpf.c BPF common code
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Daniel Borkmann <dborkman@redhat.com>
10 * Jiri Pirko <jiri@resnulli.us>
11 * Alexei Starovoitov <ast@plumgrid.com>
29 #include <sys/types.h>
33 #include <sys/mount.h>
34 #include <sys/syscall.h>
35 #include <sys/sendfile.h>
36 #include <sys/resource.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/if_alg.h>
42 #include <arpa/inet.h>
57 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
58 const char *sec
, bool verbose
);
60 static int bpf_obj_open(const char *path
, enum bpf_prog_type type
,
61 const char *sec
, bool verbose
)
63 fprintf(stderr
, "No ELF library support compiled in.\n");
69 static inline __u64
bpf_ptr_to_u64(const void *ptr
)
71 return (__u64
)(unsigned long)ptr
;
74 static int bpf(int cmd
, union bpf_attr
*attr
, unsigned int size
)
77 return syscall(__NR_bpf
, cmd
, attr
, size
);
79 fprintf(stderr
, "No bpf syscall, kernel headers too old?\n");
85 static int bpf_map_update(int fd
, const void *key
, const void *value
,
90 memset(&attr
, 0, sizeof(attr
));
92 attr
.key
= bpf_ptr_to_u64(key
);
93 attr
.value
= bpf_ptr_to_u64(value
);
96 return bpf(BPF_MAP_UPDATE_ELEM
, &attr
, sizeof(attr
));
99 static int bpf_parse_string(char *arg
, bool from_file
, __u16
*bpf_len
,
100 char **bpf_string
, bool *need_release
,
101 const char separator
)
106 size_t tmp_len
, op_len
= sizeof("65535 255 255 4294967295,");
110 tmp_len
= sizeof("4096,") + BPF_MAXINSNS
* op_len
;
111 tmp_string
= malloc(tmp_len
);
112 if (tmp_string
== NULL
)
115 memset(tmp_string
, 0, tmp_len
);
117 fp
= fopen(arg
, "r");
119 perror("Cannot fopen");
124 if (!fgets(tmp_string
, tmp_len
, fp
)) {
132 *need_release
= true;
133 *bpf_string
= tmp_string
;
135 *need_release
= false;
139 if (sscanf(*bpf_string
, "%hu%c", bpf_len
, &sp
) != 2 ||
149 static int bpf_ops_parse(int argc
, char **argv
, struct sock_filter
*bpf_ops
,
152 char *bpf_string
, *token
, separator
= ',';
159 if (bpf_parse_string(argv
[0], from_file
, &bpf_len
, &bpf_string
,
160 &need_release
, separator
))
162 if (bpf_len
== 0 || bpf_len
> BPF_MAXINSNS
) {
168 while ((token
= strchr(token
, separator
)) && (++token
)[0]) {
170 fprintf(stderr
, "Real program length exceeds encoded "
171 "length parameter!\n");
176 if (sscanf(token
, "%hu %hhu %hhu %u,",
177 &bpf_ops
[i
].code
, &bpf_ops
[i
].jt
,
178 &bpf_ops
[i
].jf
, &bpf_ops
[i
].k
) != 4) {
179 fprintf(stderr
, "Error at instruction %d!\n", i
);
188 fprintf(stderr
, "Parsed program length is less than encoded"
189 "length parameter!\n");
201 void bpf_print_ops(FILE *f
, struct rtattr
*bpf_ops
, __u16 len
)
203 struct sock_filter
*ops
= (struct sock_filter
*) RTA_DATA(bpf_ops
);
209 fprintf(f
, "bytecode \'%u,", len
);
211 for (i
= 0; i
< len
- 1; i
++)
212 fprintf(f
, "%hu %hhu %hhu %u,", ops
[i
].code
, ops
[i
].jt
,
213 ops
[i
].jf
, ops
[i
].k
);
215 fprintf(f
, "%hu %hhu %hhu %u\'", ops
[i
].code
, ops
[i
].jt
,
216 ops
[i
].jf
, ops
[i
].k
);
219 static int bpf_map_selfcheck_pinned(int fd
, const struct bpf_elf_map
*map
,
222 char file
[PATH_MAX
], buff
[4096];
223 struct bpf_elf_map tmp
, zero
;
227 snprintf(file
, sizeof(file
), "/proc/%d/fdinfo/%d", getpid(), fd
);
229 fp
= fopen(file
, "r");
231 fprintf(stderr
, "No procfs support?!\n");
235 memset(&tmp
, 0, sizeof(tmp
));
236 while (fgets(buff
, sizeof(buff
), fp
)) {
237 if (sscanf(buff
, "map_type:\t%u", &val
) == 1)
239 else if (sscanf(buff
, "key_size:\t%u", &val
) == 1)
241 else if (sscanf(buff
, "value_size:\t%u", &val
) == 1)
242 tmp
.size_value
= val
;
243 else if (sscanf(buff
, "max_entries:\t%u", &val
) == 1)
249 if (!memcmp(&tmp
, map
, length
)) {
252 memset(&zero
, 0, sizeof(zero
));
253 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
254 * so just accept it. We know we do have an eBPF fd and in this
255 * case, everything is 0. It is guaranteed that no such map exists
256 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
258 if (!memcmp(&tmp
, &zero
, length
))
261 fprintf(stderr
, "Map specs from pinned file differ!\n");
266 static int bpf_mnt_fs(const char *target
)
268 bool bind_done
= false;
270 while (mount("", target
, "none", MS_PRIVATE
| MS_REC
, NULL
)) {
271 if (errno
!= EINVAL
|| bind_done
) {
272 fprintf(stderr
, "mount --make-private %s failed: %s\n",
273 target
, strerror(errno
));
277 if (mount(target
, target
, "none", MS_BIND
, NULL
)) {
278 fprintf(stderr
, "mount --bind %s %s failed: %s\n",
279 target
, target
, strerror(errno
));
286 if (mount("bpf", target
, "bpf", 0, NULL
)) {
287 fprintf(stderr
, "mount -t bpf bpf %s failed: %s\n",
288 target
, strerror(errno
));
295 static int bpf_valid_mntpt(const char *mnt
, unsigned long magic
)
299 if (statfs(mnt
, &st_fs
) < 0)
301 if ((unsigned long)st_fs
.f_type
!= magic
)
307 static const char *bpf_find_mntpt(const char *fstype
, unsigned long magic
,
309 const char * const *known_mnts
)
311 const char * const *ptr
;
318 if (bpf_valid_mntpt(*ptr
, magic
) == 0) {
319 strncpy(mnt
, *ptr
, len
- 1);
327 fp
= fopen("/proc/mounts", "r");
328 if (fp
== NULL
|| len
!= PATH_MAX
)
331 while (fscanf(fp
, "%*s %" textify(PATH_MAX
) "s %99s %*s %*d %*d\n",
333 if (strcmp(type
, fstype
) == 0)
338 if (strcmp(type
, fstype
) != 0)
344 int bpf_trace_pipe(void)
346 char tracefs_mnt
[PATH_MAX
] = TRACE_DIR_MNT
;
347 static const char * const tracefs_known_mnts
[] = {
349 "/sys/kernel/debug/tracing",
354 char tpipe
[PATH_MAX
];
358 mnt
= bpf_find_mntpt("tracefs", TRACEFS_MAGIC
, tracefs_mnt
,
359 sizeof(tracefs_mnt
), tracefs_known_mnts
);
361 fprintf(stderr
, "tracefs not mounted?\n");
365 snprintf(tpipe
, sizeof(tpipe
), "%s/trace_pipe", mnt
);
367 fd
= open(tpipe
, O_RDONLY
);
371 fprintf(stderr
, "Running! Hang up with ^C!\n\n");
373 static char buff
[4096];
376 ret
= read(fd
, buff
, sizeof(buff
) - 1);
386 static const char *bpf_get_tc_dir(void)
388 static bool bpf_mnt_cached
= false;
389 static char bpf_tc_dir
[PATH_MAX
];
390 static const char *mnt
;
391 static const char * const bpf_known_mnts
[] = {
395 char bpf_mnt
[PATH_MAX
] = BPF_DIR_MNT
;
396 char bpf_glo_dir
[PATH_MAX
];
402 mnt
= bpf_find_mntpt("bpf", BPF_FS_MAGIC
, bpf_mnt
, sizeof(bpf_mnt
),
405 mnt
= getenv(BPF_ENV_MNT
);
408 ret
= bpf_mnt_fs(mnt
);
415 snprintf(bpf_tc_dir
, sizeof(bpf_tc_dir
), "%s/%s", mnt
, BPF_DIR_TC
);
416 ret
= mkdir(bpf_tc_dir
, S_IRWXU
);
417 if (ret
&& errno
!= EEXIST
) {
418 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_tc_dir
,
424 snprintf(bpf_glo_dir
, sizeof(bpf_glo_dir
), "%s/%s",
425 bpf_tc_dir
, BPF_DIR_GLOBALS
);
426 ret
= mkdir(bpf_glo_dir
, S_IRWXU
);
427 if (ret
&& errno
!= EEXIST
) {
428 fprintf(stderr
, "mkdir %s failed: %s\n", bpf_glo_dir
,
436 bpf_mnt_cached
= true;
441 static int bpf_obj_get(const char *pathname
)
446 if (strlen(pathname
) > 2 && pathname
[0] == 'm' &&
447 pathname
[1] == ':' && bpf_get_tc_dir()) {
448 snprintf(tmp
, sizeof(tmp
), "%s/%s",
449 bpf_get_tc_dir(), pathname
+ 2);
453 memset(&attr
, 0, sizeof(attr
));
454 attr
.pathname
= bpf_ptr_to_u64(pathname
);
456 return bpf(BPF_OBJ_GET
, &attr
, sizeof(attr
));
459 const char *bpf_default_section(const enum bpf_prog_type type
)
462 case BPF_PROG_TYPE_SCHED_CLS
:
463 return ELF_SECTION_CLASSIFIER
;
464 case BPF_PROG_TYPE_SCHED_ACT
:
465 return ELF_SECTION_ACTION
;
477 #define BPF_MODE_MAX __BPF_MODE_MAX
480 static int bpf_parse(int *ptr_argc
, char ***ptr_argv
, const bool *opt_tbl
,
481 enum bpf_prog_type
*type
, enum bpf_mode
*mode
,
482 const char **ptr_object
, const char **ptr_section
,
483 const char **ptr_uds_name
, struct sock_filter
*opcodes
)
485 const char *file
, *section
, *uds_name
;
486 bool verbose
= false;
493 if (opt_tbl
[CBPF_BYTECODE
] &&
494 (matches(*argv
, "bytecode") == 0 ||
495 strcmp(*argv
, "bc") == 0)) {
496 *mode
= CBPF_BYTECODE
;
497 } else if (opt_tbl
[CBPF_FILE
] &&
498 (matches(*argv
, "bytecode-file") == 0 ||
499 strcmp(*argv
, "bcf") == 0)) {
501 } else if (opt_tbl
[EBPF_OBJECT
] &&
502 (matches(*argv
, "object-file") == 0 ||
503 strcmp(*argv
, "obj") == 0)) {
505 } else if (opt_tbl
[EBPF_PINNED
] &&
506 (matches(*argv
, "object-pinned") == 0 ||
507 matches(*argv
, "pinned") == 0 ||
508 matches(*argv
, "fd") == 0)) {
511 fprintf(stderr
, "What mode is \"%s\"?\n", *argv
);
516 file
= section
= uds_name
= NULL
;
517 if (*mode
== EBPF_OBJECT
|| *mode
== EBPF_PINNED
) {
521 if (*type
== BPF_PROG_TYPE_UNSPEC
) {
522 if (argc
> 0 && matches(*argv
, "type") == 0) {
524 if (matches(*argv
, "cls") == 0) {
525 *type
= BPF_PROG_TYPE_SCHED_CLS
;
526 } else if (matches(*argv
, "act") == 0) {
527 *type
= BPF_PROG_TYPE_SCHED_ACT
;
529 fprintf(stderr
, "What type is \"%s\"?\n",
535 *type
= BPF_PROG_TYPE_SCHED_CLS
;
539 section
= bpf_default_section(*type
);
540 if (argc
> 0 && matches(*argv
, "section") == 0) {
546 uds_name
= getenv(BPF_ENV_UDS
);
547 if (argc
> 0 && !uds_name
&&
548 matches(*argv
, "export") == 0) {
554 if (argc
> 0 && matches(*argv
, "verbose") == 0) {
562 if (*mode
== CBPF_BYTECODE
|| *mode
== CBPF_FILE
)
563 ret
= bpf_ops_parse(argc
, argv
, opcodes
, *mode
== CBPF_FILE
);
564 else if (*mode
== EBPF_OBJECT
)
565 ret
= bpf_obj_open(file
, *type
, section
, verbose
);
566 else if (*mode
== EBPF_PINNED
)
567 ret
= bpf_obj_get(file
);
574 *ptr_section
= section
;
576 *ptr_uds_name
= uds_name
;
584 int bpf_parse_common(int *ptr_argc
, char ***ptr_argv
, const int *nla_tbl
,
585 enum bpf_prog_type type
, const char **ptr_object
,
586 const char **ptr_uds_name
, struct nlmsghdr
*n
)
588 struct sock_filter opcodes
[BPF_MAXINSNS
];
589 const bool opt_tbl
[BPF_MODE_MAX
] = {
590 [CBPF_BYTECODE
] = true,
592 [EBPF_OBJECT
] = true,
593 [EBPF_PINNED
] = true,
595 char annotation
[256];
600 ret
= bpf_parse(ptr_argc
, ptr_argv
, opt_tbl
, &type
, &mode
,
601 ptr_object
, §ion
, ptr_uds_name
, opcodes
);
605 if (mode
== CBPF_BYTECODE
|| mode
== CBPF_FILE
) {
606 addattr16(n
, MAX_MSG
, nla_tbl
[BPF_NLA_OPS_LEN
], ret
);
607 addattr_l(n
, MAX_MSG
, nla_tbl
[BPF_NLA_OPS
], opcodes
,
608 ret
* sizeof(struct sock_filter
));
611 if (mode
== EBPF_OBJECT
|| mode
== EBPF_PINNED
) {
612 snprintf(annotation
, sizeof(annotation
), "%s:[%s]",
613 basename(*ptr_object
), mode
== EBPF_PINNED
?
616 addattr32(n
, MAX_MSG
, nla_tbl
[BPF_NLA_FD
], ret
);
617 addattrstrz(n
, MAX_MSG
, nla_tbl
[BPF_NLA_NAME
], annotation
);
623 int bpf_graft_map(const char *map_path
, uint32_t *key
, int argc
, char **argv
)
625 enum bpf_prog_type type
= BPF_PROG_TYPE_UNSPEC
;
626 const bool opt_tbl
[BPF_MODE_MAX
] = {
627 [CBPF_BYTECODE
] = false,
629 [EBPF_OBJECT
] = true,
630 [EBPF_PINNED
] = true,
632 const struct bpf_elf_map test
= {
633 .type
= BPF_MAP_TYPE_PROG_ARRAY
,
634 .size_key
= sizeof(int),
635 .size_value
= sizeof(int),
637 int ret
, prog_fd
, map_fd
;
642 prog_fd
= bpf_parse(&argc
, &argv
, opt_tbl
, &type
, &mode
,
643 NULL
, §ion
, NULL
, NULL
);
649 ret
= sscanf(section
, "%*i/%i", &map_key
);
651 fprintf(stderr
, "Couldn\'t infer map key from section "
652 "name! Please provide \'key\' argument!\n");
658 map_fd
= bpf_obj_get(map_path
);
660 fprintf(stderr
, "Couldn\'t retrieve pinned map \'%s\': %s\n",
661 map_path
, strerror(errno
));
666 ret
= bpf_map_selfcheck_pinned(map_fd
, &test
,
667 offsetof(struct bpf_elf_map
, max_elem
));
669 fprintf(stderr
, "Map \'%s\' self-check failed!\n", map_path
);
673 ret
= bpf_map_update(map_fd
, &map_key
, &prog_fd
, BPF_ANY
);
675 fprintf(stderr
, "Map update failed: %s\n", strerror(errno
));
684 struct bpf_elf_prog
{
685 enum bpf_prog_type type
;
686 const struct bpf_insn
*insns
;
691 struct bpf_hash_entry
{
692 unsigned int pinning
;
694 struct bpf_hash_entry
*next
;
703 int map_fds
[ELF_MAX_MAPS
];
704 struct bpf_elf_map maps
[ELF_MAX_MAPS
];
709 char license
[ELF_MAX_LICENSE_LEN
];
710 enum bpf_prog_type type
;
712 struct bpf_elf_st stat
;
713 struct bpf_hash_entry
*ht
[256];
718 struct bpf_elf_sec_data
{
721 const char *sec_name
;
724 struct bpf_map_data
{
727 struct bpf_elf_st
*st
;
728 struct bpf_elf_map
*ent
;
731 static __check_format_string(2, 3) void
732 bpf_dump_error(struct bpf_elf_ctx
*ctx
, const char *format
, ...)
736 va_start(vl
, format
);
737 vfprintf(stderr
, format
, vl
);
740 if (ctx
->log
&& ctx
->log
[0]) {
741 fprintf(stderr
, "%s\n", ctx
->log
);
742 memset(ctx
->log
, 0, ctx
->log_size
);
746 static int bpf_log_realloc(struct bpf_elf_ctx
*ctx
)
748 size_t log_size
= ctx
->log_size
;
755 if (log_size
> (UINT_MAX
>> 8))
759 ptr
= realloc(ctx
->log
, log_size
);
764 ctx
->log_size
= log_size
;
769 static int bpf_map_create(enum bpf_map_type type
, unsigned int size_key
,
770 unsigned int size_value
, unsigned int max_elem
)
774 memset(&attr
, 0, sizeof(attr
));
775 attr
.map_type
= type
;
776 attr
.key_size
= size_key
;
777 attr
.value_size
= size_value
;
778 attr
.max_entries
= max_elem
;
780 return bpf(BPF_MAP_CREATE
, &attr
, sizeof(attr
));
783 static int bpf_prog_load(enum bpf_prog_type type
, const struct bpf_insn
*insns
,
784 size_t size_insns
, const char *license
, char *log
,
789 memset(&attr
, 0, sizeof(attr
));
790 attr
.prog_type
= type
;
791 attr
.insns
= bpf_ptr_to_u64(insns
);
792 attr
.insn_cnt
= size_insns
/ sizeof(struct bpf_insn
);
793 attr
.license
= bpf_ptr_to_u64(license
);
796 attr
.log_buf
= bpf_ptr_to_u64(log
);
797 attr
.log_size
= size_log
;
801 return bpf(BPF_PROG_LOAD
, &attr
, sizeof(attr
));
804 static int bpf_obj_pin(int fd
, const char *pathname
)
808 memset(&attr
, 0, sizeof(attr
));
809 attr
.pathname
= bpf_ptr_to_u64(pathname
);
812 return bpf(BPF_OBJ_PIN
, &attr
, sizeof(attr
));
815 static int bpf_obj_hash(const char *object
, uint8_t *out
, size_t len
)
817 struct sockaddr_alg alg
= {
818 .salg_family
= AF_ALG
,
822 int ret
, cfd
, ofd
, ffd
;
826 if (!object
|| len
!= 20)
829 cfd
= socket(AF_ALG
, SOCK_SEQPACKET
, 0);
831 fprintf(stderr
, "Cannot get AF_ALG socket: %s\n",
836 ret
= bind(cfd
, (struct sockaddr
*)&alg
, sizeof(alg
));
838 fprintf(stderr
, "Error binding socket: %s\n", strerror(errno
));
842 ofd
= accept(cfd
, NULL
, 0);
844 fprintf(stderr
, "Error accepting socket: %s\n",
850 ffd
= open(object
, O_RDONLY
);
852 fprintf(stderr
, "Error opening object %s: %s\n",
853 object
, strerror(errno
));
858 ret
= fstat(ffd
, &stbuff
);
860 fprintf(stderr
, "Error doing fstat: %s\n",
865 size
= sendfile(ofd
, ffd
, NULL
, stbuff
.st_size
);
866 if (size
!= stbuff
.st_size
) {
867 fprintf(stderr
, "Error from sendfile (%zd vs %zu bytes): %s\n",
868 size
, stbuff
.st_size
, strerror(errno
));
873 size
= read(ofd
, out
, len
);
875 fprintf(stderr
, "Error from read (%zd vs %zu bytes): %s\n",
876 size
, len
, strerror(errno
));
890 static const char *bpf_get_obj_uid(const char *pathname
)
892 static bool bpf_uid_cached
= false;
893 static char bpf_uid
[64];
900 ret
= bpf_obj_hash(pathname
, tmp
, sizeof(tmp
));
902 fprintf(stderr
, "Object hashing failed!\n");
906 hexstring_n2a(tmp
, sizeof(tmp
), bpf_uid
, sizeof(bpf_uid
));
907 bpf_uid_cached
= true;
912 static int bpf_init_env(const char *pathname
)
914 struct rlimit limit
= {
915 .rlim_cur
= RLIM_INFINITY
,
916 .rlim_max
= RLIM_INFINITY
,
919 /* Don't bother in case we fail! */
920 setrlimit(RLIMIT_MEMLOCK
, &limit
);
922 if (!bpf_get_tc_dir()) {
923 fprintf(stderr
, "Continuing without mounted eBPF fs. "
924 "Too old kernel?\n");
928 if (!bpf_get_obj_uid(pathname
))
934 static const char *bpf_custom_pinning(const struct bpf_elf_ctx
*ctx
,
937 struct bpf_hash_entry
*entry
;
939 entry
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
940 while (entry
&& entry
->pinning
!= pinning
)
943 return entry
? entry
->subpath
: NULL
;
946 static bool bpf_no_pinning(const struct bpf_elf_ctx
*ctx
,
956 return !bpf_custom_pinning(ctx
, pinning
);
960 static void bpf_make_pathname(char *pathname
, size_t len
, const char *name
,
961 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
965 snprintf(pathname
, len
, "%s/%s/%s", bpf_get_tc_dir(),
966 bpf_get_obj_uid(NULL
), name
);
969 snprintf(pathname
, len
, "%s/%s/%s", bpf_get_tc_dir(),
970 BPF_DIR_GLOBALS
, name
);
973 snprintf(pathname
, len
, "%s/../%s/%s", bpf_get_tc_dir(),
974 bpf_custom_pinning(ctx
, pinning
), name
);
979 static int bpf_probe_pinned(const char *name
, const struct bpf_elf_ctx
*ctx
,
982 char pathname
[PATH_MAX
];
984 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_tc_dir())
987 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
988 return bpf_obj_get(pathname
);
991 static int bpf_make_obj_path(void)
996 snprintf(tmp
, sizeof(tmp
), "%s/%s", bpf_get_tc_dir(),
997 bpf_get_obj_uid(NULL
));
999 ret
= mkdir(tmp
, S_IRWXU
);
1000 if (ret
&& errno
!= EEXIST
) {
1001 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
, strerror(errno
));
1008 static int bpf_make_custom_path(const char *todo
)
1010 char tmp
[PATH_MAX
], rem
[PATH_MAX
], *sub
;
1013 snprintf(tmp
, sizeof(tmp
), "%s/../", bpf_get_tc_dir());
1014 snprintf(rem
, sizeof(rem
), "%s/", todo
);
1015 sub
= strtok(rem
, "/");
1018 if (strlen(tmp
) + strlen(sub
) + 2 > PATH_MAX
)
1024 ret
= mkdir(tmp
, S_IRWXU
);
1025 if (ret
&& errno
!= EEXIST
) {
1026 fprintf(stderr
, "mkdir %s failed: %s\n", tmp
,
1031 sub
= strtok(NULL
, "/");
1037 static int bpf_place_pinned(int fd
, const char *name
,
1038 const struct bpf_elf_ctx
*ctx
, uint32_t pinning
)
1040 char pathname
[PATH_MAX
];
1044 if (bpf_no_pinning(ctx
, pinning
) || !bpf_get_tc_dir())
1047 if (pinning
== PIN_OBJECT_NS
)
1048 ret
= bpf_make_obj_path();
1049 else if ((tmp
= bpf_custom_pinning(ctx
, pinning
)))
1050 ret
= bpf_make_custom_path(tmp
);
1054 bpf_make_pathname(pathname
, sizeof(pathname
), name
, ctx
, pinning
);
1055 return bpf_obj_pin(fd
, pathname
);
1058 static void bpf_prog_report(int fd
, const char *section
,
1059 const struct bpf_elf_prog
*prog
,
1060 struct bpf_elf_ctx
*ctx
)
1062 fprintf(stderr
, "Prog section \'%s\' %s%s (%d)!\n", section
,
1063 fd
< 0 ? "rejected: " : "loaded",
1064 fd
< 0 ? strerror(errno
) : "",
1065 fd
< 0 ? errno
: fd
);
1067 fprintf(stderr
, " - Type: %u\n", prog
->type
);
1068 fprintf(stderr
, " - Instructions: %zu\n",
1069 prog
->size
/ sizeof(struct bpf_insn
));
1070 fprintf(stderr
, " - License: %s\n\n", prog
->license
);
1072 bpf_dump_error(ctx
, "Verifier analysis:\n\n");
1075 static int bpf_prog_attach(const char *section
,
1076 const struct bpf_elf_prog
*prog
,
1077 struct bpf_elf_ctx
*ctx
)
1082 fd
= bpf_prog_load(prog
->type
, prog
->insns
, prog
->size
,
1083 prog
->license
, ctx
->log
, ctx
->log_size
);
1084 if (fd
< 0 || ctx
->verbose
) {
1085 /* The verifier log is pretty chatty, sometimes so chatty
1086 * on larger programs, that we could fail to dump everything
1087 * into our buffer. Still, try to give a debuggable error
1088 * log for the user, so enlarge it and re-fail.
1090 if (fd
< 0 && (errno
== ENOSPC
|| !ctx
->log_size
)) {
1091 if (tries
++ < 6 && !bpf_log_realloc(ctx
))
1094 fprintf(stderr
, "Log buffer too small to dump "
1095 "verifier log %zu bytes (%d tries)!\n",
1096 ctx
->log_size
, tries
);
1100 bpf_prog_report(fd
, section
, prog
, ctx
);
1106 static void bpf_map_report(int fd
, const char *name
,
1107 const struct bpf_elf_map
*map
,
1108 struct bpf_elf_ctx
*ctx
)
1110 fprintf(stderr
, "Map object \'%s\' %s%s (%d)!\n", name
,
1111 fd
< 0 ? "rejected: " : "loaded",
1112 fd
< 0 ? strerror(errno
) : "",
1113 fd
< 0 ? errno
: fd
);
1115 fprintf(stderr
, " - Type: %u\n", map
->type
);
1116 fprintf(stderr
, " - Identifier: %u\n", map
->id
);
1117 fprintf(stderr
, " - Pinning: %u\n", map
->pinning
);
1118 fprintf(stderr
, " - Size key: %u\n", map
->size_key
);
1119 fprintf(stderr
, " - Size value: %u\n", map
->size_value
);
1120 fprintf(stderr
, " - Max elems: %u\n\n", map
->max_elem
);
1123 static int bpf_map_attach(const char *name
, const struct bpf_elf_map
*map
,
1124 struct bpf_elf_ctx
*ctx
)
1128 fd
= bpf_probe_pinned(name
, ctx
, map
->pinning
);
1130 ret
= bpf_map_selfcheck_pinned(fd
, map
,
1131 offsetof(struct bpf_elf_map
,
1135 fprintf(stderr
, "Map \'%s\' self-check failed!\n",
1140 fprintf(stderr
, "Map \'%s\' loaded as pinned!\n",
1146 fd
= bpf_map_create(map
->type
, map
->size_key
, map
->size_value
,
1148 if (fd
< 0 || ctx
->verbose
) {
1149 bpf_map_report(fd
, name
, map
, ctx
);
1154 ret
= bpf_place_pinned(fd
, name
, ctx
, map
->pinning
);
1155 if (ret
< 0 && errno
!= EEXIST
) {
1156 fprintf(stderr
, "Could not pin %s map: %s\n", name
,
1165 static const char *bpf_str_tab_name(const struct bpf_elf_ctx
*ctx
,
1166 const GElf_Sym
*sym
)
1168 return ctx
->str_tab
->d_buf
+ sym
->st_name
;
1171 static const char *bpf_map_fetch_name(struct bpf_elf_ctx
*ctx
, int which
)
1176 for (i
= 0; i
< ctx
->sym_num
; i
++) {
1177 if (gelf_getsym(ctx
->sym_tab
, i
, &sym
) != &sym
)
1180 if (GELF_ST_BIND(sym
.st_info
) != STB_GLOBAL
||
1181 GELF_ST_TYPE(sym
.st_info
) != STT_NOTYPE
||
1182 sym
.st_shndx
!= ctx
->sec_maps
||
1183 sym
.st_value
/ sizeof(struct bpf_elf_map
) != which
)
1186 return bpf_str_tab_name(ctx
, &sym
);
1192 static int bpf_maps_attach_all(struct bpf_elf_ctx
*ctx
)
1194 const char *map_name
;
1197 for (i
= 0; i
< ctx
->map_num
; i
++) {
1198 map_name
= bpf_map_fetch_name(ctx
, i
);
1202 fd
= bpf_map_attach(map_name
, &ctx
->maps
[i
], ctx
);
1206 ctx
->map_fds
[i
] = fd
;
1212 static int bpf_fill_section_data(struct bpf_elf_ctx
*ctx
, int section
,
1213 struct bpf_elf_sec_data
*data
)
1215 Elf_Data
*sec_edata
;
1220 memset(data
, 0, sizeof(*data
));
1222 sec_fd
= elf_getscn(ctx
->elf_fd
, section
);
1225 if (gelf_getshdr(sec_fd
, &sec_hdr
) != &sec_hdr
)
1228 sec_name
= elf_strptr(ctx
->elf_fd
, ctx
->elf_hdr
.e_shstrndx
,
1230 if (!sec_name
|| !sec_hdr
.sh_size
)
1233 sec_edata
= elf_getdata(sec_fd
, NULL
);
1234 if (!sec_edata
|| elf_getdata(sec_fd
, sec_edata
))
1237 memcpy(&data
->sec_hdr
, &sec_hdr
, sizeof(sec_hdr
));
1239 data
->sec_name
= sec_name
;
1240 data
->sec_data
= sec_edata
;
1244 static int bpf_fetch_maps(struct bpf_elf_ctx
*ctx
, int section
,
1245 struct bpf_elf_sec_data
*data
)
1247 if (data
->sec_data
->d_size
% sizeof(struct bpf_elf_map
) != 0)
1250 ctx
->map_num
= data
->sec_data
->d_size
/ sizeof(struct bpf_elf_map
);
1251 ctx
->sec_maps
= section
;
1252 ctx
->sec_done
[section
] = true;
1254 if (ctx
->map_num
> ARRAY_SIZE(ctx
->map_fds
)) {
1255 fprintf(stderr
, "Too many BPF maps in ELF section!\n");
1259 memcpy(ctx
->maps
, data
->sec_data
->d_buf
, data
->sec_data
->d_size
);
1263 static int bpf_fetch_license(struct bpf_elf_ctx
*ctx
, int section
,
1264 struct bpf_elf_sec_data
*data
)
1266 if (data
->sec_data
->d_size
> sizeof(ctx
->license
))
1269 memcpy(ctx
->license
, data
->sec_data
->d_buf
, data
->sec_data
->d_size
);
1270 ctx
->sec_done
[section
] = true;
1274 static int bpf_fetch_symtab(struct bpf_elf_ctx
*ctx
, int section
,
1275 struct bpf_elf_sec_data
*data
)
1277 ctx
->sym_tab
= data
->sec_data
;
1278 ctx
->sym_num
= data
->sec_hdr
.sh_size
/ data
->sec_hdr
.sh_entsize
;
1279 ctx
->sec_done
[section
] = true;
1283 static int bpf_fetch_strtab(struct bpf_elf_ctx
*ctx
, int section
,
1284 struct bpf_elf_sec_data
*data
)
1286 ctx
->str_tab
= data
->sec_data
;
1287 ctx
->sec_done
[section
] = true;
1291 static int bpf_fetch_ancillary(struct bpf_elf_ctx
*ctx
)
1293 struct bpf_elf_sec_data data
;
1296 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1297 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1301 if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1302 !strcmp(data
.sec_name
, ELF_SECTION_MAPS
))
1303 ret
= bpf_fetch_maps(ctx
, i
, &data
);
1304 else if (data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1305 !strcmp(data
.sec_name
, ELF_SECTION_LICENSE
))
1306 ret
= bpf_fetch_license(ctx
, i
, &data
);
1307 else if (data
.sec_hdr
.sh_type
== SHT_SYMTAB
&&
1308 !strcmp(data
.sec_name
, ".symtab"))
1309 ret
= bpf_fetch_symtab(ctx
, i
, &data
);
1310 else if (data
.sec_hdr
.sh_type
== SHT_STRTAB
&&
1311 !strcmp(data
.sec_name
, ".strtab"))
1312 ret
= bpf_fetch_strtab(ctx
, i
, &data
);
1314 fprintf(stderr
, "Error parsing section %d! Perhaps"
1315 "check with readelf -a?\n", i
);
1320 if (ctx
->sym_tab
&& ctx
->str_tab
&& ctx
->sec_maps
) {
1321 ret
= bpf_maps_attach_all(ctx
);
1323 fprintf(stderr
, "Error loading maps into kernel!\n");
1331 static int bpf_fetch_prog(struct bpf_elf_ctx
*ctx
, const char *section
)
1333 struct bpf_elf_sec_data data
;
1334 struct bpf_elf_prog prog
;
1335 int ret
, i
, fd
= -1;
1337 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1338 if (ctx
->sec_done
[i
])
1341 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1343 !(data
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1344 data
.sec_hdr
.sh_flags
& SHF_EXECINSTR
&&
1345 !strcmp(data
.sec_name
, section
)))
1348 memset(&prog
, 0, sizeof(prog
));
1349 prog
.type
= ctx
->type
;
1350 prog
.insns
= data
.sec_data
->d_buf
;
1351 prog
.size
= data
.sec_data
->d_size
;
1352 prog
.license
= ctx
->license
;
1354 fd
= bpf_prog_attach(section
, &prog
, ctx
);
1358 ctx
->sec_done
[i
] = true;
1365 static int bpf_apply_relo_data(struct bpf_elf_ctx
*ctx
,
1366 struct bpf_elf_sec_data
*data_relo
,
1367 struct bpf_elf_sec_data
*data_insn
)
1369 Elf_Data
*idata
= data_insn
->sec_data
;
1370 GElf_Shdr
*rhdr
= &data_relo
->sec_hdr
;
1371 int relo_ent
, relo_num
= rhdr
->sh_size
/ rhdr
->sh_entsize
;
1372 struct bpf_insn
*insns
= idata
->d_buf
;
1373 unsigned int num_insns
= idata
->d_size
/ sizeof(*insns
);
1375 for (relo_ent
= 0; relo_ent
< relo_num
; relo_ent
++) {
1376 unsigned int ioff
, rmap
;
1380 if (gelf_getrel(data_relo
->sec_data
, relo_ent
, &relo
) != &relo
)
1383 ioff
= relo
.r_offset
/ sizeof(struct bpf_insn
);
1384 if (ioff
>= num_insns
||
1385 insns
[ioff
].code
!= (BPF_LD
| BPF_IMM
| BPF_DW
)) {
1386 fprintf(stderr
, "ELF contains relo data for non ld64 "
1387 "instruction at offset %u! Compiler bug?!\n",
1389 if (ioff
< num_insns
&&
1390 insns
[ioff
].code
== (BPF_JMP
| BPF_CALL
))
1391 fprintf(stderr
, " - Try to annotate functions "
1392 "with always_inline attribute!\n");
1396 if (gelf_getsym(ctx
->sym_tab
, GELF_R_SYM(relo
.r_info
), &sym
) != &sym
)
1398 if (sym
.st_shndx
!= ctx
->sec_maps
) {
1399 fprintf(stderr
, "ELF contains non-map related relo data in "
1400 "entry %u pointing to section %u! Compiler bug?!\n",
1401 relo_ent
, sym
.st_shndx
);
1405 rmap
= sym
.st_value
/ sizeof(struct bpf_elf_map
);
1406 if (rmap
>= ARRAY_SIZE(ctx
->map_fds
))
1408 if (!ctx
->map_fds
[rmap
])
1412 fprintf(stderr
, "Map \'%s\' (%d) injected into prog "
1413 "section \'%s\' at offset %u!\n",
1414 bpf_str_tab_name(ctx
, &sym
), ctx
->map_fds
[rmap
],
1415 data_insn
->sec_name
, ioff
);
1417 insns
[ioff
].src_reg
= BPF_PSEUDO_MAP_FD
;
1418 insns
[ioff
].imm
= ctx
->map_fds
[rmap
];
1424 static int bpf_fetch_prog_relo(struct bpf_elf_ctx
*ctx
, const char *section
)
1426 struct bpf_elf_sec_data data_relo
, data_insn
;
1427 struct bpf_elf_prog prog
;
1428 int ret
, idx
, i
, fd
= -1;
1430 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1431 ret
= bpf_fill_section_data(ctx
, i
, &data_relo
);
1432 if (ret
< 0 || data_relo
.sec_hdr
.sh_type
!= SHT_REL
)
1435 idx
= data_relo
.sec_hdr
.sh_info
;
1436 ret
= bpf_fill_section_data(ctx
, idx
, &data_insn
);
1438 !(data_insn
.sec_hdr
.sh_type
== SHT_PROGBITS
&&
1439 data_insn
.sec_hdr
.sh_flags
& SHF_EXECINSTR
&&
1440 !strcmp(data_insn
.sec_name
, section
)))
1443 ret
= bpf_apply_relo_data(ctx
, &data_relo
, &data_insn
);
1447 memset(&prog
, 0, sizeof(prog
));
1448 prog
.type
= ctx
->type
;
1449 prog
.insns
= data_insn
.sec_data
->d_buf
;
1450 prog
.size
= data_insn
.sec_data
->d_size
;
1451 prog
.license
= ctx
->license
;
1453 fd
= bpf_prog_attach(section
, &prog
, ctx
);
1457 ctx
->sec_done
[i
] = true;
1458 ctx
->sec_done
[idx
] = true;
1465 static int bpf_fetch_prog_sec(struct bpf_elf_ctx
*ctx
, const char *section
)
1470 ret
= bpf_fetch_prog_relo(ctx
, section
);
1472 ret
= bpf_fetch_prog(ctx
, section
);
1477 static int bpf_find_map_by_id(struct bpf_elf_ctx
*ctx
, uint32_t id
)
1481 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++)
1482 if (ctx
->map_fds
[i
] && ctx
->maps
[i
].id
== id
&&
1483 ctx
->maps
[i
].type
== BPF_MAP_TYPE_PROG_ARRAY
)
1488 static int bpf_fill_prog_arrays(struct bpf_elf_ctx
*ctx
)
1490 struct bpf_elf_sec_data data
;
1491 uint32_t map_id
, key_id
;
1492 int fd
, i
, ret
, idx
;
1494 for (i
= 1; i
< ctx
->elf_hdr
.e_shnum
; i
++) {
1495 if (ctx
->sec_done
[i
])
1498 ret
= bpf_fill_section_data(ctx
, i
, &data
);
1502 ret
= sscanf(data
.sec_name
, "%i/%i", &map_id
, &key_id
);
1506 idx
= bpf_find_map_by_id(ctx
, map_id
);
1510 fd
= bpf_fetch_prog_sec(ctx
, data
.sec_name
);
1514 ret
= bpf_map_update(ctx
->map_fds
[idx
], &key_id
,
1519 ctx
->sec_done
[i
] = true;
1525 static void bpf_save_finfo(struct bpf_elf_ctx
*ctx
)
1530 memset(&ctx
->stat
, 0, sizeof(ctx
->stat
));
1532 ret
= fstat(ctx
->obj_fd
, &st
);
1534 fprintf(stderr
, "Stat of elf file failed: %s\n",
1539 ctx
->stat
.st_dev
= st
.st_dev
;
1540 ctx
->stat
.st_ino
= st
.st_ino
;
1543 static int bpf_read_pin_mapping(FILE *fp
, uint32_t *id
, char *path
)
1545 char buff
[PATH_MAX
];
1547 while (fgets(buff
, sizeof(buff
), fp
)) {
1550 while (*ptr
== ' ' || *ptr
== '\t')
1553 if (*ptr
== '#' || *ptr
== '\n' || *ptr
== 0)
1556 if (sscanf(ptr
, "%i %s\n", id
, path
) != 2 &&
1557 sscanf(ptr
, "%i %s #", id
, path
) != 2) {
1568 static bool bpf_pinning_reserved(uint32_t pinning
)
1580 static void bpf_hash_init(struct bpf_elf_ctx
*ctx
, const char *db_file
)
1582 struct bpf_hash_entry
*entry
;
1583 char subpath
[PATH_MAX
];
1588 fp
= fopen(db_file
, "r");
1592 memset(subpath
, 0, sizeof(subpath
));
1593 while ((ret
= bpf_read_pin_mapping(fp
, &pinning
, subpath
))) {
1595 fprintf(stderr
, "Database %s is corrupted at: %s\n",
1601 if (bpf_pinning_reserved(pinning
)) {
1602 fprintf(stderr
, "Database %s, id %u is reserved - "
1603 "ignoring!\n", db_file
, pinning
);
1607 entry
= malloc(sizeof(*entry
));
1609 fprintf(stderr
, "No memory left for db entry!\n");
1613 entry
->pinning
= pinning
;
1614 entry
->subpath
= strdup(subpath
);
1615 if (!entry
->subpath
) {
1616 fprintf(stderr
, "No memory left for db entry!\n");
1621 entry
->next
= ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)];
1622 ctx
->ht
[pinning
& (ARRAY_SIZE(ctx
->ht
) - 1)] = entry
;
1628 static void bpf_hash_destroy(struct bpf_elf_ctx
*ctx
)
1630 struct bpf_hash_entry
*entry
;
1633 for (i
= 0; i
< ARRAY_SIZE(ctx
->ht
); i
++) {
1634 while ((entry
= ctx
->ht
[i
]) != NULL
) {
1635 ctx
->ht
[i
] = entry
->next
;
1636 free((char *)entry
->subpath
);
1642 static int bpf_elf_check_ehdr(const struct bpf_elf_ctx
*ctx
)
1644 if (ctx
->elf_hdr
.e_type
!= ET_REL
||
1645 ctx
->elf_hdr
.e_machine
!= 0 ||
1646 ctx
->elf_hdr
.e_version
!= EV_CURRENT
) {
1647 fprintf(stderr
, "ELF format error, ELF file not for eBPF?\n");
1651 switch (ctx
->elf_hdr
.e_ident
[EI_DATA
]) {
1653 fprintf(stderr
, "ELF format error, wrong endianness info?\n");
1656 if (htons(1) == 1) {
1658 "We are big endian, eBPF object is little endian!\n");
1663 if (htons(1) != 1) {
1665 "We are little endian, eBPF object is big endian!\n");
1674 static int bpf_elf_ctx_init(struct bpf_elf_ctx
*ctx
, const char *pathname
,
1675 enum bpf_prog_type type
, bool verbose
)
1679 if (elf_version(EV_CURRENT
) == EV_NONE
||
1680 bpf_init_env(pathname
))
1683 memset(ctx
, 0, sizeof(*ctx
));
1684 ctx
->verbose
= verbose
;
1687 ctx
->obj_fd
= open(pathname
, O_RDONLY
);
1688 if (ctx
->obj_fd
< 0)
1691 ctx
->elf_fd
= elf_begin(ctx
->obj_fd
, ELF_C_READ
, NULL
);
1697 if (elf_kind(ctx
->elf_fd
) != ELF_K_ELF
) {
1702 if (gelf_getehdr(ctx
->elf_fd
, &ctx
->elf_hdr
) !=
1708 ret
= bpf_elf_check_ehdr(ctx
);
1712 ctx
->sec_done
= calloc(ctx
->elf_hdr
.e_shnum
,
1713 sizeof(*(ctx
->sec_done
)));
1714 if (!ctx
->sec_done
) {
1719 if (ctx
->verbose
&& bpf_log_realloc(ctx
)) {
1724 bpf_save_finfo(ctx
);
1725 bpf_hash_init(ctx
, CONFDIR
"/bpf_pinning");
1729 free(ctx
->sec_done
);
1731 elf_end(ctx
->elf_fd
);
1737 static int bpf_maps_count(struct bpf_elf_ctx
*ctx
)
1741 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
1742 if (!ctx
->map_fds
[i
])
1750 static void bpf_maps_teardown(struct bpf_elf_ctx
*ctx
)
1754 for (i
= 0; i
< ARRAY_SIZE(ctx
->map_fds
); i
++) {
1755 if (ctx
->map_fds
[i
])
1756 close(ctx
->map_fds
[i
]);
1760 static void bpf_elf_ctx_destroy(struct bpf_elf_ctx
*ctx
, bool failure
)
1763 bpf_maps_teardown(ctx
);
1765 bpf_hash_destroy(ctx
);
1767 free(ctx
->sec_done
);
1770 elf_end(ctx
->elf_fd
);
1774 static struct bpf_elf_ctx __ctx
;
1776 static int bpf_obj_open(const char *pathname
, enum bpf_prog_type type
,
1777 const char *section
, bool verbose
)
1779 struct bpf_elf_ctx
*ctx
= &__ctx
;
1782 ret
= bpf_elf_ctx_init(ctx
, pathname
, type
, verbose
);
1784 fprintf(stderr
, "Cannot initialize ELF context!\n");
1788 ret
= bpf_fetch_ancillary(ctx
);
1790 fprintf(stderr
, "Error fetching ELF ancillary data!\n");
1794 fd
= bpf_fetch_prog_sec(ctx
, section
);
1796 fprintf(stderr
, "Error fetching program/map!\n");
1801 ret
= bpf_fill_prog_arrays(ctx
);
1803 fprintf(stderr
, "Error filling program arrays!\n");
1805 bpf_elf_ctx_destroy(ctx
, ret
< 0);
1816 bpf_map_set_send(int fd
, struct sockaddr_un
*addr
, unsigned int addr_len
,
1817 const struct bpf_map_data
*aux
, unsigned int entries
)
1819 struct bpf_map_set_msg msg
;
1820 int *cmsg_buf
, min_fd
;
1824 memset(&msg
, 0, sizeof(msg
));
1826 msg
.aux
.uds_ver
= BPF_SCM_AUX_VER
;
1827 msg
.aux
.num_ent
= entries
;
1829 strncpy(msg
.aux
.obj_name
, aux
->obj
, sizeof(msg
.aux
.obj_name
));
1830 memcpy(&msg
.aux
.obj_st
, aux
->st
, sizeof(msg
.aux
.obj_st
));
1832 cmsg_buf
= bpf_map_set_init(&msg
, addr
, addr_len
);
1833 amsg_buf
= (char *)msg
.aux
.ent
;
1835 for (i
= 0; i
< entries
; i
+= min_fd
) {
1838 min_fd
= min(BPF_SCM_MAX_FDS
* 1U, entries
- i
);
1839 bpf_map_set_init_single(&msg
, min_fd
);
1841 memcpy(cmsg_buf
, &aux
->fds
[i
], sizeof(aux
->fds
[0]) * min_fd
);
1842 memcpy(amsg_buf
, &aux
->ent
[i
], sizeof(aux
->ent
[0]) * min_fd
);
1844 ret
= sendmsg(fd
, &msg
.hdr
, 0);
1853 bpf_map_set_recv(int fd
, int *fds
, struct bpf_map_aux
*aux
,
1854 unsigned int entries
)
1856 struct bpf_map_set_msg msg
;
1857 int *cmsg_buf
, min_fd
;
1858 char *amsg_buf
, *mmsg_buf
;
1859 unsigned int needed
= 1;
1862 cmsg_buf
= bpf_map_set_init(&msg
, NULL
, 0);
1863 amsg_buf
= (char *)msg
.aux
.ent
;
1864 mmsg_buf
= (char *)&msg
.aux
;
1866 for (i
= 0; i
< min(entries
, needed
); i
+= min_fd
) {
1867 struct cmsghdr
*cmsg
;
1870 min_fd
= min(entries
, entries
- i
);
1871 bpf_map_set_init_single(&msg
, min_fd
);
1873 ret
= recvmsg(fd
, &msg
.hdr
, 0);
1877 cmsg
= CMSG_FIRSTHDR(&msg
.hdr
);
1878 if (!cmsg
|| cmsg
->cmsg_type
!= SCM_RIGHTS
)
1880 if (msg
.hdr
.msg_flags
& MSG_CTRUNC
)
1882 if (msg
.aux
.uds_ver
!= BPF_SCM_AUX_VER
)
1885 min_fd
= (cmsg
->cmsg_len
- sizeof(*cmsg
)) / sizeof(fd
);
1886 if (min_fd
> entries
|| min_fd
<= 0)
1889 memcpy(&fds
[i
], cmsg_buf
, sizeof(fds
[0]) * min_fd
);
1890 memcpy(&aux
->ent
[i
], amsg_buf
, sizeof(aux
->ent
[0]) * min_fd
);
1891 memcpy(aux
, mmsg_buf
, offsetof(struct bpf_map_aux
, ent
));
1893 needed
= aux
->num_ent
;
1899 int bpf_send_map_fds(const char *path
, const char *obj
)
1901 struct bpf_elf_ctx
*ctx
= &__ctx
;
1902 struct sockaddr_un addr
;
1903 struct bpf_map_data bpf_aux
;
1906 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
1908 fprintf(stderr
, "Cannot open socket: %s\n",
1913 memset(&addr
, 0, sizeof(addr
));
1914 addr
.sun_family
= AF_UNIX
;
1915 strncpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
1917 ret
= connect(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
1919 fprintf(stderr
, "Cannot connect to %s: %s\n",
1920 path
, strerror(errno
));
1924 memset(&bpf_aux
, 0, sizeof(bpf_aux
));
1926 bpf_aux
.fds
= ctx
->map_fds
;
1927 bpf_aux
.ent
= ctx
->maps
;
1928 bpf_aux
.st
= &ctx
->stat
;
1931 ret
= bpf_map_set_send(fd
, &addr
, sizeof(addr
), &bpf_aux
,
1932 bpf_maps_count(ctx
));
1934 fprintf(stderr
, "Cannot send fds to %s: %s\n",
1935 path
, strerror(errno
));
1937 bpf_maps_teardown(ctx
);
1942 int bpf_recv_map_fds(const char *path
, int *fds
, struct bpf_map_aux
*aux
,
1943 unsigned int entries
)
1945 struct sockaddr_un addr
;
1948 fd
= socket(AF_UNIX
, SOCK_DGRAM
, 0);
1950 fprintf(stderr
, "Cannot open socket: %s\n",
1955 memset(&addr
, 0, sizeof(addr
));
1956 addr
.sun_family
= AF_UNIX
;
1957 strncpy(addr
.sun_path
, path
, sizeof(addr
.sun_path
));
1959 ret
= bind(fd
, (struct sockaddr
*)&addr
, sizeof(addr
));
1961 fprintf(stderr
, "Cannot bind to socket: %s\n",
1966 ret
= bpf_map_set_recv(fd
, fds
, aux
, entries
);
1968 fprintf(stderr
, "Cannot recv fds from %s: %s\n",
1969 path
, strerror(errno
));
1971 unlink(addr
.sun_path
);
1975 #endif /* HAVE_ELF */