[systemd.git] / src / shared / bpf-program.c

/* SPDX-License-Identifier: LGPL-2.1-or-later */

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "alloc-util.h"
#include "bpf-program.h"
#include "escape.h"
#include "fd-util.h"
#include "memory-util.h"
#include "missing_syscall.h"
#include "path-util.h"
#include "serialize.h"
#include "string-table.h"

static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
        [BPF_CGROUP_INET_INGRESS] =     "ingress",
        [BPF_CGROUP_INET_EGRESS] =      "egress",
        [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
        [BPF_CGROUP_SOCK_OPS] =         "sock_ops",
        [BPF_CGROUP_DEVICE] =           "device",
        [BPF_CGROUP_INET4_BIND] =       "bind4",
        [BPF_CGROUP_INET6_BIND] =       "bind6",
        [BPF_CGROUP_INET4_CONNECT] =    "connect4",
        [BPF_CGROUP_INET6_CONNECT] =    "connect6",
        [BPF_CGROUP_INET4_POST_BIND] =  "post_bind4",
        [BPF_CGROUP_INET6_POST_BIND] =  "post_bind6",
        [BPF_CGROUP_UDP4_SENDMSG] =     "sendmsg4",
        [BPF_CGROUP_UDP6_SENDMSG] =     "sendmsg6",
        [BPF_CGROUP_SYSCTL] =           "sysctl",
        [BPF_CGROUP_UDP4_RECVMSG] =     "recvmsg4",
        [BPF_CGROUP_UDP6_RECVMSG] =     "recvmsg6",
        [BPF_CGROUP_GETSOCKOPT] =       "getsockopt",
        [BPF_CGROUP_SETSOCKOPT] =       "setsockopt",
};

DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);

DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref);

 /* struct bpf_prog_info info must be initialized since its value is both input and output
  * for BPF_OBJ_GET_INFO_BY_FD syscall. */
static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
        union bpf_attr attr;

        /* Explicitly memset to zero since some compilers may produce non-zero-initialized padding when
         * structured initialization is used.
         * Refer to https://github.com/systemd/systemd/issues/18164
         */
        zero(attr);
        attr.info.bpf_fd = prog_fd;
        attr.info.info_len = info_len;
        attr.info.info = PTR_TO_UINT64(info);

        if (bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;

        p = new(BPFProgram, 1);
        if (!p)
                return -ENOMEM;

        *p = (BPFProgram) {
                .n_ref = 1,
                .prog_type = prog_type,
                .kernel_fd = -1,
        };

        *ret = TAKE_PTR(p);

        return 0;
}

int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret) {
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
        struct bpf_prog_info info = {};
        int r;

        assert(path);
        assert(ret);

        p = new(BPFProgram, 1);
        if (!p)
                return -ENOMEM;

        *p = (BPFProgram) {
                .prog_type = BPF_PROG_TYPE_UNSPEC,
                .n_ref = 1,
                .kernel_fd = -1,
        };

        r = bpf_program_load_from_bpf_fs(p, path);
        if (r < 0)
                return r;

        r = bpf_program_get_info_by_fd(p->kernel_fd, &info, sizeof(info));
        if (r < 0)
                return r;

        p->prog_type = info.type;
        *ret = TAKE_PTR(p);

        return 0;
}

static BPFProgram *bpf_program_free(BPFProgram *p) {
        assert(p);

        /* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
         * fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
         * programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
         * zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
         * question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
         * operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
         * counter this, we track closely to which cgroup a program was attached to and will detach it on our own
         * whenever we close the BPF fd. */
        (void) bpf_program_cgroup_detach(p);

        safe_close(p->kernel_fd);
        free(p->instructions);
        free(p->attached_path);

        return mfree(p);
}

DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);

int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {

        assert(p);

        if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
                return -EBUSY;

        if (!GREEDY_REALLOC(p->instructions, p->n_instructions + count))
                return -ENOMEM;

        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
        p->n_instructions += count;

        return 0;
}

int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) { /* make this idempotent */
                memzero(log_buf, log_size);
                return 0;
        }

        // FIXME: Clang doesn't 0-pad with structured initialization, causing
        // the kernel to reject the bpf_attr as invalid. See:
        // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
        // Ideally it should behave like GCC, so that we can remove these workarounds.
        zero(attr);
        attr.prog_type = p->prog_type;
        attr.insns = PTR_TO_UINT64(p->instructions);
        attr.insn_cnt = p->n_instructions;
        attr.license = PTR_TO_UINT64("GPL");
        attr.log_buf = PTR_TO_UINT64(log_buf);
        attr.log_level = !!log_buf;
        attr.log_size = log_size;

        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) {
        union bpf_attr attr;

        assert(p);

        if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
                return -EBUSY;

        zero(attr);
        attr.pathname = PTR_TO_UINT64(path);

        p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
        if (p->kernel_fd < 0)
                return -errno;

        return 0;
}

int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
        _cleanup_free_ char *copy = NULL;
        _cleanup_close_ int fd = -1;
        union bpf_attr attr;
        int r;

        assert(p);
        assert(type >= 0);
        assert(path);

        if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
                return -EINVAL;

        /* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
        * refuse this early. */
        if (p->attached_path) {
                if (!path_equal(p->attached_path, path))
                        return -EBUSY;
                if (p->attached_type != type)
                        return -EBUSY;
                if (p->attached_flags != flags)
                        return -EBUSY;

                /* Here's a shortcut: if we previously attached this program already, then we don't have to do so
                 * again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
                 * replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
                 * == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
                 * == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
                 * would remain in effect. */
                if (flags != BPF_F_ALLOW_OVERRIDE)
                        return 0;
        }

        /* Ensure we have a kernel object for this. */
        r = bpf_program_load_kernel(p, NULL, 0);
        if (r < 0)
                return r;

        copy = strdup(path);
        if (!copy)
                return -ENOMEM;

        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0)
                return -errno;

        zero(attr);
        attr.attach_type = type;
        attr.target_fd = fd;
        attr.attach_bpf_fd = p->kernel_fd;
        attr.attach_flags = flags;

        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
                return -errno;

        free_and_replace(p->attached_path, copy);
        p->attached_type = type;
        p->attached_flags = flags;

        return 0;
}

int bpf_program_cgroup_detach(BPFProgram *p) {
        _cleanup_close_ int fd = -1;

        assert(p);

        if (!p->attached_path)
                return -EUNATCH;

        fd = open(p->attached_path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
        if (fd < 0) {
                if (errno != ENOENT)
                        return -errno;

                /* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
                 * implicitly by the removal, hence don't complain */

        } else {
                union bpf_attr attr;

                zero(attr);
                attr.attach_type = p->attached_type;
                attr.target_fd = fd;
                attr.attach_bpf_fd = p->kernel_fd;

                if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
                        return -errno;
        }

        p->attached_path = mfree(p->attached_path);

        return 0;
}

int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
        union bpf_attr attr;
        int fd;

        zero(attr);
        attr.map_type = type;
        attr.key_size = key_size;
        attr.value_size = value_size;
        attr.max_entries = max_entries;
        attr.map_flags = flags;

        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
        if (fd < 0)
                return -errno;

        return fd;
}

int bpf_map_update_element(int fd, const void *key, void *value) {
        union bpf_attr attr;

        zero(attr);
        attr.map_fd = fd;
        attr.key = PTR_TO_UINT64(key);
        attr.value = PTR_TO_UINT64(value);

        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_map_lookup_element(int fd, const void *key, void *value) {
        union bpf_attr attr;

        zero(attr);
        attr.map_fd = fd;
        attr.key = PTR_TO_UINT64(key);
        attr.value = PTR_TO_UINT64(value);

        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_program_pin(int prog_fd, const char *bpffs_path) {
        union bpf_attr attr;

        zero(attr);
        attr.pathname = PTR_TO_UINT64((void *) bpffs_path);
        attr.bpf_fd = prog_fd;

        if (bpf(BPF_OBJ_PIN, &attr, sizeof(attr)) < 0)
                return -errno;

        return 0;
}

int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
        struct bpf_prog_info info = {};
        int r;

        assert(ret_id);

        r = bpf_program_get_info_by_fd(prog_fd, &info, sizeof(info));
        if (r < 0)
                return r;

        *ret_id = info.id;

        return 0;
};

int bpf_program_serialize_attachment(
                FILE *f,
                FDSet *fds,
                const char *key,
                BPFProgram *p) {

        _cleanup_free_ char *escaped = NULL;
        int copy, r;

        if (!p || !p->attached_path)
                return 0;

        assert(p->kernel_fd >= 0);

        escaped = cescape(p->attached_path);
        if (!escaped)
                return -ENOMEM;

        copy = fdset_put_dup(fds, p->kernel_fd);
        if (copy < 0)
                return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m");

        r = serialize_item_format(
                        f,
                        key,
                        "%i %s %s",
                        copy,
                        bpf_cgroup_attach_type_to_string(p->attached_type),
                        escaped);
        if (r < 0)
                return r;

        /* After serialization, let's forget the fact that this program is attached. The attachment — if you
         * so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because
         * of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to
         * explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't
         * want the program to be detached while freeing things, so that the attachment can be retained after
         * deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL,
         * hence we set it to NULL here. */

        p->attached_path = mfree(p->attached_path);
        return 0;
}

int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set) {
        BPFProgram *p;
        int r;

        SET_FOREACH(p, set) {
                r = bpf_program_serialize_attachment(f, fds, key, p);
                if (r < 0)
                        return r;
        }

        return 0;
}

int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) {
        _cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL;
        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
        _cleanup_close_ int fd = -1;
        int ifd, at, r;

        assert(v);
        assert(bpfp);

        /* Extract first word: the fd number */
        r = extract_first_word(&v, &sfd, NULL, 0);
        if (r < 0)
                return r;
        if (r == 0)
                return -EINVAL;

        r = safe_atoi(sfd, &ifd);
        if (r < 0)
                return r;
        if (ifd < 0)
                return -EBADF;

        /* Extract second word: the attach type */
        r = extract_first_word(&v, &sat, NULL, 0);
        if (r < 0)
                return r;
        if (r == 0)
                return -EINVAL;

        at = bpf_cgroup_attach_type_from_string(sat);
        if (at < 0)
                return at;

        /* The rest is the path */
        r = cunescape(v, 0, &unescaped);
        if (r < 0)
                return r;

        fd = fdset_remove(fds, ifd);
        if (fd < 0)
                return fd;

        p = new(BPFProgram, 1);
        if (!p)
                return -ENOMEM;

        *p = (BPFProgram) {
                .n_ref = 1,
                .kernel_fd = TAKE_FD(fd),
                .prog_type = BPF_PROG_TYPE_UNSPEC,
                .attached_path = TAKE_PTR(unescaped),
                .attached_type = at,
        };

        if (*bpfp)
                bpf_program_unref(*bpfp);

        *bpfp = TAKE_PTR(p);
        return 0;
}

int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp) {
        BPFProgram *p = NULL;
        int r;

        assert(v);
        assert(bpfsetp);

        r = bpf_program_deserialize_attachment(v, fds, &p);
        if (r < 0)
                return r;

        r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p);
        if (r < 0)
                return r;

        return 0;
}
Commit	Line	Data
a032b68d	1	/* SPDX-License-Identifier: LGPL-2.1-or-later */
f5e65279 MB	2
	3	#include <fcntl.h>
	4	#include <sys/stat.h>
	5	#include <sys/types.h>
	6	#include <unistd.h>
	7
	8	#include "alloc-util.h"
	9	#include "bpf-program.h"
8b3d4ff0	10	#include "escape.h"
f5e65279	11	#include "fd-util.h"
bb4f798a	12	#include "memory-util.h"
e1f67bc7	13	#include "missing_syscall.h"
98393f85	14	#include "path-util.h"
8b3d4ff0 MB	15	#include "serialize.h"
	16	#include "string-table.h"
	17
	18	static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
	19	[BPF_CGROUP_INET_INGRESS] = "ingress",
	20	[BPF_CGROUP_INET_EGRESS] = "egress",
	21	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
	22	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
	23	[BPF_CGROUP_DEVICE] = "device",
	24	[BPF_CGROUP_INET4_BIND] = "bind4",
	25	[BPF_CGROUP_INET6_BIND] = "bind6",
	26	[BPF_CGROUP_INET4_CONNECT] = "connect4",
	27	[BPF_CGROUP_INET6_CONNECT] = "connect6",
	28	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
	29	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
	30	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
	31	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
	32	[BPF_CGROUP_SYSCTL] = "sysctl",
	33	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
	34	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
	35	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
	36	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
	37	};
	38
	39	DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
	40
	41	DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref);
	42
	43	/* struct bpf_prog_info info must be initialized since its value is both input and output
	44	* for BPF_OBJ_GET_INFO_BY_FD syscall. */
	45	static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
	46	union bpf_attr attr;
	47
	48	/* Explicitly memset to zero since some compilers may produce non-zero-initialized padding when
	49	* structured initialization is used.
	50	* Refer to https://github.com/systemd/systemd/issues/18164
	51	*/
	52	zero(attr);
	53	attr.info.bpf_fd = prog_fd;
	54	attr.info.info_len = info_len;
	55	attr.info.info = PTR_TO_UINT64(info);
	56
	57	if (bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)) < 0)
	58	return -errno;
	59
	60	return 0;
	61	}
f5e65279 MB	62
	63	int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
	64	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	65
8b3d4ff0 MB	66	p = new(BPFProgram, 1);
	67	if (!p)
	68	return -ENOMEM;
	69
	70	*p = (BPFProgram) {
	71	.n_ref = 1,
	72	.prog_type = prog_type,
	73	.kernel_fd = -1,
	74	};
	75
	76	*ret = TAKE_PTR(p);
	77
	78	return 0;
	79	}
	80
	81	int bpf_program_new_from_bpffs_path(const char path, BPFProgram *ret) {
	82	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
	83	struct bpf_prog_info info = {};
	84	int r;
	85
	86	assert(path);
	87	assert(ret);
	88
	89	p = new(BPFProgram, 1);
f5e65279	90	if (!p)
9e294e28	91	return -ENOMEM;
f5e65279	92
8b3d4ff0 MB	93	*p = (BPFProgram) {
	94	.prog_type = BPF_PROG_TYPE_UNSPEC,
	95	.n_ref = 1,
	96	.kernel_fd = -1,
	97	};
	98
	99	r = bpf_program_load_from_bpf_fs(p, path);
	100	if (r < 0)
	101	return r;
	102
	103	r = bpf_program_get_info_by_fd(p->kernel_fd, &info, sizeof(info));
	104	if (r < 0)
	105	return r;
f5e65279	106
8b3d4ff0	107	p->prog_type = info.type;
b012e921 MB	108	*ret = TAKE_PTR(p);
b012e921 MB	109
f5e65279 MB	110	return 0;
	111	}
	112
6e866b33 MB	113	static BPFProgram bpf_program_free(BPFProgram p) {
6e866b33 MB	114	assert(p);
98393f85 MB	115
	116	/* Unfortunately, the kernel currently doesn't implicitly detach BPF programs from their cgroups when the last
	117	* fd to the BPF program is closed. This has nasty side-effects since this means that abnormally terminated
	118	* programs that attached one of their BPF programs to a cgroup will leave this programs pinned for good with
	119	* zero chance of recovery, until the cgroup is removed. This is particularly problematic if the cgroup in
	120	* question is the root cgroup (or any other cgroup belonging to a service that cannot be restarted during
	121	* operation, such as dbus), as the memory for the BPF program can only be reclaimed through a reboot. To
	122	* counter this, we track closely to which cgroup a program was attached to and will detach it on our own
	123	* whenever we close the BPF fd. */
	124	(void) bpf_program_cgroup_detach(p);
	125
f5e65279 MB	126	safe_close(p->kernel_fd);
f5e65279 MB	127	free(p->instructions);
98393f85	128	free(p->attached_path);
f5e65279 MB	129
	130	return mfree(p);
	131	}
	132
6e866b33 MB	133	DEFINE_TRIVIAL_REF_UNREF_FUNC(BPFProgram, bpf_program, bpf_program_free);
6e866b33 MB	134
f5e65279 MB	135	int bpf_program_add_instructions(BPFProgram p, const struct bpf_insn instructions, size_t count) {
	136
	137	assert(p);
	138
98393f85 MB	139	if (p->kernel_fd >= 0) /* don't allow modification after we uploaded things to the kernel */
	140	return -EBUSY;
	141
8b3d4ff0	142	if (!GREEDY_REALLOC(p->instructions, p->n_instructions + count))
f5e65279 MB	143	return -ENOMEM;
	144
	145	memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
	146	p->n_instructions += count;
	147
	148	return 0;
	149	}
	150
	151	int bpf_program_load_kernel(BPFProgram p, char log_buf, size_t log_size) {
	152	union bpf_attr attr;
	153
	154	assert(p);
	155
98393f85 MB	156	if (p->kernel_fd >= 0) { /* make this idempotent */
	157	memzero(log_buf, log_size);
	158	return 0;
	159	}
f5e65279	160
1ce460ce MB	161	// FIXME: Clang doesn't 0-pad with structured initialization, causing
	162	// the kernel to reject the bpf_attr as invalid. See:
	163	// https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
	164	// Ideally it should behave like GCC, so that we can remove these workarounds.
	165	zero(attr);
	166	attr.prog_type = p->prog_type;
	167	attr.insns = PTR_TO_UINT64(p->instructions);
	168	attr.insn_cnt = p->n_instructions;
	169	attr.license = PTR_TO_UINT64("GPL");
	170	attr.log_buf = PTR_TO_UINT64(log_buf);
	171	attr.log_level = !!log_buf;
	172	attr.log_size = log_size;
f5e65279 MB	173
	174	p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
	175	if (p->kernel_fd < 0)
	176	return -errno;
	177
	178	return 0;
	179	}
	180
f2dec872 BR	181	int bpf_program_load_from_bpf_fs(BPFProgram p, const char path) {
	182	union bpf_attr attr;
	183
	184	assert(p);
	185
	186	if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */
	187	return -EBUSY;
	188
1ce460ce MB	189	zero(attr);
1ce460ce MB	190	attr.pathname = PTR_TO_UINT64(path);
f2dec872 BR	191
	192	p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr));
	193	if (p->kernel_fd < 0)
	194	return -errno;
	195
	196	return 0;
	197	}
	198
f5e65279	199	int bpf_program_cgroup_attach(BPFProgram p, int type, const char path, uint32_t flags) {
98393f85	200	_cleanup_free_ char *copy = NULL;
f5e65279 MB	201	_cleanup_close_ int fd = -1;
f5e65279 MB	202	union bpf_attr attr;
98393f85	203	int r;
f5e65279 MB	204
	205	assert(p);
	206	assert(type >= 0);
	207	assert(path);
	208
98393f85 MB	209	if (!IN_SET(flags, 0, BPF_F_ALLOW_OVERRIDE, BPF_F_ALLOW_MULTI))
	210	return -EINVAL;
	211
	212	/* We need to track which cgroup the program is attached to, and we can only track one attachment, hence let's
	213	* refuse this early. */
	214	if (p->attached_path) {
	215	if (!path_equal(p->attached_path, path))
	216	return -EBUSY;
	217	if (p->attached_type != type)
	218	return -EBUSY;
	219	if (p->attached_flags != flags)
	220	return -EBUSY;
	221
	222	/* Here's a shortcut: if we previously attached this program already, then we don't have to do so
	223	* again. Well, with one exception: if we are in BPF_F_ALLOW_OVERRIDE mode then someone else might have
	224	* replaced our program since the last time, hence let's reattach it again, just to be safe. In flags
	225	* == 0 mode this is not an issue since nobody else can replace our program in that case, and in flags
	226	* == BPF_F_ALLOW_MULTI mode any other's program would be installed in addition to ours hence ours
	227	* would remain in effect. */
	228	if (flags != BPF_F_ALLOW_OVERRIDE)
	229	return 0;
	230	}
	231
	232	/* Ensure we have a kernel object for this. */
	233	r = bpf_program_load_kernel(p, NULL, 0);
	234	if (r < 0)
	235	return r;
	236
	237	copy = strdup(path);
	238	if (!copy)
	239	return -ENOMEM;
	240
f5e65279 MB	241	fd = open(path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	242	if (fd < 0)
	243	return -errno;
	244
1ce460ce MB	245	zero(attr);
	246	attr.attach_type = type;
	247	attr.target_fd = fd;
	248	attr.attach_bpf_fd = p->kernel_fd;
	249	attr.attach_flags = flags;
f5e65279 MB	250
	251	if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
	252	return -errno;
	253
98393f85 MB	254	free_and_replace(p->attached_path, copy);
	255	p->attached_type = type;
	256	p->attached_flags = flags;
	257
f5e65279 MB	258	return 0;
	259	}
	260
98393f85	261	int bpf_program_cgroup_detach(BPFProgram *p) {
f5e65279	262	_cleanup_close_ int fd = -1;
f5e65279	263
98393f85	264	assert(p);
f5e65279	265
98393f85 MB	266	if (!p->attached_path)
98393f85 MB	267	return -EUNATCH;
f5e65279	268
98393f85 MB	269	fd = open(p->attached_path, O_DIRECTORY\|O_RDONLY\|O_CLOEXEC);
	270	if (fd < 0) {
	271	if (errno != ENOENT)
	272	return -errno;
f5e65279	273
98393f85 MB	274	/* If the cgroup does not exist anymore, then we don't have to explicitly detach, it got detached
	275	* implicitly by the removal, hence don't complain */
	276
	277	} else {
	278	union bpf_attr attr;
	279
1ce460ce MB	280	zero(attr);
	281	attr.attach_type = p->attached_type;
	282	attr.target_fd = fd;
	283	attr.attach_bpf_fd = p->kernel_fd;
98393f85 MB	284
	285	if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
	286	return -errno;
	287	}
	288
	289	p->attached_path = mfree(p->attached_path);
f5e65279 MB	290
	291	return 0;
	292	}
	293
	294	int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
1ce460ce	295	union bpf_attr attr;
f5e65279 MB	296	int fd;
f5e65279 MB	297
1ce460ce MB	298	zero(attr);
	299	attr.map_type = type;
	300	attr.key_size = key_size;
	301	attr.value_size = value_size;
	302	attr.max_entries = max_entries;
	303	attr.map_flags = flags;
	304
f5e65279 MB	305	fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
	306	if (fd < 0)
	307	return -errno;
	308
	309	return fd;
	310	}
	311
	312	int bpf_map_update_element(int fd, const void key, void value) {
1ce460ce	313	union bpf_attr attr;
f5e65279	314
1ce460ce MB	315	zero(attr);
	316	attr.map_fd = fd;
	317	attr.key = PTR_TO_UINT64(key);
	318	attr.value = PTR_TO_UINT64(value);
f5e65279 MB	319
	320	if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
	321	return -errno;
	322
	323	return 0;
	324	}
	325
	326	int bpf_map_lookup_element(int fd, const void key, void value) {
1ce460ce	327	union bpf_attr attr;
f5e65279	328
1ce460ce MB	329	zero(attr);
	330	attr.map_fd = fd;
	331	attr.key = PTR_TO_UINT64(key);
	332	attr.value = PTR_TO_UINT64(value);
f5e65279 MB	333
	334	if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
	335	return -errno;
	336
	337	return 0;
	338	}
8b3d4ff0 MB	339
	340	int bpf_program_pin(int prog_fd, const char *bpffs_path) {
	341	union bpf_attr attr;
	342
	343	zero(attr);
	344	attr.pathname = PTR_TO_UINT64((void *) bpffs_path);
	345	attr.bpf_fd = prog_fd;
	346
	347	if (bpf(BPF_OBJ_PIN, &attr, sizeof(attr)) < 0)
	348	return -errno;
	349
	350	return 0;
	351	}
	352
	353	int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
	354	struct bpf_prog_info info = {};
	355	int r;
	356
	357	assert(ret_id);
	358
	359	r = bpf_program_get_info_by_fd(prog_fd, &info, sizeof(info));
	360	if (r < 0)
	361	return r;
	362
	363	*ret_id = info.id;
	364
	365	return 0;
	366	};
	367
	368	int bpf_program_serialize_attachment(
	369	FILE *f,
	370	FDSet *fds,
	371	const char *key,
	372	BPFProgram *p) {
	373
	374	_cleanup_free_ char *escaped = NULL;
	375	int copy, r;
	376
	377	if (!p \|\| !p->attached_path)
	378	return 0;
	379
	380	assert(p->kernel_fd >= 0);
	381
	382	escaped = cescape(p->attached_path);
	383	if (!escaped)
	384	return -ENOMEM;
	385
	386	copy = fdset_put_dup(fds, p->kernel_fd);
	387	if (copy < 0)
	388	return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m");
	389
	390	r = serialize_item_format(
	391	f,
	392	key,
	393	"%i %s %s",
	394	copy,
	395	bpf_cgroup_attach_type_to_string(p->attached_type),
	396	escaped);
	397	if (r < 0)
	398	return r;
	399
	400	/* After serialization, let's forget the fact that this program is attached. The attachment — if you
	401	* so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because
	402	* of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to
403	* explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't
404	* want the program to be detached while freeing things, so that the attachment can be retained after
405	* deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL,
406	* hence we set it to NULL here. */
407
408	p->attached_path = mfree(p->attached_path);
409	return 0;
410	}
411
412	int bpf_program_serialize_attachment_set(FILE f, FDSet fds, const char key, Set set) {
413	BPFProgram *p;
414	int r;
415
416	SET_FOREACH(p, set) {
417	r = bpf_program_serialize_attachment(f, fds, key, p);
418	if (r < 0)
419	return r;
420	}
421
422	return 0;
423	}
424
425	int bpf_program_deserialize_attachment(const char v, FDSet fds, BPFProgram **bpfp) {
426	_cleanup_free_ char sfd = NULL, sat = NULL, *unescaped = NULL;
427	_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
428	_cleanup_close_ int fd = -1;
429	int ifd, at, r;
430
431	assert(v);
432	assert(bpfp);
433
434	/* Extract first word: the fd number */
435	r = extract_first_word(&v, &sfd, NULL, 0);
436	if (r < 0)
437	return r;
438	if (r == 0)
439	return -EINVAL;
440
441	r = safe_atoi(sfd, &ifd);
442	if (r < 0)
443	return r;
444	if (ifd < 0)
445	return -EBADF;
446
447	/* Extract second word: the attach type */
448	r = extract_first_word(&v, &sat, NULL, 0);
449	if (r < 0)
450	return r;
451	if (r == 0)
452	return -EINVAL;
453
454	at = bpf_cgroup_attach_type_from_string(sat);
455	if (at < 0)
456	return at;
457
458	/* The rest is the path */
459	r = cunescape(v, 0, &unescaped);
460	if (r < 0)
461	return r;
462
463	fd = fdset_remove(fds, ifd);
464	if (fd < 0)
465	return fd;
466
467	p = new(BPFProgram, 1);
468	if (!p)
469	return -ENOMEM;
470
471	*p = (BPFProgram) {
472	.n_ref = 1,
473	.kernel_fd = TAKE_FD(fd),
474	.prog_type = BPF_PROG_TYPE_UNSPEC,
475	.attached_path = TAKE_PTR(unescaped),
476	.attached_type = at,
477	};
478
479	if (*bpfp)
480	bpf_program_unref(*bpfp);
481
482	*bpfp = TAKE_PTR(p);
483	return 0;
484	}
485
486	int bpf_program_deserialize_attachment_set(const char v, FDSet fds, Set **bpfsetp) {
487	BPFProgram *p = NULL;
488	int r;
489
490	assert(v);
491	assert(bpfsetp);
492
493	r = bpf_program_deserialize_attachment(v, fds, &p);
494	if (r < 0)
495	return r;
496
497	r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p);
498	if (r < 0)
499	return r;
500
501	return 0;
502	}