#ifndef __LXC_CONF_H
#define __LXC_CONF_H
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
+#include "config.h"
+
#include <linux/magic.h>
#include <net/if.h>
#include <netinet/in.h>
#include "attach_options.h"
#include "caps.h"
#include "compiler.h"
-#include "config.h"
#include "hlist.h"
#include "list.h"
#include "lxcseccomp.h"
#include "memory_utils.h"
+#include "mount_utils.h"
#include "namespace.h"
#include "ringbuf.h"
#include "start.h"
char *container_dir;
char *namespace_dir;
bool relative;
+ /* If an unpriv user in pure unified-only hierarchy
+ * starts a container, then we ask systemd to create
+ * a scope for us, and create the monitor and container
+ * cgroups under that.
+ * This will ignore the above things like monitor_dir
+ */
+ char *systemd_scope;
};
};
+
+ struct list_head head;
};
static void free_lxc_cgroup(struct lxc_cgroup *ptr)
struct rlimit {
unsigned long rlim_cur;
unsigned long rlim_max;
+ struct list_head head;
};
#endif
struct lxc_limit {
char *resource;
struct rlimit limit;
+ struct list_head head;
};
static void free_lxc_limit(struct lxc_limit *ptr)
struct lxc_sysctl {
char *key;
char *value;
+ struct list_head head;
};
static void free_lxc_sysctl(struct lxc_sysctl *ptr)
struct lxc_proc {
char *filename;
char *value;
+ struct list_head head;
};
static void free_lxc_proc(struct lxc_proc *ptr)
struct id_map {
enum idtype idtype;
unsigned long hostid, nsid, range;
+ struct list_head head;
};
/* Defines the number of tty configured and contains the
unsigned int create_file : 1;
unsigned int optional : 1;
unsigned int relative : 1;
- unsigned int recursive : 1;
+ unsigned int bind_recursively : 1;
+ unsigned int propagate_recursively : 1;
unsigned int bind : 1;
char userns_path[PATH_MAX];
unsigned long mnt_flags;
unsigned long prop_flags;
char *data;
- struct lxc_mount_attr attr;
+ struct mount_attr attr;
+ char *raw_options;
};
/* Defines a structure to store the rootfs location, the
* @path : the rootfs source (directory or device)
* @mount : where it is mounted
* @buf : static buffer to construct paths
- * @bev_type : optional backing store type
- * @options : mount options
+ * @bdev_type : optional backing store type
* @managed : whether it is managed by LXC
* @dfd_mnt : fd for @mount
* @dfd_dev : fd for /dev of the container
char buf[PATH_MAX];
char *bdev_type;
- char *options;
- unsigned long mountflags;
bool managed;
struct lxc_mount_options mnt_opts;
struct lxc_storage *storage;
* Automatic mounts for LXC to perform inside the container
*/
enum {
- LXC_AUTO_PROC_RW = 0x001, /* /proc read-write */
- LXC_AUTO_PROC_MIXED = 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
- LXC_AUTO_PROC_MASK = 0x003,
-
- LXC_AUTO_SYS_RW = 0x004, /* /sys */
- LXC_AUTO_SYS_RO = 0x008, /* /sys read-only */
- LXC_AUTO_SYS_MIXED = 0x00C, /* /sys read-only and /sys/class/net read-write */
- LXC_AUTO_SYS_MASK = 0x00C,
-
- LXC_AUTO_CGROUP_RO = 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
- LXC_AUTO_CGROUP_RW = 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
- LXC_AUTO_CGROUP_MIXED = 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
- LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */
- LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */
- LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
+ /* /proc read-write */
+ LXC_AUTO_PROC_RW = BIT(0),
+ /* /proc/sys and /proc/sysrq-trigger read-only */
+ LXC_AUTO_PROC_MIXED = BIT(1),
+ LXC_AUTO_PROC_MASK = LXC_AUTO_PROC_RW |
+ LXC_AUTO_PROC_MIXED,
+ /* /sys read-write */
+ LXC_AUTO_SYS_RW = BIT(2),
+ /* /sys read-only */
+ LXC_AUTO_SYS_RO = BIT(3),
+ /* /sys read-only and /sys/class/net read-write */
+ LXC_AUTO_SYS_MIXED = LXC_AUTO_SYS_RW |
+ LXC_AUTO_SYS_RO,
+ LXC_AUTO_SYS_MASK = LXC_AUTO_SYS_MIXED,
+
+ /* /sys/fs/cgroup (partial mount, read-only) */
+ LXC_AUTO_CGROUP_RO = BIT(4),
+ /* /sys/fs/cgroup (partial mount, read-write) */
+ LXC_AUTO_CGROUP_RW = BIT(5),
+ /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
+ LXC_AUTO_CGROUP_MIXED = LXC_AUTO_CGROUP_RO |
+ LXC_AUTO_CGROUP_RW,
+ /* /sys/fs/cgroup (full mount, read-only) */
+ LXC_AUTO_CGROUP_FULL_RO = BIT(6),
+ /* /sys/fs/cgroup (full mount, read-write) */
+ LXC_AUTO_CGROUP_FULL_RW = BIT(7),
+ /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
+ LXC_AUTO_CGROUP_FULL_MIXED = LXC_AUTO_CGROUP_FULL_RO |
+ LXC_AUTO_CGROUP_FULL_RW,
+
+ /*
+ * Mount a pure read-write cgroup2 layout in the container independent
+ * of the cgroup layout used on the host.
+ */
+ LXC_AUTO_CGROUP2_RW = BIT(8),
+ /*
+ * Mount a pure read-only cgroup2 layout in the container independent
+ * of the cgroup layout used on the host.
+ */
+ LXC_AUTO_CGROUP2_RO = BIT(9),
+
/*
* These are defined in such a way as to retain binary compatibility
* with earlier versions of this code. If the previous mask is applied,
* both of these will default back to the _MIXED variants, which is
* safe.
*/
- LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
- LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
- LXC_AUTO_CGROUP_FORCE = 0x100, /* mount cgroups even when cgroup namespaces are supported */
- LXC_AUTO_CGROUP_MASK = 0x1F0, /* all known cgroup options */
-
- LXC_AUTO_SHMOUNTS = 0x200, /* shared mount point */
- LXC_AUTO_SHMOUNTS_MASK = 0x200, /* shared mount point mask */
- LXC_AUTO_ALL_MASK = 0x1FF, /* all known settings */
+ /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
+ LXC_AUTO_CGROUP_NOSPEC = 0x0B0,
+ /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
+ LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0,
+ /* mount cgroups even when cgroup namespaces are supported */
+ LXC_AUTO_CGROUP_FORCE = BIT(10),
+ /* all known cgroup options */
+ LXC_AUTO_CGROUP_MASK = LXC_AUTO_CGROUP_MIXED |
+ LXC_AUTO_CGROUP_FULL_MIXED |
+ LXC_AUTO_CGROUP_NOSPEC |
+ LXC_AUTO_CGROUP_FULL_NOSPEC |
+ LXC_AUTO_CGROUP_FORCE |
+ LXC_AUTO_CGROUP2_RW |
+ LXC_AUTO_CGROUP2_RO,
+
+ /* shared mount point */
+ LXC_AUTO_SHMOUNTS = BIT(11),
+ /* shared mount point mask */
+ LXC_AUTO_SHMOUNTS_MASK = LXC_AUTO_SHMOUNTS,
+
+ /* all known settings */
+ LXC_AUTO_ALL_MASK = LXC_AUTO_PROC_MASK |
+ LXC_AUTO_SYS_MASK |
+ LXC_AUTO_CGROUP_MASK,
};
enum lxchooks {
struct lxc_state_client {
int clientfd;
lxc_state_t states[MAX_STATE];
+ struct list_head head;
};
typedef enum lxc_bpf_devices_rule_t {
int minor;
char access[4];
int allow;
+ struct list_head head;
};
struct bpf_devices {
lxc_bpf_devices_rule_t list_type;
- struct lxc_list device_item;
+ struct list_head devices;
};
struct timens_offsets {
int64_t ns_monotonic;
};
+struct environment_entry {
+ char *key;
+ char *val;
+ struct list_head head;
+};
+
+struct cap_entry {
+ char *cap_name;
+ __u32 cap;
+ struct list_head head;
+};
+
+struct caps {
+ int keep;
+ struct list_head list;
+};
+
+struct string_entry {
+ char *val;
+ struct list_head head;
+};
+
struct lxc_conf {
/* Pointer to the name of the container. Do not free! */
const char *name;
struct utsname *utsname;
struct {
- struct lxc_list cgroup;
- struct lxc_list cgroup2;
+ struct list_head cgroup;
+ struct list_head cgroup2;
struct bpf_devices bpf_devices;
};
struct {
- struct lxc_list id_map;
+ struct list_head id_map;
/*
* Pointer to the idmap entry for the container's root uid in
struct {
char *fstab;
int auto_mounts;
- struct lxc_list mount_list;
+ struct list_head mount_entries;
};
- struct lxc_list caps;
- struct lxc_list keepcaps;
+ struct caps caps;
/* /dev/tty<idx> devices */
struct lxc_tty_info ttys;
struct {
unsigned int hooks_version;
- struct lxc_list hooks[NUM_LXC_HOOKS];
+ struct list_head hooks[NUM_LXC_HOOKS];
};
char *lsm_aa_profile;
bool lsm_aa_profile_created;
unsigned int lsm_aa_allow_nesting;
unsigned int lsm_aa_allow_incomplete;
- struct lxc_list lsm_aa_raw;
+ struct list_head lsm_aa_raw;
char *lsm_se_context;
char *lsm_se_keyring_context;
bool keyring_disable_session;
unsigned int start_auto;
unsigned int start_delay;
int start_order;
- struct lxc_list groups;
+ struct list_head groups;
int nbd_idx;
/* unshare the mount namespace in the monitor */
unsigned int monitor_unshare;
unsigned int monitor_signal_pdeath;
- /* list of included files */
- struct lxc_list includes;
- /* config entries which are not "lxc.*" are aliens */
- struct lxc_list aliens;
-
/* list of environment variables we'll add to the container when
* started */
- struct lxc_list environment;
+ struct list_head environment;
/* text representation of the config file */
char *unexpanded_config;
bool no_new_privs;
/* RLIMIT_* limits */
- struct lxc_list limits;
+ struct list_head limits;
/* Contains generic info about the cgroup configuration for this
* container. Note that struct lxc_cgroup contains a union. It is only
char *init_cwd;
/* A list of clients registered to be informed about a container state. */
- struct lxc_list state_clients;
+ struct list_head state_clients;
/* sysctls */
- struct lxc_list sysctls;
+ struct list_head sysctls;
/* procs */
- struct lxc_list procs;
+ struct list_head procs;
struct shmount {
/* Absolute path to the shared mount point on the host */
} shmount;
struct timens_offsets timens;
+
+ bool sched_core;
+ __u64 sched_core_cookie;
};
__hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
__hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
__hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
__hidden extern int lxc_idmapped_mounts_parent(struct lxc_handler *handler);
-__hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
+__hidden extern int lxc_map_ids(struct list_head *idmap, pid_t pid);
__hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
__hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
__hidden extern int lxc_clear_config_caps(struct lxc_conf *c);
-__hidden extern int lxc_clear_config_keepcaps(struct lxc_conf *c);
__hidden extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
__hidden extern int lxc_clear_mount_entries(struct lxc_conf *c);
__hidden extern int lxc_clear_automounts(struct lxc_conf *c);
__hidden extern int lxc_clear_limits(struct lxc_conf *c, const char *key);
__hidden extern int lxc_delete_autodev(struct lxc_handler *handler);
__hidden extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf *c);
-__hidden extern void lxc_clear_includes(struct lxc_conf *conf);
__hidden extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name,
const char *lxcpath);
__hidden extern int lxc_setup(struct lxc_handler *handler);
__hidden extern int lxc_setup_parent(struct lxc_handler *handler);
-__hidden extern int setup_resource_limits(struct lxc_list *limits, pid_t pid);
+__hidden extern int setup_resource_limits(struct lxc_conf *conf, pid_t pid);
__hidden extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype);
__hidden extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype);
__hidden extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
__hidden extern int parse_mount_attrs(struct lxc_mount_options *opts, const char *mntopts);
__hidden extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
__hidden extern void suggest_default_idmap(void);
-__hidden extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers);
-__hidden extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings);
+__hidden extern FILE *make_anonymous_mount_file(const struct list_head *mount,
+ bool include_nesting_helpers);
__hidden extern int run_script(const char *name, const char *section, const char *script, ...);
__hidden extern int run_script_argv(const char *name, unsigned int hook_version, const char *section,
const char *script, const char *hookname, char **argsin);
-__hidden extern int in_caplist(int cap, struct lxc_list *caps);
-static inline bool lxc_wants_cap(int cap, struct lxc_conf *conf)
+__hidden extern bool has_cap(__u32 cap, struct lxc_conf *conf);
+static inline bool lxc_wants_cap(__u32 cap, struct lxc_conf *conf)
{
- if (lxc_caps_last_cap() < cap)
+ __u32 last_cap;
+ int ret;
+
+ ret = lxc_caps_last_cap(&last_cap);
+ if (ret)
return false;
- if (!lxc_list_empty(&conf->keepcaps))
- return in_caplist(cap, &conf->keepcaps);
+ if (last_cap < cap)
+ return false;
- return !in_caplist(cap, &conf->caps);
+ return has_cap(cap, conf);
}
-__hidden extern int setup_sysctl_parameters(struct lxc_list *sysctls);
+__hidden extern int setup_sysctl_parameters(struct lxc_conf *conf);
__hidden extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
-__hidden extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
+__hidden extern int setup_proc_filesystem(struct lxc_conf *conf, pid_t pid);
__hidden extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
__hidden extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
__hidden extern int lxc_clear_namespace(struct lxc_conf *c);
mnt_opts->prop_flags = 0;
free_disarm(mnt_opts->data);
+ free_disarm(mnt_opts->raw_options);
}
static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
static inline void lxc_clear_cgroup2_devices(struct bpf_devices *bpf_devices)
{
- struct lxc_list *list = &bpf_devices->device_item;
- struct lxc_list *it, *next;
+ struct device_item *device, *n;
- lxc_list_for_each_safe (it, list, next) {
- lxc_list_del(it);
- free(it);
- }
+ list_for_each_entry_safe(device, n, &bpf_devices->devices, head)
+ list_del(&device->head);
- lxc_list_init(&bpf_devices->device_item);
+ INIT_LIST_HEAD(&bpf_devices->devices);
}
static inline int lxc_personality(personality_t persona)
return personality(persona);
}
+__hidden extern int lxc_set_environment(const struct lxc_conf *conf);
+__hidden extern int parse_cap(const char *cap_name, __u32 *cap);
+
#endif /* __LXC_CONF_H */