1 /* SPDX-License-Identifier: LGPL-2.1+ */
8 #include <linux/magic.h>
10 #include <netinet/in.h>
13 #include <sys/param.h>
14 #include <sys/types.h>
17 #include "attach_options.h"
22 #include "lxcseccomp.h"
23 #include "memory_utils.h"
24 #include "namespace.h"
28 #include "storage/storage.h"
29 #include "string_utils.h"
30 #include "syscall_wrappers.h"
33 #if HAVE_SYS_RESOURCE_H
34 #include <sys/resource.h>
37 #if HAVE_SCMP_FILTER_CTX
38 typedef void * scmp_filter_ctx
;
41 typedef signed long personality_t
;
43 /* worth moving to configure.ac? */
44 #define subuidfile "/etc/subuid"
45 #define subgidfile "/etc/subgid"
48 * Defines a generic struct to configure the control group. It is up to the
49 * programmer to specify the right subsystem.
50 * @subsystem : the targeted subsystem
51 * @value : the value to set
52 * @version : The version of the cgroup filesystem on which the controller
55 * @controllers : The controllers to use for this container.
56 * @dir : The name of the directory containing the container's cgroup.
57 * Not that this is a per-container setting.
61 /* information about a specific controller */
62 struct /* controller */ {
68 /* meta information about cgroup configuration */
73 char *monitor_pivot_dir
;
80 struct list_head head
;
83 static void free_lxc_cgroup(struct lxc_cgroup
*ptr
)
91 define_cleanup_function(struct lxc_cgroup
*, free_lxc_cgroup
);
93 #if !HAVE_SYS_RESOURCE_H
94 #define RLIM_INFINITY ((unsigned long)-1)
96 unsigned long rlim_cur
;
97 unsigned long rlim_max
;
98 struct list_head head
;
103 * Defines a structure to configure resource limits to set via setrlimit().
104 * @resource : the resource name in lowercase without the RLIMIT_ prefix
105 * @limit : the limit to set
110 struct list_head head
;
113 static void free_lxc_limit(struct lxc_limit
*ptr
)
116 free_disarm(ptr
->resource
);
120 define_cleanup_function(struct lxc_limit
*, free_lxc_limit
);
128 * Defines a structure to configure kernel parameters at runtime.
129 * @key : the kernel parameters will be configured without the "lxc.sysctl" prefix
130 * @value : the value to set
135 struct list_head head
;
138 static void free_lxc_sysctl(struct lxc_sysctl
*ptr
)
146 define_cleanup_function(struct lxc_sysctl
*, free_lxc_sysctl
);
149 * Defines a structure to configure proc filesystem at runtime.
150 * @filename : the proc filesystem will be configured without the "lxc.proc" prefix
151 * @value : the value to set
156 struct list_head head
;
159 static void free_lxc_proc(struct lxc_proc
*ptr
)
167 define_cleanup_function(struct lxc_proc
*, free_lxc_proc
);
170 * id_map is an id map entry. Form in confile is:
171 * lxc.idmap = u 0 9800 100
172 * lxc.idmap = u 1000 9900 100
173 * lxc.idmap = g 0 9800 100
174 * lxc.idmap = g 1000 9900 100
175 * meaning the container can use uids and gids 0-99 and 1000-1099,
176 * with [ug]id 0 mapping to [ug]id 9800 on the host, and [ug]id 1000 to
177 * [ug]id 9900 on the host.
181 unsigned long hostid
, nsid
, range
;
182 struct list_head head
;
185 /* Defines the number of tty configured and contains the
187 * @max = number of configured ttys
189 struct lxc_tty_info
{
193 struct lxc_terminal_info
*tty
;
196 typedef enum lxc_mount_options_t
{
197 LXC_MOUNT_CREATE_DIR
= 0,
198 LXC_MOUNT_CREATE_FILE
= 1,
199 LXC_MOUNT_OPTIONAL
= 2,
200 LXC_MOUNT_RELATIVE
= 3,
203 } lxc_mount_options_t
;
205 __hidden
extern const char *lxc_mount_options_info
[LXC_MOUNT_MAX
];
207 struct lxc_mount_options
{
208 unsigned int create_dir
: 1;
209 unsigned int create_file
: 1;
210 unsigned int optional
: 1;
211 unsigned int relative
: 1;
212 unsigned int bind_recursively
: 1;
213 unsigned int propagate_recursively
: 1;
214 unsigned int bind
: 1;
215 char userns_path
[PATH_MAX
];
216 unsigned long mnt_flags
;
217 unsigned long prop_flags
;
219 struct lxc_mount_attr attr
;
223 /* Defines a structure to store the rootfs location, the
224 * optionals pivot_root, rootfs mount paths
225 * @path : the rootfs source (directory or device)
226 * @mount : where it is mounted
227 * @buf : static buffer to construct paths
228 * @bev_type : optional backing store type
229 * @managed : whether it is managed by LXC
230 * @dfd_mnt : fd for @mount
231 * @dfd_dev : fd for /dev of the container
248 struct lxc_mount_options mnt_opts
;
249 struct lxc_storage
*storage
;
253 * Automatic mounts for LXC to perform inside the container
256 LXC_AUTO_PROC_RW
= 0x001, /* /proc read-write */
257 LXC_AUTO_PROC_MIXED
= 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
258 LXC_AUTO_PROC_MASK
= 0x003,
260 LXC_AUTO_SYS_RW
= 0x004, /* /sys */
261 LXC_AUTO_SYS_RO
= 0x008, /* /sys read-only */
262 LXC_AUTO_SYS_MIXED
= 0x00C, /* /sys read-only and /sys/class/net read-write */
263 LXC_AUTO_SYS_MASK
= 0x00C,
265 LXC_AUTO_CGROUP_RO
= 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
266 LXC_AUTO_CGROUP_RW
= 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
267 LXC_AUTO_CGROUP_MIXED
= 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
268 LXC_AUTO_CGROUP_FULL_RO
= 0x040, /* /sys/fs/cgroup (full mount, read-only) */
269 LXC_AUTO_CGROUP_FULL_RW
= 0x050, /* /sys/fs/cgroup (full mount, read-write) */
270 LXC_AUTO_CGROUP_FULL_MIXED
= 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
272 * These are defined in such a way as to retain binary compatibility
273 * with earlier versions of this code. If the previous mask is applied,
274 * both of these will default back to the _MIXED variants, which is
277 LXC_AUTO_CGROUP_NOSPEC
= 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
278 LXC_AUTO_CGROUP_FULL_NOSPEC
= 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
279 LXC_AUTO_CGROUP_FORCE
= 0x100, /* mount cgroups even when cgroup namespaces are supported */
280 LXC_AUTO_CGROUP_MASK
= 0x1F0, /* all known cgroup options */
282 LXC_AUTO_SHMOUNTS
= 0x200, /* shared mount point */
283 LXC_AUTO_SHMOUNTS_MASK
= 0x200, /* shared mount point mask */
284 LXC_AUTO_ALL_MASK
= 0x1FF, /* all known settings */
301 __hidden
extern char *lxchook_names
[NUM_LXC_HOOKS
];
303 struct lxc_state_client
{
305 lxc_state_t states
[MAX_STATE
];
306 struct list_head head
;
309 typedef enum lxc_bpf_devices_rule_t
{
310 LXC_BPF_DEVICE_CGROUP_ALLOWLIST
= 0,
311 LXC_BPF_DEVICE_CGROUP_DENYLIST
= 1,
312 } lxc_bpf_devices_rule_t
;
320 struct list_head head
;
324 lxc_bpf_devices_rule_t list_type
;
325 struct list_head devices
;
328 struct timens_offsets
{
329 /* Currently, either s_boot or ns_boot is set, but not both. */
333 /* Currently, either s_monotonic or ns_monotonic is set, but not both. */
335 int64_t ns_monotonic
;
338 struct environment_entry
{
341 struct list_head head
;
347 struct list_head head
;
352 struct list_head list
;
355 struct string_entry
{
357 struct list_head head
;
361 /* Pointer to the name of the container. Do not free! */
365 personality_t personality
;
366 struct utsname
*utsname
;
369 struct list_head cgroup
;
370 struct list_head cgroup2
;
371 struct bpf_devices bpf_devices
;
375 struct list_head id_map
;
378 * Pointer to the idmap entry for the container's root uid in
379 * the id_map list. Do not free!
381 const struct id_map
*root_nsuid_map
;
384 * Pointer to the idmap entry for the container's root gid in
385 * the id_map list. Do not free!
387 const struct id_map
*root_nsgid_map
;
390 struct list_head netdevs
;
395 struct list_head mount_entries
;
400 /* /dev/tty<idx> devices */
401 struct lxc_tty_info ttys
;
402 /* /dev/console device */
403 struct lxc_terminal console
;
404 /* maximum pty devices allowed by devpts mount */
406 /* file descriptor for the container's /dev/pts mount */
409 /* set to true when rootfs has been setup */
411 struct lxc_rootfs rootfs
;
416 unsigned int hooks_version
;
417 struct list_head hooks
[NUM_LXC_HOOKS
];
420 char *lsm_aa_profile
;
421 char *lsm_aa_profile_computed
;
422 bool lsm_aa_profile_created
;
423 unsigned int lsm_aa_allow_nesting
;
424 unsigned int lsm_aa_allow_incomplete
;
425 struct list_head lsm_aa_raw
;
426 char *lsm_se_context
;
427 char *lsm_se_keyring_context
;
428 bool keyring_disable_session
;
429 bool transient_procfs_mnt
;
430 struct lxc_seccomp seccomp
;
432 unsigned int autodev
; /* if 1, mount and fill a /dev at start */
433 int autodevtmpfssize
; /* size of the /dev tmpfs */
434 int haltsignal
; /* signal used to halt container */
435 int rebootsignal
; /* signal used to reboot container */
436 int stopsignal
; /* signal used to hard stop container */
437 char *rcfile
; /* Copy of the top level rcfile we read */
439 /* Logfile and loglevel can be set in a container config file. Those
440 * function as defaults. The defaults can be overridden by command line.
441 * However we don't want the command line specified values to be saved
442 * on c->save_config(). So we store the config file specified values
444 char *logfile
; /* the logfile as specified in config */
445 int loglevel
; /* loglevel as specified in config (if any) */
448 unsigned int start_auto
;
449 unsigned int start_delay
;
451 struct list_head groups
;
454 /* unshare the mount namespace in the monitor */
455 unsigned int monitor_unshare
;
456 unsigned int monitor_signal_pdeath
;
458 /* list of environment variables we'll add to the container when
460 struct list_head environment
;
462 /* text representation of the config file */
463 char *unexpanded_config
;
464 size_t unexpanded_len
;
465 size_t unexpanded_alloced
;
467 /* default command for lxc-execute */
473 /* The uid to use for the container. */
475 /* The gid to use for the container. */
477 /* The groups to use for the container. */
478 lxc_groups_t init_groups
;
480 /* indicator if the container will be destroyed on shutdown */
481 unsigned int ephemeral
;
483 /* The facility to pass to syslog. Let's users establish as what type of
484 * program liblxc is supposed to write to the syslog. */
487 /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */
490 /* RLIMIT_* limits */
491 struct list_head limits
;
493 /* Contains generic info about the cgroup configuration for this
494 * container. Note that struct lxc_cgroup contains a union. It is only
495 * valid to access the members of the anonymous "meta" struct within
498 struct lxc_cgroup cgroup_meta
;
503 char *ns_share
[LXC_NS_MAX
];
506 /* init working directory */
509 /* A list of clients registered to be informed about a container state. */
510 struct list_head state_clients
;
513 struct list_head sysctls
;
516 struct list_head procs
;
519 /* Absolute path to the shared mount point on the host */
521 /* Absolute path (in the container) to the shared mount point */
525 struct timens_offsets timens
;
528 __u64 sched_core_cookie
;
531 __hidden
extern int write_id_mapping(enum idtype idtype
, pid_t pid
, const char *buf
, size_t buf_size
)
534 extern thread_local
struct lxc_conf
*current_config
;
536 __hidden
extern int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
, char *argv
[]);
537 __hidden
extern struct lxc_conf
*lxc_conf_init(void);
538 __hidden
extern void lxc_conf_free(struct lxc_conf
*conf
);
539 __hidden
extern int lxc_storage_prepare(struct lxc_conf
*conf
);
540 __hidden
extern int lxc_rootfs_prepare(struct lxc_conf
*conf
, bool userns
);
541 __hidden
extern void lxc_storage_put(struct lxc_conf
*conf
);
542 __hidden
extern int lxc_rootfs_init(struct lxc_conf
*conf
, bool userns
);
543 __hidden
extern int lxc_rootfs_prepare_parent(struct lxc_handler
*handler
);
544 __hidden
extern int lxc_idmapped_mounts_parent(struct lxc_handler
*handler
);
545 __hidden
extern int lxc_map_ids(struct list_head
*idmap
, pid_t pid
);
546 __hidden
extern int lxc_create_tty(const char *name
, struct lxc_conf
*conf
);
547 __hidden
extern void lxc_delete_tty(struct lxc_tty_info
*ttys
);
548 __hidden
extern int lxc_clear_config_caps(struct lxc_conf
*c
);
549 __hidden
extern int lxc_clear_cgroups(struct lxc_conf
*c
, const char *key
, int version
);
550 __hidden
extern int lxc_clear_mount_entries(struct lxc_conf
*c
);
551 __hidden
extern int lxc_clear_automounts(struct lxc_conf
*c
);
552 __hidden
extern int lxc_clear_hooks(struct lxc_conf
*c
, const char *key
);
553 __hidden
extern int lxc_clear_idmaps(struct lxc_conf
*c
);
554 __hidden
extern int lxc_clear_groups(struct lxc_conf
*c
);
555 __hidden
extern int lxc_clear_environment(struct lxc_conf
*c
);
556 __hidden
extern int lxc_clear_limits(struct lxc_conf
*c
, const char *key
);
557 __hidden
extern int lxc_delete_autodev(struct lxc_handler
*handler
);
558 __hidden
extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf
*c
);
559 __hidden
extern int lxc_setup_rootfs_prepare_root(struct lxc_conf
*conf
, const char *name
,
560 const char *lxcpath
);
561 __hidden
extern int lxc_setup(struct lxc_handler
*handler
);
562 __hidden
extern int lxc_setup_parent(struct lxc_handler
*handler
);
563 __hidden
extern int setup_resource_limits(struct lxc_conf
*conf
, pid_t pid
);
564 __hidden
extern int find_unmapped_nsid(const struct lxc_conf
*conf
, enum idtype idtype
);
565 __hidden
extern int mapped_hostid(unsigned id
, const struct lxc_conf
*conf
, enum idtype idtype
);
566 __hidden
extern int userns_exec_1(const struct lxc_conf
*conf
, int (*fn
)(void *), void *data
,
567 const char *fn_name
);
568 __hidden
extern int userns_exec_full(struct lxc_conf
*conf
, int (*fn
)(void *), void *data
,
569 const char *fn_name
);
570 __hidden
extern int parse_mntopts_legacy(const char *mntopts
, unsigned long *mntflags
, char **mntdata
);
571 __hidden
extern int parse_propagationopts(const char *mntopts
, unsigned long *pflags
);
572 __hidden
extern int parse_lxc_mount_attrs(struct lxc_mount_options
*opts
, char *mnt_opts
);
573 __hidden
extern int parse_mount_attrs(struct lxc_mount_options
*opts
, const char *mntopts
);
574 __hidden
extern void tmp_proc_unmount(struct lxc_conf
*lxc_conf
);
575 __hidden
extern void suggest_default_idmap(void);
576 __hidden
extern FILE *make_anonymous_mount_file(const struct list_head
*mount
,
577 bool include_nesting_helpers
);
578 __hidden
extern int run_script(const char *name
, const char *section
, const char *script
, ...);
579 __hidden
extern int run_script_argv(const char *name
, unsigned int hook_version
, const char *section
,
580 const char *script
, const char *hookname
, char **argsin
);
582 __hidden
extern bool has_cap(int cap
, struct lxc_conf
*conf
);
583 static inline bool lxc_wants_cap(int cap
, struct lxc_conf
*conf
)
585 if (lxc_caps_last_cap() < cap
)
588 return has_cap(cap
, conf
);
591 __hidden
extern int setup_sysctl_parameters(struct lxc_conf
*conf
);
592 __hidden
extern int lxc_clear_sysctls(struct lxc_conf
*c
, const char *key
);
593 __hidden
extern int setup_proc_filesystem(struct lxc_conf
*conf
, pid_t pid
);
594 __hidden
extern int lxc_clear_procs(struct lxc_conf
*c
, const char *key
);
595 __hidden
extern int lxc_clear_apparmor_raw(struct lxc_conf
*c
);
596 __hidden
extern int lxc_clear_namespace(struct lxc_conf
*c
);
597 __hidden
extern int userns_exec_minimal(const struct lxc_conf
*conf
, int (*fn_parent
)(void *),
598 void *fn_parent_data
, int (*fn_child
)(void *),
599 void *fn_child_data
);
600 __hidden
extern int userns_exec_mapped_root(const char *path
, int path_fd
,
601 const struct lxc_conf
*conf
);
602 static inline int chown_mapped_root(const char *path
, const struct lxc_conf
*conf
)
604 return userns_exec_mapped_root(path
, -EBADF
, conf
);
607 __hidden
extern int lxc_sync_fds_parent(struct lxc_handler
*handler
);
608 __hidden
extern int lxc_sync_fds_child(struct lxc_handler
*handler
);
610 static inline const char *get_rootfs_mnt(const struct lxc_rootfs
*rootfs
)
612 static const char *s
= "/";
614 return !is_empty_string(rootfs
->path
) ? rootfs
->mount
: s
;
617 static inline void put_lxc_mount_options(struct lxc_mount_options
*mnt_opts
)
619 mnt_opts
->create_dir
= 0;
620 mnt_opts
->create_file
= 0;
621 mnt_opts
->optional
= 0;
622 mnt_opts
->relative
= 0;
623 mnt_opts
->userns_path
[0] = '\0';
624 mnt_opts
->mnt_flags
= 0;
625 mnt_opts
->prop_flags
= 0;
627 free_disarm(mnt_opts
->data
);
628 free_disarm(mnt_opts
->raw_options
);
631 static inline void put_lxc_rootfs(struct lxc_rootfs
*rootfs
, bool unpin
)
634 close_prot_errno_disarm(rootfs
->dfd_host
);
635 close_prot_errno_disarm(rootfs
->dfd_mnt
);
636 close_prot_errno_disarm(rootfs
->dfd_dev
);
638 close_prot_errno_disarm(rootfs
->fd_path_pin
);
639 close_prot_errno_disarm(rootfs
->dfd_idmapped
);
640 put_lxc_mount_options(&rootfs
->mnt_opts
);
641 storage_put(rootfs
->storage
);
642 rootfs
->storage
= NULL
;
646 static inline void lxc_clear_cgroup2_devices(struct bpf_devices
*bpf_devices
)
648 struct device_item
*device
, *n
;
650 list_for_each_entry_safe(device
, n
, &bpf_devices
->devices
, head
)
651 list_del(&device
->head
);
653 INIT_LIST_HEAD(&bpf_devices
->devices
);
656 static inline int lxc_personality(personality_t persona
)
659 return ret_errno(EINVAL
);
661 return personality(persona
);
664 __hidden
extern int lxc_set_environment(const struct lxc_conf
*conf
);
665 __hidden
extern int parse_cap(const char *cap
);
667 #endif /* __LXC_CONF_H */