1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include <linux/magic.h>
11 #include <netinet/in.h>
14 #include <sys/param.h>
15 #include <sys/types.h>
21 #include "lxcseccomp.h"
22 #include "memory_utils.h"
27 #if HAVE_SYS_RESOURCE_H
28 #include <sys/resource.h>
31 #if HAVE_SCMP_FILTER_CTX
32 typedef void * scmp_filter_ctx
;
35 /* worth moving to configure.ac? */
36 #define subuidfile "/etc/subuid"
37 #define subgidfile "/etc/subgid"
40 * Defines a generic struct to configure the control group. It is up to the
41 * programmer to specify the right subsystem.
42 * @subsystem : the targeted subsystem
43 * @value : the value to set
44 * @version : The version of the cgroup filesystem on which the controller
47 * @controllers : The controllers to use for this container.
48 * @dir : The name of the directory containing the container's cgroup.
49 * Not that this is a per-container setting.
53 /* information about a specific controller */
54 struct /* controller */ {
60 /* meta information about cgroup configuration */
65 char *monitor_pivot_dir
;
73 static void free_lxc_cgroup(struct lxc_cgroup
*ptr
)
81 define_cleanup_function(struct lxc_cgroup
*, free_lxc_cgroup
);
83 #if !HAVE_SYS_RESOURCE_H
84 #define RLIM_INFINITY ((unsigned long)-1)
86 unsigned long rlim_cur
;
87 unsigned long rlim_max
;
92 * Defines a structure to configure resource limits to set via setrlimit().
93 * @resource : the resource name in lowercase without the RLIMIT_ prefix
94 * @limit : the limit to set
101 static void free_lxc_limit(struct lxc_limit
*ptr
)
108 define_cleanup_function(struct lxc_limit
*, free_lxc_limit
);
116 * Defines a structure to configure kernel parameters at runtime.
117 * @key : the kernel parameters will be configured without the "lxc.sysctl" prefix
118 * @value : the value to set
126 * Defines a structure to configure proc filesystem at runtime.
127 * @filename : the proc filesystem will be configured without the "lxc.proc" prefix
128 * @value : the value to set
136 * id_map is an id map entry. Form in confile is:
137 * lxc.idmap = u 0 9800 100
138 * lxc.idmap = u 1000 9900 100
139 * lxc.idmap = g 0 9800 100
140 * lxc.idmap = g 1000 9900 100
141 * meaning the container can use uids and gids 0-99 and 1000-1099,
142 * with [ug]id 0 mapping to [ug]id 9800 on the host, and [ug]id 1000 to
143 * [ug]id 9900 on the host.
147 unsigned long hostid
, nsid
, range
;
150 /* Defines the number of tty configured and contains the
152 * @max = number of configured ttys
154 struct lxc_tty_info
{
158 struct lxc_terminal_info
*tty
;
161 /* Defines a structure to store the rootfs location, the
162 * optionals pivot_root, rootfs mount paths
163 * @path : the rootfs source (directory or device)
164 * @mount : where it is mounted
165 * @bev_type : optional backing store type
166 * @options : mount options
167 * @mountflags : the portion of @options that are flags
168 * @data : the portion of @options that are not flags
169 * @managed : whether it is managed by LXC
170 * @mntpt_fd : fd for @mount
171 * @dev_mntpt_fd : fd for /dev of the container
180 unsigned long mountflags
;
186 * Automatic mounts for LXC to perform inside the container
189 LXC_AUTO_PROC_RW
= 0x001, /* /proc read-write */
190 LXC_AUTO_PROC_MIXED
= 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
191 LXC_AUTO_PROC_MASK
= 0x003,
193 LXC_AUTO_SYS_RW
= 0x004, /* /sys */
194 LXC_AUTO_SYS_RO
= 0x008, /* /sys read-only */
195 LXC_AUTO_SYS_MIXED
= 0x00C, /* /sys read-only and /sys/class/net read-write */
196 LXC_AUTO_SYS_MASK
= 0x00C,
198 LXC_AUTO_CGROUP_RO
= 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
199 LXC_AUTO_CGROUP_RW
= 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
200 LXC_AUTO_CGROUP_MIXED
= 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
201 LXC_AUTO_CGROUP_FULL_RO
= 0x040, /* /sys/fs/cgroup (full mount, read-only) */
202 LXC_AUTO_CGROUP_FULL_RW
= 0x050, /* /sys/fs/cgroup (full mount, read-write) */
203 LXC_AUTO_CGROUP_FULL_MIXED
= 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
205 * These are defined in such a way as to retain binary compatibility
206 * with earlier versions of this code. If the previous mask is applied,
207 * both of these will default back to the _MIXED variants, which is
210 LXC_AUTO_CGROUP_NOSPEC
= 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
211 LXC_AUTO_CGROUP_FULL_NOSPEC
= 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
212 LXC_AUTO_CGROUP_FORCE
= 0x100, /* mount cgroups even when cgroup namespaces are supported */
213 LXC_AUTO_CGROUP_MASK
= 0x1F0, /* all known cgroup options, doe not contain LXC_AUTO_CGROUP_FORCE */
215 LXC_AUTO_SHMOUNTS
= 0x200, /* shared mount point */
216 LXC_AUTO_SHMOUNTS_MASK
= 0x200, /* shared mount point mask */
217 LXC_AUTO_ALL_MASK
= 0x1FF, /* all known settings */
234 __hidden
extern char *lxchook_names
[NUM_LXC_HOOKS
];
236 struct lxc_state_client
{
238 lxc_state_t states
[MAX_STATE
];
242 LXC_BPF_DEVICE_CGROUP_LOCAL_RULE
= -1,
243 LXC_BPF_DEVICE_CGROUP_ALLOWLIST
= 0,
244 LXC_BPF_DEVICE_CGROUP_DENYLIST
= 1,
254 * LXC_BPF_DEVICE_CGROUP_LOCAL_RULE -> no global rule
255 * LXC_BPF_DEVICE_CGROUP_ALLOWLIST -> allowlist (deny all)
256 * LXC_BPF_DEVICE_CGROUP_DENYLIST -> denylist (allow all)
261 struct timens_offsets
{
262 /* Currently, either s_boot or ns_boot is set, but not both. */
266 /* Currently, either s_monotonic or ns_monotonic is set, but not both. */
268 int64_t ns_monotonic
;
272 /* Pointer to the name of the container. Do not free! */
276 signed long personality
;
277 struct utsname
*utsname
;
280 struct lxc_list cgroup
;
281 struct lxc_list cgroup2
;
282 struct bpf_program
*cgroup2_devices
;
283 /* This should be reimplemented as a hashmap. */
284 struct lxc_list devices
;
288 struct lxc_list id_map
;
291 * Pointer to the idmap entry for the container's root uid in
292 * the id_map list. Do not free!
294 const struct id_map
*root_nsuid_map
;
297 * Pointer to the idmap entry for the container's root gid in
298 * the id_map list. Do not free!
300 const struct id_map
*root_nsgid_map
;
303 struct lxc_list network
;
308 struct lxc_list mount_list
;
311 struct lxc_list caps
;
312 struct lxc_list keepcaps
;
314 /* /dev/tty<idx> devices */
315 struct lxc_tty_info ttys
;
316 /* /dev/console device */
317 struct lxc_terminal console
;
318 /* maximum pty devices allowed by devpts mount */
320 /* file descriptor for the container's /dev/pts mount */
323 /* set to true when rootfs has been setup */
325 struct lxc_rootfs rootfs
;
330 unsigned int hooks_version
;
331 struct lxc_list hooks
[NUM_LXC_HOOKS
];
334 char *lsm_aa_profile
;
335 char *lsm_aa_profile_computed
;
336 bool lsm_aa_profile_created
;
337 unsigned int lsm_aa_allow_nesting
;
338 unsigned int lsm_aa_allow_incomplete
;
339 struct lxc_list lsm_aa_raw
;
340 char *lsm_se_context
;
341 char *lsm_se_keyring_context
;
342 bool keyring_disable_session
;
343 bool tmp_umount_proc
;
344 struct lxc_seccomp seccomp
;
346 unsigned int autodev
; /* if 1, mount and fill a /dev at start */
347 int autodevtmpfssize
; /* size of the /dev tmpfs */
348 int haltsignal
; /* signal used to halt container */
349 int rebootsignal
; /* signal used to reboot container */
350 int stopsignal
; /* signal used to hard stop container */
351 char *rcfile
; /* Copy of the top level rcfile we read */
353 /* Logfile and loglevel can be set in a container config file. Those
354 * function as defaults. The defaults can be overridden by command line.
355 * However we don't want the command line specified values to be saved
356 * on c->save_config(). So we store the config file specified values
358 char *logfile
; /* the logfile as specified in config */
359 int loglevel
; /* loglevel as specified in config (if any) */
362 unsigned int start_auto
;
363 unsigned int start_delay
;
365 struct lxc_list groups
;
368 /* unshare the mount namespace in the monitor */
369 unsigned int monitor_unshare
;
370 unsigned int monitor_signal_pdeath
;
372 /* list of included files */
373 struct lxc_list includes
;
374 /* config entries which are not "lxc.*" are aliens */
375 struct lxc_list aliens
;
377 /* list of environment variables we'll add to the container when
379 struct lxc_list environment
;
381 /* text representation of the config file */
382 char *unexpanded_config
;
383 size_t unexpanded_len
;
384 size_t unexpanded_alloced
;
386 /* default command for lxc-execute */
392 /* if running in a new user namespace, the UID/GID that init and COMMAND
393 * should run under when using lxc-execute */
397 /* indicator if the container will be destroyed on shutdown */
398 unsigned int ephemeral
;
400 /* The facility to pass to syslog. Let's users establish as what type of
401 * program liblxc is supposed to write to the syslog. */
404 /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */
407 /* RLIMIT_* limits */
408 struct lxc_list limits
;
410 /* Contains generic info about the cgroup configuration for this
411 * container. Note that struct lxc_cgroup contains a union. It is only
412 * valid to access the members of the anonymous "meta" struct within
415 struct lxc_cgroup cgroup_meta
;
420 char *ns_share
[LXC_NS_MAX
];
423 /* init working directory */
426 /* A list of clients registered to be informed about a container state. */
427 struct lxc_list state_clients
;
430 struct lxc_list sysctls
;
433 struct lxc_list procs
;
436 /* Absolute path to the shared mount point on the host */
438 /* Absolute path (in the container) to the shared mount point */
442 struct timens_offsets timens
;
445 __hidden
extern int write_id_mapping(enum idtype idtype
, pid_t pid
, const char *buf
, size_t buf_size
)
449 extern thread_local
struct lxc_conf
*current_config
;
451 extern struct lxc_conf
*current_config
;
454 __hidden
extern int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
, char *argv
[]);
455 __hidden
extern struct lxc_conf
*lxc_conf_init(void);
456 __hidden
extern void lxc_conf_free(struct lxc_conf
*conf
);
457 __hidden
extern int pin_rootfs(const char *rootfs
);
458 __hidden
extern int lxc_map_ids(struct lxc_list
*idmap
, pid_t pid
);
459 __hidden
extern int lxc_create_tty(const char *name
, struct lxc_conf
*conf
);
460 __hidden
extern void lxc_delete_tty(struct lxc_tty_info
*ttys
);
461 __hidden
extern int lxc_clear_config_caps(struct lxc_conf
*c
);
462 __hidden
extern int lxc_clear_config_keepcaps(struct lxc_conf
*c
);
463 __hidden
extern int lxc_clear_cgroups(struct lxc_conf
*c
, const char *key
, int version
);
464 __hidden
extern int lxc_clear_mount_entries(struct lxc_conf
*c
);
465 __hidden
extern int lxc_clear_automounts(struct lxc_conf
*c
);
466 __hidden
extern int lxc_clear_hooks(struct lxc_conf
*c
, const char *key
);
467 __hidden
extern int lxc_clear_idmaps(struct lxc_conf
*c
);
468 __hidden
extern int lxc_clear_groups(struct lxc_conf
*c
);
469 __hidden
extern int lxc_clear_environment(struct lxc_conf
*c
);
470 __hidden
extern int lxc_clear_limits(struct lxc_conf
*c
, const char *key
);
471 __hidden
extern int lxc_delete_autodev(struct lxc_handler
*handler
);
472 __hidden
extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf
*c
);
473 __hidden
extern void lxc_clear_includes(struct lxc_conf
*conf
);
474 __hidden
extern int lxc_setup_rootfs_prepare_root(struct lxc_conf
*conf
, const char *name
,
475 const char *lxcpath
);
476 __hidden
extern int lxc_setup(struct lxc_handler
*handler
);
477 __hidden
extern int lxc_setup_parent(struct lxc_handler
*handler
);
478 __hidden
extern int setup_resource_limits(struct lxc_list
*limits
, pid_t pid
);
479 __hidden
extern int find_unmapped_nsid(const struct lxc_conf
*conf
, enum idtype idtype
);
480 __hidden
extern int mapped_hostid(unsigned id
, const struct lxc_conf
*conf
, enum idtype idtype
);
481 __hidden
extern int userns_exec_1(const struct lxc_conf
*conf
, int (*fn
)(void *), void *data
,
482 const char *fn_name
);
483 __hidden
extern int userns_exec_full(struct lxc_conf
*conf
, int (*fn
)(void *), void *data
,
484 const char *fn_name
);
485 __hidden
extern int parse_mntopts(const char *mntopts
, unsigned long *mntflags
, char **mntdata
);
486 __hidden
extern int parse_propagationopts(const char *mntopts
, unsigned long *pflags
);
487 __hidden
extern void tmp_proc_unmount(struct lxc_conf
*lxc_conf
);
488 __hidden
extern void turn_into_dependent_mounts(void);
489 __hidden
extern void suggest_default_idmap(void);
490 __hidden
extern FILE *make_anonymous_mount_file(struct lxc_list
*mount
, bool include_nesting_helpers
);
491 __hidden
extern struct lxc_list
*sort_cgroup_settings(struct lxc_list
*cgroup_settings
);
492 __hidden
extern unsigned long add_required_remount_flags(const char *s
, const char *d
,
493 unsigned long flags
);
494 __hidden
extern int run_script(const char *name
, const char *section
, const char *script
, ...);
495 __hidden
extern int run_script_argv(const char *name
, unsigned int hook_version
, const char *section
,
496 const char *script
, const char *hookname
, char **argsin
);
497 __hidden
extern int in_caplist(int cap
, struct lxc_list
*caps
);
498 __hidden
extern int setup_sysctl_parameters(struct lxc_list
*sysctls
);
499 __hidden
extern int lxc_clear_sysctls(struct lxc_conf
*c
, const char *key
);
500 __hidden
extern int setup_proc_filesystem(struct lxc_list
*procs
, pid_t pid
);
501 __hidden
extern int lxc_clear_procs(struct lxc_conf
*c
, const char *key
);
502 __hidden
extern int lxc_clear_apparmor_raw(struct lxc_conf
*c
);
503 __hidden
extern int lxc_clear_namespace(struct lxc_conf
*c
);
504 __hidden
extern int userns_exec_minimal(const struct lxc_conf
*conf
, int (*fn_parent
)(void *),
505 void *fn_parent_data
, int (*fn_child
)(void *),
506 void *fn_child_data
);
507 __hidden
extern int userns_exec_mapped_root(const char *path
, int path_fd
,
508 const struct lxc_conf
*conf
);
509 static inline int chown_mapped_root(const char *path
, const struct lxc_conf
*conf
)
511 return userns_exec_mapped_root(path
, -EBADF
, conf
);
514 __hidden
int lxc_setup_devpts_parent(struct lxc_handler
*handler
);
516 #endif /* __LXC_CONF_H */