]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.h
conf: rework recursive mount option handling
[mirror_lxc.git] / src / lxc / conf.h
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef __LXC_CONF_H
4 #define __LXC_CONF_H
5
6 #ifndef _GNU_SOURCE
7 #define _GNU_SOURCE 1
8 #endif
9 #include <linux/magic.h>
10 #include <net/if.h>
11 #include <netinet/in.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <sys/param.h>
15 #include <sys/types.h>
16 #include <sys/vfs.h>
17
18 #include "attach_options.h"
19 #include "caps.h"
20 #include "compiler.h"
21 #include "config.h"
22 #include "hlist.h"
23 #include "list.h"
24 #include "lxcseccomp.h"
25 #include "memory_utils.h"
26 #include "namespace.h"
27 #include "ringbuf.h"
28 #include "start.h"
29 #include "state.h"
30 #include "storage/storage.h"
31 #include "string_utils.h"
32 #include "syscall_wrappers.h"
33 #include "terminal.h"
34
35 #if HAVE_SYS_RESOURCE_H
36 #include <sys/resource.h>
37 #endif
38
39 #if HAVE_SCMP_FILTER_CTX
40 typedef void * scmp_filter_ctx;
41 #endif
42
43 typedef signed long personality_t;
44
45 /* worth moving to configure.ac? */
46 #define subuidfile "/etc/subuid"
47 #define subgidfile "/etc/subgid"
48
49 /*
50 * Defines a generic struct to configure the control group. It is up to the
51 * programmer to specify the right subsystem.
52 * @subsystem : the targeted subsystem
53 * @value : the value to set
54 * @version : The version of the cgroup filesystem on which the controller
55 * resides.
56 *
57 * @controllers : The controllers to use for this container.
58 * @dir : The name of the directory containing the container's cgroup.
59 * Not that this is a per-container setting.
60 */
61 struct lxc_cgroup {
62 union {
63 /* information about a specific controller */
64 struct /* controller */ {
65 int version;
66 char *subsystem;
67 char *value;
68 };
69
70 /* meta information about cgroup configuration */
71 struct /* meta */ {
72 char *controllers;
73 char *dir;
74 char *monitor_dir;
75 char *monitor_pivot_dir;
76 char *container_dir;
77 char *namespace_dir;
78 bool relative;
79 };
80 };
81
82 struct list_head head;
83 };
84
85 static void free_lxc_cgroup(struct lxc_cgroup *ptr)
86 {
87 if (ptr) {
88 free(ptr->subsystem);
89 free(ptr->value);
90 free_disarm(ptr);
91 }
92 }
93 define_cleanup_function(struct lxc_cgroup *, free_lxc_cgroup);
94
95 #if !HAVE_SYS_RESOURCE_H
96 #define RLIM_INFINITY ((unsigned long)-1)
97 struct rlimit {
98 unsigned long rlim_cur;
99 unsigned long rlim_max;
100 struct list_head head;
101 };
102 #endif
103
104 /*
105 * Defines a structure to configure resource limits to set via setrlimit().
106 * @resource : the resource name in lowercase without the RLIMIT_ prefix
107 * @limit : the limit to set
108 */
109 struct lxc_limit {
110 char *resource;
111 struct rlimit limit;
112 struct list_head head;
113 };
114
115 static void free_lxc_limit(struct lxc_limit *ptr)
116 {
117 if (ptr) {
118 free_disarm(ptr->resource);
119 free_disarm(ptr);
120 }
121 }
122 define_cleanup_function(struct lxc_limit *, free_lxc_limit);
123
124 enum idtype {
125 ID_TYPE_UID,
126 ID_TYPE_GID
127 };
128
129 /*
130 * Defines a structure to configure kernel parameters at runtime.
131 * @key : the kernel parameters will be configured without the "lxc.sysctl" prefix
132 * @value : the value to set
133 */
134 struct lxc_sysctl {
135 char *key;
136 char *value;
137 struct list_head head;
138 };
139
140 static void free_lxc_sysctl(struct lxc_sysctl *ptr)
141 {
142 if (ptr) {
143 free(ptr->key);
144 free(ptr->value);
145 free_disarm(ptr);
146 }
147 }
148 define_cleanup_function(struct lxc_sysctl *, free_lxc_sysctl);
149
150 /*
151 * Defines a structure to configure proc filesystem at runtime.
152 * @filename : the proc filesystem will be configured without the "lxc.proc" prefix
153 * @value : the value to set
154 */
155 struct lxc_proc {
156 char *filename;
157 char *value;
158 struct list_head head;
159 };
160
161 static void free_lxc_proc(struct lxc_proc *ptr)
162 {
163 if (ptr) {
164 free(ptr->filename);
165 free(ptr->value);
166 free_disarm(ptr);
167 }
168 }
169 define_cleanup_function(struct lxc_proc *, free_lxc_proc);
170
171 /*
172 * id_map is an id map entry. Form in confile is:
173 * lxc.idmap = u 0 9800 100
174 * lxc.idmap = u 1000 9900 100
175 * lxc.idmap = g 0 9800 100
176 * lxc.idmap = g 1000 9900 100
177 * meaning the container can use uids and gids 0-99 and 1000-1099,
178 * with [ug]id 0 mapping to [ug]id 9800 on the host, and [ug]id 1000 to
179 * [ug]id 9900 on the host.
180 */
181 struct id_map {
182 enum idtype idtype;
183 unsigned long hostid, nsid, range;
184 struct list_head head;
185 };
186
187 /* Defines the number of tty configured and contains the
188 * instantiated ptys
189 * @max = number of configured ttys
190 */
191 struct lxc_tty_info {
192 size_t max;
193 char *dir;
194 char *tty_names;
195 struct lxc_terminal_info *tty;
196 };
197
198 typedef enum lxc_mount_options_t {
199 LXC_MOUNT_CREATE_DIR = 0,
200 LXC_MOUNT_CREATE_FILE = 1,
201 LXC_MOUNT_OPTIONAL = 2,
202 LXC_MOUNT_RELATIVE = 3,
203 LXC_MOUNT_IDMAP = 4,
204 LXC_MOUNT_MAX = 5,
205 } lxc_mount_options_t;
206
207 __hidden extern const char *lxc_mount_options_info[LXC_MOUNT_MAX];
208
209 struct lxc_mount_options {
210 unsigned int create_dir : 1;
211 unsigned int create_file : 1;
212 unsigned int optional : 1;
213 unsigned int relative : 1;
214 unsigned int bind_recursively : 1;
215 unsigned int propagate_recursively : 1;
216 unsigned int bind : 1;
217 char userns_path[PATH_MAX];
218 unsigned long mnt_flags;
219 unsigned long prop_flags;
220 char *data;
221 struct lxc_mount_attr attr;
222 char *raw_options;
223 };
224
225 /* Defines a structure to store the rootfs location, the
226 * optionals pivot_root, rootfs mount paths
227 * @path : the rootfs source (directory or device)
228 * @mount : where it is mounted
229 * @buf : static buffer to construct paths
230 * @bev_type : optional backing store type
231 * @managed : whether it is managed by LXC
232 * @dfd_mnt : fd for @mount
233 * @dfd_dev : fd for /dev of the container
234 */
235 struct lxc_rootfs {
236 int dfd_host;
237
238 char *path;
239 int fd_path_pin;
240 int dfd_idmapped;
241
242 int dfd_mnt;
243 char *mount;
244
245 int dfd_dev;
246
247 char buf[PATH_MAX];
248 char *bdev_type;
249 bool managed;
250 struct lxc_mount_options mnt_opts;
251 struct lxc_storage *storage;
252 };
253
254 /*
255 * Automatic mounts for LXC to perform inside the container
256 */
257 enum {
258 LXC_AUTO_PROC_RW = 0x001, /* /proc read-write */
259 LXC_AUTO_PROC_MIXED = 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
260 LXC_AUTO_PROC_MASK = 0x003,
261
262 LXC_AUTO_SYS_RW = 0x004, /* /sys */
263 LXC_AUTO_SYS_RO = 0x008, /* /sys read-only */
264 LXC_AUTO_SYS_MIXED = 0x00C, /* /sys read-only and /sys/class/net read-write */
265 LXC_AUTO_SYS_MASK = 0x00C,
266
267 LXC_AUTO_CGROUP_RO = 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
268 LXC_AUTO_CGROUP_RW = 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
269 LXC_AUTO_CGROUP_MIXED = 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
270 LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */
271 LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */
272 LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
273 /*
274 * These are defined in such a way as to retain binary compatibility
275 * with earlier versions of this code. If the previous mask is applied,
276 * both of these will default back to the _MIXED variants, which is
277 * safe.
278 */
279 LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
280 LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
281 LXC_AUTO_CGROUP_FORCE = 0x100, /* mount cgroups even when cgroup namespaces are supported */
282 LXC_AUTO_CGROUP_MASK = 0x1F0, /* all known cgroup options */
283
284 LXC_AUTO_SHMOUNTS = 0x200, /* shared mount point */
285 LXC_AUTO_SHMOUNTS_MASK = 0x200, /* shared mount point mask */
286 LXC_AUTO_ALL_MASK = 0x1FF, /* all known settings */
287 };
288
289 enum lxchooks {
290 LXCHOOK_PRESTART,
291 LXCHOOK_PREMOUNT,
292 LXCHOOK_MOUNT,
293 LXCHOOK_AUTODEV,
294 LXCHOOK_START,
295 LXCHOOK_STOP,
296 LXCHOOK_POSTSTOP,
297 LXCHOOK_CLONE,
298 LXCHOOK_DESTROY,
299 LXCHOOK_START_HOST,
300 NUM_LXC_HOOKS
301 };
302
303 __hidden extern char *lxchook_names[NUM_LXC_HOOKS];
304
305 struct lxc_state_client {
306 int clientfd;
307 lxc_state_t states[MAX_STATE];
308 struct list_head head;
309 };
310
311 typedef enum lxc_bpf_devices_rule_t {
312 LXC_BPF_DEVICE_CGROUP_ALLOWLIST = 0,
313 LXC_BPF_DEVICE_CGROUP_DENYLIST = 1,
314 } lxc_bpf_devices_rule_t;
315
316 struct device_item {
317 char type;
318 int major;
319 int minor;
320 char access[4];
321 int allow;
322 struct list_head head;
323 };
324
325 struct bpf_devices {
326 lxc_bpf_devices_rule_t list_type;
327 struct list_head devices;
328 };
329
330 struct timens_offsets {
331 /* Currently, either s_boot or ns_boot is set, but not both. */
332 int64_t s_boot;
333 int64_t ns_boot;
334
335 /* Currently, either s_monotonic or ns_monotonic is set, but not both. */
336 int64_t s_monotonic;
337 int64_t ns_monotonic;
338 };
339
340 struct lxc_conf {
341 /* Pointer to the name of the container. Do not free! */
342 const char *name;
343 bool is_execute;
344 int reboot;
345 personality_t personality;
346 struct utsname *utsname;
347
348 struct {
349 struct list_head cgroup;
350 struct list_head cgroup2;
351 struct bpf_devices bpf_devices;
352 };
353
354 struct {
355 struct list_head id_map;
356
357 /*
358 * Pointer to the idmap entry for the container's root uid in
359 * the id_map list. Do not free!
360 */
361 const struct id_map *root_nsuid_map;
362
363 /*
364 * Pointer to the idmap entry for the container's root gid in
365 * the id_map list. Do not free!
366 */
367 const struct id_map *root_nsgid_map;
368 };
369
370 struct list_head netdevs;
371
372 struct {
373 char *fstab;
374 int auto_mounts;
375 struct lxc_list mount_list;
376 };
377
378 struct lxc_list caps;
379 struct lxc_list keepcaps;
380
381 /* /dev/tty<idx> devices */
382 struct lxc_tty_info ttys;
383 /* /dev/console device */
384 struct lxc_terminal console;
385 /* maximum pty devices allowed by devpts mount */
386 size_t pty_max;
387 /* file descriptor for the container's /dev/pts mount */
388 int devpts_fd;
389
390 /* set to true when rootfs has been setup */
391 bool rootfs_setup;
392 struct lxc_rootfs rootfs;
393
394 bool close_all_fds;
395
396 struct {
397 unsigned int hooks_version;
398 struct lxc_list hooks[NUM_LXC_HOOKS];
399 };
400
401 char *lsm_aa_profile;
402 char *lsm_aa_profile_computed;
403 bool lsm_aa_profile_created;
404 unsigned int lsm_aa_allow_nesting;
405 unsigned int lsm_aa_allow_incomplete;
406 struct lxc_list lsm_aa_raw;
407 char *lsm_se_context;
408 char *lsm_se_keyring_context;
409 bool keyring_disable_session;
410 bool transient_procfs_mnt;
411 struct lxc_seccomp seccomp;
412 int maincmd_fd;
413 unsigned int autodev; /* if 1, mount and fill a /dev at start */
414 int autodevtmpfssize; /* size of the /dev tmpfs */
415 int haltsignal; /* signal used to halt container */
416 int rebootsignal; /* signal used to reboot container */
417 int stopsignal; /* signal used to hard stop container */
418 char *rcfile; /* Copy of the top level rcfile we read */
419
420 /* Logfile and loglevel can be set in a container config file. Those
421 * function as defaults. The defaults can be overridden by command line.
422 * However we don't want the command line specified values to be saved
423 * on c->save_config(). So we store the config file specified values
424 * here. */
425 char *logfile; /* the logfile as specified in config */
426 int loglevel; /* loglevel as specified in config (if any) */
427 int logfd;
428
429 unsigned int start_auto;
430 unsigned int start_delay;
431 int start_order;
432 struct lxc_list groups;
433 int nbd_idx;
434
435 /* unshare the mount namespace in the monitor */
436 unsigned int monitor_unshare;
437 unsigned int monitor_signal_pdeath;
438
439 /* list of included files */
440 struct lxc_list includes;
441 /* config entries which are not "lxc.*" are aliens */
442 struct lxc_list aliens;
443
444 /* list of environment variables we'll add to the container when
445 * started */
446 struct lxc_list environment;
447
448 /* text representation of the config file */
449 char *unexpanded_config;
450 size_t unexpanded_len;
451 size_t unexpanded_alloced;
452
453 /* default command for lxc-execute */
454 char *execute_cmd;
455
456 /* init command */
457 char *init_cmd;
458
459 /* The uid to use for the container. */
460 uid_t init_uid;
461 /* The gid to use for the container. */
462 gid_t init_gid;
463 /* The groups to use for the container. */
464 lxc_groups_t init_groups;
465
466 /* indicator if the container will be destroyed on shutdown */
467 unsigned int ephemeral;
468
469 /* The facility to pass to syslog. Let's users establish as what type of
470 * program liblxc is supposed to write to the syslog. */
471 char *syslog;
472
473 /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */
474 bool no_new_privs;
475
476 /* RLIMIT_* limits */
477 struct list_head limits;
478
479 /* Contains generic info about the cgroup configuration for this
480 * container. Note that struct lxc_cgroup contains a union. It is only
481 * valid to access the members of the anonymous "meta" struct within
482 * that union.
483 */
484 struct lxc_cgroup cgroup_meta;
485
486 struct {
487 int ns_clone;
488 int ns_keep;
489 char *ns_share[LXC_NS_MAX];
490 };
491
492 /* init working directory */
493 char *init_cwd;
494
495 /* A list of clients registered to be informed about a container state. */
496 struct list_head state_clients;
497
498 /* sysctls */
499 struct list_head sysctls;
500
501 /* procs */
502 struct list_head procs;
503
504 struct shmount {
505 /* Absolute path to the shared mount point on the host */
506 char *path_host;
507 /* Absolute path (in the container) to the shared mount point */
508 char *path_cont;
509 } shmount;
510
511 struct timens_offsets timens;
512 };
513
514 __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
515 __access_r(3, 4);
516
517 extern thread_local struct lxc_conf *current_config;
518
519 __hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
520 __hidden extern struct lxc_conf *lxc_conf_init(void);
521 __hidden extern void lxc_conf_free(struct lxc_conf *conf);
522 __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
523 __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
524 __hidden extern void lxc_storage_put(struct lxc_conf *conf);
525 __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
526 __hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
527 __hidden extern int lxc_idmapped_mounts_parent(struct lxc_handler *handler);
528 __hidden extern int lxc_map_ids(struct list_head *idmap, pid_t pid);
529 __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
530 __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
531 __hidden extern int lxc_clear_config_caps(struct lxc_conf *c);
532 __hidden extern int lxc_clear_config_keepcaps(struct lxc_conf *c);
533 __hidden extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
534 __hidden extern int lxc_clear_mount_entries(struct lxc_conf *c);
535 __hidden extern int lxc_clear_automounts(struct lxc_conf *c);
536 __hidden extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
537 __hidden extern int lxc_clear_idmaps(struct lxc_conf *c);
538 __hidden extern int lxc_clear_groups(struct lxc_conf *c);
539 __hidden extern int lxc_clear_environment(struct lxc_conf *c);
540 __hidden extern int lxc_clear_limits(struct lxc_conf *c, const char *key);
541 __hidden extern int lxc_delete_autodev(struct lxc_handler *handler);
542 __hidden extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf *c);
543 __hidden extern void lxc_clear_includes(struct lxc_conf *conf);
544 __hidden extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name,
545 const char *lxcpath);
546 __hidden extern int lxc_setup(struct lxc_handler *handler);
547 __hidden extern int lxc_setup_parent(struct lxc_handler *handler);
548 __hidden extern int setup_resource_limits(struct lxc_conf *conf, pid_t pid);
549 __hidden extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype);
550 __hidden extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype);
551 __hidden extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
552 const char *fn_name);
553 __hidden extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
554 const char *fn_name);
555 __hidden extern int parse_mntopts_legacy(const char *mntopts, unsigned long *mntflags, char **mntdata);
556 __hidden extern int parse_propagationopts(const char *mntopts, unsigned long *pflags);
557 __hidden extern int parse_lxc_mount_attrs(struct lxc_mount_options *opts, char *mnt_opts);
558 __hidden extern int parse_mount_attrs(struct lxc_mount_options *opts, const char *mntopts);
559 __hidden extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
560 __hidden extern void suggest_default_idmap(void);
561 __hidden extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers);
562 __hidden extern void sort_cgroup_settings(struct lxc_conf *conf);
563 __hidden extern int run_script(const char *name, const char *section, const char *script, ...);
564 __hidden extern int run_script_argv(const char *name, unsigned int hook_version, const char *section,
565 const char *script, const char *hookname, char **argsin);
566 __hidden extern int in_caplist(int cap, struct lxc_list *caps);
567
568 static inline bool lxc_wants_cap(int cap, struct lxc_conf *conf)
569 {
570 if (lxc_caps_last_cap() < cap)
571 return false;
572
573 if (!lxc_list_empty(&conf->keepcaps))
574 return in_caplist(cap, &conf->keepcaps);
575
576 return !in_caplist(cap, &conf->caps);
577 }
578
579 __hidden extern int setup_sysctl_parameters(struct lxc_conf *conf);
580 __hidden extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
581 __hidden extern int setup_proc_filesystem(struct lxc_conf *conf, pid_t pid);
582 __hidden extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
583 __hidden extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
584 __hidden extern int lxc_clear_namespace(struct lxc_conf *c);
585 __hidden extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn_parent)(void *),
586 void *fn_parent_data, int (*fn_child)(void *),
587 void *fn_child_data);
588 __hidden extern int userns_exec_mapped_root(const char *path, int path_fd,
589 const struct lxc_conf *conf);
590 static inline int chown_mapped_root(const char *path, const struct lxc_conf *conf)
591 {
592 return userns_exec_mapped_root(path, -EBADF, conf);
593 }
594
595 __hidden extern int lxc_sync_fds_parent(struct lxc_handler *handler);
596 __hidden extern int lxc_sync_fds_child(struct lxc_handler *handler);
597
598 static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
599 {
600 static const char *s = "/";
601
602 return !is_empty_string(rootfs->path) ? rootfs->mount : s;
603 }
604
605 static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
606 {
607 mnt_opts->create_dir = 0;
608 mnt_opts->create_file = 0;
609 mnt_opts->optional = 0;
610 mnt_opts->relative = 0;
611 mnt_opts->userns_path[0] = '\0';
612 mnt_opts->mnt_flags = 0;
613 mnt_opts->prop_flags = 0;
614
615 free_disarm(mnt_opts->data);
616 free_disarm(mnt_opts->raw_options);
617 }
618
619 static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
620 {
621 if (rootfs) {
622 close_prot_errno_disarm(rootfs->dfd_host);
623 close_prot_errno_disarm(rootfs->dfd_mnt);
624 close_prot_errno_disarm(rootfs->dfd_dev);
625 if (unpin)
626 close_prot_errno_disarm(rootfs->fd_path_pin);
627 close_prot_errno_disarm(rootfs->dfd_idmapped);
628 put_lxc_mount_options(&rootfs->mnt_opts);
629 storage_put(rootfs->storage);
630 rootfs->storage = NULL;
631 }
632 }
633
634 static inline void lxc_clear_cgroup2_devices(struct bpf_devices *bpf_devices)
635 {
636 struct device_item *device, *n;
637
638 list_for_each_entry_safe(device, n, &bpf_devices->devices, head)
639 list_del(&device->head);
640
641 INIT_LIST_HEAD(&bpf_devices->devices);
642 }
643
644 static inline int lxc_personality(personality_t persona)
645 {
646 if (persona < 0)
647 return ret_errno(EINVAL);
648
649 return personality(persona);
650 }
651
652 #endif /* __LXC_CONF_H */