]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.h
storage/dir: cleanup mount code
[mirror_lxc.git] / src / lxc / conf.h
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef __LXC_CONF_H
4 #define __LXC_CONF_H
5
6 #ifndef _GNU_SOURCE
7 #define _GNU_SOURCE 1
8 #endif
9 #include <linux/magic.h>
10 #include <net/if.h>
11 #include <netinet/in.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <sys/param.h>
15 #include <sys/types.h>
16 #include <sys/vfs.h>
17
18 #include "attach_options.h"
19 #include "caps.h"
20 #include "compiler.h"
21 #include "config.h"
22 #include "list.h"
23 #include "lxcseccomp.h"
24 #include "memory_utils.h"
25 #include "ringbuf.h"
26 #include "start.h"
27 #include "storage/storage.h"
28 #include "string_utils.h"
29 #include "terminal.h"
30
31 #if HAVE_SYS_RESOURCE_H
32 #include <sys/resource.h>
33 #endif
34
35 #if HAVE_SCMP_FILTER_CTX
36 typedef void * scmp_filter_ctx;
37 #endif
38
39 /* worth moving to configure.ac? */
40 #define subuidfile "/etc/subuid"
41 #define subgidfile "/etc/subgid"
42
43 /*
44 * Defines a generic struct to configure the control group. It is up to the
45 * programmer to specify the right subsystem.
46 * @subsystem : the targeted subsystem
47 * @value : the value to set
48 * @version : The version of the cgroup filesystem on which the controller
49 * resides.
50 *
51 * @controllers : The controllers to use for this container.
52 * @dir : The name of the directory containing the container's cgroup.
53 * Not that this is a per-container setting.
54 */
55 struct lxc_cgroup {
56 union {
57 /* information about a specific controller */
58 struct /* controller */ {
59 int version;
60 char *subsystem;
61 char *value;
62 };
63
64 /* meta information about cgroup configuration */
65 struct /* meta */ {
66 char *controllers;
67 char *dir;
68 char *monitor_dir;
69 char *monitor_pivot_dir;
70 char *container_dir;
71 char *namespace_dir;
72 bool relative;
73 };
74 };
75 };
76
77 static void free_lxc_cgroup(struct lxc_cgroup *ptr)
78 {
79 if (ptr) {
80 free(ptr->subsystem);
81 free(ptr->value);
82 free_disarm(ptr);
83 }
84 }
85 define_cleanup_function(struct lxc_cgroup *, free_lxc_cgroup);
86
87 #if !HAVE_SYS_RESOURCE_H
88 #define RLIM_INFINITY ((unsigned long)-1)
89 struct rlimit {
90 unsigned long rlim_cur;
91 unsigned long rlim_max;
92 };
93 #endif
94
95 /*
96 * Defines a structure to configure resource limits to set via setrlimit().
97 * @resource : the resource name in lowercase without the RLIMIT_ prefix
98 * @limit : the limit to set
99 */
100 struct lxc_limit {
101 char *resource;
102 struct rlimit limit;
103 };
104
105 static void free_lxc_limit(struct lxc_limit *ptr)
106 {
107 if (ptr) {
108 free_disarm(ptr->resource);
109 free_disarm(ptr);
110 }
111 }
112 define_cleanup_function(struct lxc_limit *, free_lxc_limit);
113
114 enum idtype {
115 ID_TYPE_UID,
116 ID_TYPE_GID
117 };
118
119 /*
120 * Defines a structure to configure kernel parameters at runtime.
121 * @key : the kernel parameters will be configured without the "lxc.sysctl" prefix
122 * @value : the value to set
123 */
124 struct lxc_sysctl {
125 char *key;
126 char *value;
127 };
128
129 static void free_lxc_sysctl(struct lxc_sysctl *ptr)
130 {
131 if (ptr) {
132 free(ptr->key);
133 free(ptr->value);
134 free_disarm(ptr);
135 }
136 }
137 define_cleanup_function(struct lxc_sysctl *, free_lxc_sysctl);
138
139 /*
140 * Defines a structure to configure proc filesystem at runtime.
141 * @filename : the proc filesystem will be configured without the "lxc.proc" prefix
142 * @value : the value to set
143 */
144 struct lxc_proc {
145 char *filename;
146 char *value;
147 };
148
149 static void free_lxc_proc(struct lxc_proc *ptr)
150 {
151 if (ptr) {
152 free(ptr->filename);
153 free(ptr->value);
154 free_disarm(ptr);
155 }
156 }
157 define_cleanup_function(struct lxc_proc *, free_lxc_proc);
158
159 /*
160 * id_map is an id map entry. Form in confile is:
161 * lxc.idmap = u 0 9800 100
162 * lxc.idmap = u 1000 9900 100
163 * lxc.idmap = g 0 9800 100
164 * lxc.idmap = g 1000 9900 100
165 * meaning the container can use uids and gids 0-99 and 1000-1099,
166 * with [ug]id 0 mapping to [ug]id 9800 on the host, and [ug]id 1000 to
167 * [ug]id 9900 on the host.
168 */
169 struct id_map {
170 enum idtype idtype;
171 unsigned long hostid, nsid, range;
172 };
173
174 /* Defines the number of tty configured and contains the
175 * instantiated ptys
176 * @max = number of configured ttys
177 */
178 struct lxc_tty_info {
179 size_t max;
180 char *dir;
181 char *tty_names;
182 struct lxc_terminal_info *tty;
183 };
184
185 typedef enum lxc_mount_options_t {
186 LXC_MOUNT_CREATE_DIR = 0,
187 LXC_MOUNT_CREATE_FILE = 1,
188 LXC_MOUNT_OPTIONAL = 2,
189 LXC_MOUNT_RELATIVE = 3,
190 LXC_MOUNT_IDMAP = 4,
191 LXC_MOUNT_MAX = 5,
192 } lxc_mount_options_t;
193
194 __hidden extern const char *lxc_mount_options_info[LXC_MOUNT_MAX];
195
196 struct lxc_mount_options {
197 int create_dir : 1;
198 int create_file : 1;
199 int optional : 1;
200 int relative : 1;
201 int userns_self : 1;
202 char userns_path[PATH_MAX];
203 int userns_fd;
204 unsigned long mnt_flags;
205 unsigned long prop_flags;
206 char *data;
207 };
208
209 /* Defines a structure to store the rootfs location, the
210 * optionals pivot_root, rootfs mount paths
211 * @path : the rootfs source (directory or device)
212 * @mount : where it is mounted
213 * @buf : static buffer to construct paths
214 * @bev_type : optional backing store type
215 * @options : mount options
216 * @managed : whether it is managed by LXC
217 * @dfd_mnt : fd for @mount
218 * @dfd_dev : fd for /dev of the container
219 */
220 struct lxc_rootfs {
221 int dfd_host;
222
223 char *path;
224 int fd_path_pin;
225 int dfd_idmapped;
226
227 int dfd_mnt;
228 char *mount;
229
230 int dfd_dev;
231
232 char buf[PATH_MAX];
233 char *bdev_type;
234 char *options;
235 unsigned long mountflags;
236 bool managed;
237 struct lxc_mount_options mnt_opts;
238 struct lxc_storage *storage;
239 };
240
241 /*
242 * Automatic mounts for LXC to perform inside the container
243 */
244 enum {
245 LXC_AUTO_PROC_RW = 0x001, /* /proc read-write */
246 LXC_AUTO_PROC_MIXED = 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
247 LXC_AUTO_PROC_MASK = 0x003,
248
249 LXC_AUTO_SYS_RW = 0x004, /* /sys */
250 LXC_AUTO_SYS_RO = 0x008, /* /sys read-only */
251 LXC_AUTO_SYS_MIXED = 0x00C, /* /sys read-only and /sys/class/net read-write */
252 LXC_AUTO_SYS_MASK = 0x00C,
253
254 LXC_AUTO_CGROUP_RO = 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
255 LXC_AUTO_CGROUP_RW = 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
256 LXC_AUTO_CGROUP_MIXED = 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
257 LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */
258 LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */
259 LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
260 /*
261 * These are defined in such a way as to retain binary compatibility
262 * with earlier versions of this code. If the previous mask is applied,
263 * both of these will default back to the _MIXED variants, which is
264 * safe.
265 */
266 LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
267 LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
268 LXC_AUTO_CGROUP_FORCE = 0x100, /* mount cgroups even when cgroup namespaces are supported */
269 LXC_AUTO_CGROUP_MASK = 0x1F0, /* all known cgroup options */
270
271 LXC_AUTO_SHMOUNTS = 0x200, /* shared mount point */
272 LXC_AUTO_SHMOUNTS_MASK = 0x200, /* shared mount point mask */
273 LXC_AUTO_ALL_MASK = 0x1FF, /* all known settings */
274 };
275
276 enum lxchooks {
277 LXCHOOK_PRESTART,
278 LXCHOOK_PREMOUNT,
279 LXCHOOK_MOUNT,
280 LXCHOOK_AUTODEV,
281 LXCHOOK_START,
282 LXCHOOK_STOP,
283 LXCHOOK_POSTSTOP,
284 LXCHOOK_CLONE,
285 LXCHOOK_DESTROY,
286 LXCHOOK_START_HOST,
287 NUM_LXC_HOOKS
288 };
289
290 __hidden extern char *lxchook_names[NUM_LXC_HOOKS];
291
292 struct lxc_state_client {
293 int clientfd;
294 lxc_state_t states[MAX_STATE];
295 };
296
297 typedef enum lxc_bpf_devices_rule_t {
298 LXC_BPF_DEVICE_CGROUP_ALLOWLIST = 0,
299 LXC_BPF_DEVICE_CGROUP_DENYLIST = 1,
300 } lxc_bpf_devices_rule_t;
301
302 struct device_item {
303 char type;
304 int major;
305 int minor;
306 char access[4];
307 int allow;
308 };
309
310 struct bpf_devices {
311 lxc_bpf_devices_rule_t list_type;
312 struct lxc_list device_item;
313 };
314
315 struct timens_offsets {
316 /* Currently, either s_boot or ns_boot is set, but not both. */
317 int64_t s_boot;
318 int64_t ns_boot;
319
320 /* Currently, either s_monotonic or ns_monotonic is set, but not both. */
321 int64_t s_monotonic;
322 int64_t ns_monotonic;
323 };
324
325 struct lxc_conf {
326 /* Pointer to the name of the container. Do not free! */
327 const char *name;
328 bool is_execute;
329 int reboot;
330 signed long personality;
331 struct utsname *utsname;
332
333 struct {
334 struct lxc_list cgroup;
335 struct lxc_list cgroup2;
336 struct bpf_devices bpf_devices;
337 };
338
339 struct {
340 struct lxc_list id_map;
341
342 /*
343 * Pointer to the idmap entry for the container's root uid in
344 * the id_map list. Do not free!
345 */
346 const struct id_map *root_nsuid_map;
347
348 /*
349 * Pointer to the idmap entry for the container's root gid in
350 * the id_map list. Do not free!
351 */
352 const struct id_map *root_nsgid_map;
353 };
354
355 struct lxc_list network;
356
357 struct {
358 char *fstab;
359 int auto_mounts;
360 struct lxc_list mount_list;
361 };
362
363 struct lxc_list caps;
364 struct lxc_list keepcaps;
365
366 /* /dev/tty<idx> devices */
367 struct lxc_tty_info ttys;
368 /* /dev/console device */
369 struct lxc_terminal console;
370 /* maximum pty devices allowed by devpts mount */
371 size_t pty_max;
372 /* file descriptor for the container's /dev/pts mount */
373 int devpts_fd;
374
375 /* set to true when rootfs has been setup */
376 bool rootfs_setup;
377 struct lxc_rootfs rootfs;
378
379 bool close_all_fds;
380
381 struct {
382 unsigned int hooks_version;
383 struct lxc_list hooks[NUM_LXC_HOOKS];
384 };
385
386 char *lsm_aa_profile;
387 char *lsm_aa_profile_computed;
388 bool lsm_aa_profile_created;
389 unsigned int lsm_aa_allow_nesting;
390 unsigned int lsm_aa_allow_incomplete;
391 struct lxc_list lsm_aa_raw;
392 char *lsm_se_context;
393 char *lsm_se_keyring_context;
394 bool keyring_disable_session;
395 bool transient_procfs_mnt;
396 struct lxc_seccomp seccomp;
397 int maincmd_fd;
398 unsigned int autodev; /* if 1, mount and fill a /dev at start */
399 int autodevtmpfssize; /* size of the /dev tmpfs */
400 int haltsignal; /* signal used to halt container */
401 int rebootsignal; /* signal used to reboot container */
402 int stopsignal; /* signal used to hard stop container */
403 char *rcfile; /* Copy of the top level rcfile we read */
404
405 /* Logfile and loglevel can be set in a container config file. Those
406 * function as defaults. The defaults can be overridden by command line.
407 * However we don't want the command line specified values to be saved
408 * on c->save_config(). So we store the config file specified values
409 * here. */
410 char *logfile; /* the logfile as specified in config */
411 int loglevel; /* loglevel as specified in config (if any) */
412 int logfd;
413
414 unsigned int start_auto;
415 unsigned int start_delay;
416 int start_order;
417 struct lxc_list groups;
418 int nbd_idx;
419
420 /* unshare the mount namespace in the monitor */
421 unsigned int monitor_unshare;
422 unsigned int monitor_signal_pdeath;
423
424 /* list of included files */
425 struct lxc_list includes;
426 /* config entries which are not "lxc.*" are aliens */
427 struct lxc_list aliens;
428
429 /* list of environment variables we'll add to the container when
430 * started */
431 struct lxc_list environment;
432
433 /* text representation of the config file */
434 char *unexpanded_config;
435 size_t unexpanded_len;
436 size_t unexpanded_alloced;
437
438 /* default command for lxc-execute */
439 char *execute_cmd;
440
441 /* init command */
442 char *init_cmd;
443
444 /* The uid to use for the container. */
445 uid_t init_uid;
446 /* The gid to use for the container. */
447 gid_t init_gid;
448 /* The groups to use for the container. */
449 lxc_groups_t init_groups;
450
451 /* indicator if the container will be destroyed on shutdown */
452 unsigned int ephemeral;
453
454 /* The facility to pass to syslog. Let's users establish as what type of
455 * program liblxc is supposed to write to the syslog. */
456 char *syslog;
457
458 /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */
459 bool no_new_privs;
460
461 /* RLIMIT_* limits */
462 struct lxc_list limits;
463
464 /* Contains generic info about the cgroup configuration for this
465 * container. Note that struct lxc_cgroup contains a union. It is only
466 * valid to access the members of the anonymous "meta" struct within
467 * that union.
468 */
469 struct lxc_cgroup cgroup_meta;
470
471 struct {
472 int ns_clone;
473 int ns_keep;
474 char *ns_share[LXC_NS_MAX];
475 };
476
477 /* init working directory */
478 char *init_cwd;
479
480 /* A list of clients registered to be informed about a container state. */
481 struct lxc_list state_clients;
482
483 /* sysctls */
484 struct lxc_list sysctls;
485
486 /* procs */
487 struct lxc_list procs;
488
489 struct shmount {
490 /* Absolute path to the shared mount point on the host */
491 char *path_host;
492 /* Absolute path (in the container) to the shared mount point */
493 char *path_cont;
494 } shmount;
495
496 struct timens_offsets timens;
497 };
498
499 __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
500 __access_r(3, 4);
501
502 extern thread_local struct lxc_conf *current_config;
503
504 __hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
505 __hidden extern struct lxc_conf *lxc_conf_init(void);
506 __hidden extern void lxc_conf_free(struct lxc_conf *conf);
507 __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
508 __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
509 __hidden extern void lxc_storage_put(struct lxc_conf *conf);
510 __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
511 __hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
512 __hidden extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid);
513 __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
514 __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
515 __hidden extern int lxc_clear_config_caps(struct lxc_conf *c);
516 __hidden extern int lxc_clear_config_keepcaps(struct lxc_conf *c);
517 __hidden extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
518 __hidden extern int lxc_clear_mount_entries(struct lxc_conf *c);
519 __hidden extern int lxc_clear_automounts(struct lxc_conf *c);
520 __hidden extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
521 __hidden extern int lxc_clear_idmaps(struct lxc_conf *c);
522 __hidden extern int lxc_clear_groups(struct lxc_conf *c);
523 __hidden extern int lxc_clear_environment(struct lxc_conf *c);
524 __hidden extern int lxc_clear_limits(struct lxc_conf *c, const char *key);
525 __hidden extern int lxc_delete_autodev(struct lxc_handler *handler);
526 __hidden extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf *c);
527 __hidden extern void lxc_clear_includes(struct lxc_conf *conf);
528 __hidden extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name,
529 const char *lxcpath);
530 __hidden extern int lxc_setup(struct lxc_handler *handler);
531 __hidden extern int lxc_setup_parent(struct lxc_handler *handler);
532 __hidden extern int setup_resource_limits(struct lxc_list *limits, pid_t pid);
533 __hidden extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype);
534 __hidden extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype);
535 __hidden extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
536 const char *fn_name);
537 __hidden extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
538 const char *fn_name);
539 __hidden extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata);
540 __hidden extern int parse_propagationopts(const char *mntopts, unsigned long *pflags);
541 __hidden extern int parse_lxc_mntopts(struct lxc_mount_options *opts, char *mnt_opts);
542 __hidden extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
543 __hidden extern void suggest_default_idmap(void);
544 __hidden extern FILE *make_anonymous_mount_file(struct lxc_list *mount, bool include_nesting_helpers);
545 __hidden extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings);
546 __hidden extern int run_script(const char *name, const char *section, const char *script, ...);
547 __hidden extern int run_script_argv(const char *name, unsigned int hook_version, const char *section,
548 const char *script, const char *hookname, char **argsin);
549 __hidden extern int in_caplist(int cap, struct lxc_list *caps);
550
551 static inline bool lxc_wants_cap(int cap, struct lxc_conf *conf)
552 {
553 if (lxc_caps_last_cap() < cap)
554 return false;
555
556 if (!lxc_list_empty(&conf->keepcaps))
557 return in_caplist(cap, &conf->keepcaps);
558
559 return !in_caplist(cap, &conf->caps);
560 }
561
562 __hidden extern int setup_sysctl_parameters(struct lxc_list *sysctls);
563 __hidden extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
564 __hidden extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid);
565 __hidden extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
566 __hidden extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
567 __hidden extern int lxc_clear_namespace(struct lxc_conf *c);
568 __hidden extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn_parent)(void *),
569 void *fn_parent_data, int (*fn_child)(void *),
570 void *fn_child_data);
571 __hidden extern int userns_exec_mapped_root(const char *path, int path_fd,
572 const struct lxc_conf *conf);
573 static inline int chown_mapped_root(const char *path, const struct lxc_conf *conf)
574 {
575 return userns_exec_mapped_root(path, -EBADF, conf);
576 }
577
578 __hidden int lxc_setup_devpts_parent(struct lxc_handler *handler);
579
580 static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
581 {
582 static const char *s = "/";
583
584 return !is_empty_string(rootfs->path) ? rootfs->mount : s;
585 }
586
587 static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
588 {
589 mnt_opts->create_dir = 0;
590 mnt_opts->create_file = 0;
591 mnt_opts->optional = 0;
592 mnt_opts->relative = 0;
593 mnt_opts->userns_self = 0;
594 mnt_opts->userns_path[0] = '\0';
595 mnt_opts->mnt_flags = 0;
596 mnt_opts->prop_flags = 0;
597
598 close_prot_errno_disarm(mnt_opts->userns_fd);
599 free_disarm(mnt_opts->data);
600 }
601
602 static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
603 {
604 if (rootfs) {
605 close_prot_errno_disarm(rootfs->dfd_host);
606 close_prot_errno_disarm(rootfs->dfd_mnt);
607 close_prot_errno_disarm(rootfs->dfd_dev);
608 close_prot_errno_disarm(rootfs->mnt_opts.userns_fd);
609 if (unpin)
610 close_prot_errno_disarm(rootfs->fd_path_pin);
611 close_prot_errno_disarm(rootfs->dfd_idmapped);
612 put_lxc_mount_options(&rootfs->mnt_opts);
613 storage_put(rootfs->storage);
614 rootfs->storage = NULL;
615 }
616 }
617
618 static inline void lxc_clear_cgroup2_devices(struct bpf_devices *bpf_devices)
619 {
620 struct lxc_list *list = &bpf_devices->device_item;
621 struct lxc_list *it, *next;
622
623 lxc_list_for_each_safe (it, list, next) {
624 lxc_list_del(it);
625 free(it);
626 }
627
628 lxc_list_init(&bpf_devices->device_item);
629 }
630
631 #endif /* __LXC_CONF_H */