]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.h
Merge pull request #3984 from brauner/2021-09-29.core_scheduling
[mirror_lxc.git] / src / lxc / conf.h
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef __LXC_CONF_H
4 #define __LXC_CONF_H
5
6 #include "config.h"
7
8 #include <linux/magic.h>
9 #include <net/if.h>
10 #include <netinet/in.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <sys/param.h>
14 #include <sys/types.h>
15 #include <sys/vfs.h>
16
17 #include "attach_options.h"
18 #include "caps.h"
19 #include "compiler.h"
20 #include "hlist.h"
21 #include "list.h"
22 #include "lxcseccomp.h"
23 #include "memory_utils.h"
24 #include "namespace.h"
25 #include "ringbuf.h"
26 #include "start.h"
27 #include "state.h"
28 #include "storage/storage.h"
29 #include "string_utils.h"
30 #include "syscall_wrappers.h"
31 #include "terminal.h"
32
33 #if HAVE_SYS_RESOURCE_H
34 #include <sys/resource.h>
35 #endif
36
37 #if HAVE_SCMP_FILTER_CTX
38 typedef void * scmp_filter_ctx;
39 #endif
40
41 typedef signed long personality_t;
42
43 /* worth moving to configure.ac? */
44 #define subuidfile "/etc/subuid"
45 #define subgidfile "/etc/subgid"
46
47 /*
48 * Defines a generic struct to configure the control group. It is up to the
49 * programmer to specify the right subsystem.
50 * @subsystem : the targeted subsystem
51 * @value : the value to set
52 * @version : The version of the cgroup filesystem on which the controller
53 * resides.
54 *
55 * @controllers : The controllers to use for this container.
56 * @dir : The name of the directory containing the container's cgroup.
57 * Not that this is a per-container setting.
58 */
59 struct lxc_cgroup {
60 union {
61 /* information about a specific controller */
62 struct /* controller */ {
63 int version;
64 char *subsystem;
65 char *value;
66 };
67
68 /* meta information about cgroup configuration */
69 struct /* meta */ {
70 char *controllers;
71 char *dir;
72 char *monitor_dir;
73 char *monitor_pivot_dir;
74 char *container_dir;
75 char *namespace_dir;
76 bool relative;
77 };
78 };
79
80 struct list_head head;
81 };
82
83 static void free_lxc_cgroup(struct lxc_cgroup *ptr)
84 {
85 if (ptr) {
86 free(ptr->subsystem);
87 free(ptr->value);
88 free_disarm(ptr);
89 }
90 }
91 define_cleanup_function(struct lxc_cgroup *, free_lxc_cgroup);
92
93 #if !HAVE_SYS_RESOURCE_H
94 #define RLIM_INFINITY ((unsigned long)-1)
95 struct rlimit {
96 unsigned long rlim_cur;
97 unsigned long rlim_max;
98 struct list_head head;
99 };
100 #endif
101
102 /*
103 * Defines a structure to configure resource limits to set via setrlimit().
104 * @resource : the resource name in lowercase without the RLIMIT_ prefix
105 * @limit : the limit to set
106 */
107 struct lxc_limit {
108 char *resource;
109 struct rlimit limit;
110 struct list_head head;
111 };
112
113 static void free_lxc_limit(struct lxc_limit *ptr)
114 {
115 if (ptr) {
116 free_disarm(ptr->resource);
117 free_disarm(ptr);
118 }
119 }
120 define_cleanup_function(struct lxc_limit *, free_lxc_limit);
121
122 enum idtype {
123 ID_TYPE_UID,
124 ID_TYPE_GID
125 };
126
127 /*
128 * Defines a structure to configure kernel parameters at runtime.
129 * @key : the kernel parameters will be configured without the "lxc.sysctl" prefix
130 * @value : the value to set
131 */
132 struct lxc_sysctl {
133 char *key;
134 char *value;
135 struct list_head head;
136 };
137
138 static void free_lxc_sysctl(struct lxc_sysctl *ptr)
139 {
140 if (ptr) {
141 free(ptr->key);
142 free(ptr->value);
143 free_disarm(ptr);
144 }
145 }
146 define_cleanup_function(struct lxc_sysctl *, free_lxc_sysctl);
147
148 /*
149 * Defines a structure to configure proc filesystem at runtime.
150 * @filename : the proc filesystem will be configured without the "lxc.proc" prefix
151 * @value : the value to set
152 */
153 struct lxc_proc {
154 char *filename;
155 char *value;
156 struct list_head head;
157 };
158
159 static void free_lxc_proc(struct lxc_proc *ptr)
160 {
161 if (ptr) {
162 free(ptr->filename);
163 free(ptr->value);
164 free_disarm(ptr);
165 }
166 }
167 define_cleanup_function(struct lxc_proc *, free_lxc_proc);
168
169 /*
170 * id_map is an id map entry. Form in confile is:
171 * lxc.idmap = u 0 9800 100
172 * lxc.idmap = u 1000 9900 100
173 * lxc.idmap = g 0 9800 100
174 * lxc.idmap = g 1000 9900 100
175 * meaning the container can use uids and gids 0-99 and 1000-1099,
176 * with [ug]id 0 mapping to [ug]id 9800 on the host, and [ug]id 1000 to
177 * [ug]id 9900 on the host.
178 */
179 struct id_map {
180 enum idtype idtype;
181 unsigned long hostid, nsid, range;
182 struct list_head head;
183 };
184
185 /* Defines the number of tty configured and contains the
186 * instantiated ptys
187 * @max = number of configured ttys
188 */
189 struct lxc_tty_info {
190 size_t max;
191 char *dir;
192 char *tty_names;
193 struct lxc_terminal_info *tty;
194 };
195
196 typedef enum lxc_mount_options_t {
197 LXC_MOUNT_CREATE_DIR = 0,
198 LXC_MOUNT_CREATE_FILE = 1,
199 LXC_MOUNT_OPTIONAL = 2,
200 LXC_MOUNT_RELATIVE = 3,
201 LXC_MOUNT_IDMAP = 4,
202 LXC_MOUNT_MAX = 5,
203 } lxc_mount_options_t;
204
205 __hidden extern const char *lxc_mount_options_info[LXC_MOUNT_MAX];
206
207 struct lxc_mount_options {
208 unsigned int create_dir : 1;
209 unsigned int create_file : 1;
210 unsigned int optional : 1;
211 unsigned int relative : 1;
212 unsigned int bind_recursively : 1;
213 unsigned int propagate_recursively : 1;
214 unsigned int bind : 1;
215 char userns_path[PATH_MAX];
216 unsigned long mnt_flags;
217 unsigned long prop_flags;
218 char *data;
219 struct lxc_mount_attr attr;
220 char *raw_options;
221 };
222
223 /* Defines a structure to store the rootfs location, the
224 * optionals pivot_root, rootfs mount paths
225 * @path : the rootfs source (directory or device)
226 * @mount : where it is mounted
227 * @buf : static buffer to construct paths
228 * @bev_type : optional backing store type
229 * @managed : whether it is managed by LXC
230 * @dfd_mnt : fd for @mount
231 * @dfd_dev : fd for /dev of the container
232 */
233 struct lxc_rootfs {
234 int dfd_host;
235
236 char *path;
237 int fd_path_pin;
238 int dfd_idmapped;
239
240 int dfd_mnt;
241 char *mount;
242
243 int dfd_dev;
244
245 char buf[PATH_MAX];
246 char *bdev_type;
247 bool managed;
248 struct lxc_mount_options mnt_opts;
249 struct lxc_storage *storage;
250 };
251
252 /*
253 * Automatic mounts for LXC to perform inside the container
254 */
255 enum {
256 LXC_AUTO_PROC_RW = 0x001, /* /proc read-write */
257 LXC_AUTO_PROC_MIXED = 0x002, /* /proc/sys and /proc/sysrq-trigger read-only */
258 LXC_AUTO_PROC_MASK = 0x003,
259
260 LXC_AUTO_SYS_RW = 0x004, /* /sys */
261 LXC_AUTO_SYS_RO = 0x008, /* /sys read-only */
262 LXC_AUTO_SYS_MIXED = 0x00C, /* /sys read-only and /sys/class/net read-write */
263 LXC_AUTO_SYS_MASK = 0x00C,
264
265 LXC_AUTO_CGROUP_RO = 0x010, /* /sys/fs/cgroup (partial mount, read-only) */
266 LXC_AUTO_CGROUP_RW = 0x020, /* /sys/fs/cgroup (partial mount, read-write) */
267 LXC_AUTO_CGROUP_MIXED = 0x030, /* /sys/fs/cgroup (partial mount, paths r/o, cgroup r/w) */
268 LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */
269 LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */
270 LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
271 /*
272 * These are defined in such a way as to retain binary compatibility
273 * with earlier versions of this code. If the previous mask is applied,
274 * both of these will default back to the _MIXED variants, which is
275 * safe.
276 */
277 LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
278 LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
279 LXC_AUTO_CGROUP_FORCE = 0x100, /* mount cgroups even when cgroup namespaces are supported */
280 LXC_AUTO_CGROUP_MASK = 0x1F0, /* all known cgroup options */
281
282 LXC_AUTO_SHMOUNTS = 0x200, /* shared mount point */
283 LXC_AUTO_SHMOUNTS_MASK = 0x200, /* shared mount point mask */
284 LXC_AUTO_ALL_MASK = 0x1FF, /* all known settings */
285 };
286
287 enum lxchooks {
288 LXCHOOK_PRESTART,
289 LXCHOOK_PREMOUNT,
290 LXCHOOK_MOUNT,
291 LXCHOOK_AUTODEV,
292 LXCHOOK_START,
293 LXCHOOK_STOP,
294 LXCHOOK_POSTSTOP,
295 LXCHOOK_CLONE,
296 LXCHOOK_DESTROY,
297 LXCHOOK_START_HOST,
298 NUM_LXC_HOOKS
299 };
300
301 __hidden extern char *lxchook_names[NUM_LXC_HOOKS];
302
303 struct lxc_state_client {
304 int clientfd;
305 lxc_state_t states[MAX_STATE];
306 struct list_head head;
307 };
308
309 typedef enum lxc_bpf_devices_rule_t {
310 LXC_BPF_DEVICE_CGROUP_ALLOWLIST = 0,
311 LXC_BPF_DEVICE_CGROUP_DENYLIST = 1,
312 } lxc_bpf_devices_rule_t;
313
314 struct device_item {
315 char type;
316 int major;
317 int minor;
318 char access[4];
319 int allow;
320 struct list_head head;
321 };
322
323 struct bpf_devices {
324 lxc_bpf_devices_rule_t list_type;
325 struct list_head devices;
326 };
327
328 struct timens_offsets {
329 /* Currently, either s_boot or ns_boot is set, but not both. */
330 int64_t s_boot;
331 int64_t ns_boot;
332
333 /* Currently, either s_monotonic or ns_monotonic is set, but not both. */
334 int64_t s_monotonic;
335 int64_t ns_monotonic;
336 };
337
338 struct environment_entry {
339 char *key;
340 char *val;
341 struct list_head head;
342 };
343
344 struct cap_entry {
345 char *cap_name;
346 int cap;
347 struct list_head head;
348 };
349
350 struct caps {
351 int keep;
352 struct list_head list;
353 };
354
355 struct string_entry {
356 char *val;
357 struct list_head head;
358 };
359
360 struct lxc_conf {
361 /* Pointer to the name of the container. Do not free! */
362 const char *name;
363 bool is_execute;
364 int reboot;
365 personality_t personality;
366 struct utsname *utsname;
367
368 struct {
369 struct list_head cgroup;
370 struct list_head cgroup2;
371 struct bpf_devices bpf_devices;
372 };
373
374 struct {
375 struct list_head id_map;
376
377 /*
378 * Pointer to the idmap entry for the container's root uid in
379 * the id_map list. Do not free!
380 */
381 const struct id_map *root_nsuid_map;
382
383 /*
384 * Pointer to the idmap entry for the container's root gid in
385 * the id_map list. Do not free!
386 */
387 const struct id_map *root_nsgid_map;
388 };
389
390 struct list_head netdevs;
391
392 struct {
393 char *fstab;
394 int auto_mounts;
395 struct list_head mount_entries;
396 };
397
398 struct caps caps;
399
400 /* /dev/tty<idx> devices */
401 struct lxc_tty_info ttys;
402 /* /dev/console device */
403 struct lxc_terminal console;
404 /* maximum pty devices allowed by devpts mount */
405 size_t pty_max;
406 /* file descriptor for the container's /dev/pts mount */
407 int devpts_fd;
408
409 /* set to true when rootfs has been setup */
410 bool rootfs_setup;
411 struct lxc_rootfs rootfs;
412
413 bool close_all_fds;
414
415 struct {
416 unsigned int hooks_version;
417 struct list_head hooks[NUM_LXC_HOOKS];
418 };
419
420 char *lsm_aa_profile;
421 char *lsm_aa_profile_computed;
422 bool lsm_aa_profile_created;
423 unsigned int lsm_aa_allow_nesting;
424 unsigned int lsm_aa_allow_incomplete;
425 struct list_head lsm_aa_raw;
426 char *lsm_se_context;
427 char *lsm_se_keyring_context;
428 bool keyring_disable_session;
429 bool transient_procfs_mnt;
430 struct lxc_seccomp seccomp;
431 int maincmd_fd;
432 unsigned int autodev; /* if 1, mount and fill a /dev at start */
433 int autodevtmpfssize; /* size of the /dev tmpfs */
434 int haltsignal; /* signal used to halt container */
435 int rebootsignal; /* signal used to reboot container */
436 int stopsignal; /* signal used to hard stop container */
437 char *rcfile; /* Copy of the top level rcfile we read */
438
439 /* Logfile and loglevel can be set in a container config file. Those
440 * function as defaults. The defaults can be overridden by command line.
441 * However we don't want the command line specified values to be saved
442 * on c->save_config(). So we store the config file specified values
443 * here. */
444 char *logfile; /* the logfile as specified in config */
445 int loglevel; /* loglevel as specified in config (if any) */
446 int logfd;
447
448 unsigned int start_auto;
449 unsigned int start_delay;
450 int start_order;
451 struct list_head groups;
452 int nbd_idx;
453
454 /* unshare the mount namespace in the monitor */
455 unsigned int monitor_unshare;
456 unsigned int monitor_signal_pdeath;
457
458 /* list of environment variables we'll add to the container when
459 * started */
460 struct list_head environment;
461
462 /* text representation of the config file */
463 char *unexpanded_config;
464 size_t unexpanded_len;
465 size_t unexpanded_alloced;
466
467 /* default command for lxc-execute */
468 char *execute_cmd;
469
470 /* init command */
471 char *init_cmd;
472
473 /* The uid to use for the container. */
474 uid_t init_uid;
475 /* The gid to use for the container. */
476 gid_t init_gid;
477 /* The groups to use for the container. */
478 lxc_groups_t init_groups;
479
480 /* indicator if the container will be destroyed on shutdown */
481 unsigned int ephemeral;
482
483 /* The facility to pass to syslog. Let's users establish as what type of
484 * program liblxc is supposed to write to the syslog. */
485 char *syslog;
486
487 /* Whether PR_SET_NO_NEW_PRIVS will be set for the container. */
488 bool no_new_privs;
489
490 /* RLIMIT_* limits */
491 struct list_head limits;
492
493 /* Contains generic info about the cgroup configuration for this
494 * container. Note that struct lxc_cgroup contains a union. It is only
495 * valid to access the members of the anonymous "meta" struct within
496 * that union.
497 */
498 struct lxc_cgroup cgroup_meta;
499
500 struct {
501 int ns_clone;
502 int ns_keep;
503 char *ns_share[LXC_NS_MAX];
504 };
505
506 /* init working directory */
507 char *init_cwd;
508
509 /* A list of clients registered to be informed about a container state. */
510 struct list_head state_clients;
511
512 /* sysctls */
513 struct list_head sysctls;
514
515 /* procs */
516 struct list_head procs;
517
518 struct shmount {
519 /* Absolute path to the shared mount point on the host */
520 char *path_host;
521 /* Absolute path (in the container) to the shared mount point */
522 char *path_cont;
523 } shmount;
524
525 struct timens_offsets timens;
526
527 bool sched_core;
528 __u64 sched_core_cookie;
529 };
530
531 __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
532 __access_r(3, 4);
533
534 extern thread_local struct lxc_conf *current_config;
535
536 __hidden extern int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, char *argv[]);
537 __hidden extern struct lxc_conf *lxc_conf_init(void);
538 __hidden extern void lxc_conf_free(struct lxc_conf *conf);
539 __hidden extern int lxc_storage_prepare(struct lxc_conf *conf);
540 __hidden extern int lxc_rootfs_prepare(struct lxc_conf *conf, bool userns);
541 __hidden extern void lxc_storage_put(struct lxc_conf *conf);
542 __hidden extern int lxc_rootfs_init(struct lxc_conf *conf, bool userns);
543 __hidden extern int lxc_rootfs_prepare_parent(struct lxc_handler *handler);
544 __hidden extern int lxc_idmapped_mounts_parent(struct lxc_handler *handler);
545 __hidden extern int lxc_map_ids(struct list_head *idmap, pid_t pid);
546 __hidden extern int lxc_create_tty(const char *name, struct lxc_conf *conf);
547 __hidden extern void lxc_delete_tty(struct lxc_tty_info *ttys);
548 __hidden extern int lxc_clear_config_caps(struct lxc_conf *c);
549 __hidden extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version);
550 __hidden extern int lxc_clear_mount_entries(struct lxc_conf *c);
551 __hidden extern int lxc_clear_automounts(struct lxc_conf *c);
552 __hidden extern int lxc_clear_hooks(struct lxc_conf *c, const char *key);
553 __hidden extern int lxc_clear_idmaps(struct lxc_conf *c);
554 __hidden extern int lxc_clear_groups(struct lxc_conf *c);
555 __hidden extern int lxc_clear_environment(struct lxc_conf *c);
556 __hidden extern int lxc_clear_limits(struct lxc_conf *c, const char *key);
557 __hidden extern int lxc_delete_autodev(struct lxc_handler *handler);
558 __hidden extern int lxc_clear_autodev_tmpfs_size(struct lxc_conf *c);
559 __hidden extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name,
560 const char *lxcpath);
561 __hidden extern int lxc_setup(struct lxc_handler *handler);
562 __hidden extern int lxc_setup_parent(struct lxc_handler *handler);
563 __hidden extern int setup_resource_limits(struct lxc_conf *conf, pid_t pid);
564 __hidden extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype);
565 __hidden extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype);
566 __hidden extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
567 const char *fn_name);
568 __hidden extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
569 const char *fn_name);
570 __hidden extern int parse_mntopts_legacy(const char *mntopts, unsigned long *mntflags, char **mntdata);
571 __hidden extern int parse_propagationopts(const char *mntopts, unsigned long *pflags);
572 __hidden extern int parse_lxc_mount_attrs(struct lxc_mount_options *opts, char *mnt_opts);
573 __hidden extern int parse_mount_attrs(struct lxc_mount_options *opts, const char *mntopts);
574 __hidden extern void tmp_proc_unmount(struct lxc_conf *lxc_conf);
575 __hidden extern void suggest_default_idmap(void);
576 __hidden extern FILE *make_anonymous_mount_file(const struct list_head *mount,
577 bool include_nesting_helpers);
578 __hidden extern int run_script(const char *name, const char *section, const char *script, ...);
579 __hidden extern int run_script_argv(const char *name, unsigned int hook_version, const char *section,
580 const char *script, const char *hookname, char **argsin);
581
582 __hidden extern bool has_cap(int cap, struct lxc_conf *conf);
583 static inline bool lxc_wants_cap(int cap, struct lxc_conf *conf)
584 {
585 if (lxc_caps_last_cap() < cap)
586 return false;
587
588 return has_cap(cap, conf);
589 }
590
591 __hidden extern int setup_sysctl_parameters(struct lxc_conf *conf);
592 __hidden extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key);
593 __hidden extern int setup_proc_filesystem(struct lxc_conf *conf, pid_t pid);
594 __hidden extern int lxc_clear_procs(struct lxc_conf *c, const char *key);
595 __hidden extern int lxc_clear_apparmor_raw(struct lxc_conf *c);
596 __hidden extern int lxc_clear_namespace(struct lxc_conf *c);
597 __hidden extern int userns_exec_minimal(const struct lxc_conf *conf, int (*fn_parent)(void *),
598 void *fn_parent_data, int (*fn_child)(void *),
599 void *fn_child_data);
600 __hidden extern int userns_exec_mapped_root(const char *path, int path_fd,
601 const struct lxc_conf *conf);
602 static inline int chown_mapped_root(const char *path, const struct lxc_conf *conf)
603 {
604 return userns_exec_mapped_root(path, -EBADF, conf);
605 }
606
607 __hidden extern int lxc_sync_fds_parent(struct lxc_handler *handler);
608 __hidden extern int lxc_sync_fds_child(struct lxc_handler *handler);
609
610 static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
611 {
612 static const char *s = "/";
613
614 return !is_empty_string(rootfs->path) ? rootfs->mount : s;
615 }
616
617 static inline void put_lxc_mount_options(struct lxc_mount_options *mnt_opts)
618 {
619 mnt_opts->create_dir = 0;
620 mnt_opts->create_file = 0;
621 mnt_opts->optional = 0;
622 mnt_opts->relative = 0;
623 mnt_opts->userns_path[0] = '\0';
624 mnt_opts->mnt_flags = 0;
625 mnt_opts->prop_flags = 0;
626
627 free_disarm(mnt_opts->data);
628 free_disarm(mnt_opts->raw_options);
629 }
630
631 static inline void put_lxc_rootfs(struct lxc_rootfs *rootfs, bool unpin)
632 {
633 if (rootfs) {
634 close_prot_errno_disarm(rootfs->dfd_host);
635 close_prot_errno_disarm(rootfs->dfd_mnt);
636 close_prot_errno_disarm(rootfs->dfd_dev);
637 if (unpin)
638 close_prot_errno_disarm(rootfs->fd_path_pin);
639 close_prot_errno_disarm(rootfs->dfd_idmapped);
640 put_lxc_mount_options(&rootfs->mnt_opts);
641 storage_put(rootfs->storage);
642 rootfs->storage = NULL;
643 }
644 }
645
646 static inline void lxc_clear_cgroup2_devices(struct bpf_devices *bpf_devices)
647 {
648 struct device_item *device, *n;
649
650 list_for_each_entry_safe(device, n, &bpf_devices->devices, head)
651 list_del(&device->head);
652
653 INIT_LIST_HEAD(&bpf_devices->devices);
654 }
655
656 static inline int lxc_personality(personality_t persona)
657 {
658 if (persona < 0)
659 return ret_errno(EINVAL);
660
661 return personality(persona);
662 }
663
664 __hidden extern int lxc_set_environment(const struct lxc_conf *conf);
665 __hidden extern int parse_cap(const char *cap);
666
667 #endif /* __LXC_CONF_H */