From 834bb4d7051b18899e3c7d0697021849679bca44 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Wed, 2 Jan 2019 09:40:29 +0100 Subject: [PATCH] update to lxc-3.1.0 The default cgroup pattern was switched from lxc/%n to lxc.payload/%n, so add a ./configure option to revert this change as PVE expects containers in lxc/%n. Signed-off-by: Wolfgang Bumiller --- Makefile | 4 +- debian/changelog | 6 + ...onfile-add-lxc.monitor.signal.pdeath.patch | 170 -- ...-tests-add-lxc.monitor.signal.pdeath.patch | 31 - ...c.monitor.signal.pdeath-into-Japanes.patch | 45 - .../0004-apparmor-profile-generation.patch | 1605 ----------------- ...test-for-generated-apparmor-profiles.patch | 126 -- ...fix-path-lxcpath-mixups-in-tty-setup.patch | 43 - ...r-allow-various-remount-bind-options.patch | 99 - ...lxcnetaddbr-when-instantiating-veths.patch | 4 +- ...fig-deny-rw-mounting-of-sys-and-proc.patch | 4 +- ...the-limiting-from-the-namespaced-cgr.patch | 288 +-- ...tutils-make-cgroupns-separation-leve.patch | 18 +- ...0006-PVE-Config-namespace-separation.patch | 6 +- ...ty-to-run-lxc-monitord-as-a-regular-.patch | 30 +- ...VE-Config-Disable-lxc.monitor-cgroup.patch | 46 + ...Deprecated-Make-lxc-.service-forking.patch | 42 - debian/patches/series | 9 +- debian/rules | 3 +- lxc | 2 +- 20 files changed, 242 insertions(+), 2339 deletions(-) delete mode 100644 debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch delete mode 100644 debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch delete mode 100644 debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch delete mode 100644 debian/patches/extra/0004-apparmor-profile-generation.patch delete mode 100644 debian/patches/extra/0005-tests-add-test-for-generated-apparmor-profiles.patch delete mode 100644 debian/patches/extra/0006-conf-fix-path-lxcpath-mixups-in-tty-setup.patch delete mode 100644 debian/patches/extra/0007-apparmor-allow-various-remount-bind-options.patch create mode 100644 debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch delete mode 100644 debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch diff --git a/Makefile b/Makefile index 597e0b8..365479b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ PACKAGE=lxc-pve -LXCVER=3.0.2+pve1 -DEBREL=5 +LXCVER=3.1.0 +DEBREL=1 SRCDIR=lxc BUILDSRC := $(PACKAGE)-$(LXCVER) diff --git a/debian/changelog b/debian/changelog index a4793d5..362156c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +lxc-pve (3.1.0-1) stable; urgency=medium + + * update to lxc-3.1.0 + + -- Proxmox Support Team Wed, 02 Jan 2019 09:40:59 +0100 + lxc-pve (3.0.2+pve1-5) stable; urgency=medium * packaging fixup diff --git a/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch b/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch deleted file mode 100644 index 8a6c407..0000000 --- a/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 16 Jul 2018 11:07:58 +0200 -Subject: [PATCH] confile: add lxc.monitor.signal.pdeath - -Set the signal to be sent to the container's init when the lxc monitor exits. -By default it is set to SIGKILL which will cause all container processes to be -killed when the lxc monitor process dies. -To ensure that containers stay alive even if lxc monitor dies set this to 0. - -Signed-off-by: Christian Brauner -(cherry picked from commit 258f80519f3bb0a41c26083020154e9a61df8468) ---- - doc/lxc.container.conf.sgml.in | 15 +++++++++++++++ - src/lxc/conf.c | 1 + - src/lxc/conf.h | 1 + - src/lxc/confile.c | 38 ++++++++++++++++++++++++++++++++++++++ - src/lxc/start.c | 9 +++++++++ - 5 files changed, 64 insertions(+) - -diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in -index 8caee9ee..2d85ab7f 100644 ---- a/doc/lxc.container.conf.sgml.in -+++ b/doc/lxc.container.conf.sgml.in -@@ -2382,6 +2382,21 @@ dev/null proc/kcore none bind,relative 0 0 - - - -+ -+ -+ -+ -+ Set the signal to be sent to the container's init when the lxc -+ monitor exits. By default it is set to SIGKILL which will cause -+ all container processes to be killed when the lxc monitor process -+ dies. -+ To ensure that containers stay alive even if lxc monitor dies set -+ this to 0. -+ -+ -+ -+ -+ - - - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 85ae6cd2..4dd063e6 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -2707,6 +2707,7 @@ struct lxc_conf *lxc_conf_init(void) - new->console.name[0] = '\0'; - memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf)); - new->maincmd_fd = -1; -+ new->monitor_signal_pdeath = SIGKILL; - new->nbd_idx = -1; - new->rootfs.mount = strdup(default_rootfs_mount); - if (!new->rootfs.mount) { -diff --git a/src/lxc/conf.h b/src/lxc/conf.h -index d87b3347..1c029fea 100644 ---- a/src/lxc/conf.h -+++ b/src/lxc/conf.h -@@ -307,6 +307,7 @@ struct lxc_conf { - - /* unshare the mount namespace in the monitor */ - unsigned int monitor_unshare; -+ unsigned int monitor_signal_pdeath; - - /* list of included files */ - struct lxc_list includes; -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index f4a01b04..3bc86790 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -111,6 +111,7 @@ lxc_config_define(log_file); - lxc_config_define(log_level); - lxc_config_define(log_syslog); - lxc_config_define(monitor); -+lxc_config_define(monitor_signal_pdeath); - lxc_config_define(mount); - lxc_config_define(mount_auto); - lxc_config_define(mount_fstab); -@@ -194,6 +195,7 @@ static struct lxc_config_t config[] = { - { "lxc.log.level", set_config_log_level, get_config_log_level, clr_config_log_level, }, - { "lxc.log.syslog", set_config_log_syslog, get_config_log_syslog, clr_config_log_syslog, }, - { "lxc.monitor.unshare", set_config_monitor, get_config_monitor, clr_config_monitor, }, -+ { "lxc.monitor.signal.pdeath", set_config_monitor_signal_pdeath, get_config_monitor_signal_pdeath, clr_config_monitor_signal_pdeath, }, - { "lxc.mount.auto", set_config_mount_auto, get_config_mount_auto, clr_config_mount_auto, }, - { "lxc.mount.entry", set_config_mount, get_config_mount, clr_config_mount, }, - { "lxc.mount.fstab", set_config_mount_fstab, get_config_mount_fstab, clr_config_mount_fstab, }, -@@ -976,6 +978,28 @@ static int set_config_monitor(const char *key, const char *value, - return -1; - } - -+static int set_config_monitor_signal_pdeath(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ if (lxc_config_value_empty(value)) { -+ lxc_conf->monitor_signal_pdeath = 0; -+ return 0; -+ } -+ -+ if (strcmp(key + 12, "signal.pdeath") == 0) { -+ int sig_n; -+ -+ sig_n = sig_parse(value); -+ if (sig_n < 0) -+ return -1; -+ -+ lxc_conf->monitor_signal_pdeath = sig_n; -+ return 0; -+ } -+ -+ return -EINVAL; -+} -+ - static int set_config_group(const char *key, const char *value, - struct lxc_conf *lxc_conf, void *data) - { -@@ -3413,6 +3437,13 @@ static int get_config_monitor(const char *key, char *retv, int inlen, - return lxc_get_conf_int(c, retv, inlen, c->monitor_unshare); - } - -+static int get_config_monitor_signal_pdeath(const char *key, char *retv, -+ int inlen, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_get_conf_int(c, retv, inlen, c->monitor_signal_pdeath); -+} -+ - static int get_config_group(const char *key, char *retv, int inlen, - struct lxc_conf *c, void *data) - { -@@ -3968,6 +3999,13 @@ static inline int clr_config_monitor(const char *key, struct lxc_conf *c, - return 0; - } - -+static inline int clr_config_monitor_signal_pdeath(const char *key, -+ struct lxc_conf *c, void *data) -+{ -+ c->monitor_signal_pdeath = 0; -+ return 0; -+} -+ - static inline int clr_config_group(const char *key, struct lxc_conf *c, - void *data) - { -diff --git a/src/lxc/start.c b/src/lxc/start.c -index 827a9ee9..f67e3f66 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -1385,6 +1385,15 @@ static int do_start(void *data) - goto out_warn_father; - } - -+ if (handler->conf->monitor_signal_pdeath != SIGKILL) { -+ ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath); -+ if (ret < 0) { -+ SYSERROR("Failed to set PR_SET_PDEATHSIG to %d", -+ handler->conf->monitor_signal_pdeath); -+ goto out_warn_father; -+ } -+ } -+ - /* After this call, we are in error because this ops should not return - * as it execs. - */ --- -2.11.0 - diff --git a/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch b/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch deleted file mode 100644 index d49070a..0000000 --- a/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 16 Jul 2018 15:22:13 +0200 -Subject: [PATCH] tests: add lxc.monitor.signal.pdeath - -Signed-off-by: Christian Brauner -(cherry picked from commit e53cd6d899bd50e07c8bb760371bfa2d5d4c56ef) ---- - src/tests/parse_config_file.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c -index 73b8fc3c..b6034ef2 100644 ---- a/src/tests/parse_config_file.c -+++ b/src/tests/parse_config_file.c -@@ -911,6 +911,12 @@ int main(int argc, char *argv[]) - goto non_test_error; - } - -+ ret = set_get_compare_clear_save_load(c, "lxc.monitor.signal.pdeath", "SIGKILL", tmpf, true); -+ if (ret == 0) { -+ lxc_error("%s\n", "lxc.hook.version"); -+ goto non_test_error; -+ } -+ - fret = EXIT_SUCCESS; - - non_test_error: --- -2.11.0 - diff --git a/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch b/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch deleted file mode 100644 index 68adff6..0000000 --- a/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: KATOH Yasufumi -Date: Tue, 17 Jul 2018 01:14:06 +0900 -Subject: [PATCH] doc: Translate lxc.monitor.signal.pdeath into Japanese in - lxc.container.conf(5) - -Signed-off-by: KATOH Yasufumi -(cherry picked from commit fd5de0292195a2a6ba9dc6c3727de202b015c02c) ---- - doc/ja/lxc.container.conf.sgml.in | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/doc/ja/lxc.container.conf.sgml.in b/doc/ja/lxc.container.conf.sgml.in -index 8834cc64..e35005c6 100644 ---- a/doc/ja/lxc.container.conf.sgml.in -+++ b/doc/ja/lxc.container.conf.sgml.in -@@ -3161,6 +3161,25 @@ by KATOH Yasufumi - - - -+ -+ -+ -+ -+ -+ lxc のモニタプロセスが終了した際に、コンテナの init プロセスに送出するシグナルを指定します。デフォルトでは、lxc のモニタプロセスが終了した場合には、すべてのコンテナ内のプロセスが停止するように SIGKILL が設定されています。 -+ lxc のモニタプロセスが終了しても、コンテナがすべて確実に動作しつづけるようにするには、この値を 0 に設定します。 -+ -+ -+ -+ -+ - - - --- -2.11.0 - diff --git a/debian/patches/extra/0004-apparmor-profile-generation.patch b/debian/patches/extra/0004-apparmor-profile-generation.patch deleted file mode 100644 index 8f833dc..0000000 --- a/debian/patches/extra/0004-apparmor-profile-generation.patch +++ /dev/null @@ -1,1605 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Wed, 25 Jul 2018 12:11:31 +0200 -Subject: [PATCH] apparmor: profile generation - -This copies lxd's apparmor profile generation. This tries to -detect features such as cgroup namespaces, apparmor -namespaces and stacking support, and has profile parts -conditionally for unprivileged containers. - -This introduces the following changes to the configuration: - lxc.apparmor.profile = generated - The fixed value 'generated' will cause this - functionality to be used, otherwise there should be no - functional changes happening unless specifically - requested with the next key: - lxc.apparmor.allow_nesting - This is a boolean which, if enabled, causes the - following changes: When generated apparmor profiles are - used, they will contain the necessary changes to allow - creating a nested container. In addition to the usual - mount points, /dev/.lxc/proc and /dev/.lxc/sys will - contain procfs and sysfs mount points without the lxcfs - overlays, which, if generated apparmor profiles are - being used, will not be read/writable directly. - lxc.apparmor.raw - A list of raw apparmor profile lines to append to the - profile. Only valid when using generated profiles. - -The following apparmor profile lines have not been copied -from lxd: - - mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/, - mount none -> /var/lib/lxd/shmounts/, - mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**, - -They should be added via lxc.apparmor.raw entries by lxd. - -In order for apparmor_parser's cache to be of use, this adds -a --with-apparmor-cache-dir ./configure option. - -Signed-off-by: Wolfgang Bumiller -(cherry picked from commit 1800f9247357948fd11c9da73b1943a8a7b6882b) ---- - configure.ac | 8 + - src/lxc/Makefile.am | 1 + - src/lxc/conf.c | 43 ++- - src/lxc/conf.h | 8 +- - src/lxc/confile.c | 95 +++++ - src/lxc/criu.c | 3 +- - src/lxc/lsm/apparmor.c | 974 ++++++++++++++++++++++++++++++++++++++++++++++--- - src/lxc/lsm/lsm.c | 30 +- - src/lxc/lsm/lsm.h | 8 +- - src/lxc/lsm/nop.c | 2 +- - src/lxc/lsm/selinux.c | 4 +- - src/lxc/start.c | 14 +- - 12 files changed, 1134 insertions(+), 56 deletions(-) - -diff --git a/configure.ac b/configure.ac -index bcf7ab64..71fe63b0 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -469,6 +469,13 @@ AC_ARG_WITH([cgroup-pattern], - [pattern for container cgroups] - )], [], [with_cgroup_pattern=['lxc/%n']]) - -+# The path for the apparmor_parser's cache for generated apparmor profiles -+AC_ARG_WITH([apparmor-cache-dir], -+ [AC_HELP_STRING( -+ [--with-apparmor-cache-dir=dir], -+ [path for apparmor_parser cache] -+ )], [], [with_apparmor_cache_dir=['${localstatedir}/cache/lxc/apparmor']]) -+ - # Container log path. By default, use $lxcpath. - AC_MSG_CHECKING([Whether to place logfiles in container config path]) - AC_ARG_ENABLE([configpath-log], -@@ -515,6 +522,7 @@ AS_AC_EXPAND(LXCBINHOOKDIR, "$libexecdir/lxc/hooks") - AS_AC_EXPAND(LXCINITDIR, "$libexecdir") - AS_AC_EXPAND(LOGPATH, "$with_log_path") - AS_AC_EXPAND(RUNTIME_PATH, "$with_runtime_path") -+AS_AC_EXPAND(APPARMOR_CACHE_DIR, "$with_apparmor_cache_dir") - AC_SUBST(DEFAULT_CGROUP_PATTERN, ["$with_cgroup_pattern"]) - - # We need the install path so criu knows where to reference the hook scripts. -diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am -index c3f11176..14db7cb4 100644 ---- a/src/lxc/Makefile.am -+++ b/src/lxc/Makefile.am -@@ -176,6 +176,7 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ - -DDEFAULT_CGROUP_PATTERN=\"$(DEFAULT_CGROUP_PATTERN)\" \ - -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" \ - -DSBINDIR=\"$(SBINDIR)\" \ -+ -DAPPARMOR_CACHE_DIR=\"$(APPARMOR_CACHE_DIR)\" \ - -I $(top_srcdir)/src \ - -I $(top_srcdir)/src/lxc \ - -I $(top_srcdir)/src/lxc/storage \ -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 4dd063e6..8c9dce36 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -2356,7 +2356,23 @@ static int setup_mount(const struct lxc_conf *conf, - return ret; - } - --FILE *make_anonymous_mount_file(struct lxc_list *mount) -+/* -+ * In order for nested containers to be able to mount /proc and /sys they need -+ * to see a "pure" proc and sysfs mount points with nothing mounted on top -+ * (like lxcfs). -+ * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an -+ * apparmor rule to deny access to them. This is mostly for convenience: The -+ * container's root user can mount them anyway and thus has access to the two -+ * file systems. But a non-root user in the container should not be allowed to -+ * access them as a side effect without explicitly allowing it. -+ */ -+static const char nesting_helpers[] = -+"proc dev/.lxc/proc proc create=dir,optional\n" -+"sys dev/.lxc/sys sysfs create=dir,optional\n" -+; -+ -+FILE *make_anonymous_mount_file(struct lxc_list *mount, -+ bool include_nesting_helpers) - { - int ret; - char *mount_entry; -@@ -2398,6 +2414,13 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount) - goto on_error; - } - -+ if (include_nesting_helpers) { -+ ret = lxc_write_nointr(fd, nesting_helpers, -+ sizeof(nesting_helpers) - 1); -+ if (ret != sizeof(nesting_helpers) - 1) -+ goto on_error; -+ } -+ - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) - goto on_error; -@@ -2418,7 +2441,7 @@ static int setup_mount_entries(const struct lxc_conf *conf, - int ret; - FILE *f; - -- f = make_anonymous_mount_file(mount); -+ f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); - if (!f) - return -1; - -@@ -2736,6 +2759,7 @@ struct lxc_conf *lxc_conf_init(void) - lxc_list_init(&new->groups); - lxc_list_init(&new->state_clients); - new->lsm_aa_profile = NULL; -+ lxc_list_init(&new->lsm_aa_raw); - new->lsm_se_context = NULL; - new->tmp_umount_proc = false; - -@@ -4021,6 +4045,19 @@ void lxc_clear_includes(struct lxc_conf *conf) - } - } - -+int lxc_clear_apparmor_raw(struct lxc_conf *c) -+{ -+ struct lxc_list *it, *next; -+ -+ lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) { -+ lxc_list_del(it); -+ free(it->elem); -+ free(it); -+ } -+ -+ return 0; -+} -+ - void lxc_conf_free(struct lxc_conf *conf) - { - if (!conf) -@@ -4048,6 +4085,7 @@ void lxc_conf_free(struct lxc_conf *conf) - free(conf->syslog); - lxc_free_networks(&conf->network); - free(conf->lsm_aa_profile); -+ free(conf->lsm_aa_profile_computed); - free(conf->lsm_se_context); - lxc_seccomp_free(conf); - lxc_clear_config_caps(conf); -@@ -4064,6 +4102,7 @@ void lxc_conf_free(struct lxc_conf *conf) - lxc_clear_limits(conf, "lxc.prlimit"); - lxc_clear_sysctls(conf, "lxc.sysctl"); - lxc_clear_procs(conf, "lxc.proc"); -+ lxc_clear_apparmor_raw(conf); - free(conf->cgroup_meta.dir); - free(conf->cgroup_meta.controllers); - free(conf); -diff --git a/src/lxc/conf.h b/src/lxc/conf.h -index 1c029fea..f9864b35 100644 ---- a/src/lxc/conf.h -+++ b/src/lxc/conf.h -@@ -276,7 +276,11 @@ struct lxc_conf { - }; - - char *lsm_aa_profile; -+ char *lsm_aa_profile_computed; -+ bool lsm_aa_profile_created; -+ unsigned int lsm_aa_allow_nesting; - unsigned int lsm_aa_allow_incomplete; -+ struct lxc_list lsm_aa_raw; - char *lsm_se_context; - bool tmp_umount_proc; - char *seccomp; /* filename with the seccomp rules */ -@@ -422,7 +426,8 @@ extern int parse_propagationopts(const char *mntopts, unsigned long *pflags); - extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); - extern void remount_all_slave(void); - extern void suggest_default_idmap(void); --extern FILE *make_anonymous_mount_file(struct lxc_list *mount); -+extern FILE *make_anonymous_mount_file(struct lxc_list *mount, -+ bool include_nesting_helpers); - extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); - extern unsigned long add_required_remount_flags(const char *s, const char *d, - unsigned long flags); -@@ -436,5 +441,6 @@ extern int setup_sysctl_parameters(struct lxc_list *sysctls); - extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key); - extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid); - extern int lxc_clear_procs(struct lxc_conf *c, const char *key); -+extern int lxc_clear_apparmor_raw(struct lxc_conf *c); - - #endif /* __LXC_CONF_H */ -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index 3bc86790..ec795aa4 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -84,7 +84,9 @@ lxc_log_define(confile, lxc); - - lxc_config_define(autodev); - lxc_config_define(apparmor_allow_incomplete); -+lxc_config_define(apparmor_allow_nesting); - lxc_config_define(apparmor_profile); -+lxc_config_define(apparmor_raw); - lxc_config_define(cap_drop); - lxc_config_define(cap_keep); - lxc_config_define(cgroup_controller); -@@ -158,6 +160,8 @@ static struct lxc_config_t config[] = { - { "lxc.arch", set_config_personality, get_config_personality, clr_config_personality, }, - { "lxc.apparmor.profile", set_config_apparmor_profile, get_config_apparmor_profile, clr_config_apparmor_profile, }, - { "lxc.apparmor.allow_incomplete", set_config_apparmor_allow_incomplete, get_config_apparmor_allow_incomplete, clr_config_apparmor_allow_incomplete, }, -+ { "lxc.apparmor.allow_nesting", set_config_apparmor_allow_nesting, get_config_apparmor_allow_nesting, clr_config_apparmor_allow_nesting, }, -+ { "lxc.apparmor.raw", set_config_apparmor_raw, get_config_apparmor_raw, clr_config_apparmor_raw, }, - { "lxc.autodev", set_config_autodev, get_config_autodev, clr_config_autodev, }, - { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, }, - { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, }, -@@ -1115,6 +1119,52 @@ static int set_config_apparmor_allow_incomplete(const char *key, - return 0; - } - -+static int set_config_apparmor_allow_nesting(const char *key, -+ const char *value, -+ struct lxc_conf *lxc_conf, -+ void *data) -+{ -+ if (lxc_config_value_empty(value)) -+ return clr_config_apparmor_allow_nesting(key, lxc_conf, NULL); -+ -+ if (lxc_safe_uint(value, &lxc_conf->lsm_aa_allow_nesting) < 0) -+ return -1; -+ -+ if (lxc_conf->lsm_aa_allow_nesting > 1) -+ return -1; -+ -+ return 0; -+} -+ -+static int set_config_apparmor_raw(const char *key, -+ const char *value, -+ struct lxc_conf *lxc_conf, -+ void *data) -+{ -+ char *elem; -+ struct lxc_list *list; -+ -+ if (lxc_config_value_empty(value)) -+ return lxc_clear_apparmor_raw(lxc_conf); -+ -+ list = malloc(sizeof(*list)); -+ if (!list) { -+ errno = ENOMEM; -+ return -1; -+ } -+ -+ elem = strdup(value); -+ if (!elem) { -+ free(list); -+ return -1; -+ } -+ list->elem = elem; -+ -+ lxc_list_add_tail(&lxc_conf->lsm_aa_raw, list); -+ -+ return 0; -+} -+ - static int set_config_selinux_context(const char *key, const char *value, - struct lxc_conf *lxc_conf, void *data) - { -@@ -2966,6 +3016,34 @@ static int get_config_apparmor_allow_incomplete(const char *key, char *retv, - c->lsm_aa_allow_incomplete); - } - -+static int get_config_apparmor_allow_nesting(const char *key, char *retv, -+ int inlen, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_get_conf_int(c, retv, inlen, -+ c->lsm_aa_allow_nesting); -+} -+ -+static int get_config_apparmor_raw(const char *key, char *retv, -+ int inlen, struct lxc_conf *c, -+ void *data) -+{ -+ int len; -+ struct lxc_list *it; -+ int fulllen = 0; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ lxc_list_for_each(it, &c->lsm_aa_raw) { -+ strprint(retv, inlen, "%s\n", (char *)it->elem); -+ } -+ -+ return fulllen; -+} -+ - static int get_config_selinux_context(const char *key, char *retv, int inlen, - struct lxc_conf *c, void *data) - { -@@ -3756,6 +3834,21 @@ static inline int clr_config_apparmor_allow_incomplete(const char *key, - return 0; - } - -+static inline int clr_config_apparmor_allow_nesting(const char *key, -+ struct lxc_conf *c, -+ void *data) -+{ -+ c->lsm_aa_allow_nesting = 0; -+ return 0; -+} -+ -+static inline int clr_config_apparmor_raw(const char *key, -+ struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_apparmor_raw(c); -+} -+ - static inline int clr_config_selinux_context(const char *key, - struct lxc_conf *c, void *data) - { -@@ -4952,7 +5045,9 @@ int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv, - - if (!strcmp(key, "lxc.apparmor")) { - strprint(retv, inlen, "allow_incomplete\n"); -+ strprint(retv, inlen, "allow_nesting\n"); - strprint(retv, inlen, "profile\n"); -+ strprint(retv, inlen, "raw\n"); - } else if (!strcmp(key, "lxc.cgroup")) { - strprint(retv, inlen, "dir\n"); - } else if (!strcmp(key, "lxc.selinux")) { -diff --git a/src/lxc/criu.c b/src/lxc/criu.c -index 398e8e94..72ba0a95 100644 ---- a/src/lxc/criu.c -+++ b/src/lxc/criu.c -@@ -378,7 +378,8 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) - DECLARE_ARG(opts->user->action_script); - } - -- mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list); -+ mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list, -+ opts->c->lxc_conf->lsm_aa_allow_nesting); - if (!mnts) - goto err; - -diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c -index 5fe6d525..ec3f805d 100644 ---- a/src/lxc/lsm/apparmor.c -+++ b/src/lxc/lsm/apparmor.c -@@ -33,11 +33,18 @@ - #include "conf.h" - #include "utils.h" - #include "initutils.h" -+#include "caps.h" -+#include "parse.h" - - lxc_log_define(apparmor, lsm); - - /* set by lsm_apparmor_drv_init if true */ - static int aa_enabled = 0; -+static bool aa_parser_available = false; -+static bool aa_supports_unix = false; -+static bool aa_can_stack = false; -+static bool aa_is_stacked = false; -+static bool aa_admin = false; - - static int mount_features_enabled = 0; - -@@ -46,6 +53,332 @@ static int mount_features_enabled = 0; - #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask" - #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled" - #define AA_UNCHANGED "unchanged" -+#define AA_GENERATED "generated" -+ -+#define AA_CMD_LOAD 'r' -+#define AA_CMD_UNLOAD 'R' -+#define AA_CMD_PARSE 'Q' -+ -+static const char AA_PROFILE_BASE[] = -+" ### Base profile\n" -+" capability,\n" -+" dbus,\n" -+" file,\n" -+" network,\n" -+" umount,\n" -+"\n" -+" # Allow us to receive signals from anywhere.\n" -+" signal (receive),\n" -+"\n" -+" # Allow us to send signals to ourselves\n" -+" signal peer=@{profile_name},\n" -+"\n" -+" # Allow other processes to read our /proc entries, futexes, perf tracing and\n" -+" # kcmp for now (they will need 'read' in the first place). Administrators can\n" -+" # override with:\n" -+" # deny ptrace (readby) ...\n" -+" ptrace (readby),\n" -+"\n" -+" # Allow other processes to trace us by default (they will need 'trace' in\n" -+" # the first place). Administrators can override with:\n" -+" # deny ptrace (tracedby) ...\n" -+" ptrace (tracedby),\n" -+"\n" -+" # Allow us to ptrace ourselves\n" -+" ptrace peer=@{profile_name},\n" -+"\n" -+" # ignore DENIED message on / remount\n" -+" deny mount options=(ro, remount) -> /,\n" -+" deny mount options=(ro, remount, silent) -> /,\n" -+"\n" -+" # allow tmpfs mounts everywhere\n" -+" mount fstype=tmpfs,\n" -+"\n" -+" # allow hugetlbfs mounts everywhere\n" -+" mount fstype=hugetlbfs,\n" -+"\n" -+" # allow mqueue mounts everywhere\n" -+" mount fstype=mqueue,\n" -+"\n" -+" # allow fuse mounts everywhere\n" -+" mount fstype=fuse,\n" -+" mount fstype=fuse.*,\n" -+"\n" -+" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n" -+" deny @{PROC}/bus/** wklx,\n" -+"\n" -+" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n" -+" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n" -+" deny @{PROC}/sys/fs/** wklx,\n" -+"\n" -+" # allow efivars to be mounted, writing to it will be blocked though\n" -+" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n" -+"\n" -+" # block some other dangerous paths\n" -+" deny @{PROC}/kcore rwklx,\n" -+" deny @{PROC}/sysrq-trigger rwklx,\n" -+"\n" -+" # deny writes in /sys except for /sys/fs/cgroup, also allow\n" -+" # fusectl, securityfs and debugfs to be mounted there (read-only)\n" -+" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n" -+" mount fstype=securityfs -> /sys/kernel/security/,\n" -+" mount fstype=debugfs -> /sys/kernel/debug/,\n" -+" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n" -+" mount fstype=proc -> /proc/,\n" -+" mount fstype=sysfs -> /sys/,\n" -+" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n" -+" deny /sys/firmware/efi/efivars/** rwklx,\n" -+" # note, /sys/kernel/security/** handled below\n" -+" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n" -+"\n" -+" # deny reads from debugfs\n" -+" deny /sys/kernel/debug/{,**} rwklx,\n" -+"\n" -+" # allow paths to be made slave, shared, private or unbindable\n" -+" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" -+"# mount options=(rw,make-slave) -> **,\n" -+"# mount options=(rw,make-rslave) -> **,\n" -+"# mount options=(rw,make-shared) -> **,\n" -+"# mount options=(rw,make-rshared) -> **,\n" -+"# mount options=(rw,make-private) -> **,\n" -+"# mount options=(rw,make-rprivate) -> **,\n" -+"# mount options=(rw,make-unbindable) -> **,\n" -+"# mount options=(rw,make-runbindable) -> **,\n" -+"\n" -+" # allow bind-mounts of anything except /proc, /sys and /dev\n" -+" mount options=(rw,bind) /[^spd]*{,/**},\n" -+" mount options=(rw,bind) /d[^e]*{,/**},\n" -+" mount options=(rw,bind) /de[^v]*{,/**},\n" -+" mount options=(rw,bind) /dev/.[^l]*{,/**},\n" -+" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n" -+" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n" -+" mount options=(rw,bind) /dev/.lxc?*{,/**},\n" -+" mount options=(rw,bind) /dev/[^.]*{,/**},\n" -+" mount options=(rw,bind) /dev?*{,/**},\n" -+" mount options=(rw,bind) /p[^r]*{,/**},\n" -+" mount options=(rw,bind) /pr[^o]*{,/**},\n" -+" mount options=(rw,bind) /pro[^c]*{,/**},\n" -+" mount options=(rw,bind) /proc?*{,/**},\n" -+" mount options=(rw,bind) /s[^y]*{,/**},\n" -+" mount options=(rw,bind) /sy[^s]*{,/**},\n" -+" mount options=(rw,bind) /sys?*{,/**},\n" -+"\n" -+" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n" -+" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /dev?*{,/**},\n" -+" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /proc?*{,/**},\n" -+" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n" -+" mount options=(ro,remount,bind) -> /sys?*{,/**},\n" -+"\n" -+" # allow moving mounts except for /proc, /sys and /dev\n" -+" mount options=(rw,move) /[^spd]*{,/**},\n" -+" mount options=(rw,move) /d[^e]*{,/**},\n" -+" mount options=(rw,move) /de[^v]*{,/**},\n" -+" mount options=(rw,move) /dev/.[^l]*{,/**},\n" -+" mount options=(rw,move) /dev/.l[^x]*{,/**},\n" -+" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n" -+" mount options=(rw,move) /dev/.lxc?*{,/**},\n" -+" mount options=(rw,move) /dev/[^.]*{,/**},\n" -+" mount options=(rw,move) /dev?*{,/**},\n" -+" mount options=(rw,move) /p[^r]*{,/**},\n" -+" mount options=(rw,move) /pr[^o]*{,/**},\n" -+" mount options=(rw,move) /pro[^c]*{,/**},\n" -+" mount options=(rw,move) /proc?*{,/**},\n" -+" mount options=(rw,move) /s[^y]*{,/**},\n" -+" mount options=(rw,move) /sy[^s]*{,/**},\n" -+" mount options=(rw,move) /sys?*{,/**},\n" -+"\n" -+" # generated by: lxc-generate-aa-rules.py container-rules.base\n" -+" deny /proc/sys/[^kn]*{,/**} wklx,\n" -+" deny /proc/sys/k[^e]*{,/**} wklx,\n" -+" deny /proc/sys/ke[^r]*{,/**} wklx,\n" -+" deny /proc/sys/ker[^n]*{,/**} wklx,\n" -+" deny /proc/sys/kern[^e]*{,/**} wklx,\n" -+" deny /proc/sys/kerne[^l]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n" -+" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n" -+" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/msg*/** wklx,\n" -+" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/sem*/** wklx,\n" -+" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n" -+" deny /proc/sys/kernel/shm*/** wklx,\n" -+" deny /proc/sys/kernel?*{,/**} wklx,\n" -+" deny /proc/sys/n[^e]*{,/**} wklx,\n" -+" deny /proc/sys/ne[^t]*{,/**} wklx,\n" -+" deny /proc/sys/net?*{,/**} wklx,\n" -+" deny /sys/[^fdck]*{,/**} wklx,\n" -+" deny /sys/c[^l]*{,/**} wklx,\n" -+" deny /sys/cl[^a]*{,/**} wklx,\n" -+" deny /sys/cla[^s]*{,/**} wklx,\n" -+" deny /sys/clas[^s]*{,/**} wklx,\n" -+" deny /sys/class/[^n]*{,/**} wklx,\n" -+" deny /sys/class/n[^e]*{,/**} wklx,\n" -+" deny /sys/class/ne[^t]*{,/**} wklx,\n" -+" deny /sys/class/net?*{,/**} wklx,\n" -+" deny /sys/class?*{,/**} wklx,\n" -+" deny /sys/d[^e]*{,/**} wklx,\n" -+" deny /sys/de[^v]*{,/**} wklx,\n" -+" deny /sys/dev[^i]*{,/**} wklx,\n" -+" deny /sys/devi[^c]*{,/**} wklx,\n" -+" deny /sys/devic[^e]*{,/**} wklx,\n" -+" deny /sys/device[^s]*{,/**} wklx,\n" -+" deny /sys/devices/[^v]*{,/**} wklx,\n" -+" deny /sys/devices/v[^i]*{,/**} wklx,\n" -+" deny /sys/devices/vi[^r]*{,/**} wklx,\n" -+" deny /sys/devices/vir[^t]*{,/**} wklx,\n" -+" deny /sys/devices/virt[^u]*{,/**} wklx,\n" -+" deny /sys/devices/virtu[^a]*{,/**} wklx,\n" -+" deny /sys/devices/virtua[^l]*{,/**} wklx,\n" -+" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n" -+" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n" -+" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n" -+" deny /sys/devices/virtual/net?*{,/**} wklx,\n" -+" deny /sys/devices/virtual?*{,/**} wklx,\n" -+" deny /sys/devices?*{,/**} wklx,\n" -+" deny /sys/f[^s]*{,/**} wklx,\n" -+" deny /sys/fs/[^c]*{,/**} wklx,\n" -+" deny /sys/fs/c[^g]*{,/**} wklx,\n" -+" deny /sys/fs/cg[^r]*{,/**} wklx,\n" -+" deny /sys/fs/cgr[^o]*{,/**} wklx,\n" -+" deny /sys/fs/cgro[^u]*{,/**} wklx,\n" -+" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n" -+" deny /sys/fs/cgroup?*{,/**} wklx,\n" -+" deny /sys/fs?*{,/**} wklx,\n" -+; -+ -+static const char AA_PROFILE_UNIX_SOCKETS[] = -+"\n" -+" ### Feature: unix\n" -+" # Allow receive via unix sockets from anywhere\n" -+" unix (receive),\n" -+"\n" -+" # Allow all unix sockets in the container\n" -+" unix peer=(label=@{profile_name}),\n" -+; -+ -+static const char AA_PROFILE_CGROUP_NAMESPACES[] = -+"\n" -+" ### Feature: cgroup namespace\n" -+" mount fstype=cgroup -> /sys/fs/cgroup/**,\n" -+" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n" -+; -+ -+/* '_BASE' because we still need to append generated change_profile rules */ -+static const char AA_PROFILE_STACKING_BASE[] = -+"\n" -+" ### Feature: apparmor stacking\n" -+" ### Configuration: apparmor profile loading (in namespace)\n" -+" deny /sys/k[^e]*{,/**} wklx,\n" -+" deny /sys/ke[^r]*{,/**} wklx,\n" -+" deny /sys/ker[^n]*{,/**} wklx,\n" -+" deny /sys/kern[^e]*{,/**} wklx,\n" -+" deny /sys/kerne[^l]*{,/**} wklx,\n" -+" deny /sys/kernel/[^s]*{,/**} wklx,\n" -+" deny /sys/kernel/s[^e]*{,/**} wklx,\n" -+" deny /sys/kernel/se[^c]*{,/**} wklx,\n" -+" deny /sys/kernel/sec[^u]*{,/**} wklx,\n" -+" deny /sys/kernel/secu[^r]*{,/**} wklx,\n" -+" deny /sys/kernel/secur[^i]*{,/**} wklx,\n" -+" deny /sys/kernel/securi[^t]*{,/**} wklx,\n" -+" deny /sys/kernel/securit[^y]*{,/**} wklx,\n" -+" deny /sys/kernel/security/[^a]*{,/**} wklx,\n" -+" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n" -+" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n" -+" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n" -+" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n" -+" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n" -+" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n" -+" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n" -+" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n" -+" deny /sys/kernel/security?*{,/**} wklx,\n" -+" deny /sys/kernel?*{,/**} wklx,\n" -+; -+ -+static const char AA_PROFILE_NO_STACKING[] = -+"\n" -+" ### Feature: apparmor stacking (not present)\n" -+" deny /sys/k*{,/**} rwklx,\n" -+; -+ -+/* '_BASE' because we need to append change_profile for stacking */ -+static const char AA_PROFILE_NESTING_BASE[] = -+"\n" -+" ### Configuration: nesting\n" -+" pivot_root,\n" -+" ptrace,\n" -+" signal,\n" -+"\n" -+ /* NOTE: See conf.c's "nesting_helpers" for details. */ -+" deny /dev/.lxc/proc/** rw,\n" -+" deny /dev/.lxc/sys/** rw,\n" -+"\n" -+" mount fstype=proc -> /usr/lib/*/lxc/**,\n" -+" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n" -+" mount options=(rw,bind),\n" -+" mount options=(rw,rbind),\n" -+" mount options=(rw,make-rshared),\n" -+"\n" -+ /* FIXME: What's the state here on apparmor's side? */ -+" # there doesn't seem to be a way to ask for:\n" -+" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n" -+" # as we always get mount to $cdir/proc/sys with those flags denied\n" -+" # So allow all mounts until that is straightened out:\n" -+" mount,\n" -+; -+ -+static const char AA_PROFILE_UNPRIVILEGED[] = -+"\n" -+" ### Configuration: unprivileged container\n" -+" pivot_root,\n" -+"\n" -+" # Allow modifying mount propagation\n" -+" mount options=(rw,make-slave) -> **,\n" -+" mount options=(rw,make-rslave) -> **,\n" -+" mount options=(rw,make-shared) -> **,\n" -+" mount options=(rw,make-rshared) -> **,\n" -+" mount options=(rw,make-private) -> **,\n" -+" mount options=(rw,make-rprivate) -> **,\n" -+" mount options=(rw,make-unbindable) -> **,\n" -+" mount options=(rw,make-runbindable) -> **,\n" -+"\n" -+" # Allow all bind-mounts\n" -+" mount options=(rw,bind),\n" -+" mount options=(rw,rbind),\n" -+"\n" -+" # Allow remounting things read-only\n" -+" mount options=(ro,remount),\n" -+; - - static bool check_mount_feature_enabled(void) - { -@@ -144,11 +477,6 @@ static bool apparmor_am_unconfined(void) - return ret; - } - --/* aa stacking is not yet supported */ --static bool aa_stacking_supported(void) { -- return false; --} -- - static bool aa_needs_transition(char *curlabel) - { - if (!curlabel) -@@ -160,61 +488,546 @@ static bool aa_needs_transition(char *curlabel) - return true; - } - -+static inline void uint64hex(char *buf, uint64_t num) -+{ -+ size_t i; -+ -+ buf[16] = 0; -+ for (i = 16; i--;) { -+ char c = (char)(num & 0xf); -+ buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa); -+ num >>= 4; -+ } -+} -+ -+static inline char *shorten_apparmor_name(char *name) -+{ -+ size_t len = strlen(name); -+ if (len + 7 > 253) { -+ uint64_t hash; -+ hash = fnv_64a_buf(name, len, FNV1A_64_INIT); -+ name = must_realloc(name, 16 + 1); -+ uint64hex(name, hash); -+ } -+ -+ return name; -+} -+ -+/* Replace slashes with hyphens */ -+static inline void sanitize_path(char *path) -+{ -+ size_t i; -+ -+ for (i = 0; path[i]; i++) -+ if (path[i] == '/') -+ path[i] = '-'; -+} -+ -+static inline char *apparmor_dir(const char *ctname, const char *lxcpath) -+{ -+ return must_make_path(lxcpath, ctname, "apparmor", NULL); -+} -+ -+ -+static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath) -+{ -+ return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL)); -+} -+ -+/* Like apparmor_profile_full() but with slashes replaced by hyphens */ -+static inline char *apparmor_namespace(const char *ctname, const char *lxcpath) -+{ -+ char *full; -+ -+ full = apparmor_profile_full(ctname, lxcpath); -+ sanitize_path(full); -+ -+ return full; -+} -+ -+/* FIXME: This is currently run only in the context of a constructor (via the -+ * initial lsm_init() called due to its __attribute__((constructor)), so we -+ * do not have ERROR/... macros available, so there are some fprintf(stderr)s -+ * in there. -+ */ -+static bool check_apparmor_parser_version() -+{ -+ struct lxc_popen_FILE *parserpipe; -+ int rc; -+ int major = 0, minor = 0, micro = 0; -+ -+ parserpipe = lxc_popen("apparmor_parser --version"); -+ if (!parserpipe) { -+ fprintf(stderr, "Failed to run check for apparmor_parser\n"); -+ return false; -+ } -+ -+ rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, µ); -+ if (rc < 1) { -+ lxc_pclose(parserpipe); -+ /* We stay silent for now as this most likely means the shell -+ * lxc_popen executed failed to find the apparmor_parser binary. -+ * See the FIXME comment above for details. -+ */ -+ return false; -+ } -+ -+ rc = lxc_pclose(parserpipe); -+ if (rc < 0) { -+ fprintf(stderr, "Error waiting for child process\n"); -+ return false; -+ } -+ if (rc != 0) { -+ fprintf(stderr, "'apparmor_parser --version' executed with an error status\n"); -+ return false; -+ } -+ -+ aa_supports_unix = (major > 2) || -+ (major == 2 && minor > 10) || -+ (major == 2 && minor == 10 && micro >= 95); -+ -+ return true; -+} -+ -+static bool file_is_yes(const char *path) -+{ -+ ssize_t rd; -+ int fd; -+ char buf[8]; /* we actually just expect "yes" or "no" */ -+ -+ fd = open(path, O_RDONLY | O_CLOEXEC); -+ if (fd < 0) -+ return false; -+ -+ rd = read(fd, buf, sizeof(buf)); -+ close(fd); -+ -+ return rd >= 4 && strncmp(buf, "yes\n", 4) == 0; -+} -+ -+static bool apparmor_can_stack() -+{ -+ int major, minor, scanned; -+ FILE *f; -+ -+ if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack")) -+ return false; -+ -+ f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r"); -+ if (!f) -+ return false; -+ -+ scanned = fscanf(f, "%d.%d", &major, &minor); -+ fclose(f); -+ if (scanned != 2) -+ return false; -+ -+ return major > 1 || (major == 1 && minor >= 2); -+} -+ -+static void must_append_sized_full(char **buf, size_t *bufsz, const char *data, -+ size_t size, bool append_newline) -+{ -+ size_t newsize = *bufsz + size; -+ -+ if (append_newline) -+ ++newsize; -+ -+ *buf = must_realloc(*buf, newsize); -+ memcpy(*buf + *bufsz, data, size); -+ -+ if (append_newline) -+ (*buf)[newsize - 1] = '\n'; -+ -+ *bufsz = newsize; -+} -+ -+static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size) -+{ -+ return must_append_sized_full(buf, bufsz, data, size, false); -+} -+ -+static bool is_privileged(struct lxc_conf *conf) -+{ -+ return lxc_list_empty(&conf->id_map); -+} -+ -+static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath) -+{ -+ char *profile, *profile_name_full; -+ size_t size; -+ struct lxc_list *it; -+ -+ profile_name_full = apparmor_profile_full(conf->name, lxcpath); -+ -+ profile = must_concat( -+"#include \n" -+"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n", -+ NULL); -+ size = strlen(profile); -+ -+ must_append_sized(&profile, &size, AA_PROFILE_BASE, -+ sizeof(AA_PROFILE_BASE) - 1); -+ -+ if (aa_supports_unix) -+ must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS, -+ sizeof(AA_PROFILE_UNIX_SOCKETS) - 1); -+ -+ if (file_exists("/proc/self/ns/cgroup")) -+ must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES, -+ sizeof(AA_PROFILE_CGROUP_NAMESPACES) - 1); -+ -+ if (aa_can_stack && !aa_is_stacked) { -+ char *namespace, *temp; -+ -+ must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE, -+ sizeof(AA_PROFILE_STACKING_BASE) - 1); -+ -+ namespace = apparmor_namespace(conf->name, lxcpath); -+ temp = must_concat(" change_profile -> \":", namespace, ":*\",\n" -+ " change_profile -> \":", namespace, "://*\",\n", -+ NULL); -+ free(namespace); -+ -+ must_append_sized(&profile, &size, temp, strlen(temp)); -+ free(temp); -+ } else { -+ must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING, -+ sizeof(AA_PROFILE_NO_STACKING) - 1); -+ } -+ -+ if (conf->lsm_aa_allow_nesting) { -+ must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE, -+ sizeof(AA_PROFILE_NESTING_BASE) - 1); -+ -+ if (!aa_can_stack || aa_is_stacked) { -+ char *temp; -+ -+ temp = must_concat(" change_profile -> \"", -+ profile_name_full, "\",\n", NULL); -+ must_append_sized(&profile, &size, temp, strlen(temp)); -+ free(temp); -+ } -+ } -+ -+ if (!is_privileged(conf) || am_host_unpriv()) -+ must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED, -+ sizeof(AA_PROFILE_UNPRIVILEGED) - 1); -+ -+ lxc_list_for_each(it, &conf->lsm_aa_raw) { -+ const char *line = it->elem; -+ -+ must_append_sized_full(&profile, &size, line, strlen(line), true); -+ } -+ -+ /* include terminating \0 byte */ -+ must_append_sized(&profile, &size, "}\n", 3); -+ -+ free(profile_name_full); -+ -+ return profile; -+} -+ - /* -- * apparmor_process_label_set: Set AppArmor process profile -- * -- * @label : the profile to set -- * @conf : the container configuration to use if @label is NULL -- * @default : use the default profile if @label is NULL -- * @on_exec : this is ignored. Apparmor profile will be changed immediately -- * -- * Returns 0 on success, < 0 on failure -- * -- * Notes: This relies on /proc being available. -+ * apparmor_parser creates a cache file using the parsed file's name as a name. -+ * This means there may be multiple containers with the same name but different -+ * lxcpaths. Therefore we need a sanitized version of the complete profile name -+ * as profile file-name. -+ * We already get this exactly from apparmor_namespace(). - */ --static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf, -- bool use_default, bool on_exec) -+static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath) - { -- int label_fd, ret; -- pid_t tid; -- const char *label = inlabel ? inlabel : conf->lsm_aa_profile; -- char *curlabel; -+ char *ret, *filename; - -- if (!aa_enabled) -- return 0; -+ filename = apparmor_namespace(ctname, lxcpath); -+ ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL); -+ free(filename); -+ -+ return ret; -+} -+ -+static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath) -+{ -+ char *ret, *namespace; -+ -+ namespace = apparmor_namespace(ctname, lxcpath); -+ ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL); -+ free(namespace); -+ -+ return ret; -+} -+ -+static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) -+{ -+ char *path; -+ -+ if (!aa_can_stack || aa_is_stacked) -+ return true; -+ -+ path = make_apparmor_namespace_path(conf->name, lxcpath); -+ errno = 0; -+ if (mkdir(path, 0755) < 0 && errno != EEXIST) { -+ SYSERROR("Error creating AppArmor namespace: %s", path); -+ free(path); -+ return false; -+ } -+ free(path); -+ -+ return true; -+} -+ -+static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) -+{ -+ char *path; -+ -+ path = make_apparmor_namespace_path(conf->name, lxcpath); -+ if (rmdir(path) != 0) -+ SYSERROR("Error removing AppArmor namespace"); -+ free(path); -+} -+ -+struct apparmor_parser_args { -+ char cmd; -+ char *file; -+}; -+ -+static int apparmor_parser_exec(void *data) -+{ -+ struct apparmor_parser_args *args = data; -+ char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 }; -+ -+ execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL); -+ -+ return -1; -+} -+ -+static int run_apparmor_parser(char command, -+ struct lxc_conf *conf, -+ const char *lxcpath) -+{ -+ char output[MAXPATHLEN]; -+ int ret; -+ struct apparmor_parser_args args = { -+ .cmd = command, -+ .file = make_apparmor_profile_path(conf->name, lxcpath), -+ }; -+ -+ ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args); -+ if (ret < 0) { -+ ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output); -+ ret = -1; -+ } -+ -+ -+ free(args.file); -+ return ret; -+} -+ -+static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) -+{ -+ char *path; -+ -+ /* It's ok if these deletes fail: if the container was never started, -+ * we'll have never written a profile or cached it. -+ */ -+ -+ path = make_apparmor_profile_path(conf->name, lxcpath); -+ (void)unlink(path); -+ free(path); -+ -+ /* Also remove the apparmor/ subdirectory */ -+ path = apparmor_dir(conf->name, lxcpath); -+ (void)rmdir(path); -+ free(path); -+} -+ -+static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) -+{ -+ struct stat profile_sb; -+ size_t content_len; -+ int ret = -1; -+ size_t old_len = 0; -+ char *profile_path = NULL, *old_content = NULL, *new_content = NULL; -+ int profile_fd = -1; -+ -+ if (!make_apparmor_namespace(conf, lxcpath)) -+ return -1; -+ -+ /* In order to avoid forcing a profile parse (potentially slow) on -+ * every container start, let's use apparmor's binary policy cache, -+ * which checks mtime of the files to figure out if the policy needs to -+ * be regenerated. -+ * -+ * Since it uses mtimes, we shouldn't just always write out our local -+ * apparmor template; instead we should check to see whether the -+ * template is the same as ours. If it isn't we should write our -+ * version out so that the new changes are reflected and we definitely -+ * force a recompile. -+ */ -+ -+ profile_path = make_apparmor_profile_path(conf->name, lxcpath); -+ profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC); -+ if (profile_fd >= 0) { -+ if (fstat(profile_fd, &profile_sb) < 0) { -+ SYSERROR("Error accessing old profile from %s", -+ profile_path); -+ goto out; -+ } -+ old_len = profile_sb.st_size; -+ old_content = lxc_strmmap(NULL, old_len, PROT_READ, -+ MAP_PRIVATE, profile_fd, 0); -+ if (!old_content) { -+ SYSERROR("Failed to mmap old profile from %s", -+ profile_path); -+ goto out; -+ } -+ } else if (errno != ENOENT) { -+ SYSERROR("Error reading old profile from %s", profile_path); -+ goto out; -+ } -+ -+ new_content = get_apparmor_profile_content(conf, lxcpath); -+ if (!new_content) -+ goto out; -+ -+ content_len = strlen(new_content); -+ -+ if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) { -+ char *path; -+ -+ ret = mkdir_p(APPARMOR_CACHE_DIR, 0755); -+ if (ret < 0) { -+ SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR); -+ goto out; -+ } -+ -+ path = apparmor_dir(conf->name, lxcpath); -+ ret = mkdir_p(path, 0755); -+ if (ret < 0) { -+ SYSERROR("Error creating AppArmor profile directory: %s", path); -+ free(path); -+ goto out; -+ } -+ free(path); -+ -+ ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600); -+ if (ret < 0) { -+ SYSERROR("Error writing profile to %s", profile_path); -+ goto out; -+ } -+ } -+ -+ ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath); -+ if (ret != 0) -+ goto out_remove_profile; -+ -+ conf->lsm_aa_profile_created = true; -+ -+ goto out_ok; -+ -+out_remove_profile: -+ remove_apparmor_profile(conf, lxcpath); -+out: -+ remove_apparmor_namespace(conf, lxcpath); -+out_ok: -+ if (profile_fd >= 0) { -+ if (old_content) -+ lxc_strmunmap(old_content, old_len); -+ close(profile_fd); -+ } -+ free(profile_path); -+ free(new_content); -+ return ret; -+} -+ -+/* -+ * Ensure that the container's policy namespace is unloaded to free kernel -+ * memory. This does not delete the policy from disk or cache. -+ */ -+static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath) -+{ -+ if (!aa_admin) -+ return; -+ -+ if (!conf->lsm_aa_profile_created) -+ return; -+ -+ remove_apparmor_namespace(conf, lxcpath); -+ (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath); -+ -+ remove_apparmor_profile(conf, lxcpath); -+} -+ -+static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath) -+{ -+ int ret = -1; -+ const char *label; -+ char *curlabel = NULL, *genlabel = NULL; -+ -+ if (!aa_enabled) { -+ ERROR("AppArmor not enabled"); -+ return -1; -+ } -+ -+ label = conf->lsm_aa_profile; - - /* user may request that we just ignore apparmor */ - if (label && strcmp(label, AA_UNCHANGED) == 0) { -- INFO("apparmor profile unchanged per user request"); -+ INFO("AppArmor profile unchanged per user request"); -+ conf->lsm_aa_profile_computed = must_copy_string(label); - return 0; - } - -+ if (label && strcmp(label, AA_GENERATED) == 0) { -+ if (!aa_parser_available) { -+ ERROR("Cannot use generated profile: apparmor_parser not available"); -+ goto out; -+ } -+ -+ /* auto-generate profile based on available/requested security features */ -+ if (load_apparmor_profile(conf, lxcpath) != 0) { -+ ERROR("Failed to load generated AppArmor profile"); -+ goto out; -+ } -+ -+ genlabel = apparmor_profile_full(conf->name, lxcpath); -+ if (!genlabel) { -+ ERROR("Failed to build AppArmor profile name"); -+ goto out; -+ } -+ -+ if (aa_can_stack && !aa_is_stacked) { -+ char *namespace = apparmor_namespace(conf->name, lxcpath); -+ size_t llen = strlen(genlabel); -+ must_append_sized(&genlabel, &llen, "//&:", sizeof("//&:") - 1); -+ must_append_sized(&genlabel, &llen, namespace, strlen(namespace)); -+ must_append_sized(&genlabel, &llen, ":", sizeof(":")); /* with the nul byte */ -+ free(namespace); -+ } -+ -+ label = genlabel; -+ } -+ - curlabel = apparmor_process_label_get(lxc_raw_getpid()); - -- if (!aa_stacking_supported() && aa_needs_transition(curlabel)) { -+ if (!aa_can_stack && aa_needs_transition(curlabel)) { - /* we're already confined, and stacking isn't supported */ - - if (!label || strcmp(curlabel, label) == 0) { - /* no change requested */ -- free(curlabel); -- return 0; -+ ret = 0; -+ goto out; - } - -- ERROR("already apparmor confined, but new label requested."); -- free(curlabel); -- return -1; -+ ERROR("Already AppArmor confined, but new label requested."); -+ goto out; - } -- free(curlabel); - - if (!label) { -- if (use_default) { -- if (cgns_supported()) -- label = AA_DEF_PROFILE_CGNS; -- else -- label = AA_DEF_PROFILE; -- } -+ if (cgns_supported()) -+ label = AA_DEF_PROFILE_CGNS; - else -- label = "unconfined"; -+ label = AA_DEF_PROFILE; - } - - if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) { -@@ -223,30 +1036,78 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf - ERROR("If you really want to start this container, set"); - ERROR("lxc.apparmor.allow_incomplete = 1"); - ERROR("in your container configuration file"); -- return -1; -+ goto out; - } - } - -+ conf->lsm_aa_profile_computed = must_copy_string(label); -+ ret = 0; -+ -+out: -+ if (genlabel) { -+ free(genlabel); -+ if (ret != 0) -+ apparmor_cleanup(conf, lxcpath); -+ } -+ free(curlabel); -+ return ret; -+} -+ -+/* -+ * apparmor_process_label_set: Set AppArmor process profile -+ * -+ * @label : the profile to set -+ * @conf : the container configuration to use if @label is NULL -+ * @default : use the default profile if @label is NULL -+ * @on_exec : this is ignored. Apparmor profile will be changed immediately -+ * -+ * Returns 0 on success, < 0 on failure -+ * -+ * Notes: This relies on /proc being available. -+ */ -+static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf, -+ bool on_exec) -+{ -+ int label_fd, ret; -+ pid_t tid; -+ const char *label; -+ -+ if (!aa_enabled) { -+ ERROR("AppArmor not enabled"); -+ return -1; -+ } -+ -+ label = inlabel ? inlabel : conf->lsm_aa_profile_computed; -+ if (!label) { -+ ERROR("LSM wasn't prepared"); -+ return -1; -+ } -+ -+ /* user may request that we just ignore apparmor */ -+ if (strcmp(label, AA_UNCHANGED) == 0) { -+ INFO("AppArmor profile unchanged per user request"); -+ return 0; -+ } - - if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) { -- INFO("apparmor profile unchanged"); -+ INFO("AppArmor profile unchanged"); - return 0; - } - tid = lxc_raw_gettid(); - label_fd = lsm_process_label_fd_get(tid, on_exec); - if (label_fd < 0) { -- SYSERROR("Failed to change apparmor profile to %s", label); -+ SYSERROR("Failed to change AppArmor profile to %s", label); - return -1; - } - - ret = lsm_process_label_set_at(label_fd, label, on_exec); - close(label_fd); - if (ret < 0) { -- ERROR("Failed to change apparmor profile to %s", label); -+ ERROR("Failed to change AppArmor profile to %s", label); - return -1; - } - -- INFO("Changed apparmor profile to %s", label); -+ INFO("Changed AppArmor profile to %s", label); - return 0; - } - -@@ -255,12 +1116,39 @@ static struct lsm_drv apparmor_drv = { - .enabled = apparmor_enabled, - .process_label_get = apparmor_process_label_get, - .process_label_set = apparmor_process_label_set, -+ .prepare = apparmor_prepare, -+ .cleanup = apparmor_cleanup, - }; - - struct lsm_drv *lsm_apparmor_drv_init(void) - { -+ bool have_mac_admin = false; -+ - if (!apparmor_enabled()) - return NULL; -+ -+ /* We only support generated profiles when apparmor_parser is usable */ -+ if (!check_apparmor_parser_version()) -+ goto out; -+ -+ aa_parser_available = true; -+ -+ aa_can_stack = apparmor_can_stack(); -+ if (aa_can_stack) -+ aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked"); -+ -+ #if HAVE_LIBCAP -+ have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE); -+ #endif -+ -+ if (!have_mac_admin) -+ WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing"); -+ else if (am_host_unpriv() && !aa_is_stacked) -+ WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking"); -+ else -+ aa_admin = true; -+ -+out: - aa_enabled = 1; - return &apparmor_drv; - } -diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c -index 8d7de2db..46e21206 100644 ---- a/src/lxc/lsm/lsm.c -+++ b/src/lxc/lsm/lsm.c -@@ -177,11 +177,37 @@ on_error: - } - - int lsm_process_label_set(const char *label, struct lxc_conf *conf, -- bool use_default, bool on_exec) -+ bool on_exec) - { - if (!drv) { - ERROR("LSM driver not inited"); - return -1; - } -- return drv->process_label_set(label, conf, use_default, on_exec); -+ return drv->process_label_set(label, conf, on_exec); -+} -+ -+int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath) -+{ -+ if (!drv) { -+ ERROR("LSM driver not inited"); -+ return 0; -+ } -+ -+ if (!drv->prepare) -+ return 0; -+ -+ return drv->prepare(conf, lxcpath); -+} -+ -+void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath) -+{ -+ if (!drv) { -+ ERROR("LSM driver not inited"); -+ return; -+ } -+ -+ if (!drv->cleanup) -+ return; -+ -+ drv->cleanup(conf, lxcpath); - } -diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h -index cafb2ac7..52e656d6 100644 ---- a/src/lxc/lsm/lsm.h -+++ b/src/lxc/lsm/lsm.h -@@ -38,17 +38,21 @@ struct lsm_drv { - int (*enabled)(void); - char *(*process_label_get)(pid_t pid); - int (*process_label_set)(const char *label, struct lxc_conf *conf, -- bool use_default, bool on_exec); -+ bool on_exec); -+ int (*prepare)(struct lxc_conf *conf, const char *lxcpath); -+ void (*cleanup)(struct lxc_conf *conf, const char *lxcpath); - }; - - extern void lsm_init(void); - extern int lsm_enabled(void); - extern const char *lsm_name(void); - extern char *lsm_process_label_get(pid_t pid); -+extern int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath); - extern int lsm_process_label_set(const char *label, struct lxc_conf *conf, -- bool use_default, bool on_exec); -+ bool on_exec); - extern int lsm_process_label_fd_get(pid_t pid, bool on_exec); - extern int lsm_process_label_set_at(int label_fd, const char *label, - bool on_exec); -+extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath); - - #endif /* __LXC_LSM_H */ -diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c -index 7bb8121b..9397f2bf 100644 ---- a/src/lxc/lsm/nop.c -+++ b/src/lxc/lsm/nop.c -@@ -30,7 +30,7 @@ static char *nop_process_label_get(pid_t pid) - } - - static int nop_process_label_set(const char *label, struct lxc_conf *conf, -- bool use_default, bool on_exec) -+ bool on_exec) - { - return 0; - } -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index c88c18e3..9f7b7bc3 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -75,15 +75,13 @@ static char *selinux_process_label_get(pid_t pid) - * Notes: This relies on /proc being available. - */ - static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, -- bool use_default, bool on_exec) -+ bool on_exec) - { - int ret; - const char *label; - - label = inlabel ? inlabel : conf->lsm_se_context; - if (!label) { -- if (!use_default) -- return -EINVAL; - - label = DEFAULT_LABEL; - } -diff --git a/src/lxc/start.c b/src/lxc/start.c -index f67e3f66..71bc841b 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -863,9 +863,19 @@ int lxc_init(const char *name, struct lxc_handler *handler) - } - TRACE("Initialized cgroup driver"); - -+ ret = lsm_process_prepare(conf, handler->lxcpath); -+ if (ret < 0) { -+ ERROR("Failed to initialize LSM"); -+ goto out_destroy_cgroups; -+ } -+ TRACE("Initialized LSM"); -+ - INFO("Container \"%s\" is initialized", name); - return 0; - -+out_destroy_cgroups: -+ handler->cgroup_ops->destroy(handler->cgroup_ops, handler); -+ - out_delete_terminal: - lxc_terminal_delete(&handler->conf->console); - -@@ -956,6 +966,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler) - while (namespace_count--) - free(namespaces[namespace_count]); - -+ lsm_process_cleanup(handler->conf, handler->lxcpath); -+ - cgroup_ops->destroy(cgroup_ops, handler); - cgroup_exit(cgroup_ops); - -@@ -1236,7 +1248,7 @@ static int do_start(void *data) - } - - /* Set the label to change to when we exec(2) the container's init. */ -- ret = lsm_process_label_set(NULL, handler->conf, 1, 1); -+ ret = lsm_process_label_set(NULL, handler->conf, true); - if (ret < 0) - goto out_warn_father; - --- -2.11.0 - diff --git a/debian/patches/extra/0005-tests-add-test-for-generated-apparmor-profiles.patch b/debian/patches/extra/0005-tests-add-test-for-generated-apparmor-profiles.patch deleted file mode 100644 index aa22978..0000000 --- a/debian/patches/extra/0005-tests-add-test-for-generated-apparmor-profiles.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Tue, 24 Jul 2018 13:59:04 +0200 -Subject: [PATCH] tests: add test for generated apparmor profiles - -Signed-off-by: Wolfgang Bumiller -(cherry picked from commit e7311a84e5bd0758931033b1a0ce649baa720a58) ---- - src/tests/Makefile.am | 2 + - src/tests/lxc-test-apparmor-generated | 84 +++++++++++++++++++++++++++++++++++ - 2 files changed, 86 insertions(+) - create mode 100755 src/tests/lxc-test-apparmor-generated - -diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am -index a2179c3c..df5d35df 100644 ---- a/src/tests/Makefile.am -+++ b/src/tests/Makefile.am -@@ -79,6 +79,7 @@ if DISTRO_UBUNTU - bin_SCRIPTS += \ - lxc-test-lxc-attach \ - lxc-test-apparmor-mount \ -+ lxc-test-apparmor-generated \ - lxc-test-checkpoint-restore \ - lxc-test-snapdeps \ - lxc-test-symlink \ -@@ -112,6 +113,7 @@ EXTRA_DIST = \ - lxc-test-rootfs \ - lxc-test-autostart \ - lxc-test-apparmor-mount \ -+ lxc-test-apparmor-generated \ - lxc-test-checkpoint-restore \ - lxc-test-cloneconfig \ - lxc-test-createconfig \ -diff --git a/src/tests/lxc-test-apparmor-generated b/src/tests/lxc-test-apparmor-generated -new file mode 100755 -index 00000000..be2e3261 ---- /dev/null -+++ b/src/tests/lxc-test-apparmor-generated -@@ -0,0 +1,84 @@ -+#!/bin/sh -+ -+# lxc: linux Container library -+ -+# This is a test script for generated apparmor profiles -+ -+# This library is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public -+# License as published by the Free Software Foundation; either -+# version 2.1 of the License, or (at your option) any later version. -+ -+# This library is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+ -+# You should have received a copy of the GNU Lesser General Public -+# License along with this library; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ -+if ! which apparmor_parser >/dev/null 2>&1; then -+ echo 'SKIP: test for generated apparmor profiles: apparmor_parser missing' -+fi -+exit 0 -+ -+DONE=0 -+KNOWN_RELEASES="precise trusty xenial yakkety zesty" -+LOGFILE="/tmp/lxc-test-$$.log" -+cleanup() { -+ lxc-destroy -n $CONTAINER_NAME >/dev/null 2>&1 || true -+ -+ if [ $DONE -eq 0 ]; then -+ [ -f "$LOGFILE" ] && cat "$LOGFILE" >&2 -+ rm -f "$LOGFILE" -+ echo "FAIL" -+ exit 1 -+ fi -+ rm -f "$LOGFILE" -+ echo "PASS" -+} -+ -+ARCH=i386 -+if type dpkg >/dev/null 2>&1; then -+ ARCH=$(dpkg --print-architecture) -+fi -+ -+trap cleanup EXIT HUP INT TERM -+set -eu -+ -+# Create a container -+CONTAINER_NAME=lxc-test-apparmor-generated -+ -+# default release is trusty, or the systems release if recognized -+release=trusty -+if [ -f /etc/lsb-release ]; then -+ . /etc/lsb-release -+ rels=$(ubuntu-distro-info --supported 2>/dev/null) || -+ rels="$KNOWN_RELEASES" -+ for r in $rels; do -+ [ "$DISTRIB_CODENAME" = "$r" ] && release="$r" -+ done -+fi -+ -+lxc-create -t download -n $CONTAINER_NAME -B dir -- -d ubuntu -r $release -a $ARCH -+CONTAINER_PATH=$(dirname $(lxc-info -n $CONTAINER_NAME -c lxc.rootfs.path -H) | sed -e 's/dir://') -+cp $CONTAINER_PATH/config $CONTAINER_PATH/config.bak -+ -+# Set the profile to be auto-generated -+echo "lxc.apparmor.profile = generated" >> $CONTAINER_PATH/config -+ -+# Start it -+lxc-start -n $CONTAINER_NAME -lDEBUG -o "$LOGFILE" -+lxc-wait -n $CONTAINER_NAME -t 5 -s RUNNING || (echo "Container didn't start" && exit 1) -+pid=`lxc-info -p -H -n $CONTAINER_NAME` -+profile=`cat /proc/$pid/attr/current` -+expected_profile="lxc-${CONTAINER_NAME}_//&:lxc-${CONTAINER_NAME}_<-var-lib-lxc>:unconfined (enforce)" -+lxc-stop -n $CONTAINER_NAME -k -+if [ "x$profile" != "x$expected_profile" ]; then -+ echo "FAIL: container was in profile $profile" >&2 -+ echo "expected profile: $expected_profile" >&2 -+ exit 1 -+fi -+ -+DONE=1 --- -2.11.0 - diff --git a/debian/patches/extra/0006-conf-fix-path-lxcpath-mixups-in-tty-setup.patch b/debian/patches/extra/0006-conf-fix-path-lxcpath-mixups-in-tty-setup.patch deleted file mode 100644 index b88bda7..0000000 --- a/debian/patches/extra/0006-conf-fix-path-lxcpath-mixups-in-tty-setup.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Mon, 20 Aug 2018 10:02:35 +0200 -Subject: [PATCH] conf: fix path/lxcpath mixups in tty setup - -Signed-off-by: Wolfgang Bumiller -Fixes: 6947153da ("conf: use mknod() to create dummy mount target") ---- - src/lxc/conf.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 8c9dce36..0f05bf4e 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -902,7 +902,7 @@ static int lxc_setup_ttys(struct lxc_conf *conf) - if (ret < 0 || (size_t)ret >= sizeof(lxcpath)) - return -1; - -- ret = mknod(path, S_IFREG | 0000, 0); -+ ret = mknod(lxcpath, S_IFREG | 0000, 0); - if (ret < 0 && errno != EEXIST) { - SYSERROR("Failed to create \"%s\"", lxcpath); - return -1; -@@ -916,12 +916,12 @@ static int lxc_setup_ttys(struct lxc_conf *conf) - - ret = mount(tty->name, lxcpath, "none", MS_BIND, 0); - if (ret < 0) { -- WARN("Failed to bind mount \"%s\" onto \"%s\"", -- tty->name, path); -+ SYSWARN("Failed to bind mount \"%s\" onto \"%s\"", -+ tty->name, lxcpath); - continue; - } - DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, -- path); -+ lxcpath); - - ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", - ttydir, i + 1); --- -2.11.0 - diff --git a/debian/patches/extra/0007-apparmor-allow-various-remount-bind-options.patch b/debian/patches/extra/0007-apparmor-allow-various-remount-bind-options.patch deleted file mode 100644 index d1851c2..0000000 --- a/debian/patches/extra/0007-apparmor-allow-various-remount-bind-options.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Thu, 15 Nov 2018 11:51:34 +0100 -Subject: [PATCH] apparmor: allow various remount,bind options - -Signed-off-by: Wolfgang Bumiller ---- - config/apparmor/abstractions/container-base | 10 ++++++++++ - config/apparmor/abstractions/container-base.in | 11 ++++++++++- - src/lxc/lsm/apparmor.c | 26 +++++++++----------------- - 3 files changed, 29 insertions(+), 18 deletions(-) - -diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base -index 4c3a4ba8..fbd70fdf 100644 ---- a/config/apparmor/abstractions/container-base -+++ b/config/apparmor/abstractions/container-base -@@ -124,6 +124,16 @@ - mount options=(rw,bind) /sy[^s]*{,/**}, - mount options=(rw,bind) /sys?*{,/**}, - -+ # allow various ro-bind-*re*-mounts -+ mount options=(ro,remount,bind), -+ mount options=(ro,remount,bind,nosuid), -+ mount options=(ro,remount,bind,noexec), -+ mount options=(ro,remount,bind,nodev), -+ mount options=(ro,remount,bind,nosuid,noexec), -+ mount options=(ro,remount,bind,noexec,nodev), -+ mount options=(ro,remount,bind,nodev,nosuid), -+ mount options=(ro,remount,bind,nosuid,noexec,nodev), -+ - # allow moving mounts except for /proc, /sys and /dev - mount options=(rw,move) /[^spd]*{,/**}, - mount options=(rw,move) /d[^e]*{,/**}, -diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in -index 0844fdbb..39abf348 100644 ---- a/config/apparmor/abstractions/container-base.in -+++ b/config/apparmor/abstractions/container-base.in -@@ -123,6 +123,16 @@ - mount options=(rw,bind) /sy[^s]*{,/**}, - mount options=(rw,bind) /sys?*{,/**}, - -+ # allow various ro-bind-*re*-mounts -+ mount options=(ro,remount,bind), -+ mount options=(ro,remount,bind,nosuid), -+ mount options=(ro,remount,bind,noexec), -+ mount options=(ro,remount,bind,nodev), -+ mount options=(ro,remount,bind,nosuid,noexec), -+ mount options=(ro,remount,bind,noexec,nodev), -+ mount options=(ro,remount,bind,nodev,nosuid), -+ mount options=(ro,remount,bind,nosuid,noexec,nodev), -+ - # allow moving mounts except for /proc, /sys and /dev - mount options=(rw,move) /[^spd]*{,/**}, - mount options=(rw,move) /d[^e]*{,/**}, -@@ -140,4 +150,3 @@ - mount options=(rw,move) /s[^y]*{,/**}, - mount options=(rw,move) /sy[^s]*{,/**}, - mount options=(rw,move) /sys?*{,/**}, -- -diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c -index ec3f805d..bba4f6cb 100644 ---- a/src/lxc/lsm/apparmor.c -+++ b/src/lxc/lsm/apparmor.c -@@ -163,23 +163,15 @@ static const char AA_PROFILE_BASE[] = - " mount options=(rw,bind) /sy[^s]*{,/**},\n" - " mount options=(rw,bind) /sys?*{,/**},\n" - "\n" --" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n" --" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n" --" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n" --" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n" --" mount options=(ro,remount,bind) -> /dev?*{,/**},\n" --" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n" --" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n" --" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n" --" mount options=(ro,remount,bind) -> /proc?*{,/**},\n" --" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n" --" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n" --" mount options=(ro,remount,bind) -> /sys?*{,/**},\n" -+" # allow various ro-bind-*re*-mounts\n" -+" mount options=(ro,remount,bind),\n" -+" mount options=(ro,remount,bind,nosuid),\n" -+" mount options=(ro,remount,bind,noexec),\n" -+" mount options=(ro,remount,bind,nodev),\n" -+" mount options=(ro,remount,bind,nosuid,noexec),\n" -+" mount options=(ro,remount,bind,noexec,nodev),\n" -+" mount options=(ro,remount,bind,nodev,nosuid),\n" -+" mount options=(ro,remount,bind,nosuid,noexec,nodev),\n" - "\n" - " # allow moving mounts except for /proc, /sys and /dev\n" - " mount options=(rw,move) /[^spd]*{,/**},\n" --- -2.11.0 - diff --git a/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch b/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch index d5f3b9d..2bf6663 100644 --- a/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch +++ b/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch @@ -11,10 +11,10 @@ Signed-off-by: Wolfgang Bumiller 1 file changed, 5 insertions(+) diff --git a/src/lxc/network.c b/src/lxc/network.c -index dd294cd9..871a6e6b 100644 +index d0f14e63..9337ad4d 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c -@@ -194,6 +194,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd +@@ -195,6 +195,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd netdev->upscript, "up", argv); if (err < 0) goto out_delete; diff --git a/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch b/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch index 902fe4e..7f13f8c 100644 --- a/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch +++ b/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch @@ -14,7 +14,7 @@ if a rw /sys is desired, set "lxc.mount.auto" accordingly 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base -index a5e6c35f..4c3a4ba8 100644 +index 07747655..fbd70fdf 100644 --- a/config/apparmor/abstractions/container-base +++ b/config/apparmor/abstractions/container-base @@ -82,7 +82,6 @@ @@ -38,7 +38,7 @@ index a5e6c35f..4c3a4ba8 100644 # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts. # mount options=(rw,make-slave) -> **, diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in -index 11ec5c45..0844fdbb 100644 +index 1a3ead89..39abf348 100644 --- a/config/apparmor/abstractions/container-base.in +++ b/config/apparmor/abstractions/container-base.in @@ -82,7 +82,6 @@ diff --git a/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch b/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch index 239e7c9..792f717 100644 --- a/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch +++ b/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch @@ -15,37 +15,37 @@ being used in order to combat this. Signed-off-by: Wolfgang Bumiller --- - src/lxc/cgroups/cgfsng.c | 92 +++++++++++++++++++++++++++++++++++++++--------- + src/lxc/cgroups/cgfsng.c | 94 +++++++++++++++++++++++++++++++++++++++--------- src/lxc/cgroups/cgroup.h | 18 +++++++--- - src/lxc/commands.c | 85 +++++++++++++++++++++++++++++++++----------- + src/lxc/commands.c | 87 +++++++++++++++++++++++++++++++++----------- src/lxc/commands.h | 2 ++ src/lxc/criu.c | 4 +-- src/lxc/start.c | 28 +++++++++++---- - 6 files changed, 180 insertions(+), 49 deletions(-) + 6 files changed, 183 insertions(+), 50 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 56c8db54..265cf2cb 100644 +index ab99b47c..ac8f469b 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c -@@ -817,6 +817,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char +@@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char new->mountpoint = mountpoint; - new->base_cgroup = base_cgroup; - new->fullcgpath = NULL; -+ new->innercgpath = NULL; + new->container_base_path = container_base_path; + new->container_full_path = NULL; ++ new->container_inner_path = NULL; + new->monitor_full_path = NULL; new->version = type; - newentry = append_null_to_list((void ***)h); -@@ -1057,6 +1058,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies, +@@ -1059,6 +1060,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies, - free(h->fullcgpath); - h->fullcgpath = NULL; + free(h->container_full_path); + h->container_full_path = NULL; + -+ free(h->innercgpath); -+ h->innercgpath = NULL; ++ free(h->container_inner_path); ++ h->container_inner_path = NULL; } return 0; -@@ -1068,6 +1072,7 @@ struct generic_userns_exec_data { +@@ -1070,6 +1074,7 @@ struct generic_userns_exec_data { struct lxc_conf *conf; uid_t origuid; /* target uid in parent namespace */ char *path; @@ -53,7 +53,7 @@ index 56c8db54..265cf2cb 100644 }; static int cgroup_rmdir_wrapper(void *data) -@@ -1109,6 +1114,7 @@ static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) +@@ -1112,6 +1117,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, wrap.container_cgroup = ops->container_cgroup; wrap.hierarchies = ops->hierarchies; wrap.conf = handler->conf; @@ -61,43 +61,39 @@ index 56c8db54..265cf2cb 100644 if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap, -@@ -1189,22 +1195,29 @@ on_error: - return bret; +@@ -1323,17 +1329,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname) + return cg_unified_create_cgroup(h, cgname); } --static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname) -+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner) +-static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname) ++static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner) { int ret; + char *path; -- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); -- if (dir_exists(h->fullcgpath)) { -+ if (inner) { -+ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL); -+ h->innercgpath = path; -+ } else { -+ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); -+ h->fullcgpath = path; -+ } -+ if (dir_exists(path)) { - ERROR("The cgroup \"%s\" already existed", h->fullcgpath); - return false; - } - - if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) { + if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) { ERROR("Failed to handle legacy cpuset controller"); return false; } -- ret = mkdir_p(h->fullcgpath, 0755); -+ ret = mkdir_p(path, 0755); +- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); +- ret = mkdir_eexist_on_last(h->container_full_path, 0755); ++ if (inner) { ++ path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL); ++ h->container_inner_path = path; ++ ret = mkdir(path, 0755); ++ } else { ++ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); ++ h->container_full_path = path; ++ ret = mkdir_eexist_on_last(path, 0755); ++ } ++ if (ret < 0) { - ERROR("Failed to create cgroup \"%s\"", h->fullcgpath); + ERROR("Failed to create cgroup \"%s\"", h->container_full_path); return false; -@@ -1225,11 +1238,29 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) - h->fullcgpath = NULL; +@@ -1425,11 +1440,29 @@ on_error: + return bret; } +static inline bool cgfsng_create_inner(struct cgroup_ops *ops) @@ -106,9 +102,9 @@ index 56c8db54..265cf2cb 100644 + bool ret = true; + char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL); + for (i = 0; ops->hierarchies[i]; i++) { -+ if (!create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) { ++ if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) { + SYSERROR("Failed to create %s namespace subdirectory: %s", -+ ops->hierarchies[i]->fullcgpath, strerror(errno)); ++ ops->hierarchies[i]->container_full_path, strerror(errno)); + ret = false; + break; + } @@ -120,14 +116,14 @@ index 56c8db54..265cf2cb 100644 /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; * next cgroup_pattern-1, -2, ..., -999. */ - static inline bool cgfsng_create(struct cgroup_ops *ops, -- struct lxc_handler *handler) -+ struct lxc_handler *handler, -+ bool inner) + __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, +- struct lxc_handler *handler) ++ struct lxc_handler *handler, ++ bool inner) { int i; size_t len; -@@ -1238,10 +1269,17 @@ static inline bool cgfsng_create(struct cgroup_ops *ops, +@@ -1438,10 +1471,17 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_conf *conf = handler->conf; if (ops->container_cgroup) { @@ -145,43 +141,54 @@ index 56c8db54..265cf2cb 100644 if (!conf) return false; -@@ -1282,7 +1320,7 @@ again: +@@ -1482,7 +1522,7 @@ again: } for (i = 0; ops->hierarchies[i]; i++) { -- if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) { -+ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) { - int j; - ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath); - free(ops->hierarchies[i]->fullcgpath); -@@ -1304,7 +1342,7 @@ out_free: - return false; +- if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) { ++ if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) { + ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path); + free(ops->hierarchies[i]->container_full_path); + ops->hierarchies[i]->container_full_path = NULL; +@@ -1505,7 +1545,8 @@ out_free: } --static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid) -+static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid, bool inner) + __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, +- bool monitor) ++ bool monitor, ++ bool inner) { - int i, len; - char pidstr[25]; -@@ -1317,8 +1355,13 @@ static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid) - int ret; - char *fullpath; + int len; + char pidstr[INTTYPE_TO_STRLEN(pid_t)]; +@@ -1521,6 +1562,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, + if (monitor) + path = must_make_path(ops->hierarchies[i]->monitor_full_path, + "cgroup.procs", NULL); ++ else if (inner) ++ path = must_make_path(ops->hierarchies[i]->container_inner_path, ++ "cgroup.procs", NULL); + else + path = must_make_path(ops->hierarchies[i]->container_full_path, + "cgroup.procs", NULL); +@@ -1538,12 +1582,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, + + __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid) + { +- return __do_cgroup_enter(ops, pid, true); ++ return __do_cgroup_enter(ops, pid, true, false); + } -- fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, -- "cgroup.procs", NULL); -+ if (inner) -+ fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, -+ CGROUP_NAMESPACE_SUBDIR, -+ "cgroup.procs", NULL); -+ else -+ fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, -+ "cgroup.procs", NULL); - ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); - if (ret != 0) { - SYSERROR("Failed to enter cgroup \"%s\"", fullpath); -@@ -1392,9 +1435,15 @@ static int chown_cgroup_wrapper(void *data) +-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid) ++static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner) + { +- return __do_cgroup_enter(ops, pid, false); ++ return __do_cgroup_enter(ops, pid, false, inner); + } + + static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid, +@@ -1609,9 +1653,15 @@ static int chown_cgroup_wrapper(void *data) char *fullpath; - char *path = arg->hierarchies[i]->fullcgpath; + char *path = arg->hierarchies[i]->container_full_path; + if (arg->inner) + path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL); @@ -196,7 +203,7 @@ index 56c8db54..265cf2cb 100644 /* Failures to chown() these are inconvenient but not * detrimental We leave these owned by the container launcher, -@@ -1413,8 +1462,11 @@ static int chown_cgroup_wrapper(void *data) +@@ -1630,8 +1680,11 @@ static int chown_cgroup_wrapper(void *data) (void)chowmod(fullpath, destuid, nsgid, 0664); free(fullpath); @@ -209,7 +216,7 @@ index 56c8db54..265cf2cb 100644 fullpath = must_make_path(path, "cgroup.subtree_control", NULL); (void)chowmod(fullpath, destuid, nsgid, 0664); -@@ -1423,12 +1475,15 @@ static int chown_cgroup_wrapper(void *data) +@@ -1640,13 +1693,17 @@ static int chown_cgroup_wrapper(void *data) fullpath = must_make_path(path, "cgroup.threads", NULL); (void)chowmod(fullpath, destuid, nsgid, 0664); free(fullpath); @@ -221,12 +228,14 @@ index 56c8db54..265cf2cb 100644 return 0; } --static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf) -+static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner) + __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops, +- struct lxc_conf *conf) ++ struct lxc_conf *conf, ++ bool inner) { struct generic_userns_exec_data wrap; -@@ -1439,6 +1494,7 @@ static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf) +@@ -1657,6 +1714,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops, wrap.path = NULL; wrap.hierarchies = ops->hierarchies; wrap.conf = conf; @@ -234,27 +243,27 @@ index 56c8db54..265cf2cb 100644 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, "chown_cgroup_wrapper") < 0) { -@@ -1818,7 +1874,8 @@ static bool cgfsng_unfreeze(struct cgroup_ops *ops) +@@ -2038,7 +2096,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops) } - static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, -- const char *controller) -+ const char *controller, -+ bool inner) + __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, +- const char *controller) ++ const char *controller, ++ bool inner) { struct hierarchy *h; -@@ -1829,6 +1886,9 @@ static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, +@@ -2049,6 +2108,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, return NULL; } + if (inner) -+ return h->innercgpath ? h->innercgpath + strlen(h->mountpoint) : NULL; ++ return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL; + - return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL; + return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL; } -@@ -1860,7 +1920,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, +@@ -2080,7 +2142,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, int fret = -1, idx = 0; char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL; @@ -263,7 +272,7 @@ index 56c8db54..265cf2cb 100644 /* not running */ if (!container_cgroup) return 0; -@@ -1940,7 +2000,7 @@ static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, +@@ -2161,7 +2223,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, continue; } @@ -273,12 +282,12 @@ index 56c8db54..265cf2cb 100644 if (!path) continue; diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index 8f4af06c..b12c1f4c 100644 +index d4dcd506..59445b5a 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h -@@ -28,6 +28,12 @@ - #include - #include +@@ -32,6 +32,12 @@ + #define MONITOR_CGROUP "lxc.monitor" + #define PIVOT_CGROUP "lxc.pivot" +/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace + * will be moved into an additional subdirectory "cgns/" inside the cgroup in @@ -289,38 +298,38 @@ index 8f4af06c..b12c1f4c 100644 struct lxc_handler; struct lxc_conf; struct lxc_list; -@@ -65,6 +71,9 @@ typedef enum { - * @fullcgpath - * - The full path to the containers cgroup. +@@ -72,6 +78,9 @@ typedef enum { + * @monitor_full_path + * - The full path to the monitor's cgroup. * -+ * @innercgpath ++ * @container_inner_path + * - The full path to the container's inner cgroup when protect_limits is used. + * * @version * - legacy hierarchy * If the hierarchy is a legacy hierarchy this will be set to -@@ -78,6 +87,7 @@ struct hierarchy { +@@ -85,6 +94,7 @@ struct hierarchy { char *mountpoint; - char *base_cgroup; - char *fullcgpath; -+ char *innercgpath; + char *container_base_path; + char *container_full_path; ++ char *container_inner_path; + char *monitor_full_path; int version; }; - -@@ -124,9 +134,9 @@ struct cgroup_ops { - - bool (*data_init)(struct cgroup_ops *ops); - void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); -- bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler); -- bool (*enter)(struct cgroup_ops *ops, pid_t pid); +@@ -139,9 +149,9 @@ struct cgroup_ops { + void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); + bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); + bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid); +- bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); +- bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid); - const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); -+ bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner); -+ bool (*enter)(struct cgroup_ops *ops, pid_t pid, bool inner); ++ bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner); ++ bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner); + const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner); - bool (*escape)(const struct cgroup_ops *ops); + bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); int (*num_hierarchies)(struct cgroup_ops *ops); bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); -@@ -137,7 +147,7 @@ struct cgroup_ops { +@@ -152,7 +162,7 @@ struct cgroup_ops { bool (*unfreeze)(struct cgroup_ops *ops); bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, bool with_devices); @@ -330,10 +339,10 @@ index 8f4af06c..b12c1f4c 100644 const char *lxcpath, pid_t pid); bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler, diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index 30d6b604..e1bad635 100644 +index 133384d7..b41a7600 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c -@@ -424,20 +424,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, +@@ -427,20 +427,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, return lxc_cmd_rsp_send(fd, &rsp); } @@ -356,7 +365,7 @@ index 30d6b604..e1bad635 100644 { int ret, stopped; struct lxc_cmd_rr cmd = { -@@ -450,8 +438,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, +@@ -453,8 +441,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, cmd.req.data = subsystem; cmd.req.datalen = 0; @@ -377,7 +386,7 @@ index 30d6b604..e1bad635 100644 ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); if (ret < 0) -@@ -466,6 +464,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, +@@ -469,6 +467,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, return cmd.rsp.data; } @@ -420,7 +429,7 @@ index 30d6b604..e1bad635 100644 static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler) { -@@ -473,10 +507,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, +@@ -476,10 +510,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, struct lxc_cmd_rsp rsp; struct cgroup_ops *cgroup_ops = handler->cgroup_ops; @@ -446,11 +455,20 @@ index 30d6b604..e1bad635 100644 if (!path) return -1; +@@ -651,7 +696,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, + * lxc_unfreeze() would do another cmd (GET_CGROUP) which would + * deadlock us. + */ +- if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer")) ++ if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false)) + return 0; + + if (cgroup_ops->unfreeze(cgroup_ops)) diff --git a/src/lxc/commands.h b/src/lxc/commands.h -index 816cd748..e16c0d79 100644 +index 2c024b65..7c4c00b1 100644 --- a/src/lxc/commands.h +++ b/src/lxc/commands.h -@@ -93,6 +93,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, +@@ -88,6 +88,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, */ extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, const char *subsystem); @@ -460,10 +478,10 @@ index 816cd748..e16c0d79 100644 extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); extern char *lxc_cmd_get_name(const char *hashed_sock); diff --git a/src/lxc/criu.c b/src/lxc/criu.c -index dc567d34..398e8e94 100644 +index 3d857b54..ec9bcb7e 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c -@@ -328,7 +328,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) +@@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, } else { const char *p; @@ -472,34 +490,34 @@ index dc567d34..398e8e94 100644 if (!p) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; -@@ -971,7 +971,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ +@@ -976,7 +976,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ goto out_fini_handler; handler->cgroup_ops = cgroup_ops; -- if (!cgroup_ops->create(cgroup_ops, handler)) { -+ if (!cgroup_ops->create(cgroup_ops, handler, false)) { +- if (!cgroup_ops->payload_create(cgroup_ops, handler)) { ++ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) { ERROR("failed creating groups"); goto out_fini_handler; } diff --git a/src/lxc/start.c b/src/lxc/start.c -index 23b7de06..cf053d20 100644 +index dae3bcfe..f3b29d6c 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c -@@ -1599,7 +1599,7 @@ static int lxc_spawn(struct lxc_handler *handler) +@@ -1649,7 +1649,7 @@ static int lxc_spawn(struct lxc_handler *handler) } } -- if (!cgroup_ops->create(cgroup_ops, handler)) { -+ if (!cgroup_ops->create(cgroup_ops, handler, false)) { +- if (!cgroup_ops->payload_create(cgroup_ops, handler)) { ++ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) { ERROR("Failed creating cgroups"); goto out_delete_net; } -@@ -1693,10 +1693,10 @@ static int lxc_spawn(struct lxc_handler *handler) +@@ -1743,10 +1743,10 @@ static int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; } -- if (!cgroup_ops->enter(cgroup_ops, handler->pid)) -+ if (!cgroup_ops->enter(cgroup_ops, handler->pid, false)) +- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid)) ++ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false)) goto out_delete_net; - if (!cgroup_ops->chown(cgroup_ops, handler->conf)) @@ -507,7 +525,7 @@ index 23b7de06..cf053d20 100644 goto out_delete_net; /* Now we're ready to preserve the network namespace */ -@@ -1765,16 +1765,30 @@ static int lxc_spawn(struct lxc_handler *handler) +@@ -1813,16 +1813,30 @@ static int lxc_spawn(struct lxc_handler *handler) } } @@ -522,11 +540,11 @@ index 23b7de06..cf053d20 100644 TRACE("Set up legacy device cgroup controller limits"); + if (cgns_supported()) { -+ if (!cgroup_ops->create(cgroup_ops, handler, true)) { ++ if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) { + ERROR("failed to create inner cgroup separation layer"); + goto out_delete_net; + } -+ if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { ++ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) { + ERROR("failed to enter inner cgroup separation layer"); + goto out_delete_net; + } diff --git a/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch b/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch index 53848b0..23497c5 100644 --- a/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch +++ b/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch @@ -17,10 +17,10 @@ Signed-off-by: Wolfgang Bumiller 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c -index cadd8275..8cc18932 100644 +index 11c80866..8b5e2542 100644 --- a/src/lxc/initutils.c +++ b/src/lxc/initutils.c -@@ -58,14 +58,15 @@ static char *copy_global_config_value(char *p) +@@ -63,14 +63,15 @@ static char *copy_global_config_value(char *p) const char *lxc_global_config_value(const char *option_name) { static const char * const options[][2] = { @@ -45,7 +45,7 @@ index cadd8275..8cc18932 100644 }; diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h -index b815cd19..4d005679 100644 +index 6bf23a70..b542e601 100644 --- a/src/lxc/initutils.h +++ b/src/lxc/initutils.h @@ -42,6 +42,7 @@ @@ -57,18 +57,18 @@ index b815cd19..4d005679 100644 #ifndef PR_SET_MM #define PR_SET_MM 35 diff --git a/src/lxc/start.c b/src/lxc/start.c -index cf053d20..827a9ee9 100644 +index f3b29d6c..1cf792aa 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c -@@ -1772,17 +1772,20 @@ static int lxc_spawn(struct lxc_handler *handler) +@@ -1820,17 +1820,20 @@ static int lxc_spawn(struct lxc_handler *handler) TRACE("Set up legacy device cgroup controller limits"); if (cgns_supported()) { -- if (!cgroup_ops->create(cgroup_ops, handler, true)) { +- if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) { - ERROR("failed to create inner cgroup separation layer"); - goto out_delete_net; - } -- if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { +- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) { - ERROR("failed to enter inner cgroup separation layer"); - goto out_delete_net; - } @@ -77,11 +77,11 @@ index cf053d20..827a9ee9 100644 - goto out_delete_net; + const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits"); + if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) { -+ if (!cgroup_ops->create(cgroup_ops, handler, true)) { ++ if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) { + ERROR("failed to create inner cgroup separation layer"); + goto out_delete_net; + } -+ if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { ++ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) { + ERROR("failed to enter inner cgroup separation layer"); + goto out_delete_net; + } diff --git a/debian/patches/pve/0006-PVE-Config-namespace-separation.patch b/debian/patches/pve/0006-PVE-Config-namespace-separation.patch index 20e6b90..fa6da72 100644 --- a/debian/patches/pve/0006-PVE-Config-namespace-separation.patch +++ b/debian/patches/pve/0006-PVE-Config-namespace-separation.patch @@ -13,10 +13,10 @@ Signed-off-by: Wolfgang Bumiller 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index b12c1f4c..6b8df1b3 100644 +index 59445b5a..7d6135c1 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h -@@ -32,7 +32,7 @@ +@@ -36,7 +36,7 @@ * will be moved into an additional subdirectory "cgns/" inside the cgroup in * order to prevent it from accessing the outer limiting cgroup. */ @@ -26,7 +26,7 @@ index b12c1f4c..6b8df1b3 100644 struct lxc_handler; struct lxc_conf; diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h -index 4d005679..653869b5 100644 +index b542e601..78d3f2b1 100644 --- a/src/lxc/initutils.h +++ b/src/lxc/initutils.h @@ -42,7 +42,7 @@ diff --git a/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch b/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch index 4d2aee0..5f1bdb6 100644 --- a/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch +++ b/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch @@ -20,10 +20,10 @@ Signed-off-by: Wolfgang Bumiller create mode 100644 config/init/systemd/lxc-monitord.service.in diff --git a/.gitignore b/.gitignore -index 0d266c20..b2a4b020 100644 +index 45377714..69e6e7ff 100644 --- a/.gitignore +++ b/.gitignore -@@ -115,6 +115,7 @@ config/bash/lxc +@@ -116,6 +116,7 @@ config/bash/lxc config/init/common/lxc-containers config/init/common/lxc-net config/init/systemd/lxc-autostart-helper @@ -80,10 +80,10 @@ index 00000000..40635168 +[Install] +WantedBy=multi-user.target diff --git a/configure.ac b/configure.ac -index 1acc4615..bcf7ab64 100644 +index 9a6ba83c..9f3b8fb3 100644 --- a/configure.ac +++ b/configure.ac -@@ -671,6 +671,7 @@ AC_CONFIG_FILES([ +@@ -747,6 +747,7 @@ AC_CONFIG_FILES([ config/init/systemd/lxc.service config/init/systemd/lxc@.service config/init/systemd/lxc-net.service @@ -92,10 +92,10 @@ index 1acc4615..bcf7ab64 100644 config/init/sysvinit/lxc-containers config/init/sysvinit/lxc-net diff --git a/lxc.spec.in b/lxc.spec.in -index 87978feb..fcf307fa 100644 +index 7fcd811f..59597f46 100644 --- a/lxc.spec.in +++ b/lxc.spec.in -@@ -235,6 +235,7 @@ fi +@@ -247,6 +247,7 @@ fi %{_unitdir}/lxc-net.service %{_unitdir}/lxc.service %{_unitdir}/lxc@.service @@ -104,10 +104,10 @@ index 87978feb..fcf307fa 100644 %{_sysconfdir}/rc.d/init.d/lxc %{_sysconfdir}/rc.d/init.d/lxc-net diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c -index 0b9d7fd0..fdfdd283 100644 +index 3b931b36..d3cc3597 100644 --- a/src/lxc/cmd/lxc_monitord.c +++ b/src/lxc/cmd/lxc_monitord.c -@@ -355,17 +355,44 @@ static void lxc_monitord_sig_handler(int sig) +@@ -359,17 +359,44 @@ static void lxc_monitord_sig_handler(int sig) int main(int argc, char *argv[]) { @@ -156,7 +156,7 @@ index 0b9d7fd0..fdfdd283 100644 "NOTE: lxc-monitord is intended for use by lxc internally\n" " and does not need to be run by hand\n\n"); exit(EXIT_FAILURE); -@@ -388,9 +415,6 @@ int main(int argc, char *argv[]) +@@ -392,9 +419,6 @@ int main(int argc, char *argv[]) INFO("Failed to open log file %s, log will be lost", lxcpath); lxc_log_options_no_override(); @@ -166,7 +166,7 @@ index 0b9d7fd0..fdfdd283 100644 if (sigfillset(&mask) || sigdelset(&mask, SIGILL) || sigdelset(&mask, SIGSEGV) || -@@ -423,15 +447,17 @@ int main(int argc, char *argv[]) +@@ -427,15 +451,17 @@ int main(int argc, char *argv[]) goto on_error; monitord_created = true; @@ -191,14 +191,14 @@ index 0b9d7fd0..fdfdd283 100644 + close(pipefd); + } - if (lxc_monitord_mainloop_add(&mon)) { + if (lxc_monitord_mainloop_add(&monitor)) { ERROR("Failed to add mainloop handlers"); -@@ -442,7 +468,7 @@ int main(int argc, char *argv[]) - lxc_raw_getpid(), mon.lxcpath); +@@ -446,7 +472,7 @@ int main(int argc, char *argv[]) + lxc_raw_getpid(), monitor.lxcpath); for (;;) { -- ret = lxc_mainloop(&mon.descr, 1000 * 30); -+ ret = lxc_mainloop(&mon.descr, persistent ? -1 : 1000 * 30); +- ret = lxc_mainloop(&monitor.descr, 1000 * 30); ++ ret = lxc_mainloop(&monitor.descr, persistent ? -1 : 1000 * 30); if (ret) { ERROR("mainloop returned an error"); break; diff --git a/debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch b/debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch new file mode 100644 index 0000000..d6e0bdb --- /dev/null +++ b/debian/patches/pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch @@ -0,0 +1,46 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 2 Jan 2019 14:37:58 +0100 +Subject: [PATCH] PVE: [Config] Disable lxc.monitor cgroup + +When not using relative cgroups this makes lxc unusable +within systemd service files as the service cgroup becomes +empty. + +Signed-off-by: Wolfgang Bumiller +--- + src/lxc/start.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/src/lxc/start.c b/src/lxc/start.c +index 1cf792aa..24f387de 100644 +--- a/src/lxc/start.c ++++ b/src/lxc/start.c +@@ -1954,15 +1954,15 @@ int __lxc_start(const char *name, struct lxc_handler *handler, + goto out_fini_nonet; + } + +- if (!cgroup_ops->monitor_create(cgroup_ops, handler)) { +- ERROR("Failed to create monitor cgroup"); +- goto out_fini_nonet; +- } +- +- if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) { +- ERROR("Failed to enter monitor cgroup"); +- goto out_fini_nonet; +- } ++ //if (!cgroup_ops->monitor_create(cgroup_ops, handler)) { ++ // ERROR("Failed to create monitor cgroup"); ++ // goto out_fini_nonet; ++ //} ++ ++ //if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) { ++ // ERROR("Failed to enter monitor cgroup"); ++ // goto out_fini_nonet; ++ //} + + if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) { + /* If the backing store is a device, mount it here and now. */ +-- +2.11.0 + diff --git a/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch b/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch deleted file mode 100644 index c958fb8..0000000 --- a/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Mon, 20 Nov 2017 10:51:36 +0100 -Subject: [PATCH] PVE: [Deprecated] Make lxc@.service forking - -Previously the init process' output was dumped into the log -files since the service used Type=simple and -StandardOutput/Error=syslog. - -Deprecated, we provide pve-container@.service instead. - -Signed-off-by: Wolfgang Bumiller ---- - config/init/systemd/lxc@.service.in | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in -index a2aa2211..f312763c 100644 ---- a/config/init/systemd/lxc@.service.in -+++ b/config/init/systemd/lxc@.service.in -@@ -1,15 +1,15 @@ - [Unit] - Description=LXC Container: %i - # This pulls in apparmor, dev-setup, lxc-net --After=lxc.service -+After=lxc.service lxc-monitord.service - Wants=lxc.service - Documentation=man:lxc-start man:lxc - - [Service] --Type=simple -+Type=forking - KillMode=mixed - TimeoutStopSec=120s --ExecStart=@BINDIR@/lxc-start -F -n %i -+ExecStart=@BINDIR@/lxc-start -n %i - ExecStop=@BINDIR@/lxc-stop -n %i - # Environment=BOOTUP=serial - # Environment=CONSOLETYPE=serial --- -2.11.0 - diff --git a/debian/patches/series b/debian/patches/series index 18116a4..ed2f2fc 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -5,11 +5,4 @@ pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch pve/0006-PVE-Config-namespace-separation.patch pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch -pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch -extra/0001-confile-add-lxc.monitor.signal.pdeath.patch -extra/0002-tests-add-lxc.monitor.signal.pdeath.patch -extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch -extra/0004-apparmor-profile-generation.patch -extra/0005-tests-add-test-for-generated-apparmor-profiles.patch -extra/0006-conf-fix-path-lxcpath-mixups-in-tty-setup.patch -extra/0007-apparmor-allow-various-remount-bind-options.patch +pve/0008-PVE-Config-Disable-lxc.monitor-cgroup.patch diff --git a/debian/rules b/debian/rules index 14fd894..49e81e8 100755 --- a/debian/rules +++ b/debian/rules @@ -25,7 +25,8 @@ override_dh_auto_configure: --disable-lua \ --disable-examples \ --enable-seccomp \ - --disable-static + --disable-static \ + --with-cgroup-pattern='lxc/%n' override_dh_fixperms: dh_fixperms -Xusr/lib/$(DEB_HOST_MULTIARCH)/lxc/lxc-user-nic diff --git a/lxc b/lxc index 56fb4ef..dfaaf1c 160000 --- a/lxc +++ b/lxc @@ -1 +1 @@ -Subproject commit 56fb4efa7a2f2e45b46177785e1fa62978e3ff34 +Subproject commit dfaaf1cf5a9136c2caf9aab147e0f51dcb86bafb -- 2.39.2