From: Wolfgang Bumiller Date: Thu, 19 Jul 2018 09:33:35 +0000 (+0200) Subject: bump version to 3.0.1+pve1-1 X-Git-Url: https://git.proxmox.com/?p=lxc.git;a=commitdiff_plain;h=2d8021b3bf638225cdb807cbc9bf34e36a361a40 bump version to 3.0.1+pve1-1 Signed-off-by: Wolfgang Bumiller --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00314a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +lxc-templates diff --git a/Makefile b/Makefile index e556f15..6f7ef62 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ PACKAGE=lxc-pve -LXCVER=3.0.0 -DEBREL=3 +LXCVER=3.0.1+pve1 +DEBREL=1 SRCDIR=lxc BUILDSRC := $(SRCDIR).tmp @@ -26,8 +26,7 @@ $(DEB2): $(DEB1) $(DEB1): | submodule rm -f *.deb rm -rf $(BUILDSRC) - mkdir $(BUILDSRC) - cp -a $(SRCDIR)/* $(BUILDSRC)/ + cp -a $(SRCDIR) $(BUILDSRC) cp -a debian $(BUILDSRC)/debian mkdir $(BUILDSRC)/debian/config for i in config/*.conf.in; do \ diff --git a/debian/changelog b/debian/changelog index 2eeed35..464968d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +lxc (3.0.1+pve1-1) unstable; urgency=medium + + * update to current stable-3.0 (3.0.1 with some upstream fixups) + + * merged apparmor profile generation branch + + * fix 'pct enter' not attaching to the inner namespaced cgroup + + -- Proxmox Support Team Thu, 26 Jul 2018 10:41:29 +0200 + lxc (3.0.0-3) unstable; urgency=medium * merge bug fixes from upstream: diff --git a/debian/patches/0001-lxc.service-start-after-a-potential-syslog.service.patch b/debian/patches/0001-lxc.service-start-after-a-potential-syslog.service.patch deleted file mode 100644 index 34d7e8e..0000000 --- a/debian/patches/0001-lxc.service-start-after-a-potential-syslog.service.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Fri, 10 Feb 2017 09:13:40 +0100 -Subject: [PATCH] lxc.service: start after a potential syslog.service - -Signed-off-by: Wolfgang Bumiller ---- - config/init/systemd/lxc.service.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in -index cd619967..77541917 100644 ---- a/config/init/systemd/lxc.service.in -+++ b/config/init/systemd/lxc.service.in -@@ -1,6 +1,6 @@ - [Unit] - Description=LXC Container Initialization and Autoboot Code --After=network.target lxc-net.service -+After=syslog.service network.target lxc-net.service - Wants=lxc-net.service - Documentation=man:lxc-autostart man:lxc - --- -2.11.0 - diff --git a/debian/patches/0002-pve-run-lxcnetaddbr-when-instantiating-veths.patch b/debian/patches/0002-pve-run-lxcnetaddbr-when-instantiating-veths.patch deleted file mode 100644 index f09bf8b..0000000 --- a/debian/patches/0002-pve-run-lxcnetaddbr-when-instantiating-veths.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Fri, 10 Feb 2017 09:15:37 +0100 -Subject: [PATCH] pve: run lxcnetaddbr when instantiating veths - -FIXME: Why aren't we using regular up-scripts? - -Signed-off-by: Wolfgang Bumiller ---- - src/lxc/network.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/src/lxc/network.c b/src/lxc/network.c -index e31066cb..d136bad1 100644 ---- a/src/lxc/network.c -+++ b/src/lxc/network.c -@@ -216,6 +216,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd - netdev->upscript, "up", argv); - if (err < 0) - goto out_delete; -+ } else if (netdev->link[0] == '\0') { -+ err = run_script(handler->name, "net", "/usr/share/lxc/lxcnetaddbr", "up", -+ "veth", veth1, (char*) NULL); -+ if (err) -+ goto out_delete; - } - - DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, --- -2.11.0 - diff --git a/debian/patches/0003-deny-rw-mounting-of-sys-and-proc.patch b/debian/patches/0003-deny-rw-mounting-of-sys-and-proc.patch deleted file mode 100644 index 2ff706b..0000000 --- a/debian/patches/0003-deny-rw-mounting-of-sys-and-proc.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= -Date: Wed, 9 Nov 2016 09:14:26 +0100 -Subject: [PATCH] deny rw mounting of /sys and /proc - -this would allow root in a privileged container to change -the permissions of /sys on the host, which could lock out -non-root users. - -if a rw /sys is desired, set "lxc.mount.auto" accordingly ---- - config/apparmor/abstractions/container-base | 6 +++++- - config/apparmor/abstractions/container-base.in | 6 +++++- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base -index a5e6c35f..4c3a4ba8 100644 ---- a/config/apparmor/abstractions/container-base -+++ b/config/apparmor/abstractions/container-base -@@ -82,7 +82,6 @@ - deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, - mount fstype=proc -> /proc/, - mount fstype=sysfs -> /sys/, -- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, - deny /sys/firmware/efi/efivars/** rwklx, - deny /sys/kernel/security/** rwklx, - mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, -@@ -91,6 +90,11 @@ - # deny reads from debugfs - deny /sys/kernel/debug/{,**} rwklx, - -+ # prevent rw mounting of /sys, because that allows changing its global permissions -+ deny mount -> /proc/, -+ deny mount -> /sys/, -+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, -+ - # allow paths to be made slave, shared, private or unbindable - # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts. - # mount options=(rw,make-slave) -> **, -diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in -index 16529bbf..54f9ddf0 100644 ---- a/config/apparmor/abstractions/container-base.in -+++ b/config/apparmor/abstractions/container-base.in -@@ -82,7 +82,6 @@ - deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, - mount fstype=proc -> /proc/, - mount fstype=sysfs -> /sys/, -- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, - deny /sys/firmware/efi/efivars/** rwklx, - deny /sys/kernel/security/** rwklx, - mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, -@@ -91,6 +90,11 @@ - # deny reads from debugfs - deny /sys/kernel/debug/{,**} rwklx, - -+ # prevent rw mounting of /sys, because that allows changing its global permissions -+ deny mount -> /proc/, -+ deny mount -> /sys/, -+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, -+ - # allow paths to be made slave, shared, private or unbindable - # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts. - # mount options=(rw,make-slave) -> **, --- -2.11.0 - diff --git a/debian/patches/0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch b/debian/patches/0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch deleted file mode 100644 index 53f786e..0000000 --- a/debian/patches/0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch +++ /dev/null @@ -1,599 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Wed, 28 Mar 2018 13:37:28 +0200 -Subject: [PATCH] separate the limiting from the namespaced cgroup root - -When cgroup namespaces are enabled a privileged container -with mixed cgroups has full write access to its own root -cgroup effectively allowing it to overwrite values written -from the outside or configured via lxc.cgroup.*. - -This patch causes an additional 'ns/' directory to be -created in all cgroups if cgroup namespaces and cgfsng are -being used in order to combat this. - -Signed-off-by: Wolfgang Bumiller ---- - src/lxc/cgroups/cgfsng.c | 88 +++++++++++++++++++++++++++++++++++++++--------- - src/lxc/cgroups/cgroup.c | 17 +++++----- - src/lxc/cgroups/cgroup.h | 23 ++++++++----- - src/lxc/commands.c | 85 +++++++++++++++++++++++++++++++++++----------- - src/lxc/commands.h | 2 ++ - src/lxc/criu.c | 4 +-- - src/lxc/start.c | 28 +++++++++++---- - 7 files changed, 186 insertions(+), 61 deletions(-) - -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 10c7ab2c..b48f997f 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -101,6 +101,7 @@ struct hierarchy { - char *mountpoint; - char *base_cgroup; - char *fullcgpath; -+ char *innercgpath; - int version; - }; - -@@ -955,6 +956,7 @@ static struct hierarchy *add_hierarchy(char **clist, char *mountpoint, - new->mountpoint = mountpoint; - new->base_cgroup = base_cgroup; - new->fullcgpath = NULL; -+ new->innercgpath = NULL; - new->version = type; - - newentry = append_null_to_list((void ***)&hierarchies); -@@ -1587,6 +1589,8 @@ static int cgroup_rmdir(char *container_cgroup) - - free(h->fullcgpath); - h->fullcgpath = NULL; -+ free(h->innercgpath); -+ h->innercgpath = NULL; - } - - return 0; -@@ -1597,6 +1601,7 @@ struct generic_userns_exec_data { - struct lxc_conf *conf; - uid_t origuid; /* target uid in parent namespace */ - char *path; -+ bool inner; - }; - - static int cgroup_rmdir_wrapper(void *data) -@@ -1641,6 +1646,7 @@ static void cgfsng_destroy(void *hdata, struct lxc_conf *conf) - wrap.origuid = 0; - wrap.d = hdata; - wrap.conf = conf; -+ wrap.inner = false; - - if (conf && !lxc_list_empty(&conf->id_map)) - ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap, -@@ -1730,22 +1736,29 @@ on_error: - return bret; - } - --static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname) -+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner) - { - int ret; - -- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); -- if (dir_exists(h->fullcgpath)) { -- ERROR("The cgroup \"%s\" already existed", h->fullcgpath); -+ char *path; -+ if (inner) { -+ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL); -+ h->innercgpath = path; -+ } else { -+ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); -+ h->fullcgpath = path; -+ } -+ if (dir_exists(path)) { // it must not already exist -+ ERROR("Path \"%s\" already existed.", path); - return false; - } - -- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) { -+ if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) { - ERROR("Failed to handle legacy cpuset controller"); - return false; - } - -- ret = mkdir_p(h->fullcgpath, 0755); -+ ret = mkdir_p(path, 0755); - if (ret < 0) { - ERROR("Failed to create cgroup \"%s\"", h->fullcgpath); - return false; -@@ -1766,10 +1779,26 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) - h->fullcgpath = NULL; - } - -+static inline bool cgfsng_create_inner(struct cgfsng_handler_data *d) -+{ -+ size_t i; -+ bool ret = true; -+ char *cgname = must_make_path(d->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL); -+ for (i = 0; hierarchies[i]; i++) { -+ if (!create_path_for_hierarchy(hierarchies[i], cgname, true)) { -+ SYSERROR("Failed to create %s namespace subdirectory: %s", hierarchies[i]->fullcgpath, strerror(errno)); -+ ret = false; -+ break; -+ } -+ } -+ free(cgname); -+ return ret; -+} -+ - /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; - * next cgroup_pattern-1, -2, ..., -999. - */ --static inline bool cgfsng_create(void *hdata) -+static inline bool cgfsng_create(void *hdata, bool inner) - { - int i; - size_t len; -@@ -1781,10 +1810,17 @@ static inline bool cgfsng_create(void *hdata) - return false; - - if (d->container_cgroup) { -+ if (inner) -+ return cgfsng_create_inner(d); - WARN("cgfsng_create called a second time"); - return false; - } - -+ if (inner) { -+ ERROR("cgfsng_create called twice for innner cgroup"); -+ return false; -+ } -+ - if (d->cgroup_meta.dir) - tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false); - else -@@ -1821,7 +1857,7 @@ again: - } - - for (i = 0; hierarchies[i]; i++) { -- if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) { -+ if (!create_path_for_hierarchy(hierarchies[i], container_cgroup, false)) { - int j; - ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath); - free(hierarchies[i]->fullcgpath); -@@ -1843,7 +1879,7 @@ out_free: - return false; - } - --static bool cgfsng_enter(void *hdata, pid_t pid) -+static bool cgfsng_enter(void *hdata, pid_t pid, bool inner) - { - int i, len; - char pidstr[25]; -@@ -1856,8 +1892,13 @@ static bool cgfsng_enter(void *hdata, pid_t pid) - int ret; - char *fullpath; - -- fullpath = must_make_path(hierarchies[i]->fullcgpath, -- "cgroup.procs", NULL); -+ if (inner) -+ fullpath = must_make_path(hierarchies[i]->fullcgpath, -+ CGROUP_NAMESPACE_SUBDIR, -+ "cgroup.procs", NULL); -+ else -+ fullpath = must_make_path(hierarchies[i]->fullcgpath, -+ "cgroup.procs", NULL); - ret = lxc_write_to_file(fullpath, pidstr, len, false); - if (ret != 0) { - SYSERROR("Failed to enter cgroup \"%s\"", fullpath); -@@ -1933,9 +1974,15 @@ static int chown_cgroup_wrapper(void *data) - char *fullpath; - char *path = hierarchies[i]->fullcgpath; - -+ if (arg->inner) -+ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL); -+ - ret = chowmod(path, destuid, nsgid, 0775); -- if (ret < 0) -+ if (ret < 0) { -+ if (arg->inner) -+ free(path); - return -1; -+ } - - /* Failures to chown() these are inconvenient but not - * detrimental We leave these owned by the container launcher, -@@ -1954,8 +2001,11 @@ static int chown_cgroup_wrapper(void *data) - (void)chowmod(fullpath, destuid, 0, 0664); - free(fullpath); - -- if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC) -+ if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC) { -+ if (arg->inner) -+ free(path); - continue; -+ } - - fullpath = must_make_path(path, "cgroup.subtree_control", NULL); - (void)chowmod(fullpath, destuid, nsgid, 0664); -@@ -1964,12 +2014,14 @@ static int chown_cgroup_wrapper(void *data) - fullpath = must_make_path(path, "cgroup.threads", NULL); - (void)chowmod(fullpath, destuid, nsgid, 0664); - free(fullpath); -+ if (arg->inner) -+ free(path); - } - - return 0; - } - --static bool cgfsng_chown(void *hdata, struct lxc_conf *conf) -+static bool cgfsng_chown(void *hdata, struct lxc_conf *conf, bool inner) - { - struct cgfsng_handler_data *d = hdata; - struct generic_userns_exec_data wrap; -@@ -1984,6 +2036,7 @@ static bool cgfsng_chown(void *hdata, struct lxc_conf *conf) - wrap.path = NULL; - wrap.d = d; - wrap.conf = conf; -+ wrap.inner = inner; - - if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, - "chown_cgroup_wrapper") < 0) { -@@ -2366,7 +2419,7 @@ static bool cgfsng_unfreeze(void *hdata) - return true; - } - --static const char *cgfsng_get_cgroup(void *hdata, const char *controller) -+static const char *cgfsng_get_cgroup(void *hdata, const char *controller, bool inner) - { - struct hierarchy *h; - -@@ -2377,6 +2430,9 @@ static const char *cgfsng_get_cgroup(void *hdata, const char *controller) - return NULL; - } - -+ if (inner && h->innercgpath) -+ return h->innercgpath + strlen(h->mountpoint); -+ - return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL; - } - -@@ -2408,7 +2464,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, - int fret = -1, idx = 0; - char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL; - -- container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller); -+ container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller); - /* not running */ - if (!container_cgroup) - return 0; -diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c -index 9e7b26e0..ec45dd39 100644 ---- a/src/lxc/cgroups/cgroup.c -+++ b/src/lxc/cgroups/cgroup.c -@@ -73,19 +73,19 @@ void cgroup_destroy(struct lxc_handler *handler) - } - - /* Create the container cgroups for all requested controllers. */ --bool cgroup_create(struct lxc_handler *handler) -+bool cgroup_create(struct lxc_handler *handler, bool inner) - { - if (ops) -- return ops->create(handler->cgroup_data); -+ return ops->create(handler->cgroup_data, inner); - - return false; - } - - /* Enter the container init into its new cgroups for all requested controllers. */ --bool cgroup_enter(struct lxc_handler *handler) -+bool cgroup_enter(struct lxc_handler *handler, bool inner) - { - if (ops) -- return ops->enter(handler->cgroup_data, handler->pid); -+ return ops->enter(handler->cgroup_data, handler->pid, inner); - - return false; - } -@@ -99,10 +99,11 @@ bool cgroup_create_legacy(struct lxc_handler *handler) - } - - const char *cgroup_get_cgroup(struct lxc_handler *handler, -- const char *subsystem) -+ const char *subsystem, -+ bool inner) - { - if (ops) -- return ops->get_cgroup(handler->cgroup_data, subsystem); -+ return ops->get_cgroup(handler->cgroup_data, subsystem, inner); - - return NULL; - } -@@ -148,10 +149,10 @@ bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices) - return false; - } - --bool cgroup_chown(struct lxc_handler *handler) -+bool cgroup_chown(struct lxc_handler *handler, bool inner) - { - if (ops && ops->chown) -- return ops->chown(handler->cgroup_data, handler->conf); -+ return ops->chown(handler->cgroup_data, handler->conf, inner); - - return true; - } -diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index 0f04e8b7..3a63133d 100644 ---- a/src/lxc/cgroups/cgroup.h -+++ b/src/lxc/cgroups/cgroup.h -@@ -28,6 +28,12 @@ - #include - #include - -+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace -+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in -+ * order to prevent it from accessing the outer limiting cgroup. -+ */ -+#define CGROUP_NAMESPACE_SUBDIR "cgns" -+ - struct lxc_handler; - struct lxc_conf; - struct lxc_list; -@@ -45,10 +51,10 @@ struct cgroup_ops { - - void *(*init)(struct lxc_handler *handler); - void (*destroy)(void *hdata, struct lxc_conf *conf); -- bool (*create)(void *hdata); -- bool (*enter)(void *hdata, pid_t pid); -+ bool (*create)(void *hdata, bool inner); -+ bool (*enter)(void *hdata, pid_t pid, bool inner); - bool (*create_legacy)(void *hdata, pid_t pid); -- const char *(*get_cgroup)(void *hdata, const char *subsystem); -+ const char *(*get_cgroup)(void *hdata, const char *subsystem, bool inner); - bool (*escape)(); - int (*num_hierarchies)(); - bool (*get_hierarchies)(int n, char ***out); -@@ -56,7 +62,7 @@ struct cgroup_ops { - int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath); - bool (*unfreeze)(void *hdata); - bool (*setup_limits)(void *hdata, struct lxc_conf *conf, bool with_devices); -- bool (*chown)(void *hdata, struct lxc_conf *conf); -+ bool (*chown)(void *hdata, struct lxc_conf *conf, bool inner); - bool (*attach)(const char *name, const char *lxcpath, pid_t pid); - bool (*mount_cgroup)(void *hdata, const char *root, int type); - int (*nrtasks)(void *hdata); -@@ -67,15 +73,16 @@ extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid); - extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type); - extern void cgroup_destroy(struct lxc_handler *handler); - extern bool cgroup_init(struct lxc_handler *handler); --extern bool cgroup_create(struct lxc_handler *handler); -+extern bool cgroup_create(struct lxc_handler *handler, bool inner); - extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices); --extern bool cgroup_chown(struct lxc_handler *handler); --extern bool cgroup_enter(struct lxc_handler *handler); -+extern bool cgroup_chown(struct lxc_handler *handler, bool inner); -+extern bool cgroup_enter(struct lxc_handler *handler, bool inner); - extern void cgroup_cleanup(struct lxc_handler *handler); - extern bool cgroup_create_legacy(struct lxc_handler *handler); - extern int cgroup_nrtasks(struct lxc_handler *handler); - extern const char *cgroup_get_cgroup(struct lxc_handler *handler, -- const char *subsystem); -+ const char *subsystem, -+ bool inner); - extern bool cgroup_escape(); - extern int cgroup_num_hierarchies(); - extern bool cgroup_get_hierarchies(int i, char ***out); -diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index 54e9f75c..df5a9907 100644 ---- a/src/lxc/commands.c -+++ b/src/lxc/commands.c -@@ -426,20 +426,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, - return lxc_cmd_rsp_send(fd, &rsp); - } - --/* -- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a -- * particular subsystem. This is the cgroup path relative to the root -- * of the cgroup filesystem. -- * -- * @name : name of container to connect to -- * @lxcpath : the lxcpath in which the container is running -- * @subsystem : the subsystem being asked about -- * -- * Returns the path on success, NULL on failure. The caller must free() the -- * returned path. -- */ --char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, -- const char *subsystem) -+char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, -+ const char *subsystem, bool inner) - { - int ret, stopped; - struct lxc_cmd_rr cmd = { -@@ -452,8 +440,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, - - cmd.req.data = subsystem; - cmd.req.datalen = 0; -- if (subsystem) -- cmd.req.datalen = strlen(subsystem) + 1; -+ if (subsystem) { -+ size_t subsyslen = strlen(subsystem); -+ if (inner) { -+ char *data = alloca(subsyslen+2); -+ memcpy(data, subsystem, subsyslen+1); -+ data[subsyslen+1] = 1; -+ cmd.req.datalen = subsyslen+2, -+ cmd.req.data = data; -+ } else { -+ cmd.req.datalen = subsyslen+1; -+ } -+ } - - ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); - if (ret < 0) -@@ -468,16 +466,63 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, - return cmd.rsp.data; - } - -+/* -+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a -+ * particular subsystem. This is the cgroup path relative to the root -+ * of the cgroup filesystem. -+ * -+ * @name : name of container to connect to -+ * @lxcpath : the lxcpath in which the container is running -+ * @subsystem : the subsystem being asked about -+ * -+ * Returns the path on success, NULL on failure. The caller must free() the -+ * returned path. -+ */ -+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, -+ const char *subsystem) -+{ -+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false); -+} -+ -+/* -+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path -+ * for a particular subsystem. This is the cgroup path relative to the root -+ * of the cgroup filesystem. -+ * -+ * @name : name of container to connect to -+ * @lxcpath : the lxcpath in which the container is running -+ * @subsystem : the subsystem being asked about -+ * -+ * Returns the path on success, NULL on failure. The caller must free() the -+ * returned path. -+ */ -+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath, -+ const char *subsystem) -+{ -+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true); -+} -+ - static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler) - { - const char *path; - struct lxc_cmd_rsp rsp; - -- if (req->datalen > 0) -- path = cgroup_get_cgroup(handler, req->data); -- else -- path = cgroup_get_cgroup(handler, NULL); -+ if (req->datalen > 0) { -+ const char *subsystem; -+ size_t subsyslen; -+ bool inner = false; -+ subsystem = req->data; -+ subsyslen = strlen(subsystem); -+ if (req->datalen == subsyslen+2) -+ inner = (subsystem[subsyslen+1] == 1); -+ -+ path = cgroup_get_cgroup(handler, req->data, inner); -+ } else { -+ // FIXME: cgroup separation for cgroup v2 cannot be handled -+ // like we used to do v1 here... need to figure this out... -+ path = cgroup_get_cgroup(handler, NULL, false); -+ } - if (!path) - return -1; - -diff --git a/src/lxc/commands.h b/src/lxc/commands.h -index 816cd748..e16c0d79 100644 ---- a/src/lxc/commands.h -+++ b/src/lxc/commands.h -@@ -93,6 +93,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, - */ - extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, - const char *subsystem); -+extern char *lxc_cmd_get_attach_cgroup_path(const char *name, -+ const char *lxcpath, const char *subsystem); - extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath); - extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); - extern char *lxc_cmd_get_name(const char *hashed_sock); -diff --git a/src/lxc/criu.c b/src/lxc/criu.c -index f60a6e15..7c8a8aee 100644 ---- a/src/lxc/criu.c -+++ b/src/lxc/criu.c -@@ -324,7 +324,7 @@ static void exec_criu(struct criu_opts *opts) - } else { - const char *p; - -- p = cgroup_get_cgroup(opts->handler, controllers[0]); -+ p = cgroup_get_cgroup(opts->handler, controllers[0], false); - if (!p) { - ERROR("failed to get cgroup path for %s", controllers[0]); - goto err; -@@ -958,7 +958,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ - goto out_fini_handler; - } - -- if (!cgroup_create(handler)) { -+ if (!cgroup_create(handler, false)) { - ERROR("failed creating groups"); - goto out_fini_handler; - } -diff --git a/src/lxc/start.c b/src/lxc/start.c -index f66f50a7..772eacc2 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -1556,7 +1556,7 @@ static int lxc_spawn(struct lxc_handler *handler) - - cgroups_connected = true; - -- if (!cgroup_create(handler)) { -+ if (!cgroup_create(handler, false)) { - ERROR("Failed creating cgroups"); - goto out_delete_net; - } -@@ -1650,10 +1650,10 @@ static int lxc_spawn(struct lxc_handler *handler) - goto out_delete_net; - } - -- if (!cgroup_enter(handler)) -+ if (!cgroup_enter(handler, false)) - goto out_delete_net; - -- if (!cgroup_chown(handler)) -+ if (!cgroup_chown(handler, false)) - goto out_delete_net; - - /* Now we're ready to preserve the network namespace */ -@@ -1714,16 +1714,30 @@ static int lxc_spawn(struct lxc_handler *handler) - } - } - -- ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE); -- if (ret < 0) -- goto out_delete_net; -- - if (!cgroup_setup_limits(handler, true)) { - ERROR("Failed to setup legacy device cgroup controller limits"); - goto out_delete_net; - } - TRACE("Set up legacy device cgroup controller limits"); - -+ if (cgns_supported()) { -+ if (!cgroup_create(handler, true)) { -+ ERROR("failed to create inner cgroup separation layer"); -+ goto out_delete_net; -+ } -+ if (!cgroup_enter(handler, true)) { -+ ERROR("failed to enter inner cgroup separation layer"); -+ goto out_delete_net; -+ } -+ if (!cgroup_chown(handler, true)) { -+ ERROR("failed chown inner cgroup separation layer"); -+ goto out_delete_net; -+ } -+ } -+ -+ if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE)) -+ goto out_delete_net; -+ - cgroup_disconnect(); - cgroups_connected = false; - --- -2.11.0 - diff --git a/debian/patches/0005-start-initutils-make-cgroupns-separation-level-confi.patch b/debian/patches/0005-start-initutils-make-cgroupns-separation-level-confi.patch deleted file mode 100644 index e672b53..0000000 --- a/debian/patches/0005-start-initutils-make-cgroupns-separation-level-confi.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Wed, 28 Mar 2018 13:41:46 +0200 -Subject: [PATCH] start/initutils: make cgroupns separation level configurable - -Adds a new global config variable `lxc.cgroup.separate` -which controls whether a separation directory for cgroup -namespaces should be used. -Can be empty, "privileged", "unprivileged" or "both". - -Signed-off-by: Wolfgang Bumiller ---- - src/lxc/initutils.c | 17 +++++++++-------- - src/lxc/initutils.h | 1 + - src/lxc/start.c | 25 ++++++++++++++----------- - 3 files changed, 24 insertions(+), 19 deletions(-) - -diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c -index 56926fb5..c5f19ca8 100644 ---- a/src/lxc/initutils.c -+++ b/src/lxc/initutils.c -@@ -49,14 +49,15 @@ static char *copy_global_config_value(char *p) - const char *lxc_global_config_value(const char *option_name) - { - static const char * const options[][2] = { -- { "lxc.bdev.lvm.vg", DEFAULT_VG }, -- { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL }, -- { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT }, -- { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL }, -- { "lxc.lxcpath", NULL }, -- { "lxc.default_config", NULL }, -- { "lxc.cgroup.pattern", NULL }, -- { "lxc.cgroup.use", NULL }, -+ { "lxc.bdev.lvm.vg", DEFAULT_VG }, -+ { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL }, -+ { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT }, -+ { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL }, -+ { "lxc.lxcpath", NULL }, -+ { "lxc.default_config", NULL }, -+ { "lxc.cgroup.pattern", NULL }, -+ { "lxc.cgroup.use", NULL }, -+ { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT }, - { NULL, NULL }, - }; - -diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h -index ec44554e..6532f301 100644 ---- a/src/lxc/initutils.h -+++ b/src/lxc/initutils.h -@@ -42,6 +42,7 @@ - #define DEFAULT_THIN_POOL "lxc" - #define DEFAULT_ZFSROOT "lxc" - #define DEFAULT_RBDPOOL "lxc" -+#define DEFAULT_CGPROTECT "privileged" - - #ifndef PR_SET_MM - #define PR_SET_MM 35 -diff --git a/src/lxc/start.c b/src/lxc/start.c -index 772eacc2..ae13aae9 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -1721,17 +1721,20 @@ static int lxc_spawn(struct lxc_handler *handler) - TRACE("Set up legacy device cgroup controller limits"); - - if (cgns_supported()) { -- if (!cgroup_create(handler, true)) { -- ERROR("failed to create inner cgroup separation layer"); -- goto out_delete_net; -- } -- if (!cgroup_enter(handler, true)) { -- ERROR("failed to enter inner cgroup separation layer"); -- goto out_delete_net; -- } -- if (!cgroup_chown(handler, true)) { -- ERROR("failed chown inner cgroup separation layer"); -- goto out_delete_net; -+ const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits"); -+ if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) { -+ if (!cgroup_create(handler, true)) { -+ ERROR("failed to create inner cgroup separation layer"); -+ goto out_delete_net; -+ } -+ if (!cgroup_enter(handler, true)) { -+ ERROR("failed to enter inner cgroup separation layer"); -+ goto out_delete_net; -+ } -+ if (!cgroup_chown(handler, true)) { -+ ERROR("failed chown inner cgroup separation layer"); -+ goto out_delete_net; -+ } - } - } - --- -2.11.0 - diff --git a/debian/patches/0006-rename-cgroup-namespace-directory-to-ns.patch b/debian/patches/0006-rename-cgroup-namespace-directory-to-ns.patch deleted file mode 100644 index 7a5bf96..0000000 --- a/debian/patches/0006-rename-cgroup-namespace-directory-to-ns.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Fri, 23 Dec 2016 15:57:24 +0100 -Subject: [PATCH] rename cgroup namespace directory to ns - -Signed-off-by: Wolfgang Bumiller ---- - src/lxc/cgroups/cgroup.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index 3a63133d..544742df 100644 ---- a/src/lxc/cgroups/cgroup.h -+++ b/src/lxc/cgroups/cgroup.h -@@ -32,7 +32,7 @@ - * will be moved into an additional subdirectory "cgns/" inside the cgroup in - * order to prevent it from accessing the outer limiting cgroup. - */ --#define CGROUP_NAMESPACE_SUBDIR "cgns" -+#define CGROUP_NAMESPACE_SUBDIR "ns" - - struct lxc_handler; - struct lxc_conf; --- -2.11.0 - diff --git a/debian/patches/0007-possibility-to-run-lxc-monitord-as-a-regular-daemon.patch b/debian/patches/0007-possibility-to-run-lxc-monitord-as-a-regular-daemon.patch deleted file mode 100644 index 411830c..0000000 --- a/debian/patches/0007-possibility-to-run-lxc-monitord-as-a-regular-daemon.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Mon, 20 Nov 2017 10:49:41 +0100 -Subject: [PATCH] possibility to run lxc-monitord as a regular daemon - -lxc-monitord instances are spawned on demand and, if this -happens from a service, the daemon is considered part of -it by systemd, as it is running in the same cgroups. This -can be avoided by leaving it running permanently. - -Signed-off-by: Wolfgang Bumiller ---- - config/init/systemd/Makefile.am | 10 +++-- - config/init/systemd/lxc-monitord.service.in | 12 ++++++ - configure.ac | 1 + - lxc.spec.in | 1 + - src/lxc/cmd/lxc_monitord.c | 60 +++++++++++++++++++++-------- - 5 files changed, 63 insertions(+), 21 deletions(-) - create mode 100644 config/init/systemd/lxc-monitord.service.in - -diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am -index c448850d..4a4fde5e 100644 ---- a/config/init/systemd/Makefile.am -+++ b/config/init/systemd/Makefile.am -@@ -2,19 +2,21 @@ EXTRA_DIST = \ - lxc-apparmor-load \ - lxc.service.in \ - lxc@.service.in \ -- lxc-net.service.in -+ lxc-net.service.in \ -+ lxc-monitord.service.in - - if INIT_SCRIPT_SYSTEMD --BUILT_SOURCES = lxc.service lxc@.service lxc-net.service -+BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service - --install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load -+install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load - $(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR) -- $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/ -+ $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/ - - uninstall-systemd: - rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service - rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service - rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service -+ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service - rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || : - - pkglibexec_SCRIPTS = lxc-apparmor-load -diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in -new file mode 100644 -index 00000000..40635168 ---- /dev/null -+++ b/config/init/systemd/lxc-monitord.service.in -@@ -0,0 +1,12 @@ -+[Unit] -+Description=LXC Container Monitoring Daemon -+After=syslog.service network.target -+ -+[Service] -+Type=simple -+ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon -+StandardOutput=syslog -+StandardError=syslog -+ -+[Install] -+WantedBy=multi-user.target -diff --git a/configure.ac b/configure.ac -index 50c99836..efe56991 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -658,6 +658,7 @@ AC_CONFIG_FILES([ - config/init/systemd/lxc.service - config/init/systemd/lxc@.service - config/init/systemd/lxc-net.service -+ config/init/systemd/lxc-monitord.service - config/init/sysvinit/Makefile - config/init/sysvinit/lxc-containers - config/init/sysvinit/lxc-net -diff --git a/lxc.spec.in b/lxc.spec.in -index 004ced26..1adbb6bd 100644 ---- a/lxc.spec.in -+++ b/lxc.spec.in -@@ -235,6 +235,7 @@ fi - %{_unitdir}/lxc-net.service - %{_unitdir}/lxc.service - %{_unitdir}/lxc@.service -+%{_unitdir}/lxc-monitord.service - %else - %{_sysconfdir}/rc.d/init.d/lxc - %{_sysconfdir}/rc.d/init.d/lxc-net -diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c -index 99f2bdb8..2bc44dea 100644 ---- a/src/lxc/cmd/lxc_monitord.c -+++ b/src/lxc/cmd/lxc_monitord.c -@@ -346,17 +346,44 @@ static void lxc_monitord_sig_handler(int sig) - - int main(int argc, char *argv[]) - { -- int ret, pipefd; -+ int ret, pipefd = -1; - char logpath[PATH_MAX]; - sigset_t mask; -- char *lxcpath = argv[1]; -+ const char *lxcpath = NULL; - bool mainloop_opened = false; - bool monitord_created = false; -+ bool persistent = false; - struct lxc_log log; - -- if (argc != 3) { -+ if (argc > 1 && !strcmp(argv[1], "--daemon")) { -+ persistent = true; -+ --argc; -+ ++argv; -+ } -+ -+ if (argc > 1) { -+ lxcpath = argv[1]; -+ --argc; -+ ++argv; -+ } else { -+ lxcpath = lxc_global_config_value("lxc.lxcpath"); -+ if (!lxcpath) { -+ ERROR("Out of memory getting lxcpath"); -+ exit(EXIT_FAILURE); -+ } -+ } -+ -+ if (argc > 1) { -+ if (lxc_safe_int(argv[1], &pipefd) < 0) -+ exit(EXIT_FAILURE); -+ --argc; -+ ++argv; -+ } -+ -+ if (argc != 1 || (persistent != (pipefd == -1))) { - fprintf(stderr, -- "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n" -+ "Usage: lxc-monitord lxcpath sync-pipe-fd\n" -+ " lxc-monitord --daemon lxcpath\n\n" - "NOTE: lxc-monitord is intended for use by lxc internally\n" - " and does not need to be run by hand\n\n"); - exit(EXIT_FAILURE); -@@ -378,9 +405,6 @@ int main(int argc, char *argv[]) - INFO("Failed to open log file %s, log will be lost.", lxcpath); - lxc_log_options_no_override(); - -- if (lxc_safe_int(argv[2], &pipefd) < 0) -- exit(EXIT_FAILURE); -- - if (sigfillset(&mask) || - sigdelset(&mask, SIGILL) || - sigdelset(&mask, SIGSEGV) || -@@ -412,15 +436,17 @@ int main(int argc, char *argv[]) - goto on_error; - monitord_created = true; - -- /* sync with parent, we're ignoring the return from write -- * because regardless if it works or not, the following -- * close will sync us with the parent process. the -- * if-empty-statement construct is to quiet the -- * warn-unused-result warning. -- */ -- if (write(pipefd, "S", 1)) -- ; -- close(pipefd); -+ if (pipefd != -1) { -+ /* sync with parent, we're ignoring the return from write -+ * because regardless if it works or not, the following -+ * close will sync us with the parent process. the -+ * if-empty-statement construct is to quiet the -+ * warn-unused-result warning. -+ */ -+ if (write(pipefd, "S", 1)) -+ ; -+ close(pipefd); -+ } - - if (lxc_monitord_mainloop_add(&mon)) { - ERROR("Failed to add mainloop handlers."); -@@ -430,7 +456,7 @@ int main(int argc, char *argv[]) - NOTICE("lxc-monitord with pid %d is now monitoring lxcpath %s.", - lxc_raw_getpid(), mon.lxcpath); - for (;;) { -- ret = lxc_mainloop(&mon.descr, 1000 * 30); -+ ret = lxc_mainloop(&mon.descr, persistent ? -1 : 1000 * 30); - if (ret) { - ERROR("mainloop returned an error"); - break; --- -2.11.0 - diff --git a/debian/patches/0008-Make-lxc-.service-forking.patch b/debian/patches/0008-Make-lxc-.service-forking.patch deleted file mode 100644 index 558cb90..0000000 --- a/debian/patches/0008-Make-lxc-.service-forking.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Wolfgang Bumiller -Date: Mon, 20 Nov 2017 10:51:36 +0100 -Subject: [PATCH] Make lxc@.service forking - -Previously the init process' output was dumped into the log -files since the service used Type=simple and -StandardOutput/Error=syslog. - -Signed-off-by: Wolfgang Bumiller ---- - config/init/systemd/lxc@.service.in | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in -index a2aa2211..f312763c 100644 ---- a/config/init/systemd/lxc@.service.in -+++ b/config/init/systemd/lxc@.service.in -@@ -1,15 +1,15 @@ - [Unit] - Description=LXC Container: %i - # This pulls in apparmor, dev-setup, lxc-net --After=lxc.service -+After=lxc.service lxc-monitord.service - Wants=lxc.service - Documentation=man:lxc-start man:lxc - - [Service] --Type=simple -+Type=forking - KillMode=mixed - TimeoutStopSec=120s --ExecStart=@BINDIR@/lxc-start -F -n %i -+ExecStart=@BINDIR@/lxc-start -n %i - ExecStop=@BINDIR@/lxc-stop -n %i - # Environment=BOOTUP=serial - # Environment=CONSOLETYPE=serial --- -2.11.0 - diff --git a/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch b/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch new file mode 100644 index 0000000..22ede6e --- /dev/null +++ b/debian/patches/extra/0001-confile-add-lxc.monitor.signal.pdeath.patch @@ -0,0 +1,170 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 16 Jul 2018 11:07:58 +0200 +Subject: [PATCH] confile: add lxc.monitor.signal.pdeath + +Set the signal to be sent to the container's init when the lxc monitor exits. +By default it is set to SIGKILL which will cause all container processes to be +killed when the lxc monitor process dies. +To ensure that containers stay alive even if lxc monitor dies set this to 0. + +Signed-off-by: Christian Brauner +(cherry picked from commit 258f80519f3bb0a41c26083020154e9a61df8468) +--- + doc/lxc.container.conf.sgml.in | 15 +++++++++++++++ + src/lxc/conf.c | 1 + + src/lxc/conf.h | 1 + + src/lxc/confile.c | 38 ++++++++++++++++++++++++++++++++++++++ + src/lxc/start.c | 9 +++++++++ + 5 files changed, 64 insertions(+) + +diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in +index 4ac26058..3e9e4e03 100644 +--- a/doc/lxc.container.conf.sgml.in ++++ b/doc/lxc.container.conf.sgml.in +@@ -2380,6 +2380,21 @@ dev/null proc/kcore none bind,relative 0 0 + + + ++ ++ ++ ++ ++ Set the signal to be sent to the container's init when the lxc ++ monitor exits. By default it is set to SIGKILL which will cause ++ all container processes to be killed when the lxc monitor process ++ dies. ++ To ensure that containers stay alive even if lxc monitor dies set ++ this to 0. ++ ++ ++ ++ ++ + + + +diff --git a/src/lxc/conf.c b/src/lxc/conf.c +index c5d6f5b1..d36987c8 100644 +--- a/src/lxc/conf.c ++++ b/src/lxc/conf.c +@@ -2683,6 +2683,7 @@ struct lxc_conf *lxc_conf_init(void) + new->console.name[0] = '\0'; + memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf)); + new->maincmd_fd = -1; ++ new->monitor_signal_pdeath = SIGKILL; + new->nbd_idx = -1; + new->rootfs.mount = strdup(default_rootfs_mount); + if (!new->rootfs.mount) { +diff --git a/src/lxc/conf.h b/src/lxc/conf.h +index ea3a71df..f7a879c3 100644 +--- a/src/lxc/conf.h ++++ b/src/lxc/conf.h +@@ -303,6 +303,7 @@ struct lxc_conf { + + /* unshare the mount namespace in the monitor */ + unsigned int monitor_unshare; ++ unsigned int monitor_signal_pdeath; + + /* list of included files */ + struct lxc_list includes; +diff --git a/src/lxc/confile.c b/src/lxc/confile.c +index 4f46d7bf..8a7505da 100644 +--- a/src/lxc/confile.c ++++ b/src/lxc/confile.c +@@ -111,6 +111,7 @@ lxc_config_define(log_file); + lxc_config_define(log_level); + lxc_config_define(log_syslog); + lxc_config_define(monitor); ++lxc_config_define(monitor_signal_pdeath); + lxc_config_define(mount); + lxc_config_define(mount_auto); + lxc_config_define(mount_fstab); +@@ -194,6 +195,7 @@ static struct lxc_config_t config[] = { + { "lxc.log.level", set_config_log_level, get_config_log_level, clr_config_log_level, }, + { "lxc.log.syslog", set_config_log_syslog, get_config_log_syslog, clr_config_log_syslog, }, + { "lxc.monitor.unshare", set_config_monitor, get_config_monitor, clr_config_monitor, }, ++ { "lxc.monitor.signal.pdeath", set_config_monitor_signal_pdeath, get_config_monitor_signal_pdeath, clr_config_monitor_signal_pdeath, }, + { "lxc.mount.auto", set_config_mount_auto, get_config_mount_auto, clr_config_mount_auto, }, + { "lxc.mount.entry", set_config_mount, get_config_mount, clr_config_mount, }, + { "lxc.mount.fstab", set_config_mount_fstab, get_config_mount_fstab, clr_config_mount_fstab, }, +@@ -976,6 +978,28 @@ static int set_config_monitor(const char *key, const char *value, + return -1; + } + ++static int set_config_monitor_signal_pdeath(const char *key, const char *value, ++ struct lxc_conf *lxc_conf, void *data) ++{ ++ if (lxc_config_value_empty(value)) { ++ lxc_conf->monitor_signal_pdeath = 0; ++ return 0; ++ } ++ ++ if (strcmp(key + 12, "signal.pdeath") == 0) { ++ int sig_n; ++ ++ sig_n = sig_parse(value); ++ if (sig_n < 0) ++ return -1; ++ ++ lxc_conf->monitor_signal_pdeath = sig_n; ++ return 0; ++ } ++ ++ return -EINVAL; ++} ++ + static int set_config_group(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) + { +@@ -3406,6 +3430,13 @@ static int get_config_monitor(const char *key, char *retv, int inlen, + return lxc_get_conf_int(c, retv, inlen, c->monitor_unshare); + } + ++static int get_config_monitor_signal_pdeath(const char *key, char *retv, ++ int inlen, struct lxc_conf *c, ++ void *data) ++{ ++ return lxc_get_conf_int(c, retv, inlen, c->monitor_signal_pdeath); ++} ++ + static int get_config_group(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) + { +@@ -3957,6 +3988,13 @@ static inline int clr_config_monitor(const char *key, struct lxc_conf *c, + return 0; + } + ++static inline int clr_config_monitor_signal_pdeath(const char *key, ++ struct lxc_conf *c, void *data) ++{ ++ c->monitor_signal_pdeath = 0; ++ return 0; ++} ++ + static inline int clr_config_group(const char *key, struct lxc_conf *c, + void *data) + { +diff --git a/src/lxc/start.c b/src/lxc/start.c +index bccd5807..3343f9bf 100644 +--- a/src/lxc/start.c ++++ b/src/lxc/start.c +@@ -1383,6 +1383,15 @@ static int do_start(void *data) + goto out_warn_father; + } + ++ if (handler->conf->monitor_signal_pdeath != SIGKILL) { ++ ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath); ++ if (ret < 0) { ++ SYSERROR("Failed to set PR_SET_PDEATHSIG to %d", ++ handler->conf->monitor_signal_pdeath); ++ goto out_warn_father; ++ } ++ } ++ + /* After this call, we are in error because this ops should not return + * as it execs. + */ +-- +2.11.0 + diff --git a/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch b/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch new file mode 100644 index 0000000..d49070a --- /dev/null +++ b/debian/patches/extra/0002-tests-add-lxc.monitor.signal.pdeath.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 16 Jul 2018 15:22:13 +0200 +Subject: [PATCH] tests: add lxc.monitor.signal.pdeath + +Signed-off-by: Christian Brauner +(cherry picked from commit e53cd6d899bd50e07c8bb760371bfa2d5d4c56ef) +--- + src/tests/parse_config_file.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c +index 73b8fc3c..b6034ef2 100644 +--- a/src/tests/parse_config_file.c ++++ b/src/tests/parse_config_file.c +@@ -911,6 +911,12 @@ int main(int argc, char *argv[]) + goto non_test_error; + } + ++ ret = set_get_compare_clear_save_load(c, "lxc.monitor.signal.pdeath", "SIGKILL", tmpf, true); ++ if (ret == 0) { ++ lxc_error("%s\n", "lxc.hook.version"); ++ goto non_test_error; ++ } ++ + fret = EXIT_SUCCESS; + + non_test_error: +-- +2.11.0 + diff --git a/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch b/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch new file mode 100644 index 0000000..d513f4c --- /dev/null +++ b/debian/patches/extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch @@ -0,0 +1,45 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: KATOH Yasufumi +Date: Tue, 17 Jul 2018 01:14:06 +0900 +Subject: [PATCH] doc: Translate lxc.monitor.signal.pdeath into Japanese in + lxc.container.conf(5) + +Signed-off-by: KATOH Yasufumi +(cherry picked from commit fd5de0292195a2a6ba9dc6c3727de202b015c02c) +--- + doc/ja/lxc.container.conf.sgml.in | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/doc/ja/lxc.container.conf.sgml.in b/doc/ja/lxc.container.conf.sgml.in +index ee93f3bf..7aee768b 100644 +--- a/doc/ja/lxc.container.conf.sgml.in ++++ b/doc/ja/lxc.container.conf.sgml.in +@@ -3158,6 +3158,25 @@ by KATOH Yasufumi + + + ++ ++ ++ ++ ++ ++ lxc のモニタプロセスが終了した際に、コンテナの init プロセスに送出するシグナルを指定します。デフォルトでは、lxc のモニタプロセスが終了した場合には、すべてのコンテナ内のプロセスが停止するように SIGKILL が設定されています。 ++ lxc のモニタプロセスが終了しても、コンテナがすべて確実に動作しつづけるようにするには、この値を 0 に設定します。 ++ ++ ++ ++ ++ + + + +-- +2.11.0 + diff --git a/debian/patches/extra/0004-tests-lxc-test-apparmor-mount-show-a-log-on-error.patch b/debian/patches/extra/0004-tests-lxc-test-apparmor-mount-show-a-log-on-error.patch new file mode 100644 index 0000000..35b24c3 --- /dev/null +++ b/debian/patches/extra/0004-tests-lxc-test-apparmor-mount-show-a-log-on-error.patch @@ -0,0 +1,117 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Mon, 23 Jul 2018 17:23:08 +0200 +Subject: [PATCH] tests: lxc-test-apparmor-mount: show a log on error + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit d6523915861f2289505a11140874001099dfdfdc) +--- + src/tests/lxc-test-apparmor-mount | 24 ++++++++++++++++++++---- + 1 file changed, 20 insertions(+), 4 deletions(-) + +diff --git a/src/tests/lxc-test-apparmor-mount b/src/tests/lxc-test-apparmor-mount +index ddcee8a7..144467c8 100755 +--- a/src/tests/lxc-test-apparmor-mount ++++ b/src/tests/lxc-test-apparmor-mount +@@ -45,6 +45,7 @@ DONE=0 + KNOWN_RELEASES="precise trusty xenial yakkety zesty" + MOUNTSR=/sys/kernel/security/apparmor/features/mount + dnam=`mktemp -d` ++logfile=`mktemp` + cname=`basename $dnam` + cleanup() { + run_cmd lxc-destroy -f -n $cname || true +@@ -56,12 +57,21 @@ cleanup() { + rm -Rf $HDIR /run/user/$(id -u $TUSER) + deluser $TUSER + if [ $DONE -eq 0 ]; then ++ echo 'Failed container log:' >&2 ++ cat "$logfile" >&2 ++ echo 'End log' >&2 ++ rm -f "$logfile" + echo "FAIL" + exit 1 + fi ++ rm -f "$logfile" + echo "PASS" + } + ++clear_log() { ++ truncate -s0 "$logfile" ++} ++ + trap cleanup exit + + # Only run on a normally configured ubuntu lxc system +@@ -74,6 +84,8 @@ if [ "$(id -u)" != "0" ]; then + exit 1 + fi + ++chmod 0666 "$logfile" ++ + # This would be much simpler if we could run it as + # root. However, in order to not have the bind mount + # of an empty directory over the securitfs 'mount' directory +@@ -160,7 +172,7 @@ fi + run_cmd lxc-create -t download -n $cname -- -d ubuntu -r $release -a $ARCH + + echo "test default confined container" +-run_cmd lxc-start -n $cname -d ++run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" + run_cmd lxc-wait -n $cname -s RUNNING + pid=`run_cmd lxc-info -p -H -n $cname` + profile=`cat /proc/$pid/attr/current` +@@ -169,10 +181,11 @@ if [ "x$profile" != "x${default_profile}" ]; then + exit 1 + fi + run_cmd lxc-stop -n $cname -k ++clear_log + + echo "test regular unconfined container" + echo "lxc.apparmor.profile = unconfined" >> $HDIR/.local/share/lxc/$cname/config +-run_cmd lxc-start -n $cname -d ++run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" + run_cmd lxc-wait -n $cname -s RUNNING + pid=`run_cmd lxc-info -p -H -n $cname` + profile=`cat /proc/$pid/attr/current` +@@ -181,6 +194,7 @@ if [ "x$profile" != "xunconfined" ]; then + exit 1 + fi + run_cmd lxc-stop -n $cname -k ++clear_log + + echo "masking $MOUNTSR" + mount --bind $dnam $MOUNTSR +@@ -198,7 +212,7 @@ fi + + echo "test regular unconfined container" + echo "lxc.apparmor.profile = unconfined" >> $HDIR/.local/share/lxc/$cname/config +-run_cmd lxc-start -n $cname -d ++run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" + run_cmd lxc-wait -n $cname -s RUNNING + pid=`run_cmd lxc-info -p -H -n $cname` + if [ "$pid" = "-1" ]; then +@@ -211,11 +225,12 @@ if [ "x$profile" != "xunconfined" ]; then + exit 1 + fi + run_cmd lxc-stop -n $cname -k ++clear_log + + echo "testing override" + sed -i '/apparmor.profile/d' $HDIR/.local/share/lxc/$cname/config + echo "lxc.apparmor.allow_incomplete = 1" >> $HDIR/.local/share/lxc/$cname/config +-run_cmd lxc-start -n $cname -d ++run_cmd lxc-start -n $cname -d -lDEBUG -o "$logfile" + run_cmd lxc-wait -n $cname -s RUNNING + pid=`run_cmd lxc-info -p -H -n $cname` + if [ "$pid" = "-1" ]; then +@@ -228,5 +243,6 @@ if [ "x$profile" != "x${default_profile}" ]; then + exit 1 + fi + run_cmd lxc-stop -n $cname -k ++clear_log + + DONE=1 +-- +2.11.0 + diff --git a/debian/patches/extra/0005-tests-lxc-test-apparmor-mount-check-environment-earl.patch b/debian/patches/extra/0005-tests-lxc-test-apparmor-mount-check-environment-earl.patch new file mode 100644 index 0000000..afa0eda --- /dev/null +++ b/debian/patches/extra/0005-tests-lxc-test-apparmor-mount-check-environment-earl.patch @@ -0,0 +1,54 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Tue, 24 Jul 2018 11:49:14 +0200 +Subject: [PATCH] tests: lxc-test-apparmor-mount: check environment early + +don't kill all my processes when running it as user... + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit 39e2cbec3cc9f49e3edd65b2aa4bdd3983a25fea) +--- + src/tests/lxc-test-apparmor-mount | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/src/tests/lxc-test-apparmor-mount b/src/tests/lxc-test-apparmor-mount +index 144467c8..56d598f4 100755 +--- a/src/tests/lxc-test-apparmor-mount ++++ b/src/tests/lxc-test-apparmor-mount +@@ -23,6 +23,16 @@ + + set -e + ++# Only run on a normally configured ubuntu lxc system ++if [ ! -d /sys/class/net/lxcbr0 ]; then ++ echo "lxcbr0 is not configured." ++ exit 1 ++fi ++if [ "$(id -u)" != "0" ]; then ++ echo "ERROR: Must run as root." ++ exit 1 ++fi ++ + if [ -f /proc/self/ns/cgroup ]; then + default_profile="lxc-container-default-cgns (enforce)" + else +@@ -74,16 +84,6 @@ clear_log() { + + trap cleanup exit + +-# Only run on a normally configured ubuntu lxc system +-if [ ! -d /sys/class/net/lxcbr0 ]; then +- echo "lxcbr0 is not configured." +- exit 1 +-fi +-if [ "$(id -u)" != "0" ]; then +- echo "ERROR: Must run as root." +- exit 1 +-fi +- + chmod 0666 "$logfile" + + # This would be much simpler if we could run it as +-- +2.11.0 + diff --git a/debian/patches/extra/0006-lsm-fixup-lsm_process_label_set_at-return-values.patch b/debian/patches/extra/0006-lsm-fixup-lsm_process_label_set_at-return-values.patch new file mode 100644 index 0000000..162dfdb --- /dev/null +++ b/debian/patches/extra/0006-lsm-fixup-lsm_process_label_set_at-return-values.patch @@ -0,0 +1,74 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Thu, 12 Jul 2018 15:16:40 +0200 +Subject: [PATCH] lsm: fixup lsm_process_label_set_at return values + +Always return -1 on error (some code paths returned -1, some +returned negative error codes), don't assume 'errno' is set +afterwards, as the function already prints errors and not +all code paths will have a usable errno value. + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit c68d5b0dd63ea8226698ae3ff8a5336a60c171c3) +--- + src/lxc/lsm/apparmor.c | 2 +- + src/lxc/lsm/lsm.c | 12 ++++++++---- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c +index 1507917c..95b61943 100644 +--- a/src/lxc/lsm/apparmor.c ++++ b/src/lxc/lsm/apparmor.c +@@ -241,7 +241,7 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf + ret = lsm_process_label_set_at(label_fd, label, on_exec); + close(label_fd); + if (ret < 0) { +- SYSERROR("Failed to change apparmor profile to %s", label); ++ ERROR("Failed to change apparmor profile to %s", label); + return -1; + } + +diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c +index f4500ae2..8d7de2db 100644 +--- a/src/lxc/lsm/lsm.c ++++ b/src/lxc/lsm/lsm.c +@@ -142,18 +142,20 @@ int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec) + + if (on_exec) { + ERROR("Changing AppArmor profile on exec not supported"); +- return -EINVAL; ++ return -1; + } + + len = strlen(label) + strlen("changeprofile ") + 1; + command = malloc(len); + if (!command) +- return -1; ++ goto on_error; + + ret = snprintf(command, len, "changeprofile %s", label); + if (ret < 0 || (size_t)ret >= len) { ++ int saved_errno = errno; + free(command); +- return -1; ++ errno = saved_errno; ++ goto on_error; + } + + ret = lxc_write_nointr(label_fd, command, len - 1); +@@ -161,9 +163,11 @@ int lsm_process_label_set_at(int label_fd, const char *label, bool on_exec) + } else if (strcmp(name, "SELinux") == 0) { + ret = lxc_write_nointr(label_fd, label, strlen(label)); + } else { +- ret = -EINVAL; ++ errno = EINVAL; ++ ret = -1; + } + if (ret < 0) { ++on_error: + SYSERROR("Failed to set %s label \"%s\"", name, label); + return -1; + } +-- +2.11.0 + diff --git a/debian/patches/extra/0007-apparmor-use-fopen_cloexec.patch b/debian/patches/extra/0007-apparmor-use-fopen_cloexec.patch new file mode 100644 index 0000000..274fdc5 --- /dev/null +++ b/debian/patches/extra/0007-apparmor-use-fopen_cloexec.patch @@ -0,0 +1,44 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 25 Jul 2018 12:06:16 +0200 +Subject: [PATCH] apparmor: use fopen_cloexec + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit 7e556d185c49ff99825612bc7d6c93afc34113c8) +--- + src/lxc/lsm/apparmor.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c +index 95b61943..5fe6d525 100644 +--- a/src/lxc/lsm/apparmor.c ++++ b/src/lxc/lsm/apparmor.c +@@ -32,6 +32,7 @@ + #include "lsm.h" + #include "conf.h" + #include "utils.h" ++#include "initutils.h" + + lxc_log_define(apparmor, lsm); + +@@ -68,7 +69,7 @@ static int apparmor_enabled(void) + char e; + int ret; + +- fin = fopen(AA_ENABLED_FILE, "r"); ++ fin = fopen_cloexec(AA_ENABLED_FILE, "r"); + if (!fin) + return 0; + ret = fscanf(fin, "%c", &e); +@@ -95,7 +96,7 @@ static char *apparmor_process_label_get(pid_t pid) + return NULL; + } + again: +- f = fopen(path, "r"); ++ f = fopen_cloexec(path, "r"); + if (!f) { + SYSERROR("opening %s", path); + free(buf); +-- +2.11.0 + diff --git a/debian/patches/extra/0008-utils-add-must_concat-helper.patch b/debian/patches/extra/0008-utils-add-must_concat-helper.patch new file mode 100644 index 0000000..f30146c --- /dev/null +++ b/debian/patches/extra/0008-utils-add-must_concat-helper.patch @@ -0,0 +1,62 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 18 Jul 2018 12:43:37 +0200 +Subject: [PATCH] utils: add must_concat helper + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit eb5c2e6aeef233c4f3349d182e5c25315cca8de8) +--- + src/lxc/utils.c | 24 ++++++++++++++++++++++++ + src/lxc/utils.h | 1 + + 2 files changed, 25 insertions(+) + +diff --git a/src/lxc/utils.c b/src/lxc/utils.c +index dd6cdc91..9771b900 100644 +--- a/src/lxc/utils.c ++++ b/src/lxc/utils.c +@@ -2426,6 +2426,30 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) + return fret; + } + ++char *must_concat(const char *first, ...) ++{ ++ va_list args; ++ char *cur, *dest; ++ size_t cur_len, it_len; ++ ++ dest = must_copy_string(first); ++ cur_len = it_len = strlen(first); ++ ++ va_start(args, first); ++ while ((cur = va_arg(args, char *)) != NULL) { ++ it_len = strlen(cur); ++ ++ dest = must_realloc(dest, cur_len + it_len + 1); ++ ++ (void)memcpy(dest + cur_len, cur, it_len); ++ cur_len += it_len; ++ } ++ va_end(args); ++ ++ dest[cur_len] = 0; ++ return dest; ++} ++ + char *must_make_path(const char *first, ...) + { + va_list args; +diff --git a/src/lxc/utils.h b/src/lxc/utils.h +index 295e7862..ed486853 100644 +--- a/src/lxc/utils.h ++++ b/src/lxc/utils.h +@@ -567,6 +567,7 @@ extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), + /* Concatenate all passed-in strings into one path. Do not fail. If any piece + * is not prefixed with '/', add a '/'. + */ ++__attribute__((sentinel)) extern char *must_concat(const char *first, ...); + __attribute__((sentinel)) extern char *must_make_path(const char *first, ...); + __attribute__((sentinel)) extern char *must_append_path(char *first, ...); + +-- +2.11.0 + diff --git a/debian/patches/extra/0009-apparmor-update-current-profiles.patch b/debian/patches/extra/0009-apparmor-update-current-profiles.patch new file mode 100644 index 0000000..5c0e7fd --- /dev/null +++ b/debian/patches/extra/0009-apparmor-update-current-profiles.patch @@ -0,0 +1,51 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 25 Jul 2018 12:11:23 +0200 +Subject: [PATCH] apparmor: update current profiles + +remove cgmanager rules and add fstype=cgroup2 variants for +the existing fstype=cgroup rules + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit 6e6aca3e3e71ae0cfad69456acd1dc503feaf964) +--- + config/apparmor/abstractions/container-base.in | 1 - + config/apparmor/profiles/lxc-default-cgns | 1 + + config/apparmor/profiles/lxc-default-with-nesting | 1 + + 3 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in +index 54f9ddf0..0844fdbb 100644 +--- a/config/apparmor/abstractions/container-base.in ++++ b/config/apparmor/abstractions/container-base.in +@@ -84,7 +84,6 @@ + mount fstype=sysfs -> /sys/, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, +- mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, + mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/, + + # deny reads from debugfs +diff --git a/config/apparmor/profiles/lxc-default-cgns b/config/apparmor/profiles/lxc-default-cgns +index ff599ef8..f69eb994 100644 +--- a/config/apparmor/profiles/lxc-default-cgns ++++ b/config/apparmor/profiles/lxc-default-cgns +@@ -9,4 +9,5 @@ profile lxc-container-default-cgns flags=(attach_disconnected,mediate_deleted) { + # the newinstance option (but, right now, we don't). + deny mount fstype=devpts, + mount fstype=cgroup -> /sys/fs/cgroup/**, ++ mount fstype=cgroup2 -> /sys/fs/cgroup/**, + } +diff --git a/config/apparmor/profiles/lxc-default-with-nesting b/config/apparmor/profiles/lxc-default-with-nesting +index 6e5745f9..cd198beb 100644 +--- a/config/apparmor/profiles/lxc-default-with-nesting ++++ b/config/apparmor/profiles/lxc-default-with-nesting +@@ -11,4 +11,5 @@ profile lxc-container-default-with-nesting flags=(attach_disconnected,mediate_de + mount fstype=sysfs -> /var/cache/lxc/**, + mount options=(rw,bind), + mount fstype=cgroup -> /sys/fs/cgroup/**, ++ mount fstype=cgroup2 -> /sys/fs/cgroup/**, + } +-- +2.11.0 + diff --git a/debian/patches/extra/0010-apparmor-profile-generation.patch b/debian/patches/extra/0010-apparmor-profile-generation.patch new file mode 100644 index 0000000..7b0294f --- /dev/null +++ b/debian/patches/extra/0010-apparmor-profile-generation.patch @@ -0,0 +1,1605 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 25 Jul 2018 12:11:31 +0200 +Subject: [PATCH] apparmor: profile generation + +This copies lxd's apparmor profile generation. This tries to +detect features such as cgroup namespaces, apparmor +namespaces and stacking support, and has profile parts +conditionally for unprivileged containers. + +This introduces the following changes to the configuration: + lxc.apparmor.profile = generated + The fixed value 'generated' will cause this + functionality to be used, otherwise there should be no + functional changes happening unless specifically + requested with the next key: + lxc.apparmor.allow_nesting + This is a boolean which, if enabled, causes the + following changes: When generated apparmor profiles are + used, they will contain the necessary changes to allow + creating a nested container. In addition to the usual + mount points, /dev/.lxc/proc and /dev/.lxc/sys will + contain procfs and sysfs mount points without the lxcfs + overlays, which, if generated apparmor profiles are + being used, will not be read/writable directly. + lxc.apparmor.raw + A list of raw apparmor profile lines to append to the + profile. Only valid when using generated profiles. + +The following apparmor profile lines have not been copied +from lxd: + + mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/, + mount none -> /var/lib/lxd/shmounts/, + mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**, + +They should be added via lxc.apparmor.raw entries by lxd. + +In order for apparmor_parser's cache to be of use, this adds +a --with-apparmor-cache-dir ./configure option. + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit 1800f9247357948fd11c9da73b1943a8a7b6882b) +--- + configure.ac | 8 + + src/lxc/Makefile.am | 1 + + src/lxc/conf.c | 43 ++- + src/lxc/conf.h | 8 +- + src/lxc/confile.c | 95 +++++ + src/lxc/criu.c | 3 +- + src/lxc/lsm/apparmor.c | 974 ++++++++++++++++++++++++++++++++++++++++++++++--- + src/lxc/lsm/lsm.c | 30 +- + src/lxc/lsm/lsm.h | 8 +- + src/lxc/lsm/nop.c | 2 +- + src/lxc/lsm/selinux.c | 4 +- + src/lxc/start.c | 14 +- + 12 files changed, 1134 insertions(+), 56 deletions(-) + +diff --git a/configure.ac b/configure.ac +index ea312bf3..739e100a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -469,6 +469,13 @@ AC_ARG_WITH([cgroup-pattern], + [pattern for container cgroups] + )], [], [with_cgroup_pattern=['lxc/%n']]) + ++# The path for the apparmor_parser's cache for generated apparmor profiles ++AC_ARG_WITH([apparmor-cache-dir], ++ [AC_HELP_STRING( ++ [--with-apparmor-cache-dir=dir], ++ [path for apparmor_parser cache] ++ )], [], [with_apparmor_cache_dir=['${localstatedir}/cache/lxc/apparmor']]) ++ + # Container log path. By default, use $lxcpath. + AC_MSG_CHECKING([Whether to place logfiles in container config path]) + AC_ARG_ENABLE([configpath-log], +@@ -515,6 +522,7 @@ AS_AC_EXPAND(LXCBINHOOKDIR, "$libexecdir/lxc/hooks") + AS_AC_EXPAND(LXCINITDIR, "$libexecdir") + AS_AC_EXPAND(LOGPATH, "$with_log_path") + AS_AC_EXPAND(RUNTIME_PATH, "$with_runtime_path") ++AS_AC_EXPAND(APPARMOR_CACHE_DIR, "$with_apparmor_cache_dir") + AC_SUBST(DEFAULT_CGROUP_PATTERN, ["$with_cgroup_pattern"]) + + # We need the install path so criu knows where to reference the hook scripts. +diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am +index c5e46ac2..1359eb3e 100644 +--- a/src/lxc/Makefile.am ++++ b/src/lxc/Makefile.am +@@ -174,6 +174,7 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ + -DDEFAULT_CGROUP_PATTERN=\"$(DEFAULT_CGROUP_PATTERN)\" \ + -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" \ + -DSBINDIR=\"$(SBINDIR)\" \ ++ -DAPPARMOR_CACHE_DIR=\"$(APPARMOR_CACHE_DIR)\" \ + -I $(top_srcdir)/src \ + -I $(top_srcdir)/src/lxc \ + -I $(top_srcdir)/src/lxc/storage \ +diff --git a/src/lxc/conf.c b/src/lxc/conf.c +index d36987c8..df805e6c 100644 +--- a/src/lxc/conf.c ++++ b/src/lxc/conf.c +@@ -2334,7 +2334,23 @@ static int setup_mount(const struct lxc_conf *conf, + return ret; + } + +-FILE *make_anonymous_mount_file(struct lxc_list *mount) ++/* ++ * In order for nested containers to be able to mount /proc and /sys they need ++ * to see a "pure" proc and sysfs mount points with nothing mounted on top ++ * (like lxcfs). ++ * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an ++ * apparmor rule to deny access to them. This is mostly for convenience: The ++ * container's root user can mount them anyway and thus has access to the two ++ * file systems. But a non-root user in the container should not be allowed to ++ * access them as a side effect without explicitly allowing it. ++ */ ++static const char nesting_helpers[] = ++"proc dev/.lxc/proc proc create=dir,optional\n" ++"sys dev/.lxc/sys sysfs create=dir,optional\n" ++; ++ ++FILE *make_anonymous_mount_file(struct lxc_list *mount, ++ bool include_nesting_helpers) + { + int ret; + char *mount_entry; +@@ -2376,6 +2392,13 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount) + goto on_error; + } + ++ if (include_nesting_helpers) { ++ ret = lxc_write_nointr(fd, nesting_helpers, ++ sizeof(nesting_helpers) - 1); ++ if (ret != sizeof(nesting_helpers) - 1) ++ goto on_error; ++ } ++ + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) + goto on_error; +@@ -2396,7 +2419,7 @@ static int setup_mount_entries(const struct lxc_conf *conf, + int ret; + FILE *f; + +- f = make_anonymous_mount_file(mount); ++ f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); + if (!f) + return -1; + +@@ -2712,6 +2735,7 @@ struct lxc_conf *lxc_conf_init(void) + lxc_list_init(&new->groups); + lxc_list_init(&new->state_clients); + new->lsm_aa_profile = NULL; ++ lxc_list_init(&new->lsm_aa_raw); + new->lsm_se_context = NULL; + new->tmp_umount_proc = false; + +@@ -3996,6 +4020,19 @@ void lxc_clear_includes(struct lxc_conf *conf) + } + } + ++int lxc_clear_apparmor_raw(struct lxc_conf *c) ++{ ++ struct lxc_list *it, *next; ++ ++ lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) { ++ lxc_list_del(it); ++ free(it->elem); ++ free(it); ++ } ++ ++ return 0; ++} ++ + void lxc_conf_free(struct lxc_conf *conf) + { + if (!conf) +@@ -4023,6 +4060,7 @@ void lxc_conf_free(struct lxc_conf *conf) + free(conf->syslog); + lxc_free_networks(&conf->network); + free(conf->lsm_aa_profile); ++ free(conf->lsm_aa_profile_computed); + free(conf->lsm_se_context); + lxc_seccomp_free(conf); + lxc_clear_config_caps(conf); +@@ -4039,6 +4077,7 @@ void lxc_conf_free(struct lxc_conf *conf) + lxc_clear_limits(conf, "lxc.prlimit"); + lxc_clear_sysctls(conf, "lxc.sysctl"); + lxc_clear_procs(conf, "lxc.proc"); ++ lxc_clear_apparmor_raw(conf); + free(conf->cgroup_meta.dir); + free(conf->cgroup_meta.controllers); + free(conf); +diff --git a/src/lxc/conf.h b/src/lxc/conf.h +index f7a879c3..f5085d8c 100644 +--- a/src/lxc/conf.h ++++ b/src/lxc/conf.h +@@ -272,7 +272,11 @@ struct lxc_conf { + }; + + char *lsm_aa_profile; ++ char *lsm_aa_profile_computed; ++ bool lsm_aa_profile_created; ++ unsigned int lsm_aa_allow_nesting; + unsigned int lsm_aa_allow_incomplete; ++ struct lxc_list lsm_aa_raw; + char *lsm_se_context; + bool tmp_umount_proc; + char *seccomp; /* filename with the seccomp rules */ +@@ -417,7 +421,8 @@ extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, + extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); + extern void remount_all_slave(void); + extern void suggest_default_idmap(void); +-extern FILE *make_anonymous_mount_file(struct lxc_list *mount); ++extern FILE *make_anonymous_mount_file(struct lxc_list *mount, ++ bool include_nesting_helpers); + extern struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings); + extern unsigned long add_required_remount_flags(const char *s, const char *d, + unsigned long flags); +@@ -431,5 +436,6 @@ extern int setup_sysctl_parameters(struct lxc_list *sysctls); + extern int lxc_clear_sysctls(struct lxc_conf *c, const char *key); + extern int setup_proc_filesystem(struct lxc_list *procs, pid_t pid); + extern int lxc_clear_procs(struct lxc_conf *c, const char *key); ++extern int lxc_clear_apparmor_raw(struct lxc_conf *c); + + #endif /* __LXC_CONF_H */ +diff --git a/src/lxc/confile.c b/src/lxc/confile.c +index 8a7505da..097b8806 100644 +--- a/src/lxc/confile.c ++++ b/src/lxc/confile.c +@@ -84,7 +84,9 @@ lxc_log_define(confile, lxc); + + lxc_config_define(autodev); + lxc_config_define(apparmor_allow_incomplete); ++lxc_config_define(apparmor_allow_nesting); + lxc_config_define(apparmor_profile); ++lxc_config_define(apparmor_raw); + lxc_config_define(cap_drop); + lxc_config_define(cap_keep); + lxc_config_define(cgroup_controller); +@@ -158,6 +160,8 @@ static struct lxc_config_t config[] = { + { "lxc.arch", set_config_personality, get_config_personality, clr_config_personality, }, + { "lxc.apparmor.profile", set_config_apparmor_profile, get_config_apparmor_profile, clr_config_apparmor_profile, }, + { "lxc.apparmor.allow_incomplete", set_config_apparmor_allow_incomplete, get_config_apparmor_allow_incomplete, clr_config_apparmor_allow_incomplete, }, ++ { "lxc.apparmor.allow_nesting", set_config_apparmor_allow_nesting, get_config_apparmor_allow_nesting, clr_config_apparmor_allow_nesting, }, ++ { "lxc.apparmor.raw", set_config_apparmor_raw, get_config_apparmor_raw, clr_config_apparmor_raw, }, + { "lxc.autodev", set_config_autodev, get_config_autodev, clr_config_autodev, }, + { "lxc.cap.drop", set_config_cap_drop, get_config_cap_drop, clr_config_cap_drop, }, + { "lxc.cap.keep", set_config_cap_keep, get_config_cap_keep, clr_config_cap_keep, }, +@@ -1118,6 +1122,52 @@ static int set_config_apparmor_allow_incomplete(const char *key, + return 0; + } + ++static int set_config_apparmor_allow_nesting(const char *key, ++ const char *value, ++ struct lxc_conf *lxc_conf, ++ void *data) ++{ ++ if (lxc_config_value_empty(value)) ++ return clr_config_apparmor_allow_nesting(key, lxc_conf, NULL); ++ ++ if (lxc_safe_uint(value, &lxc_conf->lsm_aa_allow_nesting) < 0) ++ return -1; ++ ++ if (lxc_conf->lsm_aa_allow_nesting > 1) ++ return -1; ++ ++ return 0; ++} ++ ++static int set_config_apparmor_raw(const char *key, ++ const char *value, ++ struct lxc_conf *lxc_conf, ++ void *data) ++{ ++ char *elem; ++ struct lxc_list *list; ++ ++ if (lxc_config_value_empty(value)) ++ return lxc_clear_apparmor_raw(lxc_conf); ++ ++ list = malloc(sizeof(*list)); ++ if (!list) { ++ errno = ENOMEM; ++ return -1; ++ } ++ ++ elem = strdup(value); ++ if (!elem) { ++ free(list); ++ return -1; ++ } ++ list->elem = elem; ++ ++ lxc_list_add_tail(&lxc_conf->lsm_aa_raw, list); ++ ++ return 0; ++} ++ + static int set_config_selinux_context(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) + { +@@ -2959,6 +3009,34 @@ static int get_config_apparmor_allow_incomplete(const char *key, char *retv, + c->lsm_aa_allow_incomplete); + } + ++static int get_config_apparmor_allow_nesting(const char *key, char *retv, ++ int inlen, struct lxc_conf *c, ++ void *data) ++{ ++ return lxc_get_conf_int(c, retv, inlen, ++ c->lsm_aa_allow_nesting); ++} ++ ++static int get_config_apparmor_raw(const char *key, char *retv, ++ int inlen, struct lxc_conf *c, ++ void *data) ++{ ++ int len; ++ struct lxc_list *it; ++ int fulllen = 0; ++ ++ if (!retv) ++ inlen = 0; ++ else ++ memset(retv, 0, inlen); ++ ++ lxc_list_for_each(it, &c->lsm_aa_raw) { ++ strprint(retv, inlen, "%s\n", (char *)it->elem); ++ } ++ ++ return fulllen; ++} ++ + static int get_config_selinux_context(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) + { +@@ -3749,6 +3827,21 @@ static inline int clr_config_apparmor_allow_incomplete(const char *key, + return 0; + } + ++static inline int clr_config_apparmor_allow_nesting(const char *key, ++ struct lxc_conf *c, ++ void *data) ++{ ++ c->lsm_aa_allow_nesting = 0; ++ return 0; ++} ++ ++static inline int clr_config_apparmor_raw(const char *key, ++ struct lxc_conf *c, ++ void *data) ++{ ++ return lxc_clear_apparmor_raw(c); ++} ++ + static inline int clr_config_selinux_context(const char *key, + struct lxc_conf *c, void *data) + { +@@ -4941,7 +5034,9 @@ int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv, + + if (!strcmp(key, "lxc.apparmor")) { + strprint(retv, inlen, "allow_incomplete\n"); ++ strprint(retv, inlen, "allow_nesting\n"); + strprint(retv, inlen, "profile\n"); ++ strprint(retv, inlen, "raw\n"); + } else if (!strcmp(key, "lxc.cgroup")) { + strprint(retv, inlen, "dir\n"); + } else if (!strcmp(key, "lxc.selinux")) { +diff --git a/src/lxc/criu.c b/src/lxc/criu.c +index 456d19cf..02d301ac 100644 +--- a/src/lxc/criu.c ++++ b/src/lxc/criu.c +@@ -378,7 +378,8 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) + DECLARE_ARG(opts->user->action_script); + } + +- mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list); ++ mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list, ++ opts->c->lxc_conf->lsm_aa_allow_nesting); + if (!mnts) + goto err; + +diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c +index 5fe6d525..ec3f805d 100644 +--- a/src/lxc/lsm/apparmor.c ++++ b/src/lxc/lsm/apparmor.c +@@ -33,11 +33,18 @@ + #include "conf.h" + #include "utils.h" + #include "initutils.h" ++#include "caps.h" ++#include "parse.h" + + lxc_log_define(apparmor, lsm); + + /* set by lsm_apparmor_drv_init if true */ + static int aa_enabled = 0; ++static bool aa_parser_available = false; ++static bool aa_supports_unix = false; ++static bool aa_can_stack = false; ++static bool aa_is_stacked = false; ++static bool aa_admin = false; + + static int mount_features_enabled = 0; + +@@ -46,6 +53,332 @@ static int mount_features_enabled = 0; + #define AA_MOUNT_RESTR "/sys/kernel/security/apparmor/features/mount/mask" + #define AA_ENABLED_FILE "/sys/module/apparmor/parameters/enabled" + #define AA_UNCHANGED "unchanged" ++#define AA_GENERATED "generated" ++ ++#define AA_CMD_LOAD 'r' ++#define AA_CMD_UNLOAD 'R' ++#define AA_CMD_PARSE 'Q' ++ ++static const char AA_PROFILE_BASE[] = ++" ### Base profile\n" ++" capability,\n" ++" dbus,\n" ++" file,\n" ++" network,\n" ++" umount,\n" ++"\n" ++" # Allow us to receive signals from anywhere.\n" ++" signal (receive),\n" ++"\n" ++" # Allow us to send signals to ourselves\n" ++" signal peer=@{profile_name},\n" ++"\n" ++" # Allow other processes to read our /proc entries, futexes, perf tracing and\n" ++" # kcmp for now (they will need 'read' in the first place). Administrators can\n" ++" # override with:\n" ++" # deny ptrace (readby) ...\n" ++" ptrace (readby),\n" ++"\n" ++" # Allow other processes to trace us by default (they will need 'trace' in\n" ++" # the first place). Administrators can override with:\n" ++" # deny ptrace (tracedby) ...\n" ++" ptrace (tracedby),\n" ++"\n" ++" # Allow us to ptrace ourselves\n" ++" ptrace peer=@{profile_name},\n" ++"\n" ++" # ignore DENIED message on / remount\n" ++" deny mount options=(ro, remount) -> /,\n" ++" deny mount options=(ro, remount, silent) -> /,\n" ++"\n" ++" # allow tmpfs mounts everywhere\n" ++" mount fstype=tmpfs,\n" ++"\n" ++" # allow hugetlbfs mounts everywhere\n" ++" mount fstype=hugetlbfs,\n" ++"\n" ++" # allow mqueue mounts everywhere\n" ++" mount fstype=mqueue,\n" ++"\n" ++" # allow fuse mounts everywhere\n" ++" mount fstype=fuse,\n" ++" mount fstype=fuse.*,\n" ++"\n" ++" # deny access under /proc/bus to avoid e.g. messing with pci devices directly\n" ++" deny @{PROC}/bus/** wklx,\n" ++"\n" ++" # deny writes in /proc/sys/fs but allow binfmt_misc to be mounted\n" ++" mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,\n" ++" deny @{PROC}/sys/fs/** wklx,\n" ++"\n" ++" # allow efivars to be mounted, writing to it will be blocked though\n" ++" mount fstype=efivarfs -> /sys/firmware/efi/efivars/,\n" ++"\n" ++" # block some other dangerous paths\n" ++" deny @{PROC}/kcore rwklx,\n" ++" deny @{PROC}/sysrq-trigger rwklx,\n" ++"\n" ++" # deny writes in /sys except for /sys/fs/cgroup, also allow\n" ++" # fusectl, securityfs and debugfs to be mounted there (read-only)\n" ++" mount fstype=fusectl -> /sys/fs/fuse/connections/,\n" ++" mount fstype=securityfs -> /sys/kernel/security/,\n" ++" mount fstype=debugfs -> /sys/kernel/debug/,\n" ++" deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,\n" ++" mount fstype=proc -> /proc/,\n" ++" mount fstype=sysfs -> /sys/,\n" ++" mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,\n" ++" deny /sys/firmware/efi/efivars/** rwklx,\n" ++" # note, /sys/kernel/security/** handled below\n" ++" mount options=(ro, nosuid, nodev, noexec, remount, strictatime) -> /sys/fs/cgroup/,\n" ++"\n" ++" # deny reads from debugfs\n" ++" deny /sys/kernel/debug/{,**} rwklx,\n" ++"\n" ++" # allow paths to be made slave, shared, private or unbindable\n" ++" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" ++"# mount options=(rw,make-slave) -> **,\n" ++"# mount options=(rw,make-rslave) -> **,\n" ++"# mount options=(rw,make-shared) -> **,\n" ++"# mount options=(rw,make-rshared) -> **,\n" ++"# mount options=(rw,make-private) -> **,\n" ++"# mount options=(rw,make-rprivate) -> **,\n" ++"# mount options=(rw,make-unbindable) -> **,\n" ++"# mount options=(rw,make-runbindable) -> **,\n" ++"\n" ++" # allow bind-mounts of anything except /proc, /sys and /dev\n" ++" mount options=(rw,bind) /[^spd]*{,/**},\n" ++" mount options=(rw,bind) /d[^e]*{,/**},\n" ++" mount options=(rw,bind) /de[^v]*{,/**},\n" ++" mount options=(rw,bind) /dev/.[^l]*{,/**},\n" ++" mount options=(rw,bind) /dev/.l[^x]*{,/**},\n" ++" mount options=(rw,bind) /dev/.lx[^c]*{,/**},\n" ++" mount options=(rw,bind) /dev/.lxc?*{,/**},\n" ++" mount options=(rw,bind) /dev/[^.]*{,/**},\n" ++" mount options=(rw,bind) /dev?*{,/**},\n" ++" mount options=(rw,bind) /p[^r]*{,/**},\n" ++" mount options=(rw,bind) /pr[^o]*{,/**},\n" ++" mount options=(rw,bind) /pro[^c]*{,/**},\n" ++" mount options=(rw,bind) /proc?*{,/**},\n" ++" mount options=(rw,bind) /s[^y]*{,/**},\n" ++" mount options=(rw,bind) /sy[^s]*{,/**},\n" ++" mount options=(rw,bind) /sys?*{,/**},\n" ++"\n" ++" # allow read-only bind-mounts of anything except /proc, /sys and /dev\n" ++" mount options=(ro,remount,bind) -> /[^spd]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /d[^e]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /de[^v]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev/[^.]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /dev?*{,/**},\n" ++" mount options=(ro,remount,bind) -> /p[^r]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /pr[^o]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /pro[^c]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /proc?*{,/**},\n" ++" mount options=(ro,remount,bind) -> /s[^y]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /sy[^s]*{,/**},\n" ++" mount options=(ro,remount,bind) -> /sys?*{,/**},\n" ++"\n" ++" # allow moving mounts except for /proc, /sys and /dev\n" ++" mount options=(rw,move) /[^spd]*{,/**},\n" ++" mount options=(rw,move) /d[^e]*{,/**},\n" ++" mount options=(rw,move) /de[^v]*{,/**},\n" ++" mount options=(rw,move) /dev/.[^l]*{,/**},\n" ++" mount options=(rw,move) /dev/.l[^x]*{,/**},\n" ++" mount options=(rw,move) /dev/.lx[^c]*{,/**},\n" ++" mount options=(rw,move) /dev/.lxc?*{,/**},\n" ++" mount options=(rw,move) /dev/[^.]*{,/**},\n" ++" mount options=(rw,move) /dev?*{,/**},\n" ++" mount options=(rw,move) /p[^r]*{,/**},\n" ++" mount options=(rw,move) /pr[^o]*{,/**},\n" ++" mount options=(rw,move) /pro[^c]*{,/**},\n" ++" mount options=(rw,move) /proc?*{,/**},\n" ++" mount options=(rw,move) /s[^y]*{,/**},\n" ++" mount options=(rw,move) /sy[^s]*{,/**},\n" ++" mount options=(rw,move) /sys?*{,/**},\n" ++"\n" ++" # generated by: lxc-generate-aa-rules.py container-rules.base\n" ++" deny /proc/sys/[^kn]*{,/**} wklx,\n" ++" deny /proc/sys/k[^e]*{,/**} wklx,\n" ++" deny /proc/sys/ke[^r]*{,/**} wklx,\n" ++" deny /proc/sys/ker[^n]*{,/**} wklx,\n" ++" deny /proc/sys/kern[^e]*{,/**} wklx,\n" ++" deny /proc/sys/kerne[^l]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/[^smhd]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/d[^o]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/do[^m]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/dom[^a]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/doma[^i]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domai[^n]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domain[^n]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domainn[^a]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domainna[^m]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domainnam[^e]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/domainname?*{,/**} wklx,\n" ++" deny /proc/sys/kernel/h[^o]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/ho[^s]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/hos[^t]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/host[^n]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/hostn[^a]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/hostna[^m]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/hostnam[^e]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/hostname?*{,/**} wklx,\n" ++" deny /proc/sys/kernel/m[^s]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/ms[^g]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/msg*/** wklx,\n" ++" deny /proc/sys/kernel/s[^he]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/se[^m]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/sem*/** wklx,\n" ++" deny /proc/sys/kernel/sh[^m]*{,/**} wklx,\n" ++" deny /proc/sys/kernel/shm*/** wklx,\n" ++" deny /proc/sys/kernel?*{,/**} wklx,\n" ++" deny /proc/sys/n[^e]*{,/**} wklx,\n" ++" deny /proc/sys/ne[^t]*{,/**} wklx,\n" ++" deny /proc/sys/net?*{,/**} wklx,\n" ++" deny /sys/[^fdck]*{,/**} wklx,\n" ++" deny /sys/c[^l]*{,/**} wklx,\n" ++" deny /sys/cl[^a]*{,/**} wklx,\n" ++" deny /sys/cla[^s]*{,/**} wklx,\n" ++" deny /sys/clas[^s]*{,/**} wklx,\n" ++" deny /sys/class/[^n]*{,/**} wklx,\n" ++" deny /sys/class/n[^e]*{,/**} wklx,\n" ++" deny /sys/class/ne[^t]*{,/**} wklx,\n" ++" deny /sys/class/net?*{,/**} wklx,\n" ++" deny /sys/class?*{,/**} wklx,\n" ++" deny /sys/d[^e]*{,/**} wklx,\n" ++" deny /sys/de[^v]*{,/**} wklx,\n" ++" deny /sys/dev[^i]*{,/**} wklx,\n" ++" deny /sys/devi[^c]*{,/**} wklx,\n" ++" deny /sys/devic[^e]*{,/**} wklx,\n" ++" deny /sys/device[^s]*{,/**} wklx,\n" ++" deny /sys/devices/[^v]*{,/**} wklx,\n" ++" deny /sys/devices/v[^i]*{,/**} wklx,\n" ++" deny /sys/devices/vi[^r]*{,/**} wklx,\n" ++" deny /sys/devices/vir[^t]*{,/**} wklx,\n" ++" deny /sys/devices/virt[^u]*{,/**} wklx,\n" ++" deny /sys/devices/virtu[^a]*{,/**} wklx,\n" ++" deny /sys/devices/virtua[^l]*{,/**} wklx,\n" ++" deny /sys/devices/virtual/[^n]*{,/**} wklx,\n" ++" deny /sys/devices/virtual/n[^e]*{,/**} wklx,\n" ++" deny /sys/devices/virtual/ne[^t]*{,/**} wklx,\n" ++" deny /sys/devices/virtual/net?*{,/**} wklx,\n" ++" deny /sys/devices/virtual?*{,/**} wklx,\n" ++" deny /sys/devices?*{,/**} wklx,\n" ++" deny /sys/f[^s]*{,/**} wklx,\n" ++" deny /sys/fs/[^c]*{,/**} wklx,\n" ++" deny /sys/fs/c[^g]*{,/**} wklx,\n" ++" deny /sys/fs/cg[^r]*{,/**} wklx,\n" ++" deny /sys/fs/cgr[^o]*{,/**} wklx,\n" ++" deny /sys/fs/cgro[^u]*{,/**} wklx,\n" ++" deny /sys/fs/cgrou[^p]*{,/**} wklx,\n" ++" deny /sys/fs/cgroup?*{,/**} wklx,\n" ++" deny /sys/fs?*{,/**} wklx,\n" ++; ++ ++static const char AA_PROFILE_UNIX_SOCKETS[] = ++"\n" ++" ### Feature: unix\n" ++" # Allow receive via unix sockets from anywhere\n" ++" unix (receive),\n" ++"\n" ++" # Allow all unix sockets in the container\n" ++" unix peer=(label=@{profile_name}),\n" ++; ++ ++static const char AA_PROFILE_CGROUP_NAMESPACES[] = ++"\n" ++" ### Feature: cgroup namespace\n" ++" mount fstype=cgroup -> /sys/fs/cgroup/**,\n" ++" mount fstype=cgroup2 -> /sys/fs/cgroup/**,\n" ++; ++ ++/* '_BASE' because we still need to append generated change_profile rules */ ++static const char AA_PROFILE_STACKING_BASE[] = ++"\n" ++" ### Feature: apparmor stacking\n" ++" ### Configuration: apparmor profile loading (in namespace)\n" ++" deny /sys/k[^e]*{,/**} wklx,\n" ++" deny /sys/ke[^r]*{,/**} wklx,\n" ++" deny /sys/ker[^n]*{,/**} wklx,\n" ++" deny /sys/kern[^e]*{,/**} wklx,\n" ++" deny /sys/kerne[^l]*{,/**} wklx,\n" ++" deny /sys/kernel/[^s]*{,/**} wklx,\n" ++" deny /sys/kernel/s[^e]*{,/**} wklx,\n" ++" deny /sys/kernel/se[^c]*{,/**} wklx,\n" ++" deny /sys/kernel/sec[^u]*{,/**} wklx,\n" ++" deny /sys/kernel/secu[^r]*{,/**} wklx,\n" ++" deny /sys/kernel/secur[^i]*{,/**} wklx,\n" ++" deny /sys/kernel/securi[^t]*{,/**} wklx,\n" ++" deny /sys/kernel/securit[^y]*{,/**} wklx,\n" ++" deny /sys/kernel/security/[^a]*{,/**} wklx,\n" ++" deny /sys/kernel/security/a[^p]*{,/**} wklx,\n" ++" deny /sys/kernel/security/ap[^p]*{,/**} wklx,\n" ++" deny /sys/kernel/security/app[^a]*{,/**} wklx,\n" ++" deny /sys/kernel/security/appa[^r]*{,/**} wklx,\n" ++" deny /sys/kernel/security/appar[^m]*{,/**} wklx,\n" ++" deny /sys/kernel/security/apparm[^o]*{,/**} wklx,\n" ++" deny /sys/kernel/security/apparmo[^r]*{,/**} wklx,\n" ++" deny /sys/kernel/security/apparmor?*{,/**} wklx,\n" ++" deny /sys/kernel/security?*{,/**} wklx,\n" ++" deny /sys/kernel?*{,/**} wklx,\n" ++; ++ ++static const char AA_PROFILE_NO_STACKING[] = ++"\n" ++" ### Feature: apparmor stacking (not present)\n" ++" deny /sys/k*{,/**} rwklx,\n" ++; ++ ++/* '_BASE' because we need to append change_profile for stacking */ ++static const char AA_PROFILE_NESTING_BASE[] = ++"\n" ++" ### Configuration: nesting\n" ++" pivot_root,\n" ++" ptrace,\n" ++" signal,\n" ++"\n" ++ /* NOTE: See conf.c's "nesting_helpers" for details. */ ++" deny /dev/.lxc/proc/** rw,\n" ++" deny /dev/.lxc/sys/** rw,\n" ++"\n" ++" mount fstype=proc -> /usr/lib/*/lxc/**,\n" ++" mount fstype=sysfs -> /usr/lib/*/lxc/**,\n" ++" mount options=(rw,bind),\n" ++" mount options=(rw,rbind),\n" ++" mount options=(rw,make-rshared),\n" ++"\n" ++ /* FIXME: What's the state here on apparmor's side? */ ++" # there doesn't seem to be a way to ask for:\n" ++" # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n" ++" # as we always get mount to $cdir/proc/sys with those flags denied\n" ++" # So allow all mounts until that is straightened out:\n" ++" mount,\n" ++; ++ ++static const char AA_PROFILE_UNPRIVILEGED[] = ++"\n" ++" ### Configuration: unprivileged container\n" ++" pivot_root,\n" ++"\n" ++" # Allow modifying mount propagation\n" ++" mount options=(rw,make-slave) -> **,\n" ++" mount options=(rw,make-rslave) -> **,\n" ++" mount options=(rw,make-shared) -> **,\n" ++" mount options=(rw,make-rshared) -> **,\n" ++" mount options=(rw,make-private) -> **,\n" ++" mount options=(rw,make-rprivate) -> **,\n" ++" mount options=(rw,make-unbindable) -> **,\n" ++" mount options=(rw,make-runbindable) -> **,\n" ++"\n" ++" # Allow all bind-mounts\n" ++" mount options=(rw,bind),\n" ++" mount options=(rw,rbind),\n" ++"\n" ++" # Allow remounting things read-only\n" ++" mount options=(ro,remount),\n" ++; + + static bool check_mount_feature_enabled(void) + { +@@ -144,11 +477,6 @@ static bool apparmor_am_unconfined(void) + return ret; + } + +-/* aa stacking is not yet supported */ +-static bool aa_stacking_supported(void) { +- return false; +-} +- + static bool aa_needs_transition(char *curlabel) + { + if (!curlabel) +@@ -160,61 +488,546 @@ static bool aa_needs_transition(char *curlabel) + return true; + } + ++static inline void uint64hex(char *buf, uint64_t num) ++{ ++ size_t i; ++ ++ buf[16] = 0; ++ for (i = 16; i--;) { ++ char c = (char)(num & 0xf); ++ buf[i] = c + (c < 0xa ? '0' : 'a' - 0xa); ++ num >>= 4; ++ } ++} ++ ++static inline char *shorten_apparmor_name(char *name) ++{ ++ size_t len = strlen(name); ++ if (len + 7 > 253) { ++ uint64_t hash; ++ hash = fnv_64a_buf(name, len, FNV1A_64_INIT); ++ name = must_realloc(name, 16 + 1); ++ uint64hex(name, hash); ++ } ++ ++ return name; ++} ++ ++/* Replace slashes with hyphens */ ++static inline void sanitize_path(char *path) ++{ ++ size_t i; ++ ++ for (i = 0; path[i]; i++) ++ if (path[i] == '/') ++ path[i] = '-'; ++} ++ ++static inline char *apparmor_dir(const char *ctname, const char *lxcpath) ++{ ++ return must_make_path(lxcpath, ctname, "apparmor", NULL); ++} ++ ++ ++static inline char *apparmor_profile_full(const char *ctname, const char *lxcpath) ++{ ++ return shorten_apparmor_name(must_concat("lxc-", ctname, "_<", lxcpath, ">", NULL)); ++} ++ ++/* Like apparmor_profile_full() but with slashes replaced by hyphens */ ++static inline char *apparmor_namespace(const char *ctname, const char *lxcpath) ++{ ++ char *full; ++ ++ full = apparmor_profile_full(ctname, lxcpath); ++ sanitize_path(full); ++ ++ return full; ++} ++ ++/* FIXME: This is currently run only in the context of a constructor (via the ++ * initial lsm_init() called due to its __attribute__((constructor)), so we ++ * do not have ERROR/... macros available, so there are some fprintf(stderr)s ++ * in there. ++ */ ++static bool check_apparmor_parser_version() ++{ ++ struct lxc_popen_FILE *parserpipe; ++ int rc; ++ int major = 0, minor = 0, micro = 0; ++ ++ parserpipe = lxc_popen("apparmor_parser --version"); ++ if (!parserpipe) { ++ fprintf(stderr, "Failed to run check for apparmor_parser\n"); ++ return false; ++ } ++ ++ rc = fscanf(parserpipe->f, "AppArmor parser version %d.%d.%d", &major, &minor, µ); ++ if (rc < 1) { ++ lxc_pclose(parserpipe); ++ /* We stay silent for now as this most likely means the shell ++ * lxc_popen executed failed to find the apparmor_parser binary. ++ * See the FIXME comment above for details. ++ */ ++ return false; ++ } ++ ++ rc = lxc_pclose(parserpipe); ++ if (rc < 0) { ++ fprintf(stderr, "Error waiting for child process\n"); ++ return false; ++ } ++ if (rc != 0) { ++ fprintf(stderr, "'apparmor_parser --version' executed with an error status\n"); ++ return false; ++ } ++ ++ aa_supports_unix = (major > 2) || ++ (major == 2 && minor > 10) || ++ (major == 2 && minor == 10 && micro >= 95); ++ ++ return true; ++} ++ ++static bool file_is_yes(const char *path) ++{ ++ ssize_t rd; ++ int fd; ++ char buf[8]; /* we actually just expect "yes" or "no" */ ++ ++ fd = open(path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return false; ++ ++ rd = read(fd, buf, sizeof(buf)); ++ close(fd); ++ ++ return rd >= 4 && strncmp(buf, "yes\n", 4) == 0; ++} ++ ++static bool apparmor_can_stack() ++{ ++ int major, minor, scanned; ++ FILE *f; ++ ++ if (!file_is_yes("/sys/kernel/security/apparmor/features/domain/stack")) ++ return false; ++ ++ f = fopen_cloexec("/sys/kernel/security/apparmor/features/domain/version", "r"); ++ if (!f) ++ return false; ++ ++ scanned = fscanf(f, "%d.%d", &major, &minor); ++ fclose(f); ++ if (scanned != 2) ++ return false; ++ ++ return major > 1 || (major == 1 && minor >= 2); ++} ++ ++static void must_append_sized_full(char **buf, size_t *bufsz, const char *data, ++ size_t size, bool append_newline) ++{ ++ size_t newsize = *bufsz + size; ++ ++ if (append_newline) ++ ++newsize; ++ ++ *buf = must_realloc(*buf, newsize); ++ memcpy(*buf + *bufsz, data, size); ++ ++ if (append_newline) ++ (*buf)[newsize - 1] = '\n'; ++ ++ *bufsz = newsize; ++} ++ ++static void must_append_sized(char **buf, size_t *bufsz, const char *data, size_t size) ++{ ++ return must_append_sized_full(buf, bufsz, data, size, false); ++} ++ ++static bool is_privileged(struct lxc_conf *conf) ++{ ++ return lxc_list_empty(&conf->id_map); ++} ++ ++static char *get_apparmor_profile_content(struct lxc_conf *conf, const char *lxcpath) ++{ ++ char *profile, *profile_name_full; ++ size_t size; ++ struct lxc_list *it; ++ ++ profile_name_full = apparmor_profile_full(conf->name, lxcpath); ++ ++ profile = must_concat( ++"#include \n" ++"profile \"", profile_name_full, "\" flags=(attach_disconnected,mediate_deleted) {\n", ++ NULL); ++ size = strlen(profile); ++ ++ must_append_sized(&profile, &size, AA_PROFILE_BASE, ++ sizeof(AA_PROFILE_BASE) - 1); ++ ++ if (aa_supports_unix) ++ must_append_sized(&profile, &size, AA_PROFILE_UNIX_SOCKETS, ++ sizeof(AA_PROFILE_UNIX_SOCKETS) - 1); ++ ++ if (file_exists("/proc/self/ns/cgroup")) ++ must_append_sized(&profile, &size, AA_PROFILE_CGROUP_NAMESPACES, ++ sizeof(AA_PROFILE_CGROUP_NAMESPACES) - 1); ++ ++ if (aa_can_stack && !aa_is_stacked) { ++ char *namespace, *temp; ++ ++ must_append_sized(&profile, &size, AA_PROFILE_STACKING_BASE, ++ sizeof(AA_PROFILE_STACKING_BASE) - 1); ++ ++ namespace = apparmor_namespace(conf->name, lxcpath); ++ temp = must_concat(" change_profile -> \":", namespace, ":*\",\n" ++ " change_profile -> \":", namespace, "://*\",\n", ++ NULL); ++ free(namespace); ++ ++ must_append_sized(&profile, &size, temp, strlen(temp)); ++ free(temp); ++ } else { ++ must_append_sized(&profile, &size, AA_PROFILE_NO_STACKING, ++ sizeof(AA_PROFILE_NO_STACKING) - 1); ++ } ++ ++ if (conf->lsm_aa_allow_nesting) { ++ must_append_sized(&profile, &size, AA_PROFILE_NESTING_BASE, ++ sizeof(AA_PROFILE_NESTING_BASE) - 1); ++ ++ if (!aa_can_stack || aa_is_stacked) { ++ char *temp; ++ ++ temp = must_concat(" change_profile -> \"", ++ profile_name_full, "\",\n", NULL); ++ must_append_sized(&profile, &size, temp, strlen(temp)); ++ free(temp); ++ } ++ } ++ ++ if (!is_privileged(conf) || am_host_unpriv()) ++ must_append_sized(&profile, &size, AA_PROFILE_UNPRIVILEGED, ++ sizeof(AA_PROFILE_UNPRIVILEGED) - 1); ++ ++ lxc_list_for_each(it, &conf->lsm_aa_raw) { ++ const char *line = it->elem; ++ ++ must_append_sized_full(&profile, &size, line, strlen(line), true); ++ } ++ ++ /* include terminating \0 byte */ ++ must_append_sized(&profile, &size, "}\n", 3); ++ ++ free(profile_name_full); ++ ++ return profile; ++} ++ + /* +- * apparmor_process_label_set: Set AppArmor process profile +- * +- * @label : the profile to set +- * @conf : the container configuration to use if @label is NULL +- * @default : use the default profile if @label is NULL +- * @on_exec : this is ignored. Apparmor profile will be changed immediately +- * +- * Returns 0 on success, < 0 on failure +- * +- * Notes: This relies on /proc being available. ++ * apparmor_parser creates a cache file using the parsed file's name as a name. ++ * This means there may be multiple containers with the same name but different ++ * lxcpaths. Therefore we need a sanitized version of the complete profile name ++ * as profile file-name. ++ * We already get this exactly from apparmor_namespace(). + */ +-static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf, +- bool use_default, bool on_exec) ++static char *make_apparmor_profile_path(const char *ctname, const char *lxcpath) + { +- int label_fd, ret; +- pid_t tid; +- const char *label = inlabel ? inlabel : conf->lsm_aa_profile; +- char *curlabel; ++ char *ret, *filename; + +- if (!aa_enabled) +- return 0; ++ filename = apparmor_namespace(ctname, lxcpath); ++ ret = must_make_path(lxcpath, ctname, "apparmor", filename, NULL); ++ free(filename); ++ ++ return ret; ++} ++ ++static char *make_apparmor_namespace_path(const char *ctname, const char *lxcpath) ++{ ++ char *ret, *namespace; ++ ++ namespace = apparmor_namespace(ctname, lxcpath); ++ ret = must_make_path("/sys/kernel/security/apparmor/policy/namespaces", namespace, NULL); ++ free(namespace); ++ ++ return ret; ++} ++ ++static bool make_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) ++{ ++ char *path; ++ ++ if (!aa_can_stack || aa_is_stacked) ++ return true; ++ ++ path = make_apparmor_namespace_path(conf->name, lxcpath); ++ errno = 0; ++ if (mkdir(path, 0755) < 0 && errno != EEXIST) { ++ SYSERROR("Error creating AppArmor namespace: %s", path); ++ free(path); ++ return false; ++ } ++ free(path); ++ ++ return true; ++} ++ ++static void remove_apparmor_namespace(struct lxc_conf *conf, const char *lxcpath) ++{ ++ char *path; ++ ++ path = make_apparmor_namespace_path(conf->name, lxcpath); ++ if (rmdir(path) != 0) ++ SYSERROR("Error removing AppArmor namespace"); ++ free(path); ++} ++ ++struct apparmor_parser_args { ++ char cmd; ++ char *file; ++}; ++ ++static int apparmor_parser_exec(void *data) ++{ ++ struct apparmor_parser_args *args = data; ++ char cmdbuf[] = { '-', args->cmd, 'W', 'L', 0 }; ++ ++ execlp("apparmor_parser", "apparmor_parser", cmdbuf, APPARMOR_CACHE_DIR, args->file, NULL); ++ ++ return -1; ++} ++ ++static int run_apparmor_parser(char command, ++ struct lxc_conf *conf, ++ const char *lxcpath) ++{ ++ char output[MAXPATHLEN]; ++ int ret; ++ struct apparmor_parser_args args = { ++ .cmd = command, ++ .file = make_apparmor_profile_path(conf->name, lxcpath), ++ }; ++ ++ ret = run_command(output, sizeof(output), apparmor_parser_exec, (void*)&args); ++ if (ret < 0) { ++ ERROR("Failed to run apparmor_parser on \"%s\": %s", args.file, output); ++ ret = -1; ++ } ++ ++ ++ free(args.file); ++ return ret; ++} ++ ++static void remove_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) ++{ ++ char *path; ++ ++ /* It's ok if these deletes fail: if the container was never started, ++ * we'll have never written a profile or cached it. ++ */ ++ ++ path = make_apparmor_profile_path(conf->name, lxcpath); ++ (void)unlink(path); ++ free(path); ++ ++ /* Also remove the apparmor/ subdirectory */ ++ path = apparmor_dir(conf->name, lxcpath); ++ (void)rmdir(path); ++ free(path); ++} ++ ++static int load_apparmor_profile(struct lxc_conf *conf, const char *lxcpath) ++{ ++ struct stat profile_sb; ++ size_t content_len; ++ int ret = -1; ++ size_t old_len = 0; ++ char *profile_path = NULL, *old_content = NULL, *new_content = NULL; ++ int profile_fd = -1; ++ ++ if (!make_apparmor_namespace(conf, lxcpath)) ++ return -1; ++ ++ /* In order to avoid forcing a profile parse (potentially slow) on ++ * every container start, let's use apparmor's binary policy cache, ++ * which checks mtime of the files to figure out if the policy needs to ++ * be regenerated. ++ * ++ * Since it uses mtimes, we shouldn't just always write out our local ++ * apparmor template; instead we should check to see whether the ++ * template is the same as ours. If it isn't we should write our ++ * version out so that the new changes are reflected and we definitely ++ * force a recompile. ++ */ ++ ++ profile_path = make_apparmor_profile_path(conf->name, lxcpath); ++ profile_fd = open(profile_path, O_RDONLY | O_CLOEXEC); ++ if (profile_fd >= 0) { ++ if (fstat(profile_fd, &profile_sb) < 0) { ++ SYSERROR("Error accessing old profile from %s", ++ profile_path); ++ goto out; ++ } ++ old_len = profile_sb.st_size; ++ old_content = lxc_strmmap(NULL, old_len, PROT_READ, ++ MAP_PRIVATE, profile_fd, 0); ++ if (!old_content) { ++ SYSERROR("Failed to mmap old profile from %s", ++ profile_path); ++ goto out; ++ } ++ } else if (errno != ENOENT) { ++ SYSERROR("Error reading old profile from %s", profile_path); ++ goto out; ++ } ++ ++ new_content = get_apparmor_profile_content(conf, lxcpath); ++ if (!new_content) ++ goto out; ++ ++ content_len = strlen(new_content); ++ ++ if (!old_content || old_len != content_len || memcmp(old_content, new_content, content_len) != 0) { ++ char *path; ++ ++ ret = mkdir_p(APPARMOR_CACHE_DIR, 0755); ++ if (ret < 0) { ++ SYSERROR("Error creating AppArmor profile cache directory " APPARMOR_CACHE_DIR); ++ goto out; ++ } ++ ++ path = apparmor_dir(conf->name, lxcpath); ++ ret = mkdir_p(path, 0755); ++ if (ret < 0) { ++ SYSERROR("Error creating AppArmor profile directory: %s", path); ++ free(path); ++ goto out; ++ } ++ free(path); ++ ++ ret = lxc_write_to_file(profile_path, new_content, content_len, false, 0600); ++ if (ret < 0) { ++ SYSERROR("Error writing profile to %s", profile_path); ++ goto out; ++ } ++ } ++ ++ ret = run_apparmor_parser(AA_CMD_LOAD, conf, lxcpath); ++ if (ret != 0) ++ goto out_remove_profile; ++ ++ conf->lsm_aa_profile_created = true; ++ ++ goto out_ok; ++ ++out_remove_profile: ++ remove_apparmor_profile(conf, lxcpath); ++out: ++ remove_apparmor_namespace(conf, lxcpath); ++out_ok: ++ if (profile_fd >= 0) { ++ if (old_content) ++ lxc_strmunmap(old_content, old_len); ++ close(profile_fd); ++ } ++ free(profile_path); ++ free(new_content); ++ return ret; ++} ++ ++/* ++ * Ensure that the container's policy namespace is unloaded to free kernel ++ * memory. This does not delete the policy from disk or cache. ++ */ ++static void apparmor_cleanup(struct lxc_conf *conf, const char *lxcpath) ++{ ++ if (!aa_admin) ++ return; ++ ++ if (!conf->lsm_aa_profile_created) ++ return; ++ ++ remove_apparmor_namespace(conf, lxcpath); ++ (void)run_apparmor_parser(AA_CMD_UNLOAD, conf, lxcpath); ++ ++ remove_apparmor_profile(conf, lxcpath); ++} ++ ++static int apparmor_prepare(struct lxc_conf *conf, const char *lxcpath) ++{ ++ int ret = -1; ++ const char *label; ++ char *curlabel = NULL, *genlabel = NULL; ++ ++ if (!aa_enabled) { ++ ERROR("AppArmor not enabled"); ++ return -1; ++ } ++ ++ label = conf->lsm_aa_profile; + + /* user may request that we just ignore apparmor */ + if (label && strcmp(label, AA_UNCHANGED) == 0) { +- INFO("apparmor profile unchanged per user request"); ++ INFO("AppArmor profile unchanged per user request"); ++ conf->lsm_aa_profile_computed = must_copy_string(label); + return 0; + } + ++ if (label && strcmp(label, AA_GENERATED) == 0) { ++ if (!aa_parser_available) { ++ ERROR("Cannot use generated profile: apparmor_parser not available"); ++ goto out; ++ } ++ ++ /* auto-generate profile based on available/requested security features */ ++ if (load_apparmor_profile(conf, lxcpath) != 0) { ++ ERROR("Failed to load generated AppArmor profile"); ++ goto out; ++ } ++ ++ genlabel = apparmor_profile_full(conf->name, lxcpath); ++ if (!genlabel) { ++ ERROR("Failed to build AppArmor profile name"); ++ goto out; ++ } ++ ++ if (aa_can_stack && !aa_is_stacked) { ++ char *namespace = apparmor_namespace(conf->name, lxcpath); ++ size_t llen = strlen(genlabel); ++ must_append_sized(&genlabel, &llen, "//&:", sizeof("//&:") - 1); ++ must_append_sized(&genlabel, &llen, namespace, strlen(namespace)); ++ must_append_sized(&genlabel, &llen, ":", sizeof(":")); /* with the nul byte */ ++ free(namespace); ++ } ++ ++ label = genlabel; ++ } ++ + curlabel = apparmor_process_label_get(lxc_raw_getpid()); + +- if (!aa_stacking_supported() && aa_needs_transition(curlabel)) { ++ if (!aa_can_stack && aa_needs_transition(curlabel)) { + /* we're already confined, and stacking isn't supported */ + + if (!label || strcmp(curlabel, label) == 0) { + /* no change requested */ +- free(curlabel); +- return 0; ++ ret = 0; ++ goto out; + } + +- ERROR("already apparmor confined, but new label requested."); +- free(curlabel); +- return -1; ++ ERROR("Already AppArmor confined, but new label requested."); ++ goto out; + } +- free(curlabel); + + if (!label) { +- if (use_default) { +- if (cgns_supported()) +- label = AA_DEF_PROFILE_CGNS; +- else +- label = AA_DEF_PROFILE; +- } ++ if (cgns_supported()) ++ label = AA_DEF_PROFILE_CGNS; + else +- label = "unconfined"; ++ label = AA_DEF_PROFILE; + } + + if (!check_mount_feature_enabled() && strcmp(label, "unconfined") != 0) { +@@ -223,30 +1036,78 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf + ERROR("If you really want to start this container, set"); + ERROR("lxc.apparmor.allow_incomplete = 1"); + ERROR("in your container configuration file"); +- return -1; ++ goto out; + } + } + ++ conf->lsm_aa_profile_computed = must_copy_string(label); ++ ret = 0; ++ ++out: ++ if (genlabel) { ++ free(genlabel); ++ if (ret != 0) ++ apparmor_cleanup(conf, lxcpath); ++ } ++ free(curlabel); ++ return ret; ++} ++ ++/* ++ * apparmor_process_label_set: Set AppArmor process profile ++ * ++ * @label : the profile to set ++ * @conf : the container configuration to use if @label is NULL ++ * @default : use the default profile if @label is NULL ++ * @on_exec : this is ignored. Apparmor profile will be changed immediately ++ * ++ * Returns 0 on success, < 0 on failure ++ * ++ * Notes: This relies on /proc being available. ++ */ ++static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf, ++ bool on_exec) ++{ ++ int label_fd, ret; ++ pid_t tid; ++ const char *label; ++ ++ if (!aa_enabled) { ++ ERROR("AppArmor not enabled"); ++ return -1; ++ } ++ ++ label = inlabel ? inlabel : conf->lsm_aa_profile_computed; ++ if (!label) { ++ ERROR("LSM wasn't prepared"); ++ return -1; ++ } ++ ++ /* user may request that we just ignore apparmor */ ++ if (strcmp(label, AA_UNCHANGED) == 0) { ++ INFO("AppArmor profile unchanged per user request"); ++ return 0; ++ } + + if (strcmp(label, "unconfined") == 0 && apparmor_am_unconfined()) { +- INFO("apparmor profile unchanged"); ++ INFO("AppArmor profile unchanged"); + return 0; + } + tid = lxc_raw_gettid(); + label_fd = lsm_process_label_fd_get(tid, on_exec); + if (label_fd < 0) { +- SYSERROR("Failed to change apparmor profile to %s", label); ++ SYSERROR("Failed to change AppArmor profile to %s", label); + return -1; + } + + ret = lsm_process_label_set_at(label_fd, label, on_exec); + close(label_fd); + if (ret < 0) { +- ERROR("Failed to change apparmor profile to %s", label); ++ ERROR("Failed to change AppArmor profile to %s", label); + return -1; + } + +- INFO("Changed apparmor profile to %s", label); ++ INFO("Changed AppArmor profile to %s", label); + return 0; + } + +@@ -255,12 +1116,39 @@ static struct lsm_drv apparmor_drv = { + .enabled = apparmor_enabled, + .process_label_get = apparmor_process_label_get, + .process_label_set = apparmor_process_label_set, ++ .prepare = apparmor_prepare, ++ .cleanup = apparmor_cleanup, + }; + + struct lsm_drv *lsm_apparmor_drv_init(void) + { ++ bool have_mac_admin = false; ++ + if (!apparmor_enabled()) + return NULL; ++ ++ /* We only support generated profiles when apparmor_parser is usable */ ++ if (!check_apparmor_parser_version()) ++ goto out; ++ ++ aa_parser_available = true; ++ ++ aa_can_stack = apparmor_can_stack(); ++ if (aa_can_stack) ++ aa_is_stacked = file_is_yes("/sys/kernel/security/apparmor/.ns_stacked"); ++ ++ #if HAVE_LIBCAP ++ have_mac_admin = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE); ++ #endif ++ ++ if (!have_mac_admin) ++ WARN("Per-container AppArmor profiles are disabled because the mac_admin capability is missing"); ++ else if (am_host_unpriv() && !aa_is_stacked) ++ WARN("Per-container AppArmor profiles are disabled because LXC is running in an unprivileged container without stacking"); ++ else ++ aa_admin = true; ++ ++out: + aa_enabled = 1; + return &apparmor_drv; + } +diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c +index 8d7de2db..46e21206 100644 +--- a/src/lxc/lsm/lsm.c ++++ b/src/lxc/lsm/lsm.c +@@ -177,11 +177,37 @@ on_error: + } + + int lsm_process_label_set(const char *label, struct lxc_conf *conf, +- bool use_default, bool on_exec) ++ bool on_exec) + { + if (!drv) { + ERROR("LSM driver not inited"); + return -1; + } +- return drv->process_label_set(label, conf, use_default, on_exec); ++ return drv->process_label_set(label, conf, on_exec); ++} ++ ++int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath) ++{ ++ if (!drv) { ++ ERROR("LSM driver not inited"); ++ return 0; ++ } ++ ++ if (!drv->prepare) ++ return 0; ++ ++ return drv->prepare(conf, lxcpath); ++} ++ ++void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath) ++{ ++ if (!drv) { ++ ERROR("LSM driver not inited"); ++ return; ++ } ++ ++ if (!drv->cleanup) ++ return; ++ ++ drv->cleanup(conf, lxcpath); + } +diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h +index cafb2ac7..52e656d6 100644 +--- a/src/lxc/lsm/lsm.h ++++ b/src/lxc/lsm/lsm.h +@@ -38,17 +38,21 @@ struct lsm_drv { + int (*enabled)(void); + char *(*process_label_get)(pid_t pid); + int (*process_label_set)(const char *label, struct lxc_conf *conf, +- bool use_default, bool on_exec); ++ bool on_exec); ++ int (*prepare)(struct lxc_conf *conf, const char *lxcpath); ++ void (*cleanup)(struct lxc_conf *conf, const char *lxcpath); + }; + + extern void lsm_init(void); + extern int lsm_enabled(void); + extern const char *lsm_name(void); + extern char *lsm_process_label_get(pid_t pid); ++extern int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath); + extern int lsm_process_label_set(const char *label, struct lxc_conf *conf, +- bool use_default, bool on_exec); ++ bool on_exec); + extern int lsm_process_label_fd_get(pid_t pid, bool on_exec); + extern int lsm_process_label_set_at(int label_fd, const char *label, + bool on_exec); ++extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath); + + #endif /* __LXC_LSM_H */ +diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c +index 7bb8121b..9397f2bf 100644 +--- a/src/lxc/lsm/nop.c ++++ b/src/lxc/lsm/nop.c +@@ -30,7 +30,7 @@ static char *nop_process_label_get(pid_t pid) + } + + static int nop_process_label_set(const char *label, struct lxc_conf *conf, +- bool use_default, bool on_exec) ++ bool on_exec) + { + return 0; + } +diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c +index c88c18e3..9f7b7bc3 100644 +--- a/src/lxc/lsm/selinux.c ++++ b/src/lxc/lsm/selinux.c +@@ -75,15 +75,13 @@ static char *selinux_process_label_get(pid_t pid) + * Notes: This relies on /proc being available. + */ + static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, +- bool use_default, bool on_exec) ++ bool on_exec) + { + int ret; + const char *label; + + label = inlabel ? inlabel : conf->lsm_se_context; + if (!label) { +- if (!use_default) +- return -EINVAL; + + label = DEFAULT_LABEL; + } +diff --git a/src/lxc/start.c b/src/lxc/start.c +index 3343f9bf..6d6dc7ee 100644 +--- a/src/lxc/start.c ++++ b/src/lxc/start.c +@@ -863,9 +863,19 @@ int lxc_init(const char *name, struct lxc_handler *handler) + } + TRACE("Initialized cgroup driver"); + ++ ret = lsm_process_prepare(conf, handler->lxcpath); ++ if (ret < 0) { ++ ERROR("Failed to initialize LSM"); ++ goto out_destroy_cgroups; ++ } ++ TRACE("Initialized LSM"); ++ + INFO("Container \"%s\" is initialized", name); + return 0; + ++out_destroy_cgroups: ++ handler->cgroup_ops->destroy(handler->cgroup_ops, handler); ++ + out_delete_terminal: + lxc_terminal_delete(&handler->conf->console); + +@@ -956,6 +966,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler) + while (namespace_count--) + free(namespaces[namespace_count]); + ++ lsm_process_cleanup(handler->conf, handler->lxcpath); ++ + cgroup_ops->destroy(cgroup_ops, handler); + cgroup_exit(cgroup_ops); + +@@ -1235,7 +1247,7 @@ static int do_start(void *data) + } + + /* Set the label to change to when we exec(2) the container's init. */ +- ret = lsm_process_label_set(NULL, handler->conf, 1, 1); ++ ret = lsm_process_label_set(NULL, handler->conf, true); + if (ret < 0) + goto out_warn_father; + +-- +2.11.0 + diff --git a/debian/patches/extra/0011-apparmor-allow-start-container-to-change-to-lxc.patch b/debian/patches/extra/0011-apparmor-allow-start-container-to-change-to-lxc.patch new file mode 100644 index 0000000..6bffc13 --- /dev/null +++ b/debian/patches/extra/0011-apparmor-allow-start-container-to-change-to-lxc.patch @@ -0,0 +1,29 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Tue, 24 Jul 2018 16:42:26 +0200 +Subject: [PATCH] apparmor: allow start-container to change to lxc-** + +For generated profiles with apparmor namespaces we get +profile names with slashes in them. To match those, we need +to allow changing to lxc-**, not just lxc-*. + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit 242a9fa7ee7e9f524de5a23917faa846ea525622) +--- + config/apparmor/abstractions/start-container | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/config/apparmor/abstractions/start-container b/config/apparmor/abstractions/start-container +index 414d058b..3df9883e 100644 +--- a/config/apparmor/abstractions/start-container ++++ b/config/apparmor/abstractions/start-container +@@ -40,5 +40,6 @@ + pivot_root /usr/lib*/*/lxc/**, + + change_profile -> lxc-*, ++ change_profile -> lxc-**, + change_profile -> unconfined, + change_profile -> :lxc-*:unconfined, +-- +2.11.0 + diff --git a/debian/patches/extra/0012-tests-add-test-for-generated-apparmor-profiles.patch b/debian/patches/extra/0012-tests-add-test-for-generated-apparmor-profiles.patch new file mode 100644 index 0000000..aa22978 --- /dev/null +++ b/debian/patches/extra/0012-tests-add-test-for-generated-apparmor-profiles.patch @@ -0,0 +1,126 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Tue, 24 Jul 2018 13:59:04 +0200 +Subject: [PATCH] tests: add test for generated apparmor profiles + +Signed-off-by: Wolfgang Bumiller +(cherry picked from commit e7311a84e5bd0758931033b1a0ce649baa720a58) +--- + src/tests/Makefile.am | 2 + + src/tests/lxc-test-apparmor-generated | 84 +++++++++++++++++++++++++++++++++++ + 2 files changed, 86 insertions(+) + create mode 100755 src/tests/lxc-test-apparmor-generated + +diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am +index a2179c3c..df5d35df 100644 +--- a/src/tests/Makefile.am ++++ b/src/tests/Makefile.am +@@ -79,6 +79,7 @@ if DISTRO_UBUNTU + bin_SCRIPTS += \ + lxc-test-lxc-attach \ + lxc-test-apparmor-mount \ ++ lxc-test-apparmor-generated \ + lxc-test-checkpoint-restore \ + lxc-test-snapdeps \ + lxc-test-symlink \ +@@ -112,6 +113,7 @@ EXTRA_DIST = \ + lxc-test-rootfs \ + lxc-test-autostart \ + lxc-test-apparmor-mount \ ++ lxc-test-apparmor-generated \ + lxc-test-checkpoint-restore \ + lxc-test-cloneconfig \ + lxc-test-createconfig \ +diff --git a/src/tests/lxc-test-apparmor-generated b/src/tests/lxc-test-apparmor-generated +new file mode 100755 +index 00000000..be2e3261 +--- /dev/null ++++ b/src/tests/lxc-test-apparmor-generated +@@ -0,0 +1,84 @@ ++#!/bin/sh ++ ++# lxc: linux Container library ++ ++# This is a test script for generated apparmor profiles ++ ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++ ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++ ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ ++if ! which apparmor_parser >/dev/null 2>&1; then ++ echo 'SKIP: test for generated apparmor profiles: apparmor_parser missing' ++fi ++exit 0 ++ ++DONE=0 ++KNOWN_RELEASES="precise trusty xenial yakkety zesty" ++LOGFILE="/tmp/lxc-test-$$.log" ++cleanup() { ++ lxc-destroy -n $CONTAINER_NAME >/dev/null 2>&1 || true ++ ++ if [ $DONE -eq 0 ]; then ++ [ -f "$LOGFILE" ] && cat "$LOGFILE" >&2 ++ rm -f "$LOGFILE" ++ echo "FAIL" ++ exit 1 ++ fi ++ rm -f "$LOGFILE" ++ echo "PASS" ++} ++ ++ARCH=i386 ++if type dpkg >/dev/null 2>&1; then ++ ARCH=$(dpkg --print-architecture) ++fi ++ ++trap cleanup EXIT HUP INT TERM ++set -eu ++ ++# Create a container ++CONTAINER_NAME=lxc-test-apparmor-generated ++ ++# default release is trusty, or the systems release if recognized ++release=trusty ++if [ -f /etc/lsb-release ]; then ++ . /etc/lsb-release ++ rels=$(ubuntu-distro-info --supported 2>/dev/null) || ++ rels="$KNOWN_RELEASES" ++ for r in $rels; do ++ [ "$DISTRIB_CODENAME" = "$r" ] && release="$r" ++ done ++fi ++ ++lxc-create -t download -n $CONTAINER_NAME -B dir -- -d ubuntu -r $release -a $ARCH ++CONTAINER_PATH=$(dirname $(lxc-info -n $CONTAINER_NAME -c lxc.rootfs.path -H) | sed -e 's/dir://') ++cp $CONTAINER_PATH/config $CONTAINER_PATH/config.bak ++ ++# Set the profile to be auto-generated ++echo "lxc.apparmor.profile = generated" >> $CONTAINER_PATH/config ++ ++# Start it ++lxc-start -n $CONTAINER_NAME -lDEBUG -o "$LOGFILE" ++lxc-wait -n $CONTAINER_NAME -t 5 -s RUNNING || (echo "Container didn't start" && exit 1) ++pid=`lxc-info -p -H -n $CONTAINER_NAME` ++profile=`cat /proc/$pid/attr/current` ++expected_profile="lxc-${CONTAINER_NAME}_//&:lxc-${CONTAINER_NAME}_<-var-lib-lxc>:unconfined (enforce)" ++lxc-stop -n $CONTAINER_NAME -k ++if [ "x$profile" != "x$expected_profile" ]; then ++ echo "FAIL: container was in profile $profile" >&2 ++ echo "expected profile: $expected_profile" >&2 ++ exit 1 ++fi ++ ++DONE=1 +-- +2.11.0 + diff --git a/debian/patches/fixes/0001-conf-ret-try-devpts-mount-without-gid-5-on-error.patch b/debian/patches/fixes/0001-conf-ret-try-devpts-mount-without-gid-5-on-error.patch deleted file mode 100644 index 217982a..0000000 --- a/debian/patches/fixes/0001-conf-ret-try-devpts-mount-without-gid-5-on-error.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Thu, 12 Apr 2018 11:12:06 +0200 -Subject: [PATCH] conf: ret-try devpts mount without gid=5 on error - -We should always default to mounting devpts with gid=5 but we should fallback -to mounting without gid=5. This let's us cover use-cases such as container -started with only a single mapping e.g.: - -lxc.idmap = u 1000 1000 1 -lxc.idmap = g 1000 1000 1 - -Closes #2257. - -Signed-off-by: Christian Brauner ---- - src/lxc/conf.c | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index fe30800d..a604adbb 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -1503,7 +1503,7 @@ static struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id, - static int lxc_setup_devpts(struct lxc_conf *conf) - { - int ret; -- const char *default_devpts_mntopts; -+ const char *default_devpts_mntopts = "gid=5,newinstance,ptmxmode=0666,mode=0620"; - char devpts_mntopts[256]; - - if (conf->pts <= 0) { -@@ -1512,11 +1512,6 @@ static int lxc_setup_devpts(struct lxc_conf *conf) - return 0; - } - -- if (!find_mapped_nsid_entry(conf, 5, ID_TYPE_GID)) -- default_devpts_mntopts = "newinstance,ptmxmode=0666,mode=0620"; -- else -- default_devpts_mntopts = "newinstance,ptmxmode=0666,mode=0620,gid=5"; -- - ret = snprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%d", - default_devpts_mntopts, conf->pts); - if (ret < 0 || (size_t)ret >= sizeof(devpts_mntopts)) -@@ -1540,11 +1535,16 @@ static int lxc_setup_devpts(struct lxc_conf *conf) - return -1; - } - -- /* Mount new devpts instance. */ -+ /* mount new devpts instance */ - ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, devpts_mntopts); - if (ret < 0) { -- SYSERROR("Failed to mount new devpts instance"); -- return -1; -+ /* try mounting without gid=5 */ -+ ret = mount("devpts", "/dev/pts", "devpts", -+ MS_NOSUID | MS_NOEXEC, devpts_mntopts + sizeof("gid=5")); -+ if (ret < 0) { -+ SYSERROR("Failed to mount new devpts instance"); -+ return -1; -+ } - } - DEBUG("Mount new devpts instance with options \"%s\"", devpts_mntopts); - --- -2.11.0 - diff --git a/debian/patches/fixes/0002-Fix-the-memory-leak-in-cgfsng_attach.patch b/debian/patches/fixes/0002-Fix-the-memory-leak-in-cgfsng_attach.patch deleted file mode 100644 index 04e258f..0000000 --- a/debian/patches/fixes/0002-Fix-the-memory-leak-in-cgfsng_attach.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: LiFeng -Date: Tue, 24 Apr 2018 12:53:57 -0400 -Subject: [PATCH] Fix the memory leak in cgfsng_attach - -Signed-off-by: LiFeng ---- - src/lxc/cgroups/cgfsng.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index b48f997f..689e4fe5 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -2549,6 +2549,7 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid) - continue; - - fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs"); -+ free(path); - ret = lxc_write_to_file(fullpath, pidstr, len, false); - if (ret < 0) { - SYSERROR("Failed to attach %d to %s", (int)pid, fullpath); --- -2.11.0 - diff --git a/debian/patches/fixes/0003-Fix-memory-leak-in-list_active_containers.patch b/debian/patches/fixes/0003-Fix-memory-leak-in-list_active_containers.patch deleted file mode 100644 index bf0b177..0000000 --- a/debian/patches/fixes/0003-Fix-memory-leak-in-list_active_containers.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: LiFeng -Date: Tue, 24 Apr 2018 15:10:15 -0400 -Subject: [PATCH] Fix memory leak in list_active_containers - -Signed-off-by: LiFeng ---- - src/lxc/lxccontainer.c | 25 +++++++++++++++++++++---- - 1 file changed, 21 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 6d41b6cf..6a58d81c 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -4973,32 +4973,49 @@ int list_active_containers(const char *lxcpath, char ***nret, - char *recvpath = lxc_cmd_get_lxcpath(p); - if (!recvpath) - continue; -- if (strncmp(lxcpath, recvpath, lxcpath_len) != 0) -+ if (strncmp(lxcpath, recvpath, lxcpath_len) != 0) { -+ free(recvpath); - continue; -+ } -+ free(recvpath); - p = lxc_cmd_get_name(p); - if (!p) - continue; - } - -- if (array_contains(&ct_name, p, ct_name_cnt)) -+ if (array_contains(&ct_name, p, ct_name_cnt)) { -+ if (is_hashed) -+ free(p); - continue; -+ } - -- if (!add_to_array(&ct_name, p, ct_name_cnt)) -+ if (!add_to_array(&ct_name, p, ct_name_cnt)) { -+ if (is_hashed) -+ free(p); - goto free_cret_list; -+ } - - ct_name_cnt++; - -- if (!cret) -+ if (!cret) { -+ if (is_hashed) -+ free(p); - continue; -+ } - - c = lxc_container_new(p, lxcpath); - if (!c) { - INFO("Container %s:%s is running but could not be loaded", - lxcpath, p); - remove_from_array(&ct_name, p, ct_name_cnt--); -+ if (is_hashed) -+ free(p); - continue; - } - -+ if (is_hashed) -+ free(p); -+ - /* - * If this is an anonymous container, then is_defined *can* - * return false. So we don't do that check. Count on the --- -2.11.0 - diff --git a/debian/patches/fixes/0004-pam-cgfs-ignore-the-system-umask-when-creating-the-c.patch b/debian/patches/fixes/0004-pam-cgfs-ignore-the-system-umask-when-creating-the-c.patch deleted file mode 100644 index 547192e..0000000 --- a/debian/patches/fixes/0004-pam-cgfs-ignore-the-system-umask-when-creating-the-c.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jonathan Calmels -Date: Mon, 16 Apr 2018 12:30:33 -0700 -Subject: [PATCH] pam-cgfs: ignore the system umask when creating the cgroup - hierarchy - -Fixes: #2277 -Signed-off-by: Jonathan Calmels ---- - src/lxc/pam/pam_cgfs.c | 18 ++++++++++++++++-- - 1 file changed, 16 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/pam/pam_cgfs.c b/src/lxc/pam/pam_cgfs.c -index 3df8daef..5c272b0e 100644 ---- a/src/lxc/pam/pam_cgfs.c -+++ b/src/lxc/pam/pam_cgfs.c -@@ -223,6 +223,20 @@ static bool cgv2_prune_empty_cgroups(const char *user); - static bool cgv2_remove(const char *cgroup); - static bool is_cgv2(char *line); - -+static int do_mkdir(const char *path, mode_t mode) -+{ -+ int saved_errno; -+ mode_t mask; -+ int r; -+ -+ mask = umask(0); -+ r = mkdir(path, mode); -+ saved_errno = errno; -+ umask(mask); -+ errno = saved_errno; -+ return (r); -+} -+ - /* Create directory and (if necessary) its parents. */ - static bool mkdir_parent(const char *root, char *path) - { -@@ -252,7 +266,7 @@ static bool mkdir_parent(const char *root, char *path) - if (file_exists(path)) - goto next; - -- if (mkdir(path, 0755) < 0) { -+ if (do_mkdir(path, 0755) < 0) { - pam_cgfs_debug("Failed to create %s: %s.\n", path, strerror(errno)); - return false; - } -@@ -1963,7 +1977,7 @@ static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h, - cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL); - if (slash) - *slash = '/'; -- if (mkdir(cgpath, 0755) < 0 && errno != EEXIST) { -+ if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) { - pam_cgfs_debug("Failed to create '%s'", cgpath); - free(cgpath); - return false; --- -2.11.0 - diff --git a/debian/patches/fixes/0005-attach-always-drop-supplementary-groups.patch b/debian/patches/fixes/0005-attach-always-drop-supplementary-groups.patch deleted file mode 100644 index 9cd29db..0000000 --- a/debian/patches/fixes/0005-attach-always-drop-supplementary-groups.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Mon, 9 Apr 2018 18:01:38 +0200 -Subject: [PATCH] attach: always drop supplementary groups - -Closes #1704. - -Signed-off-by: Christian Brauner ---- - src/lxc/attach.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 9a8a836d..3b715272 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -870,12 +870,12 @@ static int attach_child_main(struct attach_clone_payload *payload) - ret = lxc_switch_uid_gid(new_uid, new_gid); - if (ret < 0) - goto on_error; -- -- ret = lxc_setgroups(0, NULL); -- if (ret < 0) -- goto on_error; - } - -+ ret = lxc_setgroups(0, NULL); -+ if (ret < 0) -+ goto on_error; -+ - if ((init_ctx->container && init_ctx->container->lxc_conf && - init_ctx->container->lxc_conf->no_new_privs) || - (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { --- -2.11.0 - diff --git a/debian/patches/fixes/0006-storage-rsync-free-memory-on-error.patch b/debian/patches/fixes/0006-storage-rsync-free-memory-on-error.patch deleted file mode 100644 index 2918f42..0000000 --- a/debian/patches/fixes/0006-storage-rsync-free-memory-on-error.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: C0deAi -Date: Fri, 6 Apr 2018 12:46:53 -0400 -Subject: [PATCH] storage/rsync: free memory on error - -Closes #2262. - -Signed-off-by: C0deAi -Signed-off-by: Christian Brauner ---- - src/lxc/storage/rsync.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/storage/rsync.c b/src/lxc/storage/rsync.c -index c3080a56..83871ae8 100644 ---- a/src/lxc/storage/rsync.c -+++ b/src/lxc/storage/rsync.c -@@ -73,8 +73,10 @@ int lxc_rsync_exec(const char *src, const char *dest) - return -1; - - ret = snprintf(s, l, "%s", src); -- if (ret < 0 || (size_t)ret >= l) -+ if (ret < 0 || (size_t)ret >= l) { -+ free(s); - return -1; -+ } - - s[l - 2] = '/'; - s[l - 1] = '\0'; --- -2.11.0 - diff --git a/debian/patches/fixes/0007-tools-utils-free-memory-on-error.patch b/debian/patches/fixes/0007-tools-utils-free-memory-on-error.patch deleted file mode 100644 index f62d635..0000000 --- a/debian/patches/fixes/0007-tools-utils-free-memory-on-error.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: C0deAi -Date: Fri, 6 Apr 2018 12:56:50 -0400 -Subject: [PATCH] tools/utils: free memory on error - -Closes #2262. - -Signed-off-by: C0deAi -Signed-off-by: Christian Brauner ---- - src/lxc/tools/tool_utils.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/tools/tool_utils.c b/src/lxc/tools/tool_utils.c -index ca325456..71e5036b 100644 ---- a/src/lxc/tools/tool_utils.c -+++ b/src/lxc/tools/tool_utils.c -@@ -794,8 +794,11 @@ int lxc_config_define_add(struct lxc_list *defines, char *arg) - return -1; - - dent->elem = parse_line(arg); -- if (!dent->elem) -+ if (!dent->elem) { -+ free(dent); - return -1; -+ } -+ - lxc_list_add_tail(defines, dent); - return 0; - } --- -2.11.0 - diff --git a/debian/patches/fixes/0008-fix-signal-sending-in-lxc.init.patch b/debian/patches/fixes/0008-fix-signal-sending-in-lxc.init.patch deleted file mode 100644 index cdad9c8..0000000 --- a/debian/patches/fixes/0008-fix-signal-sending-in-lxc.init.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Tycho Andersen -Date: Wed, 4 Apr 2018 17:45:29 -0600 -Subject: [PATCH] fix signal sending in lxc.init - -The problem here is that these two clauses were ordered backwards: we first -check if the signal came from not the init pid, and if it did, then we give -a notice and return. The comment notes that this is intended to protect -against SIGCHLD, but we don't in fact know if the signal is a SIGCHLD yet, -because that's tested in the next hunk. - -The symptom is that if I e.g. send SIGTERM from the outside world to the -container init, it ignores it and gives this notice. If we re-order these -clauses, it forwards non SIGCHLD signals, and ignores SIGCHLD signals from -things that aren't the real container process. - -Signed-off-by: Tycho Andersen ---- - src/lxc/start.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/src/lxc/start.c b/src/lxc/start.c -index ae13aae9..1982270e 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -380,6 +380,12 @@ static int signal_handler(int fd, uint32_t events, void *data, - return hdlr->init_died ? LXC_MAINLOOP_CLOSE : 0; - } - -+ if (siginfo.ssi_signo != SIGCHLD) { -+ kill(hdlr->pid, siginfo.ssi_signo); -+ INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid); -+ return hdlr->init_died ? LXC_MAINLOOP_CLOSE : 0; -+ } -+ - /* More robustness, protect ourself from a SIGCHLD sent - * by a process different from the container init. - */ -@@ -389,12 +395,6 @@ static int signal_handler(int fd, uint32_t events, void *data, - return hdlr->init_died ? LXC_MAINLOOP_CLOSE : 0; - } - -- if (siginfo.ssi_signo != SIGCHLD) { -- kill(hdlr->pid, siginfo.ssi_signo); -- INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid); -- return hdlr->init_died ? LXC_MAINLOOP_CLOSE : 0; -- } -- - if (siginfo.ssi_code == CLD_STOPPED) { - INFO("Container init process was stopped"); - return hdlr->init_died ? LXC_MAINLOOP_CLOSE : 0; --- -2.11.0 - diff --git a/debian/patches/fixes/0009-conf-fix-temporary-file-creation.patch b/debian/patches/fixes/0009-conf-fix-temporary-file-creation.patch deleted file mode 100644 index 301d1f1..0000000 --- a/debian/patches/fixes/0009-conf-fix-temporary-file-creation.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Fri, 6 Apr 2018 10:54:02 +0200 -Subject: [PATCH] conf: fix temporary file creation - -lxc_make_tmpfile() uses mkstemp() internally, and thus expects the -template to contain 'XXXXXX' and be writable. - -Signed-off-by: Thomas Moschny -Signed-off-by: Christian Brauner ---- - src/lxc/conf.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index a604adbb..c1193675 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -2277,9 +2277,12 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount) - - fd = memfd_create(".lxc_mount_file", MFD_CLOEXEC); - if (fd < 0) { -+ char template[] = P_tmpdir "/.lxc_mount_file_XXXXXX"; -+ - if (errno != ENOSYS) - return NULL; -- fd = lxc_make_tmpfile((char *){P_tmpdir "/.lxc_mount_file"}, true); -+ -+ fd = lxc_make_tmpfile(template, true); - if (fd < 0) { - SYSERROR("Could not create temporary mount file"); - return NULL; --- -2.11.0 - diff --git a/debian/patches/fixes/0010-ringbuf-fix-temporary-file-creation.patch b/debian/patches/fixes/0010-ringbuf-fix-temporary-file-creation.patch deleted file mode 100644 index 7c6e7d1..0000000 --- a/debian/patches/fixes/0010-ringbuf-fix-temporary-file-creation.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Fri, 6 Apr 2018 10:54:41 +0200 -Subject: [PATCH] ringbuf: fix temporary file creation - -lxc_make_tmpfile() uses mkstemp() internally, and thus expects the -template to contain 'XXXXXX' and be writable. - -Signed-off-by: Thomas Moschny -Signed-off-by: Christian Brauner ---- - src/lxc/ringbuf.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/ringbuf.c b/src/lxc/ringbuf.c -index 1299fe70..7aa2e631 100644 ---- a/src/lxc/ringbuf.c -+++ b/src/lxc/ringbuf.c -@@ -52,10 +52,12 @@ int lxc_ringbuf_create(struct lxc_ringbuf *buf, size_t size) - - memfd = memfd_create(".lxc_ringbuf", MFD_CLOEXEC); - if (memfd < 0) { -+ char template[] = P_tmpdir "/.lxc_ringbuf_XXXXXX"; -+ - if (errno != ENOSYS) - goto on_error; - -- memfd = lxc_make_tmpfile((char *){P_tmpdir"/.lxc_ringbuf_XXXXXX"}, true); -+ memfd = lxc_make_tmpfile(template, true); - } - if (memfd < 0) - goto on_error; --- -2.11.0 - diff --git a/debian/patches/pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch b/debian/patches/pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch new file mode 100644 index 0000000..634abee --- /dev/null +++ b/debian/patches/pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch @@ -0,0 +1,26 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Fri, 10 Feb 2017 09:13:40 +0100 +Subject: [PATCH] PVE: [Config] lxc.service: start after a potential + syslog.service + +Signed-off-by: Wolfgang Bumiller +--- + config/init/systemd/lxc.service.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in +index cd619967..77541917 100644 +--- a/config/init/systemd/lxc.service.in ++++ b/config/init/systemd/lxc.service.in +@@ -1,6 +1,6 @@ + [Unit] + Description=LXC Container Initialization and Autoboot Code +-After=network.target lxc-net.service ++After=syslog.service network.target lxc-net.service + Wants=lxc-net.service + Documentation=man:lxc-autostart man:lxc + +-- +2.11.0 + diff --git a/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch b/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch new file mode 100644 index 0000000..2cb20d9 --- /dev/null +++ b/debian/patches/pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Fri, 10 Feb 2017 09:15:37 +0100 +Subject: [PATCH] PVE: [Down] run lxcnetaddbr when instantiating veths + +FIXME: Why aren't we using regular up-scripts? + +Signed-off-by: Wolfgang Bumiller +--- + src/lxc/network.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/lxc/network.c b/src/lxc/network.c +index 50b5293c..07c7d204 100644 +--- a/src/lxc/network.c ++++ b/src/lxc/network.c +@@ -226,6 +226,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd + netdev->upscript, "up", argv); + if (err < 0) + goto out_delete; ++ } else if (netdev->link[0] == '\0') { ++ err = run_script(handler->name, "net", "/usr/share/lxc/lxcnetaddbr", "up", ++ "veth", veth1, (char*) NULL); ++ if (err) ++ goto out_delete; + } + + DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2, +-- +2.11.0 + diff --git a/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch b/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch new file mode 100644 index 0000000..0fec1ba --- /dev/null +++ b/debian/patches/pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch @@ -0,0 +1,66 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= +Date: Wed, 9 Nov 2016 09:14:26 +0100 +Subject: [PATCH] PVE: [Config] deny rw mounting of /sys and /proc + +this would allow root in a privileged container to change +the permissions of /sys on the host, which could lock out +non-root users. + +if a rw /sys is desired, set "lxc.mount.auto" accordingly +--- + config/apparmor/abstractions/container-base | 6 +++++- + config/apparmor/abstractions/container-base.in | 6 +++++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base +index a5e6c35f..4c3a4ba8 100644 +--- a/config/apparmor/abstractions/container-base ++++ b/config/apparmor/abstractions/container-base +@@ -82,7 +82,6 @@ + deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, + mount fstype=proc -> /proc/, + mount fstype=sysfs -> /sys/, +- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, + mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, +@@ -91,6 +90,11 @@ + # deny reads from debugfs + deny /sys/kernel/debug/{,**} rwklx, + ++ # prevent rw mounting of /sys, because that allows changing its global permissions ++ deny mount -> /proc/, ++ deny mount -> /sys/, ++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, ++ + # allow paths to be made slave, shared, private or unbindable + # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts. + # mount options=(rw,make-slave) -> **, +diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in +index 16529bbf..54f9ddf0 100644 +--- a/config/apparmor/abstractions/container-base.in ++++ b/config/apparmor/abstractions/container-base.in +@@ -82,7 +82,6 @@ + deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, + mount fstype=proc -> /proc/, + mount fstype=sysfs -> /sys/, +- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, + mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/, +@@ -91,6 +90,11 @@ + # deny reads from debugfs + deny /sys/kernel/debug/{,**} rwklx, + ++ # prevent rw mounting of /sys, because that allows changing its global permissions ++ deny mount -> /proc/, ++ deny mount -> /sys/, ++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/, ++ + # allow paths to be made slave, shared, private or unbindable + # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts. + # mount options=(rw,make-slave) -> **, +-- +2.11.0 + diff --git a/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch b/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch new file mode 100644 index 0000000..59bea81 --- /dev/null +++ b/debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch @@ -0,0 +1,547 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 28 Mar 2018 13:37:28 +0200 +Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup + root + +When cgroup namespaces are enabled a privileged container +with mixed cgroups has full write access to its own root +cgroup effectively allowing it to overwrite values written +from the outside or configured via lxc.cgroup.*. + +This patch causes an additional 'ns/' directory to be +created in all cgroups if cgroup namespaces and cgfsng are +being used in order to combat this. + +Signed-off-by: Wolfgang Bumiller +--- + src/lxc/cgroups/cgfsng.c | 92 +++++++++++++++++++++++++++++++++++++++--------- + src/lxc/cgroups/cgroup.h | 18 +++++++--- + src/lxc/commands.c | 85 +++++++++++++++++++++++++++++++++----------- + src/lxc/commands.h | 2 ++ + src/lxc/criu.c | 4 +-- + src/lxc/start.c | 28 +++++++++++---- + 6 files changed, 180 insertions(+), 49 deletions(-) + +diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c +index 935b868b..9281cee0 100644 +--- a/src/lxc/cgroups/cgfsng.c ++++ b/src/lxc/cgroups/cgfsng.c +@@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char + new->mountpoint = mountpoint; + new->base_cgroup = base_cgroup; + new->fullcgpath = NULL; ++ new->innercgpath = NULL; + new->version = type; + + newentry = append_null_to_list((void ***)h); +@@ -1060,6 +1061,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies, + + free(h->fullcgpath); + h->fullcgpath = NULL; ++ ++ free(h->innercgpath); ++ h->innercgpath = NULL; + } + + return 0; +@@ -1071,6 +1075,7 @@ struct generic_userns_exec_data { + struct lxc_conf *conf; + uid_t origuid; /* target uid in parent namespace */ + char *path; ++ bool inner; + }; + + static int cgroup_rmdir_wrapper(void *data) +@@ -1112,6 +1117,7 @@ static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) + wrap.container_cgroup = ops->container_cgroup; + wrap.hierarchies = ops->hierarchies; + wrap.conf = handler->conf; ++ wrap.inner = false; + + if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) + ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap, +@@ -1192,22 +1198,29 @@ on_error: + return bret; + } + +-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname) ++static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner) + { + int ret; ++ char *path; + +- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); +- if (dir_exists(h->fullcgpath)) { ++ if (inner) { ++ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL); ++ h->innercgpath = path; ++ } else { ++ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL); ++ h->fullcgpath = path; ++ } ++ if (dir_exists(path)) { + ERROR("The cgroup \"%s\" already existed", h->fullcgpath); + return false; + } + +- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) { ++ if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) { + ERROR("Failed to handle legacy cpuset controller"); + return false; + } + +- ret = mkdir_p(h->fullcgpath, 0755); ++ ret = mkdir_p(path, 0755); + if (ret < 0) { + ERROR("Failed to create cgroup \"%s\"", h->fullcgpath); + return false; +@@ -1228,11 +1241,29 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) + h->fullcgpath = NULL; + } + ++static inline bool cgfsng_create_inner(struct cgroup_ops *ops) ++{ ++ size_t i; ++ bool ret = true; ++ char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL); ++ for (i = 0; ops->hierarchies[i]; i++) { ++ if (!create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) { ++ SYSERROR("Failed to create %s namespace subdirectory: %s", ++ ops->hierarchies[i]->fullcgpath, strerror(errno)); ++ ret = false; ++ break; ++ } ++ } ++ free(cgname); ++ return ret; ++} ++ + /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; + * next cgroup_pattern-1, -2, ..., -999. + */ + static inline bool cgfsng_create(struct cgroup_ops *ops, +- struct lxc_handler *handler) ++ struct lxc_handler *handler, ++ bool inner) + { + int i; + size_t len; +@@ -1241,10 +1272,17 @@ static inline bool cgfsng_create(struct cgroup_ops *ops, + struct lxc_conf *conf = handler->conf; + + if (ops->container_cgroup) { ++ if (inner) ++ return cgfsng_create_inner(ops); + WARN("cgfsng_create called a second time: %s", ops->container_cgroup); + return false; + } + ++ if (inner) { ++ ERROR("cgfsng_create called twice for inner cgroup"); ++ return false; ++ } ++ + if (!conf) + return false; + +@@ -1285,7 +1323,7 @@ again: + } + + for (i = 0; ops->hierarchies[i]; i++) { +- if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) { ++ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) { + int j; + ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath); + free(ops->hierarchies[i]->fullcgpath); +@@ -1307,7 +1345,7 @@ out_free: + return false; + } + +-static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid) ++static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid, bool inner) + { + int i, len; + char pidstr[25]; +@@ -1320,8 +1358,13 @@ static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid) + int ret; + char *fullpath; + +- fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, +- "cgroup.procs", NULL); ++ if (inner) ++ fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, ++ CGROUP_NAMESPACE_SUBDIR, ++ "cgroup.procs", NULL); ++ else ++ fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, ++ "cgroup.procs", NULL); + ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); + if (ret != 0) { + SYSERROR("Failed to enter cgroup \"%s\"", fullpath); +@@ -1395,9 +1438,15 @@ static int chown_cgroup_wrapper(void *data) + char *fullpath; + char *path = arg->hierarchies[i]->fullcgpath; + ++ if (arg->inner) ++ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL); ++ + ret = chowmod(path, destuid, nsgid, 0775); +- if (ret < 0) ++ if (ret < 0) { ++ if (arg->inner) ++ free(path); + return -1; ++ } + + /* Failures to chown() these are inconvenient but not + * detrimental We leave these owned by the container launcher, +@@ -1416,8 +1465,11 @@ static int chown_cgroup_wrapper(void *data) + (void)chowmod(fullpath, destuid, nsgid, 0664); + free(fullpath); + +- if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) ++ if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) { ++ if (arg->inner) ++ free(path); + continue; ++ } + + fullpath = must_make_path(path, "cgroup.subtree_control", NULL); + (void)chowmod(fullpath, destuid, nsgid, 0664); +@@ -1426,12 +1478,15 @@ static int chown_cgroup_wrapper(void *data) + fullpath = must_make_path(path, "cgroup.threads", NULL); + (void)chowmod(fullpath, destuid, nsgid, 0664); + free(fullpath); ++ ++ if (arg->inner) ++ free(path); + } + + return 0; + } + +-static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf) ++static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner) + { + struct generic_userns_exec_data wrap; + +@@ -1442,6 +1497,7 @@ static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf) + wrap.path = NULL; + wrap.hierarchies = ops->hierarchies; + wrap.conf = conf; ++ wrap.inner = inner; + + if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, + "chown_cgroup_wrapper") < 0) { +@@ -1821,7 +1877,8 @@ static bool cgfsng_unfreeze(struct cgroup_ops *ops) + } + + static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, +- const char *controller) ++ const char *controller, ++ bool inner) + { + struct hierarchy *h; + +@@ -1832,6 +1889,9 @@ static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, + return NULL; + } + ++ if (inner) ++ return h->innercgpath ? h->innercgpath + strlen(h->mountpoint) : NULL; ++ + return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL; + } + +@@ -1863,7 +1923,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, + int fret = -1, idx = 0; + char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL; + +- container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller); ++ container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller); + /* not running */ + if (!container_cgroup) + return 0; +@@ -1943,7 +2003,7 @@ static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, + continue; + } + +- path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]); ++ path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]); + /* not running */ + if (!path) + continue; +diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h +index 8f4af06c..b12c1f4c 100644 +--- a/src/lxc/cgroups/cgroup.h ++++ b/src/lxc/cgroups/cgroup.h +@@ -28,6 +28,12 @@ + #include + #include + ++/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace ++ * will be moved into an additional subdirectory "cgns/" inside the cgroup in ++ * order to prevent it from accessing the outer limiting cgroup. ++ */ ++#define CGROUP_NAMESPACE_SUBDIR "cgns" ++ + struct lxc_handler; + struct lxc_conf; + struct lxc_list; +@@ -65,6 +71,9 @@ typedef enum { + * @fullcgpath + * - The full path to the containers cgroup. + * ++ * @innercgpath ++ * - The full path to the container's inner cgroup when protect_limits is used. ++ * + * @version + * - legacy hierarchy + * If the hierarchy is a legacy hierarchy this will be set to +@@ -78,6 +87,7 @@ struct hierarchy { + char *mountpoint; + char *base_cgroup; + char *fullcgpath; ++ char *innercgpath; + int version; + }; + +@@ -124,9 +134,9 @@ struct cgroup_ops { + + bool (*data_init)(struct cgroup_ops *ops); + void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); +- bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler); +- bool (*enter)(struct cgroup_ops *ops, pid_t pid); +- const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); ++ bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner); ++ bool (*enter)(struct cgroup_ops *ops, pid_t pid, bool inner); ++ const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner); + bool (*escape)(const struct cgroup_ops *ops); + int (*num_hierarchies)(struct cgroup_ops *ops); + bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); +@@ -137,7 +147,7 @@ struct cgroup_ops { + bool (*unfreeze)(struct cgroup_ops *ops); + bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, + bool with_devices); +- bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf); ++ bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner); + bool (*attach)(struct cgroup_ops *ops, const char *name, + const char *lxcpath, pid_t pid); + bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler, +diff --git a/src/lxc/commands.c b/src/lxc/commands.c +index 30d6b604..e1bad635 100644 +--- a/src/lxc/commands.c ++++ b/src/lxc/commands.c +@@ -424,20 +424,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, + return lxc_cmd_rsp_send(fd, &rsp); + } + +-/* +- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a +- * particular subsystem. This is the cgroup path relative to the root +- * of the cgroup filesystem. +- * +- * @name : name of container to connect to +- * @lxcpath : the lxcpath in which the container is running +- * @subsystem : the subsystem being asked about +- * +- * Returns the path on success, NULL on failure. The caller must free() the +- * returned path. +- */ +-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, +- const char *subsystem) ++char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, ++ const char *subsystem, bool inner) + { + int ret, stopped; + struct lxc_cmd_rr cmd = { +@@ -450,8 +438,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + + cmd.req.data = subsystem; + cmd.req.datalen = 0; +- if (subsystem) +- cmd.req.datalen = strlen(subsystem) + 1; ++ if (subsystem) { ++ size_t subsyslen = strlen(subsystem); ++ if (inner) { ++ char *data = alloca(subsyslen+2); ++ memcpy(data, subsystem, subsyslen+1); ++ data[subsyslen+1] = 1; ++ cmd.req.datalen = subsyslen+2, ++ cmd.req.data = data; ++ } else { ++ cmd.req.datalen = subsyslen+1; ++ } ++ } + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); + if (ret < 0) +@@ -466,6 +464,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + return cmd.rsp.data; + } + ++/* ++ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a ++ * particular subsystem. This is the cgroup path relative to the root ++ * of the cgroup filesystem. ++ * ++ * @name : name of container to connect to ++ * @lxcpath : the lxcpath in which the container is running ++ * @subsystem : the subsystem being asked about ++ * ++ * Returns the path on success, NULL on failure. The caller must free() the ++ * returned path. ++ */ ++char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, ++ const char *subsystem) ++{ ++ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false); ++} ++ ++/* ++ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path ++ * for a particular subsystem. This is the cgroup path relative to the root ++ * of the cgroup filesystem. ++ * ++ * @name : name of container to connect to ++ * @lxcpath : the lxcpath in which the container is running ++ * @subsystem : the subsystem being asked about ++ * ++ * Returns the path on success, NULL on failure. The caller must free() the ++ * returned path. ++ */ ++char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath, ++ const char *subsystem) ++{ ++ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true); ++} ++ + static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) + { +@@ -473,10 +507,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_cmd_rsp rsp; + struct cgroup_ops *cgroup_ops = handler->cgroup_ops; + +- if (req->datalen > 0) +- path = cgroup_ops->get_cgroup(cgroup_ops, req->data); +- else +- path = cgroup_ops->get_cgroup(cgroup_ops, NULL); ++ if (req->datalen > 0) { ++ const char *subsystem; ++ size_t subsyslen; ++ bool inner = false; ++ subsystem = req->data; ++ subsyslen = strlen(subsystem); ++ if (req->datalen == subsyslen+2) ++ inner = (subsystem[subsyslen+1] == 1); ++ ++ path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner); ++ } else { ++ // FIXME: cgroup separation for cgroup v2 cannot be handled ++ // like we used to do v1 here... need to figure this out... ++ path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false); ++ } + if (!path) + return -1; + +diff --git a/src/lxc/commands.h b/src/lxc/commands.h +index 816cd748..e16c0d79 100644 +--- a/src/lxc/commands.h ++++ b/src/lxc/commands.h +@@ -93,6 +93,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, + */ + extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem); ++extern char *lxc_cmd_get_attach_cgroup_path(const char *name, ++ const char *lxcpath, const char *subsystem); + extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath); + extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); + extern char *lxc_cmd_get_name(const char *hashed_sock); +diff --git a/src/lxc/criu.c b/src/lxc/criu.c +index c3642162..456d19cf 100644 +--- a/src/lxc/criu.c ++++ b/src/lxc/criu.c +@@ -328,7 +328,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) + } else { + const char *p; + +- p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); ++ p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false); + if (!p) { + ERROR("failed to get cgroup path for %s", controllers[0]); + goto err; +@@ -971,7 +971,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ + goto out_fini_handler; + handler->cgroup_ops = cgroup_ops; + +- if (!cgroup_ops->create(cgroup_ops, handler)) { ++ if (!cgroup_ops->create(cgroup_ops, handler, false)) { + ERROR("failed creating groups"); + goto out_fini_handler; + } +diff --git a/src/lxc/start.c b/src/lxc/start.c +index 739866d8..6944b310 100644 +--- a/src/lxc/start.c ++++ b/src/lxc/start.c +@@ -1597,7 +1597,7 @@ static int lxc_spawn(struct lxc_handler *handler) + } + } + +- if (!cgroup_ops->create(cgroup_ops, handler)) { ++ if (!cgroup_ops->create(cgroup_ops, handler, false)) { + ERROR("Failed creating cgroups"); + goto out_delete_net; + } +@@ -1691,10 +1691,10 @@ static int lxc_spawn(struct lxc_handler *handler) + goto out_delete_net; + } + +- if (!cgroup_ops->enter(cgroup_ops, handler->pid)) ++ if (!cgroup_ops->enter(cgroup_ops, handler->pid, false)) + goto out_delete_net; + +- if (!cgroup_ops->chown(cgroup_ops, handler->conf)) ++ if (!cgroup_ops->chown(cgroup_ops, handler->conf, false)) + goto out_delete_net; + + /* Now we're ready to preserve the network namespace */ +@@ -1755,16 +1755,30 @@ static int lxc_spawn(struct lxc_handler *handler) + } + } + +- ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE); +- if (ret < 0) +- goto out_delete_net; +- + if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) { + ERROR("Failed to setup legacy device cgroup controller limits"); + goto out_delete_net; + } + TRACE("Set up legacy device cgroup controller limits"); + ++ if (cgns_supported()) { ++ if (!cgroup_ops->create(cgroup_ops, handler, true)) { ++ ERROR("failed to create inner cgroup separation layer"); ++ goto out_delete_net; ++ } ++ if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { ++ ERROR("failed to enter inner cgroup separation layer"); ++ goto out_delete_net; ++ } ++ if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) { ++ ERROR("failed chown inner cgroup separation layer"); ++ goto out_delete_net; ++ } ++ } ++ ++ if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE)) ++ goto out_delete_net; ++ + if (handler->ns_clone_flags & CLONE_NEWCGROUP) { + /* Now we're ready to preserve the cgroup namespace */ + ret = lxc_try_preserve_ns(handler->pid, "cgroup"); +-- +2.11.0 + diff --git a/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch b/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch new file mode 100644 index 0000000..5881202 --- /dev/null +++ b/debian/patches/pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch @@ -0,0 +1,97 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Wed, 28 Mar 2018 13:41:46 +0200 +Subject: [PATCH] PVE: [Up] start/initutils: make cgroupns separation level + configurable + +Adds a new global config variable `lxc.cgroup.separate` +which controls whether a separation directory for cgroup +namespaces should be used. +Can be empty, "privileged", "unprivileged" or "both". + +Signed-off-by: Wolfgang Bumiller +--- + src/lxc/initutils.c | 17 +++++++++-------- + src/lxc/initutils.h | 1 + + src/lxc/start.c | 25 ++++++++++++++----------- + 3 files changed, 24 insertions(+), 19 deletions(-) + +diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c +index 6ab39a7d..2c142bd4 100644 +--- a/src/lxc/initutils.c ++++ b/src/lxc/initutils.c +@@ -57,14 +57,15 @@ static char *copy_global_config_value(char *p) + const char *lxc_global_config_value(const char *option_name) + { + static const char * const options[][2] = { +- { "lxc.bdev.lvm.vg", DEFAULT_VG }, +- { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL }, +- { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT }, +- { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL }, +- { "lxc.lxcpath", NULL }, +- { "lxc.default_config", NULL }, +- { "lxc.cgroup.pattern", NULL }, +- { "lxc.cgroup.use", NULL }, ++ { "lxc.bdev.lvm.vg", DEFAULT_VG }, ++ { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL }, ++ { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT }, ++ { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL }, ++ { "lxc.lxcpath", NULL }, ++ { "lxc.default_config", NULL }, ++ { "lxc.cgroup.pattern", NULL }, ++ { "lxc.cgroup.use", NULL }, ++ { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT }, + { NULL, NULL }, + }; + +diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h +index b815cd19..4d005679 100644 +--- a/src/lxc/initutils.h ++++ b/src/lxc/initutils.h +@@ -42,6 +42,7 @@ + #define DEFAULT_THIN_POOL "lxc" + #define DEFAULT_ZFSROOT "lxc" + #define DEFAULT_RBDPOOL "lxc" ++#define DEFAULT_CGPROTECT "privileged" + + #ifndef PR_SET_MM + #define PR_SET_MM 35 +diff --git a/src/lxc/start.c b/src/lxc/start.c +index 6944b310..bccd5807 100644 +--- a/src/lxc/start.c ++++ b/src/lxc/start.c +@@ -1762,17 +1762,20 @@ static int lxc_spawn(struct lxc_handler *handler) + TRACE("Set up legacy device cgroup controller limits"); + + if (cgns_supported()) { +- if (!cgroup_ops->create(cgroup_ops, handler, true)) { +- ERROR("failed to create inner cgroup separation layer"); +- goto out_delete_net; +- } +- if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { +- ERROR("failed to enter inner cgroup separation layer"); +- goto out_delete_net; +- } +- if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) { +- ERROR("failed chown inner cgroup separation layer"); +- goto out_delete_net; ++ const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits"); ++ if (!strcmp(tmp, "both") || !strcmp(tmp, wants_to_map_ids ? "unprivileged" : "privileged")) { ++ if (!cgroup_ops->create(cgroup_ops, handler, true)) { ++ ERROR("failed to create inner cgroup separation layer"); ++ goto out_delete_net; ++ } ++ if (!cgroup_ops->enter(cgroup_ops, handler->pid, true)) { ++ ERROR("failed to enter inner cgroup separation layer"); ++ goto out_delete_net; ++ } ++ if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) { ++ ERROR("failed chown inner cgroup separation layer"); ++ goto out_delete_net; ++ } + } + } + +-- +2.11.0 + diff --git a/debian/patches/pve/0006-PVE-Config-rename-cgroup-namespace-directory-to-ns.patch b/debian/patches/pve/0006-PVE-Config-rename-cgroup-namespace-directory-to-ns.patch new file mode 100644 index 0000000..45d340b --- /dev/null +++ b/debian/patches/pve/0006-PVE-Config-rename-cgroup-namespace-directory-to-ns.patch @@ -0,0 +1,26 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Fri, 23 Dec 2016 15:57:24 +0100 +Subject: [PATCH] PVE: [Config] rename cgroup namespace directory to ns + +Signed-off-by: Wolfgang Bumiller +--- + src/lxc/cgroups/cgroup.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h +index b12c1f4c..6b8df1b3 100644 +--- a/src/lxc/cgroups/cgroup.h ++++ b/src/lxc/cgroups/cgroup.h +@@ -32,7 +32,7 @@ + * will be moved into an additional subdirectory "cgns/" inside the cgroup in + * order to prevent it from accessing the outer limiting cgroup. + */ +-#define CGROUP_NAMESPACE_SUBDIR "cgns" ++#define CGROUP_NAMESPACE_SUBDIR "ns" + + struct lxc_handler; + struct lxc_conf; +-- +2.11.0 + diff --git a/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch b/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch new file mode 100644 index 0000000..edd62a2 --- /dev/null +++ b/debian/patches/pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch @@ -0,0 +1,207 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Mon, 20 Nov 2017 10:49:41 +0100 +Subject: [PATCH] PVE: [Up] possibility to run lxc-monitord as a regular daemon + +lxc-monitord instances are spawned on demand and, if this +happens from a service, the daemon is considered part of +it by systemd, as it is running in the same cgroups. This +can be avoided by leaving it running permanently. + +Signed-off-by: Wolfgang Bumiller +--- + .gitignore | 1 + + config/init/systemd/Makefile.am | 10 +++-- + config/init/systemd/lxc-monitord.service.in | 12 ++++++ + configure.ac | 1 + + lxc.spec.in | 1 + + src/lxc/cmd/lxc_monitord.c | 60 +++++++++++++++++++++-------- + 6 files changed, 64 insertions(+), 21 deletions(-) + create mode 100644 config/init/systemd/lxc-monitord.service.in + +diff --git a/.gitignore b/.gitignore +index 0d266c20..b2a4b020 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -115,6 +115,7 @@ config/bash/lxc + config/init/common/lxc-containers + config/init/common/lxc-net + config/init/systemd/lxc-autostart-helper ++config/init/systemd/lxc-monitord.service + config/init/systemd/lxc-net.service + config/init/systemd/lxc.service + config/init/systemd/lxc@.service +diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am +index c448850d..4a4fde5e 100644 +--- a/config/init/systemd/Makefile.am ++++ b/config/init/systemd/Makefile.am +@@ -2,19 +2,21 @@ EXTRA_DIST = \ + lxc-apparmor-load \ + lxc.service.in \ + lxc@.service.in \ +- lxc-net.service.in ++ lxc-net.service.in \ ++ lxc-monitord.service.in + + if INIT_SCRIPT_SYSTEMD +-BUILT_SOURCES = lxc.service lxc@.service lxc-net.service ++BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service + +-install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load ++install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load + $(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR) +- $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/ ++ $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/ + + uninstall-systemd: + rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service + rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service + rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service ++ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service + rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || : + + pkglibexec_SCRIPTS = lxc-apparmor-load +diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in +new file mode 100644 +index 00000000..40635168 +--- /dev/null ++++ b/config/init/systemd/lxc-monitord.service.in +@@ -0,0 +1,12 @@ ++[Unit] ++Description=LXC Container Monitoring Daemon ++After=syslog.service network.target ++ ++[Service] ++Type=simple ++ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon ++StandardOutput=syslog ++StandardError=syslog ++ ++[Install] ++WantedBy=multi-user.target +diff --git a/configure.ac b/configure.ac +index 59a0d410..ea312bf3 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -674,6 +674,7 @@ AC_CONFIG_FILES([ + config/init/systemd/lxc.service + config/init/systemd/lxc@.service + config/init/systemd/lxc-net.service ++ config/init/systemd/lxc-monitord.service + config/init/sysvinit/Makefile + config/init/sysvinit/lxc-containers + config/init/sysvinit/lxc-net +diff --git a/lxc.spec.in b/lxc.spec.in +index 87978feb..fcf307fa 100644 +--- a/lxc.spec.in ++++ b/lxc.spec.in +@@ -235,6 +235,7 @@ fi + %{_unitdir}/lxc-net.service + %{_unitdir}/lxc.service + %{_unitdir}/lxc@.service ++%{_unitdir}/lxc-monitord.service + %else + %{_sysconfdir}/rc.d/init.d/lxc + %{_sysconfdir}/rc.d/init.d/lxc-net +diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c +index 38eee0a9..ffa6deca 100644 +--- a/src/lxc/cmd/lxc_monitord.c ++++ b/src/lxc/cmd/lxc_monitord.c +@@ -354,17 +354,44 @@ static void lxc_monitord_sig_handler(int sig) + + int main(int argc, char *argv[]) + { +- int ret, pipefd; ++ int ret, pipefd = -1; + char logpath[PATH_MAX]; + sigset_t mask; +- char *lxcpath = argv[1]; ++ const char *lxcpath = NULL; + bool mainloop_opened = false; + bool monitord_created = false; ++ bool persistent = false; + struct lxc_log log; + +- if (argc != 3) { ++ if (argc > 1 && !strcmp(argv[1], "--daemon")) { ++ persistent = true; ++ --argc; ++ ++argv; ++ } ++ ++ if (argc > 1) { ++ lxcpath = argv[1]; ++ --argc; ++ ++argv; ++ } else { ++ lxcpath = lxc_global_config_value("lxc.lxcpath"); ++ if (!lxcpath) { ++ ERROR("Out of memory getting lxcpath"); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ if (argc > 1) { ++ if (lxc_safe_int(argv[1], &pipefd) < 0) ++ exit(EXIT_FAILURE); ++ --argc; ++ ++argv; ++ } ++ ++ if (argc != 1 || (persistent != (pipefd == -1))) { + fprintf(stderr, +- "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n" ++ "Usage: lxc-monitord lxcpath sync-pipe-fd\n" ++ " lxc-monitord --daemon lxcpath\n\n" + "NOTE: lxc-monitord is intended for use by lxc internally\n" + " and does not need to be run by hand\n\n"); + exit(EXIT_FAILURE); +@@ -387,9 +414,6 @@ int main(int argc, char *argv[]) + INFO("Failed to open log file %s, log will be lost", lxcpath); + lxc_log_options_no_override(); + +- if (lxc_safe_int(argv[2], &pipefd) < 0) +- exit(EXIT_FAILURE); +- + if (sigfillset(&mask) || + sigdelset(&mask, SIGILL) || + sigdelset(&mask, SIGSEGV) || +@@ -422,15 +446,17 @@ int main(int argc, char *argv[]) + goto on_error; + monitord_created = true; + +- /* sync with parent, we're ignoring the return from write +- * because regardless if it works or not, the following +- * close will sync us with the parent process. the +- * if-empty-statement construct is to quiet the +- * warn-unused-result warning. +- */ +- if (write(pipefd, "S", 1)) +- ; +- close(pipefd); ++ if (pipefd != -1) { ++ /* sync with parent, we're ignoring the return from write ++ * because regardless if it works or not, the following ++ * close will sync us with the parent process. the ++ * if-empty-statement construct is to quiet the ++ * warn-unused-result warning. ++ */ ++ if (write(pipefd, "S", 1)) ++ ; ++ close(pipefd); ++ } + + if (lxc_monitord_mainloop_add(&mon)) { + ERROR("Failed to add mainloop handlers"); +@@ -441,7 +467,7 @@ int main(int argc, char *argv[]) + lxc_raw_getpid(), mon.lxcpath); + + for (;;) { +- ret = lxc_mainloop(&mon.descr, 1000 * 30); ++ ret = lxc_mainloop(&mon.descr, persistent ? -1 : 1000 * 30); + if (ret) { + ERROR("mainloop returned an error"); + break; +-- +2.11.0 + diff --git a/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch b/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch new file mode 100644 index 0000000..c958fb8 --- /dev/null +++ b/debian/patches/pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch @@ -0,0 +1,42 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Wolfgang Bumiller +Date: Mon, 20 Nov 2017 10:51:36 +0100 +Subject: [PATCH] PVE: [Deprecated] Make lxc@.service forking + +Previously the init process' output was dumped into the log +files since the service used Type=simple and +StandardOutput/Error=syslog. + +Deprecated, we provide pve-container@.service instead. + +Signed-off-by: Wolfgang Bumiller +--- + config/init/systemd/lxc@.service.in | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in +index a2aa2211..f312763c 100644 +--- a/config/init/systemd/lxc@.service.in ++++ b/config/init/systemd/lxc@.service.in +@@ -1,15 +1,15 @@ + [Unit] + Description=LXC Container: %i + # This pulls in apparmor, dev-setup, lxc-net +-After=lxc.service ++After=lxc.service lxc-monitord.service + Wants=lxc.service + Documentation=man:lxc-start man:lxc + + [Service] +-Type=simple ++Type=forking + KillMode=mixed + TimeoutStopSec=120s +-ExecStart=@BINDIR@/lxc-start -F -n %i ++ExecStart=@BINDIR@/lxc-start -n %i + ExecStop=@BINDIR@/lxc-stop -n %i + # Environment=BOOTUP=serial + # Environment=CONSOLETYPE=serial +-- +2.11.0 + diff --git a/debian/patches/series b/debian/patches/series index fdc018c..348b685 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,18 +1,20 @@ -0001-lxc.service-start-after-a-potential-syslog.service.patch -0002-pve-run-lxcnetaddbr-when-instantiating-veths.patch -0003-deny-rw-mounting-of-sys-and-proc.patch -0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch -0005-start-initutils-make-cgroupns-separation-level-confi.patch -0006-rename-cgroup-namespace-directory-to-ns.patch -0007-possibility-to-run-lxc-monitord-as-a-regular-daemon.patch -0008-Make-lxc-.service-forking.patch -fixes/0001-conf-ret-try-devpts-mount-without-gid-5-on-error.patch -fixes/0002-Fix-the-memory-leak-in-cgfsng_attach.patch -fixes/0003-Fix-memory-leak-in-list_active_containers.patch -fixes/0004-pam-cgfs-ignore-the-system-umask-when-creating-the-c.patch -fixes/0005-attach-always-drop-supplementary-groups.patch -fixes/0006-storage-rsync-free-memory-on-error.patch -fixes/0007-tools-utils-free-memory-on-error.patch -fixes/0008-fix-signal-sending-in-lxc.init.patch -fixes/0009-conf-fix-temporary-file-creation.patch -fixes/0010-ringbuf-fix-temporary-file-creation.patch +pve/0001-PVE-Config-lxc.service-start-after-a-potential-syslo.patch +pve/0002-PVE-Down-run-lxcnetaddbr-when-instantiating-veths.patch +pve/0003-PVE-Config-deny-rw-mounting-of-sys-and-proc.patch +pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch +pve/0005-PVE-Up-start-initutils-make-cgroupns-separation-leve.patch +pve/0006-PVE-Config-rename-cgroup-namespace-directory-to-ns.patch +pve/0007-PVE-Up-possibility-to-run-lxc-monitord-as-a-regular-.patch +pve/0008-PVE-Deprecated-Make-lxc-.service-forking.patch +extra/0001-confile-add-lxc.monitor.signal.pdeath.patch +extra/0002-tests-add-lxc.monitor.signal.pdeath.patch +extra/0003-doc-Translate-lxc.monitor.signal.pdeath-into-Japanes.patch +extra/0004-tests-lxc-test-apparmor-mount-show-a-log-on-error.patch +extra/0005-tests-lxc-test-apparmor-mount-check-environment-earl.patch +extra/0006-lsm-fixup-lsm_process_label_set_at-return-values.patch +extra/0007-apparmor-use-fopen_cloexec.patch +extra/0008-utils-add-must_concat-helper.patch +extra/0009-apparmor-update-current-profiles.patch +extra/0010-apparmor-profile-generation.patch +extra/0011-apparmor-allow-start-container-to-change-to-lxc.patch +extra/0012-tests-add-test-for-generated-apparmor-profiles.patch diff --git a/debian/rules b/debian/rules index 6169797..14fd894 100755 --- a/debian/rules +++ b/debian/rules @@ -24,7 +24,8 @@ override_dh_auto_configure: --disable-python \ --disable-lua \ --disable-examples \ - --enable-seccomp + --enable-seccomp \ + --disable-static override_dh_fixperms: dh_fixperms -Xusr/lib/$(DEB_HOST_MULTIARCH)/lxc/lxc-user-nic diff --git a/lxc b/lxc index 5b66b6e..4d87983 160000 --- a/lxc +++ b/lxc @@ -1 +1 @@ -Subproject commit 5b66b6ee3e3cd2575a4b9b2eb8190b2b05ab4b42 +Subproject commit 4d879838422a8a4bf79ff540fa0dbcce4a82d872