--- /dev/null
+From 10bc10054434f20870f812bb710eef5b5e22040b Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 10 Feb 2017 09:13:40 +0100
+Subject: [PATCH 1/8] lxc.service: start after a potential syslog.service
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ config/init/systemd/lxc.service.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
+index cd61996..7754191 100644
+--- a/config/init/systemd/lxc.service.in
++++ b/config/init/systemd/lxc.service.in
+@@ -1,6 +1,6 @@
+ [Unit]
+ Description=LXC Container Initialization and Autoboot Code
+-After=network.target lxc-net.service
++After=syslog.service network.target lxc-net.service
+ Wants=lxc-net.service
+ Documentation=man:lxc-autostart man:lxc
+
+--
+2.1.4
+
+++ /dev/null
-From 2e386b0ab03ebc04bd3b08fa3cf9aa14c596b883 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Tue, 15 Nov 2016 09:20:24 +0100
-Subject: [PATCH 1/2] separate the limiting from the namespaced cgroup root
-
-When cgroup namespaces are enabled a privileged container
-with mixed cgroups has full write access to its own root
-cgroup effectively allowing it to overwrite values written
-from the outside or configured via lxc.cgroup.*.
-
-This patch causes an additional 'ns/' directory to be
-created in all cgroups if cgroup namespaces and cgfsng are
-being used in order to combat this.
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/cgroups/cgfs.c | 19 ++++++--
- src/lxc/cgroups/cgfsng.c | 81 +++++++++++++++++++++++++++-----
- src/lxc/cgroups/cgmanager.c | 19 ++++++--
- src/lxc/cgroups/cgroup.c | 16 +++----
- src/lxc/cgroups/cgroup.h | 22 +++++----
- src/lxc/commands.c | 112 ++++++++++++++++++++++++++++++--------------
- src/lxc/commands.h | 2 +
- src/lxc/criu.c | 4 +-
- src/lxc/start.c | 21 +++++++--
- 9 files changed, 219 insertions(+), 77 deletions(-)
-
-diff --git a/src/lxc/cgroups/cgfs.c b/src/lxc/cgroups/cgfs.c
-index 8499200..b78b78d 100644
---- a/src/lxc/cgroups/cgfs.c
-+++ b/src/lxc/cgroups/cgfs.c
-@@ -2383,12 +2383,15 @@ static void cgfs_destroy(void *hdata, struct lxc_conf *conf)
- free(d);
- }
-
--static inline bool cgfs_create(void *hdata)
-+static inline bool cgfs_create(void *hdata, bool inner)
- {
- struct cgfs_data *d = hdata;
- struct cgroup_process_info *i;
- struct cgroup_meta_data *md;
-
-+ if (inner)
-+ return true;
-+
- if (!d)
- return false;
- md = d->meta;
-@@ -2399,12 +2402,15 @@ static inline bool cgfs_create(void *hdata)
- return true;
- }
-
--static inline bool cgfs_enter(void *hdata, pid_t pid)
-+static inline bool cgfs_enter(void *hdata, pid_t pid, bool inner)
- {
- struct cgfs_data *d = hdata;
- struct cgroup_process_info *i;
- int ret;
-
-+ if (inner)
-+ return true;
-+
- if (!d)
- return false;
- i = d->info;
-@@ -2428,10 +2434,12 @@ static inline bool cgfs_create_legacy(void *hdata, pid_t pid)
- return true;
- }
-
--static const char *cgfs_get_cgroup(void *hdata, const char *subsystem)
-+static const char *cgfs_get_cgroup(void *hdata, const char *subsystem, bool inner)
- {
- struct cgfs_data *d = hdata;
-
-+ (void)inner;
-+
- if (!d)
- return NULL;
- return lxc_cgroup_get_hierarchy_path_data(subsystem, d);
-@@ -2646,13 +2654,16 @@ static bool do_cgfs_chown(char *cgroup_path, struct lxc_conf *conf)
- return true;
- }
-
--static bool cgfs_chown(void *hdata, struct lxc_conf *conf)
-+static bool cgfs_chown(void *hdata, struct lxc_conf *conf, bool inner)
- {
- struct cgfs_data *d = hdata;
- struct cgroup_process_info *info_ptr;
- char *cgpath;
- bool r = true;
-
-+ if (inner)
-+ return true;
-+
- if (!d)
- return false;
-
-diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
-index 2b772e2..f7df3cf 100644
---- a/src/lxc/cgroups/cgfsng.c
-+++ b/src/lxc/cgroups/cgfsng.c
-@@ -72,6 +72,7 @@ struct hierarchy {
- char *mountpoint;
- char *base_cgroup;
- char *fullcgpath;
-+ char *innercgpath;
- };
-
- /*
-@@ -814,6 +815,7 @@ static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
- new->mountpoint = mountpoint;
- new->base_cgroup = base_cgroup;
- new->fullcgpath = NULL;
-+ new->innercgpath = false;
-
- newentry = append_null_to_list((void ***)&hierarchies);
- hierarchies[newentry] = new;
-@@ -1286,6 +1288,8 @@ static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
- free(h->fullcgpath);
- h->fullcgpath = NULL;
- }
-+ free(h->innercgpath);
-+ h->innercgpath = NULL;
- }
- }
-
-@@ -1299,18 +1303,25 @@ struct cgroup_ops *cgfsng_ops_init(void)
- return &cgfsng_ops;
- }
-
--static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
-+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
- {
-- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
-- if (dir_exists(h->fullcgpath)) { // it must not already exist
-- ERROR("Path \"%s\" already existed.", h->fullcgpath);
-+ char *path;
-+ if (inner) {
-+ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL);
-+ h->innercgpath = path;
-+ } else {
-+ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
-+ h->fullcgpath = path;
-+ }
-+ if (dir_exists(path)) { // it must not already exist
-+ ERROR("Path \"%s\" already existed.", path);
- return false;
- }
-- if (!handle_cpuset_hierarchy(h, cgname)) {
-+ if (!inner && !handle_cpuset_hierarchy(h, cgname)) {
- ERROR("Failed to handle cgroupfs v1 cpuset controller.");
- return false;
- }
-- return mkdir_p(h->fullcgpath, 0755) == 0;
-+ return mkdir_p(path, 0755) == 0;
- }
-
- static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
-@@ -1325,7 +1336,8 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
- * Try to create the same cgroup in all hierarchies.
- * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
- */
--static inline bool cgfsng_create(void *hdata)
-+static inline bool cgfsng_create_inner(struct cgfsng_handler_data*);
-+static inline bool cgfsng_create(void *hdata, bool inner)
- {
- struct cgfsng_handler_data *d = hdata;
- char *tmp, *cgname, *offset;
-@@ -1335,9 +1347,15 @@ static inline bool cgfsng_create(void *hdata)
- if (!d)
- return false;
- if (d->container_cgroup) {
-+ if (inner)
-+ return cgfsng_create_inner(d);
- WARN("cgfsng_create called a second time");
- return false;
- }
-+ if (inner) {
-+ ERROR("cgfsng_create called twice for innner cgroup");
-+ return false;
-+ }
-
- tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
- if (!tmp) {
-@@ -1358,7 +1376,7 @@ again:
- if (idx)
- snprintf(offset, 5, "-%d", idx);
- for (i = 0; hierarchies[i]; i++) {
-- if (!create_path_for_hierarchy(hierarchies[i], cgname)) {
-+ if (!create_path_for_hierarchy(hierarchies[i], cgname, false)) {
- int j;
- SYSERROR("Failed to create %s: %s", hierarchies[i]->fullcgpath, strerror(errno));
- free(hierarchies[i]->fullcgpath);
-@@ -1378,7 +1396,24 @@ out_free:
- return false;
- }
-
--static bool cgfsng_enter(void *hdata, pid_t pid)
-+static inline bool cgfsng_create_inner(struct cgfsng_handler_data *d)
-+{
-+ size_t i;
-+ bool ret = true;
-+ char *cgname = must_make_path(d->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
-+ for (i = 0; hierarchies[i]; i++) {
-+ if (!create_path_for_hierarchy(hierarchies[i], cgname, true)) {
-+ SYSERROR("Failed to create %s namespace subdirectory: %s", hierarchies[i]->fullcgpath, strerror(errno));
-+ ret = false;
-+ break;
-+ }
-+ }
-+ free(cgname);
-+ return ret;
-+}
-+
-+
-+static bool cgfsng_enter(void *hdata, pid_t pid, bool inner)
- {
- char pidstr[25];
- int i, len;
-@@ -1388,7 +1423,13 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
- return false;
-
- for (i = 0; hierarchies[i]; i++) {
-- char *fullpath = must_make_path(hierarchies[i]->fullcgpath,
-+ char *fullpath;
-+ if (inner)
-+ fullpath = must_make_path(hierarchies[i]->fullcgpath,
-+ CGROUP_NAMESPACE_SUBDIR,
-+ "cgroup.procs", NULL);
-+ else
-+ fullpath = must_make_path(hierarchies[i]->fullcgpath,
- "cgroup.procs", NULL);
- if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
- SYSERROR("Failed to enter %s", fullpath);
-@@ -1404,6 +1445,7 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
- struct chown_data {
- struct cgfsng_handler_data *d;
- uid_t origuid; // target uid in parent namespace
-+ bool inner;
- };
-
- /*
-@@ -1432,13 +1474,20 @@ static int chown_cgroup_wrapper(void *data)
- for (i = 0; hierarchies[i]; i++) {
- char *fullpath, *path = hierarchies[i]->fullcgpath;
-
-+ if (arg->inner)
-+ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
-+
- if (chown(path, destuid, 0) < 0) {
- SYSERROR("Error chowning %s to %d", path, (int) destuid);
-+ if (arg->inner)
-+ free(path);
- return -1;
- }
-
- if (chmod(path, 0775) < 0) {
- SYSERROR("Error chmoding %s", path);
-+ if (arg->inner)
-+ free(path);
- return -1;
- }
-
-@@ -1462,12 +1511,14 @@ static int chown_cgroup_wrapper(void *data)
- if (chmod(fullpath, 0664) < 0)
- WARN("Error chmoding %s: %m", path);
- free(fullpath);
-+
-+ free(path);
- }
-
- return 0;
- }
-
--static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
-+static bool cgfsns_chown(void *hdata, struct lxc_conf *conf, bool inner)
- {
- struct cgfsng_handler_data *d = hdata;
- struct chown_data wrap;
-@@ -1480,6 +1531,7 @@ static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
-
- wrap.d = d;
- wrap.origuid = geteuid();
-+ wrap.inner = inner;
-
- if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap) < 0) {
- ERROR("Error requesting cgroup chown in new namespace");
-@@ -1774,12 +1826,15 @@ static bool cgfsng_unfreeze(void *hdata)
- return true;
- }
-
--static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
-+static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem, bool inner)
- {
- struct hierarchy *h = get_hierarchy(subsystem);
- if (!h)
- return NULL;
-
-+ if (inner && h->innercgpath)
-+ return h->innercgpath + strlen(h->mountpoint);
-+
- return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
- }
-
-@@ -1814,7 +1869,7 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
- char *path, *fullpath;
- struct hierarchy *h = hierarchies[i];
-
-- path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
-+ path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
- if (!path) // not running
- continue;
-
-diff --git a/src/lxc/cgroups/cgmanager.c b/src/lxc/cgroups/cgmanager.c
-index f2756b0..ac966b6 100644
---- a/src/lxc/cgroups/cgmanager.c
-+++ b/src/lxc/cgroups/cgmanager.c
-@@ -609,7 +609,7 @@ static inline void cleanup_cgroups(char *path)
- cgm_remove_cgroup(slist[i], path);
- }
-
--static inline bool cgm_create(void *hdata)
-+static inline bool cgm_create(void *hdata, bool inner)
- {
- struct cgm_data *d = hdata;
- char **slist = subsystems;
-@@ -617,6 +617,9 @@ static inline bool cgm_create(void *hdata)
- int32_t existed;
- char result[MAXPATHLEN], *tmp, *cgroup_path;
-
-+ if (inner)
-+ return true;
-+
- if (!d)
- return false;
- // XXX we should send a hint to the cgmanager that when these
-@@ -709,13 +712,16 @@ static bool lxc_cgmanager_enter(pid_t pid, const char *controller,
- return true;
- }
-
--static inline bool cgm_enter(void *hdata, pid_t pid)
-+static inline bool cgm_enter(void *hdata, pid_t pid, bool inner)
- {
- struct cgm_data *d = hdata;
- char **slist = subsystems;
- bool ret = false;
- int i;
-
-+ if (inner)
-+ return true;
-+
- if (!d || !d->cgroup_path)
- return false;
-
-@@ -737,10 +743,12 @@ out:
- return ret;
- }
-
--static const char *cgm_get_cgroup(void *hdata, const char *subsystem)
-+static const char *cgm_get_cgroup(void *hdata, const char *subsystem, bool inner)
- {
- struct cgm_data *d = hdata;
-
-+ (void)inner;
-+
- if (!d || !d->cgroup_path)
- return NULL;
- return d->cgroup_path;
-@@ -1541,10 +1549,13 @@ out:
- return ret;
- }
-
--static bool cgm_chown(void *hdata, struct lxc_conf *conf)
-+static bool cgm_chown(void *hdata, struct lxc_conf *conf, bool inner)
- {
- struct cgm_data *d = hdata;
-
-+ if (inner)
-+ return true;
-+
- if (!d || !d->cgroup_path)
- return false;
- if (!cgm_dbus_connect()) {
-diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
-index 78472d4..4d26e72 100644
---- a/src/lxc/cgroups/cgroup.c
-+++ b/src/lxc/cgroups/cgroup.c
-@@ -80,10 +80,10 @@ void cgroup_destroy(struct lxc_handler *handler)
- }
-
- /* Create the container cgroups for all requested controllers */
--bool cgroup_create(struct lxc_handler *handler)
-+bool cgroup_create(struct lxc_handler *handler, bool inner)
- {
- if (ops)
-- return ops->create(handler->cgroup_data);
-+ return ops->create(handler->cgroup_data, inner);
- return false;
- }
-
-@@ -91,10 +91,10 @@ bool cgroup_create(struct lxc_handler *handler)
- * Enter the container init into its new cgroups for all
- * requested controllers
- */
--bool cgroup_enter(struct lxc_handler *handler)
-+bool cgroup_enter(struct lxc_handler *handler, bool inner)
- {
- if (ops)
-- return ops->enter(handler->cgroup_data, handler->pid);
-+ return ops->enter(handler->cgroup_data, handler->pid, inner);
- return false;
- }
-
-@@ -105,10 +105,10 @@ bool cgroup_create_legacy(struct lxc_handler *handler)
- return true;
- }
-
--const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
-+const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem, bool inner)
- {
- if (ops)
-- return ops->get_cgroup(handler->cgroup_data, subsystem);
-+ return ops->get_cgroup(handler->cgroup_data, subsystem, inner);
- return NULL;
- }
-
-@@ -150,10 +150,10 @@ bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
- return false;
- }
-
--bool cgroup_chown(struct lxc_handler *handler)
-+bool cgroup_chown(struct lxc_handler *handler, bool inner)
- {
- if (ops && ops->chown)
-- return ops->chown(handler->cgroup_data, handler->conf);
-+ return ops->chown(handler->cgroup_data, handler->conf, inner);
- return true;
- }
-
-diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
-index 11b251e..f36c6f0 100644
---- a/src/lxc/cgroups/cgroup.h
-+++ b/src/lxc/cgroups/cgroup.h
-@@ -28,6 +28,12 @@
- #include <stddef.h>
- #include <sys/types.h>
-
-+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
-+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
-+ * order to prevent it from accessing the outer limiting cgroup.
-+ */
-+#define CGROUP_NAMESPACE_SUBDIR "cgns"
-+
- struct lxc_handler;
- struct lxc_conf;
- struct lxc_list;
-@@ -43,10 +49,10 @@ struct cgroup_ops {
-
- void *(*init)(const char *name);
- void (*destroy)(void *hdata, struct lxc_conf *conf);
-- bool (*create)(void *hdata);
-- bool (*enter)(void *hdata, pid_t pid);
-+ bool (*create)(void *hdata, bool inner);
-+ bool (*enter)(void *hdata, pid_t pid, bool inner);
- bool (*create_legacy)(void *hdata, pid_t pid);
-- const char *(*get_cgroup)(void *hdata, const char *subsystem);
-+ const char *(*get_cgroup)(void *hdata, const char *subsystem, bool inner);
- bool (*escape)();
- int (*num_hierarchies)();
- bool (*get_hierarchies)(int n, char ***out);
-@@ -54,7 +60,7 @@ struct cgroup_ops {
- int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
- bool (*unfreeze)(void *hdata);
- bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
-- bool (*chown)(void *hdata, struct lxc_conf *conf);
-+ bool (*chown)(void *hdata, struct lxc_conf *conf, bool inner);
- bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
- bool (*mount_cgroup)(void *hdata, const char *root, int type);
- int (*nrtasks)(void *hdata);
-@@ -66,14 +72,14 @@ extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid);
- extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type);
- extern void cgroup_destroy(struct lxc_handler *handler);
- extern bool cgroup_init(struct lxc_handler *handler);
--extern bool cgroup_create(struct lxc_handler *handler);
-+extern bool cgroup_create(struct lxc_handler *handler, bool inner);
- extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices);
--extern bool cgroup_chown(struct lxc_handler *handler);
--extern bool cgroup_enter(struct lxc_handler *handler);
-+extern bool cgroup_chown(struct lxc_handler *handler, bool inner);
-+extern bool cgroup_enter(struct lxc_handler *handler, bool inner);
- extern void cgroup_cleanup(struct lxc_handler *handler);
- extern bool cgroup_create_legacy(struct lxc_handler *handler);
- extern int cgroup_nrtasks(struct lxc_handler *handler);
--extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
-+extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem, bool inner);
- extern bool cgroup_escape();
- extern int cgroup_num_hierarchies();
- extern bool cgroup_get_hierarchies(int i, char ***out);
-diff --git a/src/lxc/commands.c b/src/lxc/commands.c
-index b17879b..5ef682f 100644
---- a/src/lxc/commands.c
-+++ b/src/lxc/commands.c
-@@ -128,15 +128,15 @@ static int fill_sock_name(char *path, int len, const char *name,
- static const char *lxc_cmd_str(lxc_cmd_t cmd)
- {
- static const char * const cmdname[LXC_CMD_MAX] = {
-- [LXC_CMD_CONSOLE] = "console",
-- [LXC_CMD_STOP] = "stop",
-- [LXC_CMD_GET_STATE] = "get_state",
-- [LXC_CMD_GET_INIT_PID] = "get_init_pid",
-- [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
-- [LXC_CMD_GET_CGROUP] = "get_cgroup",
-- [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
-- [LXC_CMD_GET_NAME] = "get_name",
-- [LXC_CMD_GET_LXCPATH] = "get_lxcpath",
-+ [LXC_CMD_CONSOLE] = "console",
-+ [LXC_CMD_STOP] = "stop",
-+ [LXC_CMD_GET_STATE] = "get_state",
-+ [LXC_CMD_GET_INIT_PID] = "get_init_pid",
-+ [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
-+ [LXC_CMD_GET_CGROUP] = "get_cgroup",
-+ [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
-+ [LXC_CMD_GET_NAME] = "get_name",
-+ [LXC_CMD_GET_LXCPATH] = "get_lxcpath",
- };
-
- if (cmd >= LXC_CMD_MAX)
-@@ -429,30 +429,28 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
- return lxc_cmd_rsp_send(fd, &rsp);
- }
-
--/*
-- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
-- * particular subsystem. This is the cgroup path relative to the root
-- * of the cgroup filesystem.
-- *
-- * @name : name of container to connect to
-- * @lxcpath : the lxcpath in which the container is running
-- * @subsystem : the subsystem being asked about
-- *
-- * Returns the path on success, NULL on failure. The caller must free() the
-- * returned path.
-- */
--char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-- const char *subsystem)
-+static char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-+ const char *subsystem, bool inner)
- {
- int ret, stopped;
-+ size_t subsyslen = strlen(subsystem);
-+
- struct lxc_cmd_rr cmd = {
- .req = {
- .cmd = LXC_CMD_GET_CGROUP,
-- .datalen = strlen(subsystem)+1,
-+ .datalen = subsyslen+1,
- .data = subsystem,
- },
- };
-
-+ if (inner) {
-+ char *data = alloca(subsyslen+2);
-+ memcpy(data, subsystem, subsyslen+1);
-+ data[subsyslen+1] = 1;
-+ cmd.req.datalen = subsyslen+2,
-+ cmd.req.data = data;
-+ }
-+
- ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
- if (ret < 0)
- return NULL;
-@@ -471,16 +469,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
- return cmd.rsp.data;
- }
-
-+/*
-+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
-+ * particular subsystem. This is the cgroup path relative to the root
-+ * of the cgroup filesystem.
-+ *
-+ * @name : name of container to connect to
-+ * @lxcpath : the lxcpath in which the container is running
-+ * @subsystem : the subsystem being asked about
-+ *
-+ * Returns the path on success, NULL on failure. The caller must free() the
-+ * returned path.
-+ */
-+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
-+ const char *subsystem)
-+{
-+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
-+}
-+
- static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
- struct lxc_handler *handler)
- {
- struct lxc_cmd_rsp rsp;
- const char *path;
-+ const char *subsystem;
-+ size_t subsyslen;
-+ bool inner = false;
-
- if (req->datalen < 1)
- return -1;
-
-- path = cgroup_get_cgroup(handler, req->data);
-+ subsystem = req->data;
-+ subsyslen = strlen(subsystem);
-+ if (req->datalen == subsyslen+2)
-+ inner = (subsystem[subsyslen+1] == 1);
-+
-+ path = cgroup_get_cgroup(handler, req->data, inner);
- if (!path)
- return -1;
- rsp.datalen = strlen(path) + 1,
-@@ -491,6 +515,24 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
- }
-
- /*
-+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
-+ * for a particular subsystem. This is the cgroup path relative to the root
-+ * of the cgroup filesystem.
-+ *
-+ * @name : name of container to connect to
-+ * @lxcpath : the lxcpath in which the container is running
-+ * @subsystem : the subsystem being asked about
-+ *
-+ * Returns the path on success, NULL on failure. The caller must free() the
-+ * returned path.
-+ */
-+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
-+ const char *subsystem)
-+{
-+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
-+}
-+
-+/*
- * lxc_cmd_get_config_item: Get config item the running container
- *
- * @name : name of container to connect to
-@@ -841,16 +883,16 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
- typedef int (*callback)(int, struct lxc_cmd_req *, struct lxc_handler *);
-
- callback cb[LXC_CMD_MAX] = {
-- [LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
-- [LXC_CMD_CONSOLE_WINCH] = lxc_cmd_console_winch_callback,
-- [LXC_CMD_STOP] = lxc_cmd_stop_callback,
-- [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
-- [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
-- [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
-- [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
-- [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
-- [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
-- [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
-+ [LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
-+ [LXC_CMD_CONSOLE_WINCH] = lxc_cmd_console_winch_callback,
-+ [LXC_CMD_STOP] = lxc_cmd_stop_callback,
-+ [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
-+ [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
-+ [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
-+ [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
-+ [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
-+ [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
-+ [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
- };
-
- if (req->cmd >= LXC_CMD_MAX) {
-diff --git a/src/lxc/commands.h b/src/lxc/commands.h
-index 184eefa..6430b33 100644
---- a/src/lxc/commands.h
-+++ b/src/lxc/commands.h
-@@ -77,6 +77,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
- */
- extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
- const char *subsystem);
-+extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
-+ const char *lxcpath, const char *subsystem);
- extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
- extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
- extern char *lxc_cmd_get_name(const char *hashed_sock);
-diff --git a/src/lxc/criu.c b/src/lxc/criu.c
-index 125e674..5a9e36b 100644
---- a/src/lxc/criu.c
-+++ b/src/lxc/criu.c
-@@ -284,7 +284,7 @@ static void exec_criu(struct criu_opts *opts)
- } else {
- const char *p;
-
-- p = cgroup_get_cgroup(opts->handler, controllers[0]);
-+ p = cgroup_get_cgroup(opts->handler, controllers[0], false);
- if (!p) {
- ERROR("failed to get cgroup path for %s", controllers[0]);
- goto err;
-@@ -797,7 +797,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
- goto out_fini_handler;
- }
-
-- if (!cgroup_create(handler)) {
-+ if (!cgroup_create(handler, false)) {
- ERROR("failed creating groups");
- goto out_fini_handler;
- }
-diff --git a/src/lxc/start.c b/src/lxc/start.c
-index 71206e0..c9d78b7 100644
---- a/src/lxc/start.c
-+++ b/src/lxc/start.c
-@@ -1121,7 +1121,7 @@ static int lxc_spawn(struct lxc_handler *handler)
-
- cgroups_connected = true;
-
-- if (!cgroup_create(handler)) {
-+ if (!cgroup_create(handler, false)) {
- ERROR("Failed creating cgroups.");
- goto out_delete_net;
- }
-@@ -1208,10 +1208,10 @@ static int lxc_spawn(struct lxc_handler *handler)
- goto out_delete_net;
- }
-
-- if (!cgroup_enter(handler))
-+ if (!cgroup_enter(handler, false))
- goto out_delete_net;
-
-- if (!cgroup_chown(handler))
-+ if (!cgroup_chown(handler, false))
- goto out_delete_net;
-
- if (failed_before_rename)
-@@ -1254,6 +1254,21 @@ static int lxc_spawn(struct lxc_handler *handler)
- goto out_delete_net;
- }
-
-+ if (cgns_supported()) {
-+ if (!cgroup_create(handler, true)) {
-+ ERROR("failed to create inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
-+ if (!cgroup_enter(handler, true)) {
-+ ERROR("failed to enter inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
-+ if (!cgroup_chown(handler, true)) {
-+ ERROR("failed chown inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
-+ }
-+
- cgroup_disconnect();
- cgroups_connected = false;
-
---
-2.1.4
-
--- /dev/null
+From e68a4291abec1c140fffbc8c954ff9596b17aad4 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 10 Feb 2017 09:14:55 +0100
+Subject: [PATCH 2/8] jessie/systemd: remove Delegate flag to silence warnings
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ config/init/systemd/lxc.service.in | 1 -
+ config/init/systemd/lxc@.service.in | 1 -
+ 2 files changed, 2 deletions(-)
+
+diff --git a/config/init/systemd/lxc.service.in b/config/init/systemd/lxc.service.in
+index 7754191..bdd5828 100644
+--- a/config/init/systemd/lxc.service.in
++++ b/config/init/systemd/lxc.service.in
+@@ -12,7 +12,6 @@ ExecStart=@LIBEXECDIR@/lxc/lxc-containers start
+ ExecStop=@LIBEXECDIR@/lxc/lxc-containers stop
+ # Environment=BOOTUP=serial
+ # Environment=CONSOLETYPE=serial
+-Delegate=yes
+ StandardOutput=syslog
+ StandardError=syslog
+
+diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in
+index 44d11e8..6b8b5ff 100644
+--- a/config/init/systemd/lxc@.service.in
++++ b/config/init/systemd/lxc@.service.in
+@@ -13,7 +13,6 @@ TimeoutStopSec=120s
+ ExecStart=@BINDIR@/lxc-start -F -n %i
+ # Environment=BOOTUP=serial
+ # Environment=CONSOLETYPE=serial
+-Delegate=yes
+ StandardOutput=syslog
+ StandardError=syslog
+
+--
+2.1.4
+
+++ /dev/null
-From e1fecf743d507ea7df458ed7e14222d02fe76cae Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Wed, 16 Nov 2016 09:53:42 +0100
-Subject: [PATCH 2/2] start/initutils: make cgroupns separation level
- configurable
-
-Adds a new global config variable `lxc.cgroup.separate`
-which controls whether a separation directory for cgroup
-namespaces should be used.
-Can be empty, "privileged", "unprivileged" or "both".
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/initutils.c | 17 +++++++++--------
- src/lxc/initutils.h | 1 +
- src/lxc/start.c | 28 ++++++++++++++++------------
- 3 files changed, 26 insertions(+), 20 deletions(-)
-
-diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c
-index 3213bd3..d07fd10 100644
---- a/src/lxc/initutils.c
-+++ b/src/lxc/initutils.c
-@@ -88,14 +88,15 @@ static char *copy_global_config_value(char *p)
- const char *lxc_global_config_value(const char *option_name)
- {
- static const char * const options[][2] = {
-- { "lxc.bdev.lvm.vg", DEFAULT_VG },
-- { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
-- { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
-- { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
-- { "lxc.lxcpath", NULL },
-- { "lxc.default_config", NULL },
-- { "lxc.cgroup.pattern", NULL },
-- { "lxc.cgroup.use", NULL },
-+ { "lxc.bdev.lvm.vg", DEFAULT_VG },
-+ { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
-+ { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
-+ { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
-+ { "lxc.lxcpath", NULL },
-+ { "lxc.default_config", NULL },
-+ { "lxc.cgroup.pattern", NULL },
-+ { "lxc.cgroup.use", NULL },
-+ { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT },
- { NULL, NULL },
- };
-
-diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
-index c021fd6..443ad02 100644
---- a/src/lxc/initutils.h
-+++ b/src/lxc/initutils.h
-@@ -43,6 +43,7 @@
- #define DEFAULT_THIN_POOL "lxc"
- #define DEFAULT_ZFSROOT "lxc"
- #define DEFAULT_RBDPOOL "lxc"
-+#define DEFAULT_CGPROTECT "privileged"
-
- extern void lxc_setup_fs(void);
- extern const char *lxc_global_config_value(const char *option_name);
-diff --git a/src/lxc/start.c b/src/lxc/start.c
-index c9d78b7..fe2e335 100644
---- a/src/lxc/start.c
-+++ b/src/lxc/start.c
-@@ -1067,6 +1067,7 @@ static int lxc_spawn(struct lxc_handler *handler)
- int saved_ns_fd[LXC_NS_MAX];
- int preserve_mask = 0, i, flags;
- int netpipepair[2], nveths;
-+ bool privileged = lxc_list_empty(&handler->conf->id_map);
-
- netpipe = -1;
-
-@@ -1130,7 +1131,7 @@ static int lxc_spawn(struct lxc_handler *handler)
- * it readonly.
- * If the container is unprivileged then skip rootfs pinning.
- */
-- if (lxc_list_empty(&handler->conf->id_map)) {
-+ if (privileged) {
- handler->pinfd = pin_rootfs(handler->conf->rootfs.path);
- if (handler->pinfd == -1)
- INFO("Failed to pin the rootfs for container \"%s\".", handler->name);
-@@ -1255,17 +1256,20 @@ static int lxc_spawn(struct lxc_handler *handler)
- }
-
- if (cgns_supported()) {
-- if (!cgroup_create(handler, true)) {
-- ERROR("failed to create inner cgroup separation layer");
-- goto out_delete_net;
-- }
-- if (!cgroup_enter(handler, true)) {
-- ERROR("failed to enter inner cgroup separation layer");
-- goto out_delete_net;
-- }
-- if (!cgroup_chown(handler, true)) {
-- ERROR("failed chown inner cgroup separation layer");
-- goto out_delete_net;
-+ const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits");
-+ if (!strcmp(tmp, "both") || !strcmp(tmp, privileged ? "privileged" : "unprivileged")) {
-+ if (!cgroup_create(handler, true)) {
-+ ERROR("failed to create inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
-+ if (!cgroup_enter(handler, true)) {
-+ ERROR("failed to enter inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
-+ if (!cgroup_chown(handler, true)) {
-+ ERROR("failed chown inner cgroup separation layer");
-+ goto out_delete_net;
-+ }
- }
- }
-
---
-2.1.4
-
--- /dev/null
+From 6b3de84e0654c3b0b13166d63af9961a3a757c6e Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 10 Feb 2017 09:15:37 +0100
+Subject: [PATCH 3/8] pve: run lxcnetaddbr when instantiating veths
+
+FIXME: Why aren't we using regular up-scripts?
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/conf.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/src/lxc/conf.c b/src/lxc/conf.c
+index a93124b..c4079bb 100644
+--- a/src/lxc/conf.c
++++ b/src/lxc/conf.c
+@@ -2683,8 +2683,13 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd
+ "veth", veth1, (char*) NULL);
+ if (err)
+ goto out_delete;
++ } else if (!netdev->link) {
++ err = run_script(handler->name, "net", "/usr/share/lxc/lxcnetaddbr", "up",
++ "veth", veth1, (char*) NULL);
++ if (err)
++ goto out_delete;
+ }
+-
++
+ DEBUG("instantiated veth '%s/%s', index is '%d'",
+ veth1, veth2, netdev->ifindex);
+
+--
+2.1.4
+
--- /dev/null
+From e7d6b0d2384070f2c34a46aaa20250ce31f96c9c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
+Date: Wed, 9 Nov 2016 09:14:26 +0100
+Subject: [PATCH 4/8] deny rw mounting of /sys and /proc
+
+this would allow root in a privileged container to change
+the permissions of /sys on the host, which could lock out
+non-root users.
+
+if a rw /sys is desired, set "lxc.mount.auto" accordingly
+---
+ config/apparmor/abstractions/container-base | 6 +++++-
+ config/apparmor/abstractions/container-base.in | 6 +++++-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base
+index 06290de..779aadd 100644
+--- a/config/apparmor/abstractions/container-base
++++ b/config/apparmor/abstractions/container-base
+@@ -84,7 +84,6 @@
+ deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
+ mount fstype=proc -> /proc/,
+ mount fstype=sysfs -> /sys/,
+- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
+ deny /sys/firmware/efi/efivars/** rwklx,
+ deny /sys/kernel/security/** rwklx,
+ mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
+@@ -93,6 +92,11 @@
+ # deny reads from debugfs
+ deny /sys/kernel/debug/{,**} rwklx,
+
++ # prevent rw mounting of /sys, because that allows changing its global permissions
++ deny mount -> /proc/,
++ deny mount -> /sys/,
++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
++
+ # allow paths to be made slave, shared, private or unbindable
+ # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
+ # mount options=(rw,make-slave) -> **,
+diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
+index 5bc9b28..5c8e441 100644
+--- a/config/apparmor/abstractions/container-base.in
++++ b/config/apparmor/abstractions/container-base.in
+@@ -84,7 +84,6 @@
+ deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
+ mount fstype=proc -> /proc/,
+ mount fstype=sysfs -> /sys/,
+- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
+ deny /sys/firmware/efi/efivars/** rwklx,
+ deny /sys/kernel/security/** rwklx,
+ mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
+@@ -93,6 +92,11 @@
+ # deny reads from debugfs
+ deny /sys/kernel/debug/{,**} rwklx,
+
++ # prevent rw mounting of /sys, because that allows changing its global permissions
++ deny mount -> /proc/,
++ deny mount -> /sys/,
++# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
++
+ # allow paths to be made slave, shared, private or unbindable
+ # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
+ # mount options=(rw,make-slave) -> **,
+--
+2.1.4
+
--- /dev/null
+From 6adbaea0d07553932f4cd78b5530cd5291c3b41f Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Tue, 15 Nov 2016 09:20:24 +0100
+Subject: [PATCH 5/8] separate the limiting from the namespaced cgroup root
+
+When cgroup namespaces are enabled a privileged container
+with mixed cgroups has full write access to its own root
+cgroup effectively allowing it to overwrite values written
+from the outside or configured via lxc.cgroup.*.
+
+This patch causes an additional 'ns/' directory to be
+created in all cgroups if cgroup namespaces and cgfsng are
+being used in order to combat this.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/cgroups/cgfs.c | 19 ++++++--
+ src/lxc/cgroups/cgfsng.c | 81 +++++++++++++++++++++++++++-----
+ src/lxc/cgroups/cgmanager.c | 19 ++++++--
+ src/lxc/cgroups/cgroup.c | 16 +++----
+ src/lxc/cgroups/cgroup.h | 22 +++++----
+ src/lxc/commands.c | 112 ++++++++++++++++++++++++++++++--------------
+ src/lxc/commands.h | 2 +
+ src/lxc/criu.c | 4 +-
+ src/lxc/start.c | 21 +++++++--
+ 9 files changed, 219 insertions(+), 77 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfs.c b/src/lxc/cgroups/cgfs.c
+index 8499200..b78b78d 100644
+--- a/src/lxc/cgroups/cgfs.c
++++ b/src/lxc/cgroups/cgfs.c
+@@ -2383,12 +2383,15 @@ static void cgfs_destroy(void *hdata, struct lxc_conf *conf)
+ free(d);
+ }
+
+-static inline bool cgfs_create(void *hdata)
++static inline bool cgfs_create(void *hdata, bool inner)
+ {
+ struct cgfs_data *d = hdata;
+ struct cgroup_process_info *i;
+ struct cgroup_meta_data *md;
+
++ if (inner)
++ return true;
++
+ if (!d)
+ return false;
+ md = d->meta;
+@@ -2399,12 +2402,15 @@ static inline bool cgfs_create(void *hdata)
+ return true;
+ }
+
+-static inline bool cgfs_enter(void *hdata, pid_t pid)
++static inline bool cgfs_enter(void *hdata, pid_t pid, bool inner)
+ {
+ struct cgfs_data *d = hdata;
+ struct cgroup_process_info *i;
+ int ret;
+
++ if (inner)
++ return true;
++
+ if (!d)
+ return false;
+ i = d->info;
+@@ -2428,10 +2434,12 @@ static inline bool cgfs_create_legacy(void *hdata, pid_t pid)
+ return true;
+ }
+
+-static const char *cgfs_get_cgroup(void *hdata, const char *subsystem)
++static const char *cgfs_get_cgroup(void *hdata, const char *subsystem, bool inner)
+ {
+ struct cgfs_data *d = hdata;
+
++ (void)inner;
++
+ if (!d)
+ return NULL;
+ return lxc_cgroup_get_hierarchy_path_data(subsystem, d);
+@@ -2646,13 +2654,16 @@ static bool do_cgfs_chown(char *cgroup_path, struct lxc_conf *conf)
+ return true;
+ }
+
+-static bool cgfs_chown(void *hdata, struct lxc_conf *conf)
++static bool cgfs_chown(void *hdata, struct lxc_conf *conf, bool inner)
+ {
+ struct cgfs_data *d = hdata;
+ struct cgroup_process_info *info_ptr;
+ char *cgpath;
+ bool r = true;
+
++ if (inner)
++ return true;
++
+ if (!d)
+ return false;
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index 2b772e2..f7df3cf 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -72,6 +72,7 @@ struct hierarchy {
+ char *mountpoint;
+ char *base_cgroup;
+ char *fullcgpath;
++ char *innercgpath;
+ };
+
+ /*
+@@ -814,6 +815,7 @@ static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
+ new->mountpoint = mountpoint;
+ new->base_cgroup = base_cgroup;
+ new->fullcgpath = NULL;
++ new->innercgpath = false;
+
+ newentry = append_null_to_list((void ***)&hierarchies);
+ hierarchies[newentry] = new;
+@@ -1286,6 +1288,8 @@ static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
+ free(h->fullcgpath);
+ h->fullcgpath = NULL;
+ }
++ free(h->innercgpath);
++ h->innercgpath = NULL;
+ }
+ }
+
+@@ -1299,18 +1303,25 @@ struct cgroup_ops *cgfsng_ops_init(void)
+ return &cgfsng_ops;
+ }
+
+-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
++static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
+ {
+- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
+- if (dir_exists(h->fullcgpath)) { // it must not already exist
+- ERROR("Path \"%s\" already existed.", h->fullcgpath);
++ char *path;
++ if (inner) {
++ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL);
++ h->innercgpath = path;
++ } else {
++ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
++ h->fullcgpath = path;
++ }
++ if (dir_exists(path)) { // it must not already exist
++ ERROR("Path \"%s\" already existed.", path);
+ return false;
+ }
+- if (!handle_cpuset_hierarchy(h, cgname)) {
++ if (!inner && !handle_cpuset_hierarchy(h, cgname)) {
+ ERROR("Failed to handle cgroupfs v1 cpuset controller.");
+ return false;
+ }
+- return mkdir_p(h->fullcgpath, 0755) == 0;
++ return mkdir_p(path, 0755) == 0;
+ }
+
+ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+@@ -1325,7 +1336,8 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+ * Try to create the same cgroup in all hierarchies.
+ * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
+ */
+-static inline bool cgfsng_create(void *hdata)
++static inline bool cgfsng_create_inner(struct cgfsng_handler_data*);
++static inline bool cgfsng_create(void *hdata, bool inner)
+ {
+ struct cgfsng_handler_data *d = hdata;
+ char *tmp, *cgname, *offset;
+@@ -1335,9 +1347,15 @@ static inline bool cgfsng_create(void *hdata)
+ if (!d)
+ return false;
+ if (d->container_cgroup) {
++ if (inner)
++ return cgfsng_create_inner(d);
+ WARN("cgfsng_create called a second time");
+ return false;
+ }
++ if (inner) {
++ ERROR("cgfsng_create called twice for innner cgroup");
++ return false;
++ }
+
+ tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
+ if (!tmp) {
+@@ -1358,7 +1376,7 @@ again:
+ if (idx)
+ snprintf(offset, 5, "-%d", idx);
+ for (i = 0; hierarchies[i]; i++) {
+- if (!create_path_for_hierarchy(hierarchies[i], cgname)) {
++ if (!create_path_for_hierarchy(hierarchies[i], cgname, false)) {
+ int j;
+ SYSERROR("Failed to create %s: %s", hierarchies[i]->fullcgpath, strerror(errno));
+ free(hierarchies[i]->fullcgpath);
+@@ -1378,7 +1396,24 @@ out_free:
+ return false;
+ }
+
+-static bool cgfsng_enter(void *hdata, pid_t pid)
++static inline bool cgfsng_create_inner(struct cgfsng_handler_data *d)
++{
++ size_t i;
++ bool ret = true;
++ char *cgname = must_make_path(d->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
++ for (i = 0; hierarchies[i]; i++) {
++ if (!create_path_for_hierarchy(hierarchies[i], cgname, true)) {
++ SYSERROR("Failed to create %s namespace subdirectory: %s", hierarchies[i]->fullcgpath, strerror(errno));
++ ret = false;
++ break;
++ }
++ }
++ free(cgname);
++ return ret;
++}
++
++
++static bool cgfsng_enter(void *hdata, pid_t pid, bool inner)
+ {
+ char pidstr[25];
+ int i, len;
+@@ -1388,7 +1423,13 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
+ return false;
+
+ for (i = 0; hierarchies[i]; i++) {
+- char *fullpath = must_make_path(hierarchies[i]->fullcgpath,
++ char *fullpath;
++ if (inner)
++ fullpath = must_make_path(hierarchies[i]->fullcgpath,
++ CGROUP_NAMESPACE_SUBDIR,
++ "cgroup.procs", NULL);
++ else
++ fullpath = must_make_path(hierarchies[i]->fullcgpath,
+ "cgroup.procs", NULL);
+ if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
+ SYSERROR("Failed to enter %s", fullpath);
+@@ -1404,6 +1445,7 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
+ struct chown_data {
+ struct cgfsng_handler_data *d;
+ uid_t origuid; // target uid in parent namespace
++ bool inner;
+ };
+
+ /*
+@@ -1432,13 +1474,20 @@ static int chown_cgroup_wrapper(void *data)
+ for (i = 0; hierarchies[i]; i++) {
+ char *fullpath, *path = hierarchies[i]->fullcgpath;
+
++ if (arg->inner)
++ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
++
+ if (chown(path, destuid, 0) < 0) {
+ SYSERROR("Error chowning %s to %d", path, (int) destuid);
++ if (arg->inner)
++ free(path);
+ return -1;
+ }
+
+ if (chmod(path, 0775) < 0) {
+ SYSERROR("Error chmoding %s", path);
++ if (arg->inner)
++ free(path);
+ return -1;
+ }
+
+@@ -1462,12 +1511,14 @@ static int chown_cgroup_wrapper(void *data)
+ if (chmod(fullpath, 0664) < 0)
+ WARN("Error chmoding %s: %m", path);
+ free(fullpath);
++
++ free(path);
+ }
+
+ return 0;
+ }
+
+-static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
++static bool cgfsns_chown(void *hdata, struct lxc_conf *conf, bool inner)
+ {
+ struct cgfsng_handler_data *d = hdata;
+ struct chown_data wrap;
+@@ -1480,6 +1531,7 @@ static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
+
+ wrap.d = d;
+ wrap.origuid = geteuid();
++ wrap.inner = inner;
+
+ if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap) < 0) {
+ ERROR("Error requesting cgroup chown in new namespace");
+@@ -1774,12 +1826,15 @@ static bool cgfsng_unfreeze(void *hdata)
+ return true;
+ }
+
+-static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
++static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem, bool inner)
+ {
+ struct hierarchy *h = get_hierarchy(subsystem);
+ if (!h)
+ return NULL;
+
++ if (inner && h->innercgpath)
++ return h->innercgpath + strlen(h->mountpoint);
++
+ return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
+ }
+
+@@ -1814,7 +1869,7 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
+ char *path, *fullpath;
+ struct hierarchy *h = hierarchies[i];
+
+- path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
++ path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
+ if (!path) // not running
+ continue;
+
+diff --git a/src/lxc/cgroups/cgmanager.c b/src/lxc/cgroups/cgmanager.c
+index f2756b0..ac966b6 100644
+--- a/src/lxc/cgroups/cgmanager.c
++++ b/src/lxc/cgroups/cgmanager.c
+@@ -609,7 +609,7 @@ static inline void cleanup_cgroups(char *path)
+ cgm_remove_cgroup(slist[i], path);
+ }
+
+-static inline bool cgm_create(void *hdata)
++static inline bool cgm_create(void *hdata, bool inner)
+ {
+ struct cgm_data *d = hdata;
+ char **slist = subsystems;
+@@ -617,6 +617,9 @@ static inline bool cgm_create(void *hdata)
+ int32_t existed;
+ char result[MAXPATHLEN], *tmp, *cgroup_path;
+
++ if (inner)
++ return true;
++
+ if (!d)
+ return false;
+ // XXX we should send a hint to the cgmanager that when these
+@@ -709,13 +712,16 @@ static bool lxc_cgmanager_enter(pid_t pid, const char *controller,
+ return true;
+ }
+
+-static inline bool cgm_enter(void *hdata, pid_t pid)
++static inline bool cgm_enter(void *hdata, pid_t pid, bool inner)
+ {
+ struct cgm_data *d = hdata;
+ char **slist = subsystems;
+ bool ret = false;
+ int i;
+
++ if (inner)
++ return true;
++
+ if (!d || !d->cgroup_path)
+ return false;
+
+@@ -737,10 +743,12 @@ out:
+ return ret;
+ }
+
+-static const char *cgm_get_cgroup(void *hdata, const char *subsystem)
++static const char *cgm_get_cgroup(void *hdata, const char *subsystem, bool inner)
+ {
+ struct cgm_data *d = hdata;
+
++ (void)inner;
++
+ if (!d || !d->cgroup_path)
+ return NULL;
+ return d->cgroup_path;
+@@ -1541,10 +1549,13 @@ out:
+ return ret;
+ }
+
+-static bool cgm_chown(void *hdata, struct lxc_conf *conf)
++static bool cgm_chown(void *hdata, struct lxc_conf *conf, bool inner)
+ {
+ struct cgm_data *d = hdata;
+
++ if (inner)
++ return true;
++
+ if (!d || !d->cgroup_path)
+ return false;
+ if (!cgm_dbus_connect()) {
+diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
+index 78472d4..4d26e72 100644
+--- a/src/lxc/cgroups/cgroup.c
++++ b/src/lxc/cgroups/cgroup.c
+@@ -80,10 +80,10 @@ void cgroup_destroy(struct lxc_handler *handler)
+ }
+
+ /* Create the container cgroups for all requested controllers */
+-bool cgroup_create(struct lxc_handler *handler)
++bool cgroup_create(struct lxc_handler *handler, bool inner)
+ {
+ if (ops)
+- return ops->create(handler->cgroup_data);
++ return ops->create(handler->cgroup_data, inner);
+ return false;
+ }
+
+@@ -91,10 +91,10 @@ bool cgroup_create(struct lxc_handler *handler)
+ * Enter the container init into its new cgroups for all
+ * requested controllers
+ */
+-bool cgroup_enter(struct lxc_handler *handler)
++bool cgroup_enter(struct lxc_handler *handler, bool inner)
+ {
+ if (ops)
+- return ops->enter(handler->cgroup_data, handler->pid);
++ return ops->enter(handler->cgroup_data, handler->pid, inner);
+ return false;
+ }
+
+@@ -105,10 +105,10 @@ bool cgroup_create_legacy(struct lxc_handler *handler)
+ return true;
+ }
+
+-const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
++const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem, bool inner)
+ {
+ if (ops)
+- return ops->get_cgroup(handler->cgroup_data, subsystem);
++ return ops->get_cgroup(handler->cgroup_data, subsystem, inner);
+ return NULL;
+ }
+
+@@ -150,10 +150,10 @@ bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
+ return false;
+ }
+
+-bool cgroup_chown(struct lxc_handler *handler)
++bool cgroup_chown(struct lxc_handler *handler, bool inner)
+ {
+ if (ops && ops->chown)
+- return ops->chown(handler->cgroup_data, handler->conf);
++ return ops->chown(handler->cgroup_data, handler->conf, inner);
+ return true;
+ }
+
+diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
+index 11b251e..f36c6f0 100644
+--- a/src/lxc/cgroups/cgroup.h
++++ b/src/lxc/cgroups/cgroup.h
+@@ -28,6 +28,12 @@
+ #include <stddef.h>
+ #include <sys/types.h>
+
++/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
++ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
++ * order to prevent it from accessing the outer limiting cgroup.
++ */
++#define CGROUP_NAMESPACE_SUBDIR "cgns"
++
+ struct lxc_handler;
+ struct lxc_conf;
+ struct lxc_list;
+@@ -43,10 +49,10 @@ struct cgroup_ops {
+
+ void *(*init)(const char *name);
+ void (*destroy)(void *hdata, struct lxc_conf *conf);
+- bool (*create)(void *hdata);
+- bool (*enter)(void *hdata, pid_t pid);
++ bool (*create)(void *hdata, bool inner);
++ bool (*enter)(void *hdata, pid_t pid, bool inner);
+ bool (*create_legacy)(void *hdata, pid_t pid);
+- const char *(*get_cgroup)(void *hdata, const char *subsystem);
++ const char *(*get_cgroup)(void *hdata, const char *subsystem, bool inner);
+ bool (*escape)();
+ int (*num_hierarchies)();
+ bool (*get_hierarchies)(int n, char ***out);
+@@ -54,7 +60,7 @@ struct cgroup_ops {
+ int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
+ bool (*unfreeze)(void *hdata);
+ bool (*setup_limits)(void *hdata, struct lxc_list *cgroup_conf, bool with_devices);
+- bool (*chown)(void *hdata, struct lxc_conf *conf);
++ bool (*chown)(void *hdata, struct lxc_conf *conf, bool inner);
+ bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
+ bool (*mount_cgroup)(void *hdata, const char *root, int type);
+ int (*nrtasks)(void *hdata);
+@@ -66,14 +72,14 @@ extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid);
+ extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type);
+ extern void cgroup_destroy(struct lxc_handler *handler);
+ extern bool cgroup_init(struct lxc_handler *handler);
+-extern bool cgroup_create(struct lxc_handler *handler);
++extern bool cgroup_create(struct lxc_handler *handler, bool inner);
+ extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices);
+-extern bool cgroup_chown(struct lxc_handler *handler);
+-extern bool cgroup_enter(struct lxc_handler *handler);
++extern bool cgroup_chown(struct lxc_handler *handler, bool inner);
++extern bool cgroup_enter(struct lxc_handler *handler, bool inner);
+ extern void cgroup_cleanup(struct lxc_handler *handler);
+ extern bool cgroup_create_legacy(struct lxc_handler *handler);
+ extern int cgroup_nrtasks(struct lxc_handler *handler);
+-extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem);
++extern const char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem, bool inner);
+ extern bool cgroup_escape();
+ extern int cgroup_num_hierarchies();
+ extern bool cgroup_get_hierarchies(int i, char ***out);
+diff --git a/src/lxc/commands.c b/src/lxc/commands.c
+index b17879b..5ef682f 100644
+--- a/src/lxc/commands.c
++++ b/src/lxc/commands.c
+@@ -128,15 +128,15 @@ static int fill_sock_name(char *path, int len, const char *name,
+ static const char *lxc_cmd_str(lxc_cmd_t cmd)
+ {
+ static const char * const cmdname[LXC_CMD_MAX] = {
+- [LXC_CMD_CONSOLE] = "console",
+- [LXC_CMD_STOP] = "stop",
+- [LXC_CMD_GET_STATE] = "get_state",
+- [LXC_CMD_GET_INIT_PID] = "get_init_pid",
+- [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
+- [LXC_CMD_GET_CGROUP] = "get_cgroup",
+- [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
+- [LXC_CMD_GET_NAME] = "get_name",
+- [LXC_CMD_GET_LXCPATH] = "get_lxcpath",
++ [LXC_CMD_CONSOLE] = "console",
++ [LXC_CMD_STOP] = "stop",
++ [LXC_CMD_GET_STATE] = "get_state",
++ [LXC_CMD_GET_INIT_PID] = "get_init_pid",
++ [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags",
++ [LXC_CMD_GET_CGROUP] = "get_cgroup",
++ [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item",
++ [LXC_CMD_GET_NAME] = "get_name",
++ [LXC_CMD_GET_LXCPATH] = "get_lxcpath",
+ };
+
+ if (cmd >= LXC_CMD_MAX)
+@@ -429,30 +429,28 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
+ return lxc_cmd_rsp_send(fd, &rsp);
+ }
+
+-/*
+- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
+- * particular subsystem. This is the cgroup path relative to the root
+- * of the cgroup filesystem.
+- *
+- * @name : name of container to connect to
+- * @lxcpath : the lxcpath in which the container is running
+- * @subsystem : the subsystem being asked about
+- *
+- * Returns the path on success, NULL on failure. The caller must free() the
+- * returned path.
+- */
+-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+- const char *subsystem)
++static char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
++ const char *subsystem, bool inner)
+ {
+ int ret, stopped;
++ size_t subsyslen = strlen(subsystem);
++
+ struct lxc_cmd_rr cmd = {
+ .req = {
+ .cmd = LXC_CMD_GET_CGROUP,
+- .datalen = strlen(subsystem)+1,
++ .datalen = subsyslen+1,
+ .data = subsystem,
+ },
+ };
+
++ if (inner) {
++ char *data = alloca(subsyslen+2);
++ memcpy(data, subsystem, subsyslen+1);
++ data[subsyslen+1] = 1;
++ cmd.req.datalen = subsyslen+2,
++ cmd.req.data = data;
++ }
++
+ ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
+ if (ret < 0)
+ return NULL;
+@@ -471,16 +469,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+ return cmd.rsp.data;
+ }
+
++/*
++ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
++ * particular subsystem. This is the cgroup path relative to the root
++ * of the cgroup filesystem.
++ *
++ * @name : name of container to connect to
++ * @lxcpath : the lxcpath in which the container is running
++ * @subsystem : the subsystem being asked about
++ *
++ * Returns the path on success, NULL on failure. The caller must free() the
++ * returned path.
++ */
++char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
++ const char *subsystem)
++{
++ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
++}
++
+ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+ struct lxc_handler *handler)
+ {
+ struct lxc_cmd_rsp rsp;
+ const char *path;
++ const char *subsystem;
++ size_t subsyslen;
++ bool inner = false;
+
+ if (req->datalen < 1)
+ return -1;
+
+- path = cgroup_get_cgroup(handler, req->data);
++ subsystem = req->data;
++ subsyslen = strlen(subsystem);
++ if (req->datalen == subsyslen+2)
++ inner = (subsystem[subsyslen+1] == 1);
++
++ path = cgroup_get_cgroup(handler, req->data, inner);
+ if (!path)
+ return -1;
+ rsp.datalen = strlen(path) + 1,
+@@ -491,6 +515,24 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
+ }
+
+ /*
++ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
++ * for a particular subsystem. This is the cgroup path relative to the root
++ * of the cgroup filesystem.
++ *
++ * @name : name of container to connect to
++ * @lxcpath : the lxcpath in which the container is running
++ * @subsystem : the subsystem being asked about
++ *
++ * Returns the path on success, NULL on failure. The caller must free() the
++ * returned path.
++ */
++char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
++ const char *subsystem)
++{
++ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
++}
++
++/*
+ * lxc_cmd_get_config_item: Get config item the running container
+ *
+ * @name : name of container to connect to
+@@ -841,16 +883,16 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req,
+ typedef int (*callback)(int, struct lxc_cmd_req *, struct lxc_handler *);
+
+ callback cb[LXC_CMD_MAX] = {
+- [LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
+- [LXC_CMD_CONSOLE_WINCH] = lxc_cmd_console_winch_callback,
+- [LXC_CMD_STOP] = lxc_cmd_stop_callback,
+- [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
+- [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
+- [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
+- [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
+- [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
+- [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
+- [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
++ [LXC_CMD_CONSOLE] = lxc_cmd_console_callback,
++ [LXC_CMD_CONSOLE_WINCH] = lxc_cmd_console_winch_callback,
++ [LXC_CMD_STOP] = lxc_cmd_stop_callback,
++ [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback,
++ [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback,
++ [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback,
++ [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback,
++ [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback,
++ [LXC_CMD_GET_NAME] = lxc_cmd_get_name_callback,
++ [LXC_CMD_GET_LXCPATH] = lxc_cmd_get_lxcpath_callback,
+ };
+
+ if (req->cmd >= LXC_CMD_MAX) {
+diff --git a/src/lxc/commands.h b/src/lxc/commands.h
+index 184eefa..6430b33 100644
+--- a/src/lxc/commands.h
++++ b/src/lxc/commands.h
+@@ -77,6 +77,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
+ */
+ extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
+ const char *subsystem);
++extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
++ const char *lxcpath, const char *subsystem);
+ extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
+ extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
+ extern char *lxc_cmd_get_name(const char *hashed_sock);
+diff --git a/src/lxc/criu.c b/src/lxc/criu.c
+index 8a0702f..5843f97 100644
+--- a/src/lxc/criu.c
++++ b/src/lxc/criu.c
+@@ -283,7 +283,7 @@ static void exec_criu(struct criu_opts *opts)
+ } else {
+ const char *p;
+
+- p = cgroup_get_cgroup(opts->handler, controllers[0]);
++ p = cgroup_get_cgroup(opts->handler, controllers[0], false);
+ if (!p) {
+ ERROR("failed to get cgroup path for %s", controllers[0]);
+ goto err;
+@@ -795,7 +795,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
+ goto out_fini_handler;
+ }
+
+- if (!cgroup_create(handler)) {
++ if (!cgroup_create(handler, false)) {
+ ERROR("failed creating groups");
+ goto out_fini_handler;
+ }
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index c2c14a7..e889421 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -1104,7 +1104,7 @@ static int lxc_spawn(struct lxc_handler *handler)
+
+ cgroups_connected = true;
+
+- if (!cgroup_create(handler)) {
++ if (!cgroup_create(handler, false)) {
+ ERROR("Failed creating cgroups.");
+ goto out_delete_net;
+ }
+@@ -1191,10 +1191,10 @@ static int lxc_spawn(struct lxc_handler *handler)
+ goto out_delete_net;
+ }
+
+- if (!cgroup_enter(handler))
++ if (!cgroup_enter(handler, false))
+ goto out_delete_net;
+
+- if (!cgroup_chown(handler))
++ if (!cgroup_chown(handler, false))
+ goto out_delete_net;
+
+ if (failed_before_rename)
+@@ -1237,6 +1237,21 @@ static int lxc_spawn(struct lxc_handler *handler)
+ goto out_delete_net;
+ }
+
++ if (cgns_supported()) {
++ if (!cgroup_create(handler, true)) {
++ ERROR("failed to create inner cgroup separation layer");
++ goto out_delete_net;
++ }
++ if (!cgroup_enter(handler, true)) {
++ ERROR("failed to enter inner cgroup separation layer");
++ goto out_delete_net;
++ }
++ if (!cgroup_chown(handler, true)) {
++ ERROR("failed chown inner cgroup separation layer");
++ goto out_delete_net;
++ }
++ }
++
+ cgroup_disconnect();
+ cgroups_connected = false;
+
+--
+2.1.4
+
--- /dev/null
+From af72260927efd412210ec85842e1ef70ccc0c5e8 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Wed, 16 Nov 2016 09:53:42 +0100
+Subject: [PATCH 6/8] start/initutils: make cgroupns separation level
+ configurable
+
+Adds a new global config variable `lxc.cgroup.separate`
+which controls whether a separation directory for cgroup
+namespaces should be used.
+Can be empty, "privileged", "unprivileged" or "both".
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/initutils.c | 17 +++++++++--------
+ src/lxc/initutils.h | 1 +
+ src/lxc/start.c | 28 ++++++++++++++++------------
+ 3 files changed, 26 insertions(+), 20 deletions(-)
+
+diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c
+index 8d9016c..0630293 100644
+--- a/src/lxc/initutils.c
++++ b/src/lxc/initutils.c
+@@ -88,14 +88,15 @@ static char *copy_global_config_value(char *p)
+ const char *lxc_global_config_value(const char *option_name)
+ {
+ static const char * const options[][2] = {
+- { "lxc.bdev.lvm.vg", DEFAULT_VG },
+- { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
+- { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
+- { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
+- { "lxc.lxcpath", NULL },
+- { "lxc.default_config", NULL },
+- { "lxc.cgroup.pattern", NULL },
+- { "lxc.cgroup.use", NULL },
++ { "lxc.bdev.lvm.vg", DEFAULT_VG },
++ { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL },
++ { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT },
++ { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL },
++ { "lxc.lxcpath", NULL },
++ { "lxc.default_config", NULL },
++ { "lxc.cgroup.pattern", NULL },
++ { "lxc.cgroup.use", NULL },
++ { "lxc.cgroup.protect_limits", DEFAULT_CGPROTECT },
+ { NULL, NULL },
+ };
+
+diff --git a/src/lxc/initutils.h b/src/lxc/initutils.h
+index c021fd6..443ad02 100644
+--- a/src/lxc/initutils.h
++++ b/src/lxc/initutils.h
+@@ -43,6 +43,7 @@
+ #define DEFAULT_THIN_POOL "lxc"
+ #define DEFAULT_ZFSROOT "lxc"
+ #define DEFAULT_RBDPOOL "lxc"
++#define DEFAULT_CGPROTECT "privileged"
+
+ extern void lxc_setup_fs(void);
+ extern const char *lxc_global_config_value(const char *option_name);
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index e889421..4217c5d 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -1050,6 +1050,7 @@ static int lxc_spawn(struct lxc_handler *handler)
+ int saved_ns_fd[LXC_NS_MAX];
+ int preserve_mask = 0, i, flags;
+ int netpipepair[2], nveths;
++ bool privileged = lxc_list_empty(&handler->conf->id_map);
+
+ netpipe = -1;
+
+@@ -1113,7 +1114,7 @@ static int lxc_spawn(struct lxc_handler *handler)
+ * it readonly.
+ * If the container is unprivileged then skip rootfs pinning.
+ */
+- if (lxc_list_empty(&handler->conf->id_map)) {
++ if (privileged) {
+ handler->pinfd = pin_rootfs(handler->conf->rootfs.path);
+ if (handler->pinfd == -1)
+ INFO("Failed to pin the rootfs for container \"%s\".", handler->name);
+@@ -1238,17 +1239,20 @@ static int lxc_spawn(struct lxc_handler *handler)
+ }
+
+ if (cgns_supported()) {
+- if (!cgroup_create(handler, true)) {
+- ERROR("failed to create inner cgroup separation layer");
+- goto out_delete_net;
+- }
+- if (!cgroup_enter(handler, true)) {
+- ERROR("failed to enter inner cgroup separation layer");
+- goto out_delete_net;
+- }
+- if (!cgroup_chown(handler, true)) {
+- ERROR("failed chown inner cgroup separation layer");
+- goto out_delete_net;
++ const char *tmp = lxc_global_config_value("lxc.cgroup.protect_limits");
++ if (!strcmp(tmp, "both") || !strcmp(tmp, privileged ? "privileged" : "unprivileged")) {
++ if (!cgroup_create(handler, true)) {
++ ERROR("failed to create inner cgroup separation layer");
++ goto out_delete_net;
++ }
++ if (!cgroup_enter(handler, true)) {
++ ERROR("failed to enter inner cgroup separation layer");
++ goto out_delete_net;
++ }
++ if (!cgroup_chown(handler, true)) {
++ ERROR("failed chown inner cgroup separation layer");
++ goto out_delete_net;
++ }
+ }
+ }
+
+--
+2.1.4
+
--- /dev/null
+From 3790507952f3cda5c6dd9bb6f87c80d9b0ddadf7 Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 23 Dec 2016 15:57:24 +0100
+Subject: [PATCH 7/8] rename cgroup namespace directory to ns
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ src/lxc/cgroups/cgroup.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
+index f36c6f0..2c504c8 100644
+--- a/src/lxc/cgroups/cgroup.h
++++ b/src/lxc/cgroups/cgroup.h
+@@ -32,7 +32,7 @@
+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
+ * order to prevent it from accessing the outer limiting cgroup.
+ */
+-#define CGROUP_NAMESPACE_SUBDIR "cgns"
++#define CGROUP_NAMESPACE_SUBDIR "ns"
+
+ struct lxc_handler;
+ struct lxc_conf;
+--
+2.1.4
+
--- /dev/null
+From 1bdcf98811093349ca856dac4beb3f5bd0dd501b Mon Sep 17 00:00:00 2001
+From: Wolfgang Bumiller <w.bumiller@proxmox.com>
+Date: Fri, 10 Feb 2017 10:23:36 +0100
+Subject: [PATCH 8/8] possibility to run lxc-monitord as a regular daemon
+
+This includes an lxc-monitord.service, required by
+lxc@.service which is now of Type=forking.
+
+Previously the init process' output was dumped into the log
+files since the service used Type=simple and
+StandardOutput/Error=syslog. Using lxc-start's daemon mode
+on the other hand used a wait call spawning an lxc-monitord
+in the background which could potentially stick around
+forever if there were clients connected to it. Since it was
+considered part of the lxc@foo.service unit by systemd this
+also meant the unit was considered active until not only the
+container but also lxc-monitord exited.
+This is now corrected by creating a separate lxc-monitord
+unit which lxc@.service depends on.
+
+Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
+---
+ config/init/systemd/Makefile.am | 10 +++--
+ config/init/systemd/lxc-monitord.service.in | 12 ++++++
+ config/init/systemd/lxc@.service.in | 7 ++--
+ configure.ac | 1 +
+ lxc.spec.in | 1 +
+ src/lxc/lxc_monitord.c | 60 +++++++++++++++++++++--------
+ 6 files changed, 67 insertions(+), 24 deletions(-)
+ create mode 100644 config/init/systemd/lxc-monitord.service.in
+
+diff --git a/config/init/systemd/Makefile.am b/config/init/systemd/Makefile.am
+index c448850..4a4fde5 100644
+--- a/config/init/systemd/Makefile.am
++++ b/config/init/systemd/Makefile.am
+@@ -2,19 +2,21 @@ EXTRA_DIST = \
+ lxc-apparmor-load \
+ lxc.service.in \
+ lxc@.service.in \
+- lxc-net.service.in
++ lxc-net.service.in \
++ lxc-monitord.service.in
+
+ if INIT_SCRIPT_SYSTEMD
+-BUILT_SOURCES = lxc.service lxc@.service lxc-net.service
++BUILT_SOURCES = lxc.service lxc@.service lxc-net.service lxc-monitord.service
+
+-install-systemd: lxc.service lxc@.service lxc-net.service lxc-apparmor-load
++install-systemd: lxc.service lxc@.service lxc-net.service lxc-monitord.service lxc-apparmor-load
+ $(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR)
+- $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
++ $(INSTALL_DATA) lxc.service lxc@.service lxc-net.service lxc-monitord.service $(DESTDIR)$(SYSTEMD_UNIT_DIR)/
+
+ uninstall-systemd:
+ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc.service
+ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc@.service
+ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-net.service
++ rm -f $(DESTDIR)$(SYSTEMD_UNIT_DIR)/lxc-monitord.service
+ rmdir $(DESTDIR)$(SYSTEMD_UNIT_DIR) || :
+
+ pkglibexec_SCRIPTS = lxc-apparmor-load
+diff --git a/config/init/systemd/lxc-monitord.service.in b/config/init/systemd/lxc-monitord.service.in
+new file mode 100644
+index 0000000..4063516
+--- /dev/null
++++ b/config/init/systemd/lxc-monitord.service.in
+@@ -0,0 +1,12 @@
++[Unit]
++Description=LXC Container Monitoring Daemon
++After=syslog.service network.target
++
++[Service]
++Type=simple
++ExecStart=@LIBEXECDIR@/lxc/lxc-monitord --daemon
++StandardOutput=syslog
++StandardError=syslog
++
++[Install]
++WantedBy=multi-user.target
+diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in
+index 6b8b5ff..ffb9136 100644
+--- a/config/init/systemd/lxc@.service.in
++++ b/config/init/systemd/lxc@.service.in
+@@ -1,16 +1,17 @@
+ [Unit]
+ Description=LXC Container: %i
+ # This pulls in apparmor, dev-setup, lxc-net
+-After=lxc.service
++After=lxc.service lxc-monitord.service
+ Wants=lxc.service
++Requires = lxc-monitord.service
+ Documentation=man:lxc-start man:lxc
+
+ [Service]
+-Type=simple
++Type=forking
+ KillMode=mixed
+ KillSignal=SIGPWR
+ TimeoutStopSec=120s
+-ExecStart=@BINDIR@/lxc-start -F -n %i
++ExecStart=@BINDIR@/lxc-start -n %i
+ # Environment=BOOTUP=serial
+ # Environment=CONSOLETYPE=serial
+ StandardOutput=syslog
+diff --git a/configure.ac b/configure.ac
+index 42ece7a..c6b2a78 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -694,6 +694,7 @@ AC_CONFIG_FILES([
+ config/init/systemd/lxc.service
+ config/init/systemd/lxc@.service
+ config/init/systemd/lxc-net.service
++ config/init/systemd/lxc-monitord.service
+ config/init/sysvinit/Makefile
+ config/init/sysvinit/lxc-containers
+ config/init/sysvinit/lxc-net
+diff --git a/lxc.spec.in b/lxc.spec.in
+index 0e64907..f35d81c 100644
+--- a/lxc.spec.in
++++ b/lxc.spec.in
+@@ -259,6 +259,7 @@ fi
+ %{_unitdir}/lxc-net.service
+ %{_unitdir}/lxc.service
+ %{_unitdir}/lxc@.service
++%{_unitdir}/lxc-monitord.service
+ %else
+ %{_sysconfdir}/rc.d/init.d/lxc
+ %{_sysconfdir}/rc.d/init.d/lxc-net
+diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c
+index 62e2121..ad40dbe 100644
+--- a/src/lxc/lxc_monitord.c
++++ b/src/lxc/lxc_monitord.c
+@@ -344,16 +344,43 @@ static void lxc_monitord_sig_handler(int sig)
+
+ int main(int argc, char *argv[])
+ {
+- int ret, pipefd;
++ int ret, pipefd = -1;
+ char logpath[PATH_MAX];
+ sigset_t mask;
+- char *lxcpath = argv[1];
++ const char *lxcpath = NULL;
+ bool mainloop_opened = false;
+ bool monitord_created = false;
++ bool persistent = false;
+
+- if (argc != 3) {
++ if (argc > 1 && !strcmp(argv[1], "--daemon")) {
++ persistent = true;
++ --argc;
++ ++argv;
++ }
++
++ if (argc > 1) {
++ lxcpath = argv[1];
++ --argc;
++ ++argv;
++ } else {
++ lxcpath = lxc_global_config_value("lxc.lxcpath");
++ if (!lxcpath) {
++ ERROR("Out of memory getting lxcpath");
++ exit(EXIT_FAILURE);
++ }
++ }
++
++ if (argc > 1) {
++ if (lxc_safe_int(argv[1], &pipefd) < 0)
++ exit(EXIT_FAILURE);
++ --argc;
++ ++argv;
++ }
++
++ if (argc != 1 || (persistent != (pipefd == -1))) {
+ fprintf(stderr,
+- "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n"
++ "Usage: lxc-monitord lxcpath sync-pipe-fd\n"
++ " lxc-monitord --daemon lxcpath\n\n"
+ "NOTE: lxc-monitord is intended for use by lxc internally\n"
+ " and does not need to be run by hand\n\n");
+ exit(EXIT_FAILURE);
+@@ -369,9 +396,6 @@ int main(int argc, char *argv[])
+ INFO("Failed to open log file %s, log will be lost.", lxcpath);
+ lxc_log_options_no_override();
+
+- if (lxc_safe_int(argv[2], &pipefd) < 0)
+- exit(EXIT_FAILURE);
+-
+ if (sigfillset(&mask) ||
+ sigdelset(&mask, SIGILL) ||
+ sigdelset(&mask, SIGSEGV) ||
+@@ -403,15 +427,17 @@ int main(int argc, char *argv[])
+ goto on_error;
+ monitord_created = true;
+
+- /* sync with parent, we're ignoring the return from write
+- * because regardless if it works or not, the following
+- * close will sync us with the parent process. the
+- * if-empty-statement construct is to quiet the
+- * warn-unused-result warning.
+- */
+- if (write(pipefd, "S", 1))
+- ;
+- close(pipefd);
++ if (pipefd != -1) {
++ /* sync with parent, we're ignoring the return from write
++ * because regardless if it works or not, the following
++ * close will sync us with the parent process. the
++ * if-empty-statement construct is to quiet the
++ * warn-unused-result warning.
++ */
++ if (write(pipefd, "S", 1))
++ ;
++ close(pipefd);
++ }
+
+ if (lxc_monitord_mainloop_add(&mon)) {
+ ERROR("Failed to add mainloop handlers.");
+@@ -421,7 +447,7 @@ int main(int argc, char *argv[])
+ NOTICE("lxc-monitord with pid %d is now monitoring lxcpath %s.",
+ getpid(), mon.lxcpath);
+ for (;;) {
+- ret = lxc_mainloop(&mon.descr, 1000 * 30);
++ ret = lxc_mainloop(&mon.descr, persistent ? -1 : 1000 * 30);
+ if (mon.clientfds_cnt <= 0) {
+ NOTICE("No remaining clients. lxc-monitord is exiting.");
+ break;
+--
+2.1.4
+
+++ /dev/null
-diff --git a/src/lxc/conf.c b/src/lxc/conf.c
-index 0e3421b..91c90b1 100644
---- a/src/lxc/conf.c
-+++ b/src/lxc/conf.c
-@@ -3083,9 +3083,13 @@ void lxc_delete_network(struct lxc_handler *handler)
- * namespace is destroyed but in case we did not moved the
- * interface to the network namespace, we have to destroy it
- */
-- if (netdev->ifindex != 0 &&
-- lxc_netdev_delete_by_index(netdev->ifindex))
-- WARN("failed to remove interface '%s'", netdev->name);
-+ if (netdev->ifindex != 0) {
-+ int err = lxc_netdev_delete_by_index(netdev->ifindex);
-+ if (err) {
-+ WARN("failed to remove interface '%s': %i: %s", netdev->name,
-+ err, strerror(-err));
-+ }
-+ }
- }
- }
-
+++ /dev/null
-From 77596df581ee381896a5d9a9152c046ff164c65a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
-Date: Wed, 9 Nov 2016 09:14:26 +0100
-Subject: [PATCH] deny rw mounting of /sys and /proc
-
-this would allow root in a privileged container to change
-the permissions of /sys on the host, which could lock out
-non-root users.
-
-if a rw /sys is desired, set "lxc.mount.auto" accordingly
----
- config/apparmor/abstractions/container-base | 6 +++++-
- config/apparmor/abstractions/container-base.in | 6 +++++-
- 2 files changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/config/apparmor/abstractions/container-base b/config/apparmor/abstractions/container-base
-index 06290de..3c64c66 100644
---- a/config/apparmor/abstractions/container-base
-+++ b/config/apparmor/abstractions/container-base
-@@ -84,7 +84,6 @@
- deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
- mount fstype=proc -> /proc/,
- mount fstype=sysfs -> /sys/,
-- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
- deny /sys/firmware/efi/efivars/** rwklx,
- deny /sys/kernel/security/** rwklx,
- mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
-@@ -93,6 +92,11 @@
- # deny reads from debugfs
- deny /sys/kernel/debug/{,**} rwklx,
-
-+ # prevent rw mounting of /sys, because that allows changing its global permissions
-+ deny mount -> /proc/,
-+ deny mount -> /sys/,
-+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
-+
- # allow paths to be made slave, shared, private or unbindable
- # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
- # mount options=(rw,make-slave) -> **,
-diff --git a/config/apparmor/abstractions/container-base.in b/config/apparmor/abstractions/container-base.in
-index 5bc9b28..482214e 100644
---- a/config/apparmor/abstractions/container-base.in
-+++ b/config/apparmor/abstractions/container-base.in
-@@ -84,7 +84,6 @@
- deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
- mount fstype=proc -> /proc/,
- mount fstype=sysfs -> /sys/,
-- mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
- deny /sys/firmware/efi/efivars/** rwklx,
- deny /sys/kernel/security/** rwklx,
- mount options=(move) /sys/fs/cgroup/cgmanager/ -> /sys/fs/cgroup/cgmanager.lower/,
-@@ -93,6 +92,11 @@
- # deny reads from debugfs
- deny /sys/kernel/debug/{,**} rwklx,
-
-+ # prevent rw mounting of /sys, because that allows changing its global permissions
-+ deny mount -> /proc/,
-+ deny mount -> /sys/,
-+# mount options=(rw, nosuid, nodev, noexec, remount) -> /sys/,
-+
- # allow paths to be made slave, shared, private or unbindable
- # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.
- # mount options=(rw,make-slave) -> **,
---
-2.1.4
-
+++ /dev/null
-Index: new/config/init/systemd/lxc.service.in
-===================================================================
---- new.orig/config/init/systemd/lxc.service.in
-+++ new/config/init/systemd/lxc.service.in
-@@ -1,7 +1,7 @@
- [Unit]
- Description=LXC Container Initialization and Autoboot Code
--After=network.target lxc-net.service
--Wants=lxc-net.service
-+After=syslog.service network.target lxc-net.service
-+Wants=lxc-net.service
- Documentation=man:lxc-autostart man:lxc
-
- [Service]
+++ /dev/null
-This flag requires systemd 218 or newer
-
-Index: new/config/init/systemd/lxc.service.in
-===================================================================
---- new.orig/config/init/systemd/lxc.service.in
-+++ new/config/init/systemd/lxc.service.in
-@@ -12,7 +12,7 @@ ExecStart=@LIBEXECDIR@/lxc/lxc-container
- ExecStop=@LIBEXECDIR@/lxc/lxc-containers stop
- # Environment=BOOTUP=serial
- # Environment=CONSOLETYPE=serial
--Delegate=yes
-+#Delegate=yes
- StandardOutput=syslog
- StandardError=syslog
-
---- new.orig/config/init/systemd/lxc@.service.in
-+++ new/config/init/systemd/lxc@.service.in
-@@ -12,7 +12,7 @@ ExecStart=@LIBEXECDIR@/lxc/lxc-container
- ExecStart=/usr/bin/lxc-start -n %i
- # Environment=BOOTUP=serial
- # Environment=CONSOLETYPE=serial
--Delegate=yes
-+#Delegate=yes
- StandardOutput=syslog
- StandardError=syslog
-
+++ /dev/null
-From 9b5f49f361290267e00665ec9f0bdbfaeda39bc0 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Fri, 23 Dec 2016 15:57:24 +0100
-Subject: [PATCH] rename cgroup namespace directory to ns
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- src/lxc/cgroups/cgroup.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
-index f36c6f0..2c504c8 100644
---- a/src/lxc/cgroups/cgroup.h
-+++ b/src/lxc/cgroups/cgroup.h
-@@ -32,7 +32,7 @@
- * will be moved into an additional subdirectory "cgns/" inside the cgroup in
- * order to prevent it from accessing the outer limiting cgroup.
- */
--#define CGROUP_NAMESPACE_SUBDIR "cgns"
-+#define CGROUP_NAMESPACE_SUBDIR "ns"
-
- struct lxc_handler;
- struct lxc_conf;
---
-2.1.4
-
+++ /dev/null
-Index: new/src/lxc/conf.c
-===================================================================
---- new.orig/src/lxc/conf.c
-+++ new/src/lxc/conf.c
-@@ -2691,8 +2691,13 @@ static int instantiate_veth(struct lxc_h
- "veth", veth1, (char*) NULL);
- if (err)
- goto out_delete;
-+ } else if (!netdev->link) {
-+ err = run_script(handler->name, "net", "/usr/share/lxc/lxcnetaddbr", "up",
-+ "veth", veth1, (char*) NULL);
-+ if (err)
-+ goto out_delete;
- }
--
-+
- DEBUG("instantiated veth '%s/%s', index is '%d'",
- veth1, veth2, netdev->ifindex);
-
-fix-systemd-service-depends.patch
-use-forking-systemd-service.patch
-remove-systemd-delegate-flag.patch
-run-lxcnetaddbr.patch
-deny-rw-mounting-of-sys-and-proc.patch
-0001-separate-the-limiting-from-the-namespaced-cgroup-roo.patch
-0002-start-initutils-make-cgroupns-separation-level-confi.patch
-rename-cgns-subdir-to-ns.patch
+0001-lxc.service-start-after-a-potential-syslog.service.patch
+0002-jessie-systemd-remove-Delegate-flag-to-silence-warni.patch
+0003-pve-run-lxcnetaddbr-when-instantiating-veths.patch
+0004-deny-rw-mounting-of-sys-and-proc.patch
+0005-separate-the-limiting-from-the-namespaced-cgroup-roo.patch
+0006-start-initutils-make-cgroupns-separation-level-confi.patch
+0007-rename-cgroup-namespace-directory-to-ns.patch
+0008-possibility-to-run-lxc-monitord-as-a-regular-daemon.patch
+++ /dev/null
-From ba028c2a3f73eec5e45842cc742a20471ee0d921 Mon Sep 17 00:00:00 2001
-From: Wolfgang Bumiller <w.bumiller@proxmox.com>
-Date: Thu, 2 Feb 2017 11:15:22 +0100
-Subject: [PATCH] init: systemd: use forking mode in lxc@.service
-
-To avoid dumping the entire console outputs into the
-logs
-
-Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
----
- config/init/systemd/lxc@.service.in | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/config/init/systemd/lxc@.service.in b/config/init/systemd/lxc@.service.in
-index 44d11e8..52c6a27 100644
---- a/config/init/systemd/lxc@.service.in
-+++ b/config/init/systemd/lxc@.service.in
-@@ -6,11 +6,11 @@ Wants=lxc.service
- Documentation=man:lxc-start man:lxc
-
- [Service]
--Type=simple
-+Type=forking
- KillMode=mixed
- KillSignal=SIGPWR
- TimeoutStopSec=120s
--ExecStart=@BINDIR@/lxc-start -F -n %i
-+ExecStart=@BINDIR@/lxc-start -n %i
- # Environment=BOOTUP=serial
- # Environment=CONSOLETYPE=serial
- Delegate=yes
---
-2.1.4
-