]> git.proxmox.com Git - lxc.git/blame - debian/patches/pve/0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
fix issues with shell detection on attach
[lxc.git] / debian / patches / pve / 0004-PVE-Up-separate-the-limiting-from-the-namespaced-cgr.patch
CommitLineData
1513a0b5 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
308c8a3e 2From: Wolfgang Bumiller <w.bumiller@proxmox.com>
1513a0b5 3Date: Wed, 28 Mar 2018 13:37:28 +0200
2d8021b3
WB
4Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup
5 root
308c8a3e
WB
6
7When cgroup namespaces are enabled a privileged container
8with mixed cgroups has full write access to its own root
9cgroup effectively allowing it to overwrite values written
10from the outside or configured via lxc.cgroup.*.
11
12This patch causes an additional 'ns/' directory to be
13created in all cgroups if cgroup namespaces and cgfsng are
14being used in order to combat this.
15
16Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
17---
8a25e884
WB
18 src/lxc/cgroups/cgfsng.c | 94 +++++++++++++++++++++++++++++++++-------
19 src/lxc/cgroups/cgroup.h | 18 ++++++--
20 src/lxc/commands.c | 87 ++++++++++++++++++++++++++++---------
21 src/lxc/commands.h | 2 +
22 src/lxc/criu.c | 4 +-
23 src/lxc/start.c | 28 +++++++++---
834bb4d7 24 6 files changed, 183 insertions(+), 50 deletions(-)
308c8a3e 25
308c8a3e 26diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
8a25e884 27index ab99b47c5..ac8f469bb 100644
308c8a3e
WB
28--- a/src/lxc/cgroups/cgfsng.c
29+++ b/src/lxc/cgroups/cgfsng.c
834bb4d7 30@@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
07288e64 31 new->mountpoint = mountpoint;
834bb4d7
WB
32 new->container_base_path = container_base_path;
33 new->container_full_path = NULL;
34+ new->container_inner_path = NULL;
35 new->monitor_full_path = NULL;
1513a0b5 36 new->version = type;
07288e64 37
834bb4d7 38@@ -1059,6 +1060,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
1513a0b5 39
834bb4d7
WB
40 free(h->container_full_path);
41 h->container_full_path = NULL;
2d8021b3 42+
834bb4d7
WB
43+ free(h->container_inner_path);
44+ h->container_inner_path = NULL;
07288e64
WB
45 }
46
1513a0b5 47 return 0;
834bb4d7 48@@ -1070,6 +1074,7 @@ struct generic_userns_exec_data {
1513a0b5
WB
49 struct lxc_conf *conf;
50 uid_t origuid; /* target uid in parent namespace */
51 char *path;
52+ bool inner;
53 };
54
55 static int cgroup_rmdir_wrapper(void *data)
834bb4d7 56@@ -1112,6 +1117,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
2d8021b3
WB
57 wrap.container_cgroup = ops->container_cgroup;
58 wrap.hierarchies = ops->hierarchies;
59 wrap.conf = handler->conf;
1513a0b5
WB
60+ wrap.inner = false;
61
2d8021b3
WB
62 if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
63 ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
834bb4d7
WB
64@@ -1323,17 +1329,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
65 return cg_unified_create_cgroup(h, cgname);
308c8a3e
WB
66 }
67
834bb4d7
WB
68-static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
69+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
308c8a3e 70 {
1513a0b5 71 int ret;
2d8021b3 72+ char *path;
1513a0b5 73
1513a0b5
WB
74- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
75+ if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
76 ERROR("Failed to handle legacy cpuset controller");
07288e64 77 return false;
da73bbc6 78 }
308c8a3e 79
834bb4d7
WB
80- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
81- ret = mkdir_eexist_on_last(h->container_full_path, 0755);
82+ if (inner) {
83+ path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL);
84+ h->container_inner_path = path;
85+ ret = mkdir(path, 0755);
86+ } else {
87+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
88+ h->container_full_path = path;
89+ ret = mkdir_eexist_on_last(path, 0755);
90+ }
91+
1513a0b5 92 if (ret < 0) {
834bb4d7 93 ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
1513a0b5 94 return false;
834bb4d7
WB
95@@ -1425,11 +1440,29 @@ on_error:
96 return bret;
f39a178a
WB
97 }
98
2d8021b3 99+static inline bool cgfsng_create_inner(struct cgroup_ops *ops)
f39a178a
WB
100+{
101+ size_t i;
102+ bool ret = true;
2d8021b3
WB
103+ char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
104+ for (i = 0; ops->hierarchies[i]; i++) {
834bb4d7 105+ if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) {
2d8021b3 106+ SYSERROR("Failed to create %s namespace subdirectory: %s",
834bb4d7 107+ ops->hierarchies[i]->container_full_path, strerror(errno));
f39a178a
WB
108+ ret = false;
109+ break;
110+ }
111+ }
112+ free(cgname);
113+ return ret;
114+}
115+
1513a0b5
WB
116 /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
117 * next cgroup_pattern-1, -2, ..., -999.
308c8a3e 118 */
834bb4d7
WB
119 __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
120- struct lxc_handler *handler)
121+ struct lxc_handler *handler,
122+ bool inner)
308c8a3e 123 {
f39a178a
WB
124 int i;
125 size_t len;
834bb4d7 126@@ -1438,10 +1471,17 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
2d8021b3 127 struct lxc_conf *conf = handler->conf;
f39a178a 128
2d8021b3 129 if (ops->container_cgroup) {
308c8a3e 130+ if (inner)
2d8021b3
WB
131+ return cgfsng_create_inner(ops);
132 WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
308c8a3e
WB
133 return false;
134 }
1513a0b5 135
308c8a3e 136+ if (inner) {
2d8021b3 137+ ERROR("cgfsng_create called twice for inner cgroup");
308c8a3e
WB
138+ return false;
139+ }
1513a0b5 140+
2d8021b3
WB
141 if (!conf)
142 return false;
143
834bb4d7 144@@ -1482,7 +1522,7 @@ again:
f39a178a 145 }
1513a0b5 146
2d8021b3 147 for (i = 0; ops->hierarchies[i]; i++) {
834bb4d7
WB
148- if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
149+ if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) {
150 ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path);
151 free(ops->hierarchies[i]->container_full_path);
152 ops->hierarchies[i]->container_full_path = NULL;
153@@ -1505,7 +1545,8 @@ out_free:
308c8a3e
WB
154 }
155
834bb4d7
WB
156 __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
157- bool monitor)
158+ bool monitor,
159+ bool inner)
308c8a3e 160 {
834bb4d7
WB
161 int len;
162 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
163@@ -1521,6 +1562,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
164 if (monitor)
165 path = must_make_path(ops->hierarchies[i]->monitor_full_path,
166 "cgroup.procs", NULL);
167+ else if (inner)
168+ path = must_make_path(ops->hierarchies[i]->container_inner_path,
169+ "cgroup.procs", NULL);
170 else
171 path = must_make_path(ops->hierarchies[i]->container_full_path,
172 "cgroup.procs", NULL);
173@@ -1538,12 +1582,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
174
175 __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
176 {
177- return __do_cgroup_enter(ops, pid, true);
178+ return __do_cgroup_enter(ops, pid, true, false);
179 }
308c8a3e 180
834bb4d7
WB
181-static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
182+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner)
183 {
184- return __do_cgroup_enter(ops, pid, false);
185+ return __do_cgroup_enter(ops, pid, false, inner);
186 }
187
188 static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
189@@ -1609,9 +1653,15 @@ static int chown_cgroup_wrapper(void *data)
1513a0b5 190 char *fullpath;
834bb4d7 191 char *path = arg->hierarchies[i]->container_full_path;
308c8a3e
WB
192
193+ if (arg->inner)
02a2999d 194+ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
308c8a3e 195+
1513a0b5
WB
196 ret = chowmod(path, destuid, nsgid, 0775);
197- if (ret < 0)
198+ if (ret < 0) {
308c8a3e
WB
199+ if (arg->inner)
200+ free(path);
201 return -1;
1513a0b5
WB
202+ }
203
204 /* Failures to chown() these are inconvenient but not
205 * detrimental We leave these owned by the container launcher,
834bb4d7 206@@ -1630,8 +1680,11 @@ static int chown_cgroup_wrapper(void *data)
2d8021b3 207 (void)chowmod(fullpath, destuid, nsgid, 0664);
1513a0b5 208 free(fullpath);
308c8a3e 209
2d8021b3
WB
210- if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
211+ if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) {
308c8a3e
WB
212+ if (arg->inner)
213+ free(path);
1513a0b5
WB
214 continue;
215+ }
308c8a3e 216
1513a0b5
WB
217 fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
218 (void)chowmod(fullpath, destuid, nsgid, 0664);
834bb4d7 219@@ -1640,13 +1693,17 @@ static int chown_cgroup_wrapper(void *data)
1513a0b5
WB
220 fullpath = must_make_path(path, "cgroup.threads", NULL);
221 (void)chowmod(fullpath, destuid, nsgid, 0664);
308c8a3e 222 free(fullpath);
2d8021b3 223+
f39a178a
WB
224+ if (arg->inner)
225+ free(path);
308c8a3e
WB
226 }
227
228 return 0;
229 }
230
834bb4d7
WB
231 __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
232- struct lxc_conf *conf)
233+ struct lxc_conf *conf,
234+ bool inner)
308c8a3e 235 {
1513a0b5 236 struct generic_userns_exec_data wrap;
2d8021b3 237
834bb4d7 238@@ -1657,6 +1714,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
1513a0b5 239 wrap.path = NULL;
2d8021b3 240 wrap.hierarchies = ops->hierarchies;
1513a0b5 241 wrap.conf = conf;
308c8a3e
WB
242+ wrap.inner = inner;
243
f39a178a
WB
244 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
245 "chown_cgroup_wrapper") < 0) {
834bb4d7 246@@ -2038,7 +2096,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
07288e64
WB
247 }
248
834bb4d7
WB
249 __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
250- const char *controller)
251+ const char *controller,
252+ bool inner)
07288e64 253 {
1513a0b5
WB
254 struct hierarchy *h;
255
834bb4d7 256@@ -2049,6 +2108,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
07288e64 257 return NULL;
1513a0b5 258 }
07288e64 259
2d8021b3 260+ if (inner)
834bb4d7 261+ return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL;
07288e64 262+
834bb4d7 263 return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
07288e64
WB
264 }
265
834bb4d7 266@@ -2080,7 +2142,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
1513a0b5
WB
267 int fret = -1, idx = 0;
268 char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
07288e64 269
1513a0b5
WB
270- container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
271+ container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
272 /* not running */
273 if (!container_cgroup)
274 return 0;
834bb4d7 275@@ -2161,7 +2223,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
2d8021b3
WB
276 continue;
277 }
f39a178a 278
2d8021b3
WB
279- path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
280+ path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]);
281 /* not running */
282 if (!path)
283 continue;
308c8a3e 284diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
8a25e884 285index d4dcd506b..59445b5a5 100644
308c8a3e
WB
286--- a/src/lxc/cgroups/cgroup.h
287+++ b/src/lxc/cgroups/cgroup.h
834bb4d7
WB
288@@ -32,6 +32,12 @@
289 #define MONITOR_CGROUP "lxc.monitor"
290 #define PIVOT_CGROUP "lxc.pivot"
02a2999d
WB
291
292+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
293+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
294+ * order to prevent it from accessing the outer limiting cgroup.
295+ */
296+#define CGROUP_NAMESPACE_SUBDIR "cgns"
297+
298 struct lxc_handler;
299 struct lxc_conf;
300 struct lxc_list;
834bb4d7
WB
301@@ -72,6 +78,9 @@ typedef enum {
302 * @monitor_full_path
303 * - The full path to the monitor's cgroup.
2d8021b3 304 *
834bb4d7 305+ * @container_inner_path
2d8021b3
WB
306+ * - The full path to the container's inner cgroup when protect_limits is used.
307+ *
308 * @version
309 * - legacy hierarchy
310 * If the hierarchy is a legacy hierarchy this will be set to
834bb4d7 311@@ -85,6 +94,7 @@ struct hierarchy {
2d8021b3 312 char *mountpoint;
834bb4d7
WB
313 char *container_base_path;
314 char *container_full_path;
315+ char *container_inner_path;
316 char *monitor_full_path;
2d8021b3
WB
317 int version;
318 };
834bb4d7
WB
319@@ -139,9 +149,9 @@ struct cgroup_ops {
320 void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
321 bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
322 bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid);
323- bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler);
324- bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid);
2d8021b3 325- const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
834bb4d7
WB
326+ bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner);
327+ bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner);
2d8021b3 328+ const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner);
834bb4d7 329 bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf);
2d8021b3
WB
330 int (*num_hierarchies)(struct cgroup_ops *ops);
331 bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
834bb4d7 332@@ -152,7 +162,7 @@ struct cgroup_ops {
2d8021b3
WB
333 bool (*unfreeze)(struct cgroup_ops *ops);
334 bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
335 bool with_devices);
336- bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
337+ bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner);
338 bool (*attach)(struct cgroup_ops *ops, const char *name,
339 const char *lxcpath, pid_t pid);
340 bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
07288e64 341diff --git a/src/lxc/commands.c b/src/lxc/commands.c
8a25e884 342index 133384d72..b41a76000 100644
07288e64
WB
343--- a/src/lxc/commands.c
344+++ b/src/lxc/commands.c
834bb4d7 345@@ -427,20 +427,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
02a2999d
WB
346 return lxc_cmd_rsp_send(fd, &rsp);
347 }
07288e64 348
02a2999d
WB
349-/*
350- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
351- * particular subsystem. This is the cgroup path relative to the root
352- * of the cgroup filesystem.
353- *
354- * @name : name of container to connect to
355- * @lxcpath : the lxcpath in which the container is running
356- * @subsystem : the subsystem being asked about
357- *
358- * Returns the path on success, NULL on failure. The caller must free() the
359- * returned path.
360- */
361-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
1513a0b5
WB
362- const char *subsystem)
363+char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
364+ const char *subsystem, bool inner)
02a2999d
WB
365 {
366 int ret, stopped;
02a2999d 367 struct lxc_cmd_rr cmd = {
834bb4d7 368@@ -453,8 +441,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
1513a0b5
WB
369
370 cmd.req.data = subsystem;
371 cmd.req.datalen = 0;
372- if (subsystem)
373- cmd.req.datalen = strlen(subsystem) + 1;
374+ if (subsystem) {
375+ size_t subsyslen = strlen(subsystem);
376+ if (inner) {
377+ char *data = alloca(subsyslen+2);
378+ memcpy(data, subsystem, subsyslen+1);
379+ data[subsyslen+1] = 1;
380+ cmd.req.datalen = subsyslen+2,
381+ cmd.req.data = data;
382+ } else {
383+ cmd.req.datalen = subsyslen+1;
384+ }
02a2999d 385+ }
1513a0b5 386
02a2999d 387 ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
1513a0b5 388 if (ret < 0)
834bb4d7 389@@ -469,6 +467,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
02a2999d
WB
390 return cmd.rsp.data;
391 }
392
07288e64 393+/*
02a2999d
WB
394+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
395+ * particular subsystem. This is the cgroup path relative to the root
07288e64
WB
396+ * of the cgroup filesystem.
397+ *
398+ * @name : name of container to connect to
399+ * @lxcpath : the lxcpath in which the container is running
400+ * @subsystem : the subsystem being asked about
401+ *
402+ * Returns the path on success, NULL on failure. The caller must free() the
403+ * returned path.
404+ */
02a2999d 405+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
07288e64
WB
406+ const char *subsystem)
407+{
02a2999d 408+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
07288e64 409+}
f39a178a
WB
410+
411+/*
412+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
413+ * for a particular subsystem. This is the cgroup path relative to the root
414+ * of the cgroup filesystem.
415+ *
416+ * @name : name of container to connect to
417+ * @lxcpath : the lxcpath in which the container is running
418+ * @subsystem : the subsystem being asked about
419+ *
420+ * Returns the path on success, NULL on failure. The caller must free() the
421+ * returned path.
422+ */
423+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
424+ const char *subsystem)
425+{
426+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
427+}
07288e64 428+
02a2999d
WB
429 static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
430 struct lxc_handler *handler)
431 {
834bb4d7 432@@ -476,10 +510,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
1513a0b5 433 struct lxc_cmd_rsp rsp;
2d8021b3 434 struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
02a2999d 435
1513a0b5 436- if (req->datalen > 0)
2d8021b3 437- path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
1513a0b5 438- else
2d8021b3 439- path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
1513a0b5
WB
440+ if (req->datalen > 0) {
441+ const char *subsystem;
442+ size_t subsyslen;
443+ bool inner = false;
444+ subsystem = req->data;
445+ subsyslen = strlen(subsystem);
446+ if (req->datalen == subsyslen+2)
447+ inner = (subsystem[subsyslen+1] == 1);
07288e64 448+
2d8021b3 449+ path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner);
1513a0b5
WB
450+ } else {
451+ // FIXME: cgroup separation for cgroup v2 cannot be handled
452+ // like we used to do v1 here... need to figure this out...
2d8021b3 453+ path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false);
1513a0b5 454+ }
07288e64
WB
455 if (!path)
456 return -1;
1513a0b5 457
834bb4d7
WB
458@@ -651,7 +696,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
459 * lxc_unfreeze() would do another cmd (GET_CGROUP) which would
460 * deadlock us.
461 */
462- if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer"))
463+ if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false))
464 return 0;
465
466 if (cgroup_ops->unfreeze(cgroup_ops))
07288e64 467diff --git a/src/lxc/commands.h b/src/lxc/commands.h
8a25e884 468index 2c024b65d..7c4c00b1e 100644
07288e64
WB
469--- a/src/lxc/commands.h
470+++ b/src/lxc/commands.h
834bb4d7 471@@ -88,6 +88,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
07288e64
WB
472 */
473 extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
474 const char *subsystem);
475+extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
476+ const char *lxcpath, const char *subsystem);
477 extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
478 extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
479 extern char *lxc_cmd_get_name(const char *hashed_sock);
308c8a3e 480diff --git a/src/lxc/criu.c b/src/lxc/criu.c
8a25e884 481index 3d857b541..ec9bcb7e4 100644
308c8a3e
WB
482--- a/src/lxc/criu.c
483+++ b/src/lxc/criu.c
834bb4d7 484@@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
07288e64
WB
485 } else {
486 const char *p;
487
2d8021b3
WB
488- p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
489+ p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false);
07288e64
WB
490 if (!p) {
491 ERROR("failed to get cgroup path for %s", controllers[0]);
492 goto err;
834bb4d7 493@@ -976,7 +976,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
308c8a3e 494 goto out_fini_handler;
2d8021b3 495 handler->cgroup_ops = cgroup_ops;
308c8a3e 496
834bb4d7
WB
497- if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
498+ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
308c8a3e
WB
499 ERROR("failed creating groups");
500 goto out_fini_handler;
501 }
502diff --git a/src/lxc/start.c b/src/lxc/start.c
8a25e884 503index dae3bcfe5..f3b29d6cd 100644
308c8a3e
WB
504--- a/src/lxc/start.c
505+++ b/src/lxc/start.c
834bb4d7 506@@ -1649,7 +1649,7 @@ static int lxc_spawn(struct lxc_handler *handler)
2d8021b3
WB
507 }
508 }
308c8a3e 509
834bb4d7
WB
510- if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
511+ if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) {
1513a0b5 512 ERROR("Failed creating cgroups");
308c8a3e
WB
513 goto out_delete_net;
514 }
834bb4d7 515@@ -1743,10 +1743,10 @@ static int lxc_spawn(struct lxc_handler *handler)
308c8a3e
WB
516 goto out_delete_net;
517 }
518
834bb4d7
WB
519- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
520+ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false))
308c8a3e
WB
521 goto out_delete_net;
522
2d8021b3
WB
523- if (!cgroup_ops->chown(cgroup_ops, handler->conf))
524+ if (!cgroup_ops->chown(cgroup_ops, handler->conf, false))
308c8a3e
WB
525 goto out_delete_net;
526
1513a0b5 527 /* Now we're ready to preserve the network namespace */
834bb4d7 528@@ -1813,16 +1813,30 @@ static int lxc_spawn(struct lxc_handler *handler)
1513a0b5 529 }
bc7e56ac
WB
530 }
531
1513a0b5
WB
532- ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
533- if (ret < 0)
bc7e56ac
WB
534- goto out_delete_net;
535-
2d8021b3 536 if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
1513a0b5 537 ERROR("Failed to setup legacy device cgroup controller limits");
bc7e56ac 538 goto out_delete_net;
308c8a3e 539 }
1513a0b5 540 TRACE("Set up legacy device cgroup controller limits");
308c8a3e
WB
541
542+ if (cgns_supported()) {
834bb4d7 543+ if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) {
308c8a3e
WB
544+ ERROR("failed to create inner cgroup separation layer");
545+ goto out_delete_net;
546+ }
834bb4d7 547+ if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) {
308c8a3e
WB
548+ ERROR("failed to enter inner cgroup separation layer");
549+ goto out_delete_net;
550+ }
2d8021b3 551+ if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) {
308c8a3e
WB
552+ ERROR("failed chown inner cgroup separation layer");
553+ goto out_delete_net;
554+ }
555+ }
bc7e56ac
WB
556+
557+ if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
558+ goto out_delete_net;
308c8a3e 559+
2d8021b3
WB
560 if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
561 /* Now we're ready to preserve the cgroup namespace */
562 ret = lxc_try_preserve_ns(handler->pid, "cgroup");
308c8a3e 563--
8a25e884 5642.20.1
308c8a3e 565