]> git.proxmox.com Git - lxc.git/blame - debian/patches/0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch
bump version to 3.0.0-3
[lxc.git] / debian / patches / 0004-separate-the-limiting-from-the-namespaced-cgroup-roo.patch
CommitLineData
1513a0b5 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
308c8a3e 2From: Wolfgang Bumiller <w.bumiller@proxmox.com>
1513a0b5
WB
3Date: Wed, 28 Mar 2018 13:37:28 +0200
4Subject: [PATCH] separate the limiting from the namespaced cgroup root
308c8a3e
WB
5
6When cgroup namespaces are enabled a privileged container
7with mixed cgroups has full write access to its own root
8cgroup effectively allowing it to overwrite values written
9from the outside or configured via lxc.cgroup.*.
10
11This patch causes an additional 'ns/' directory to be
12created in all cgroups if cgroup namespaces and cgfsng are
13being used in order to combat this.
14
15Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
16---
1513a0b5
WB
17 src/lxc/cgroups/cgfsng.c | 88 +++++++++++++++++++++++++++++++++++++++---------
18 src/lxc/cgroups/cgroup.c | 17 +++++-----
19 src/lxc/cgroups/cgroup.h | 23 ++++++++-----
20 src/lxc/commands.c | 85 +++++++++++++++++++++++++++++++++++-----------
21 src/lxc/commands.h | 2 ++
22 src/lxc/criu.c | 4 +--
23 src/lxc/start.c | 28 +++++++++++----
24 7 files changed, 186 insertions(+), 61 deletions(-)
308c8a3e 25
308c8a3e 26diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
1513a0b5 27index 10c7ab2c..b48f997f 100644
308c8a3e
WB
28--- a/src/lxc/cgroups/cgfsng.c
29+++ b/src/lxc/cgroups/cgfsng.c
1513a0b5 30@@ -101,6 +101,7 @@ struct hierarchy {
07288e64
WB
31 char *mountpoint;
32 char *base_cgroup;
33 char *fullcgpath;
34+ char *innercgpath;
1513a0b5 35 int version;
07288e64
WB
36 };
37
1513a0b5 38@@ -955,6 +956,7 @@ static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
07288e64
WB
39 new->mountpoint = mountpoint;
40 new->base_cgroup = base_cgroup;
41 new->fullcgpath = NULL;
f39a178a 42+ new->innercgpath = NULL;
1513a0b5 43 new->version = type;
07288e64 44
1513a0b5
WB
45 newentry = append_null_to_list((void ***)&hierarchies);
46@@ -1587,6 +1589,8 @@ static int cgroup_rmdir(char *container_cgroup)
47
48 free(h->fullcgpath);
49 h->fullcgpath = NULL;
50+ free(h->innercgpath);
51+ h->innercgpath = NULL;
07288e64
WB
52 }
53
1513a0b5
WB
54 return 0;
55@@ -1597,6 +1601,7 @@ struct generic_userns_exec_data {
56 struct lxc_conf *conf;
57 uid_t origuid; /* target uid in parent namespace */
58 char *path;
59+ bool inner;
60 };
61
62 static int cgroup_rmdir_wrapper(void *data)
63@@ -1641,6 +1646,7 @@ static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
64 wrap.origuid = 0;
65 wrap.d = hdata;
66 wrap.conf = conf;
67+ wrap.inner = false;
68
69 if (conf && !lxc_list_empty(&conf->id_map))
70 ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
71@@ -1730,22 +1736,29 @@ on_error:
72 return bret;
308c8a3e
WB
73 }
74
75-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
76+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner)
77 {
1513a0b5
WB
78 int ret;
79
308c8a3e 80- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
1513a0b5
WB
81- if (dir_exists(h->fullcgpath)) {
82- ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
308c8a3e
WB
83+ char *path;
84+ if (inner) {
02a2999d 85+ path = must_make_path(h->fullcgpath, CGROUP_NAMESPACE_SUBDIR, NULL);
07288e64 86+ h->innercgpath = path;
308c8a3e
WB
87+ } else {
88+ path = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
89+ h->fullcgpath = path;
07288e64
WB
90+ }
91+ if (dir_exists(path)) { // it must not already exist
92+ ERROR("Path \"%s\" already existed.", path);
93 return false;
94 }
1513a0b5
WB
95
96- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
97+ if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
98 ERROR("Failed to handle legacy cpuset controller");
07288e64 99 return false;
da73bbc6 100 }
308c8a3e 101
1513a0b5
WB
102- ret = mkdir_p(h->fullcgpath, 0755);
103+ ret = mkdir_p(path, 0755);
104 if (ret < 0) {
105 ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
106 return false;
107@@ -1766,10 +1779,26 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
f39a178a
WB
108 h->fullcgpath = NULL;
109 }
110
111+static inline bool cgfsng_create_inner(struct cgfsng_handler_data *d)
112+{
113+ size_t i;
114+ bool ret = true;
115+ char *cgname = must_make_path(d->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL);
116+ for (i = 0; hierarchies[i]; i++) {
117+ if (!create_path_for_hierarchy(hierarchies[i], cgname, true)) {
118+ SYSERROR("Failed to create %s namespace subdirectory: %s", hierarchies[i]->fullcgpath, strerror(errno));
119+ ret = false;
120+ break;
121+ }
122+ }
123+ free(cgname);
124+ return ret;
125+}
126+
1513a0b5
WB
127 /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
128 * next cgroup_pattern-1, -2, ..., -999.
308c8a3e
WB
129 */
130-static inline bool cgfsng_create(void *hdata)
308c8a3e
WB
131+static inline bool cgfsng_create(void *hdata, bool inner)
132 {
f39a178a
WB
133 int i;
134 size_t len;
1513a0b5 135@@ -1781,10 +1810,17 @@ static inline bool cgfsng_create(void *hdata)
308c8a3e 136 return false;
f39a178a 137
308c8a3e
WB
138 if (d->container_cgroup) {
139+ if (inner)
140+ return cgfsng_create_inner(d);
141 WARN("cgfsng_create called a second time");
142 return false;
143 }
1513a0b5 144
308c8a3e
WB
145+ if (inner) {
146+ ERROR("cgfsng_create called twice for innner cgroup");
147+ return false;
148+ }
1513a0b5 149+
f39a178a
WB
150 if (d->cgroup_meta.dir)
151 tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false);
1513a0b5
WB
152 else
153@@ -1821,7 +1857,7 @@ again:
f39a178a 154 }
1513a0b5 155
308c8a3e 156 for (i = 0; hierarchies[i]; i++) {
1513a0b5
WB
157- if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
158+ if (!create_path_for_hierarchy(hierarchies[i], container_cgroup, false)) {
308c8a3e 159 int j;
1513a0b5 160 ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath);
308c8a3e 161 free(hierarchies[i]->fullcgpath);
1513a0b5 162@@ -1843,7 +1879,7 @@ out_free:
308c8a3e
WB
163 return false;
164 }
165
166-static bool cgfsng_enter(void *hdata, pid_t pid)
308c8a3e
WB
167+static bool cgfsng_enter(void *hdata, pid_t pid, bool inner)
168 {
308c8a3e 169 int i, len;
1513a0b5
WB
170 char pidstr[25];
171@@ -1856,8 +1892,13 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
172 int ret;
173 char *fullpath;
308c8a3e 174
1513a0b5
WB
175- fullpath = must_make_path(hierarchies[i]->fullcgpath,
176- "cgroup.procs", NULL);
308c8a3e 177+ if (inner)
02a2999d 178+ fullpath = must_make_path(hierarchies[i]->fullcgpath,
1513a0b5
WB
179+ CGROUP_NAMESPACE_SUBDIR,
180+ "cgroup.procs", NULL);
308c8a3e
WB
181+ else
182+ fullpath = must_make_path(hierarchies[i]->fullcgpath,
1513a0b5
WB
183+ "cgroup.procs", NULL);
184 ret = lxc_write_to_file(fullpath, pidstr, len, false);
185 if (ret != 0) {
186 SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
187@@ -1933,9 +1974,15 @@ static int chown_cgroup_wrapper(void *data)
188 char *fullpath;
189 char *path = hierarchies[i]->fullcgpath;
308c8a3e
WB
190
191+ if (arg->inner)
02a2999d 192+ path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL);
308c8a3e 193+
1513a0b5
WB
194 ret = chowmod(path, destuid, nsgid, 0775);
195- if (ret < 0)
196+ if (ret < 0) {
308c8a3e
WB
197+ if (arg->inner)
198+ free(path);
199 return -1;
1513a0b5
WB
200+ }
201
202 /* Failures to chown() these are inconvenient but not
203 * detrimental We leave these owned by the container launcher,
204@@ -1954,8 +2001,11 @@ static int chown_cgroup_wrapper(void *data)
205 (void)chowmod(fullpath, destuid, 0, 0664);
206 free(fullpath);
308c8a3e 207
1513a0b5
WB
208- if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
209+ if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC) {
308c8a3e
WB
210+ if (arg->inner)
211+ free(path);
1513a0b5
WB
212 continue;
213+ }
308c8a3e 214
1513a0b5
WB
215 fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
216 (void)chowmod(fullpath, destuid, nsgid, 0664);
217@@ -1964,12 +2014,14 @@ static int chown_cgroup_wrapper(void *data)
218 fullpath = must_make_path(path, "cgroup.threads", NULL);
219 (void)chowmod(fullpath, destuid, nsgid, 0664);
308c8a3e 220 free(fullpath);
f39a178a
WB
221+ if (arg->inner)
222+ free(path);
308c8a3e
WB
223 }
224
225 return 0;
226 }
227
1513a0b5
WB
228-static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
229+static bool cgfsng_chown(void *hdata, struct lxc_conf *conf, bool inner)
308c8a3e
WB
230 {
231 struct cgfsng_handler_data *d = hdata;
1513a0b5
WB
232 struct generic_userns_exec_data wrap;
233@@ -1984,6 +2036,7 @@ static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
234 wrap.path = NULL;
308c8a3e 235 wrap.d = d;
1513a0b5 236 wrap.conf = conf;
308c8a3e
WB
237+ wrap.inner = inner;
238
f39a178a
WB
239 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
240 "chown_cgroup_wrapper") < 0) {
1513a0b5 241@@ -2366,7 +2419,7 @@ static bool cgfsng_unfreeze(void *hdata)
07288e64
WB
242 return true;
243 }
244
1513a0b5
WB
245-static const char *cgfsng_get_cgroup(void *hdata, const char *controller)
246+static const char *cgfsng_get_cgroup(void *hdata, const char *controller, bool inner)
07288e64 247 {
1513a0b5
WB
248 struct hierarchy *h;
249
250@@ -2377,6 +2430,9 @@ static const char *cgfsng_get_cgroup(void *hdata, const char *controller)
07288e64 251 return NULL;
1513a0b5 252 }
07288e64
WB
253
254+ if (inner && h->innercgpath)
255+ return h->innercgpath + strlen(h->mountpoint);
256+
257 return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
258 }
259
1513a0b5
WB
260@@ -2408,7 +2464,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
261 int fret = -1, idx = 0;
262 char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
07288e64 263
1513a0b5
WB
264- container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
265+ container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller);
266 /* not running */
267 if (!container_cgroup)
268 return 0;
308c8a3e 269diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c
1513a0b5 270index 9e7b26e0..ec45dd39 100644
308c8a3e
WB
271--- a/src/lxc/cgroups/cgroup.c
272+++ b/src/lxc/cgroups/cgroup.c
1513a0b5 273@@ -73,19 +73,19 @@ void cgroup_destroy(struct lxc_handler *handler)
308c8a3e
WB
274 }
275
f39a178a 276 /* Create the container cgroups for all requested controllers. */
308c8a3e
WB
277-bool cgroup_create(struct lxc_handler *handler)
278+bool cgroup_create(struct lxc_handler *handler, bool inner)
279 {
280 if (ops)
281- return ops->create(handler->cgroup_data);
282+ return ops->create(handler->cgroup_data, inner);
f39a178a 283
308c8a3e
WB
284 return false;
285 }
286
f39a178a 287 /* Enter the container init into its new cgroups for all requested controllers. */
308c8a3e
WB
288-bool cgroup_enter(struct lxc_handler *handler)
289+bool cgroup_enter(struct lxc_handler *handler, bool inner)
290 {
291 if (ops)
292- return ops->enter(handler->cgroup_data, handler->pid);
293+ return ops->enter(handler->cgroup_data, handler->pid, inner);
f39a178a 294
308c8a3e
WB
295 return false;
296 }
1513a0b5 297@@ -99,10 +99,11 @@ bool cgroup_create_legacy(struct lxc_handler *handler)
07288e64
WB
298 }
299
f39a178a
WB
300 const char *cgroup_get_cgroup(struct lxc_handler *handler,
301- const char *subsystem)
302+ const char *subsystem,
303+ bool inner)
07288e64
WB
304 {
305 if (ops)
306- return ops->get_cgroup(handler->cgroup_data, subsystem);
307+ return ops->get_cgroup(handler->cgroup_data, subsystem, inner);
f39a178a 308
07288e64
WB
309 return NULL;
310 }
1513a0b5 311@@ -148,10 +149,10 @@ bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
308c8a3e
WB
312 return false;
313 }
314
315-bool cgroup_chown(struct lxc_handler *handler)
316+bool cgroup_chown(struct lxc_handler *handler, bool inner)
317 {
318 if (ops && ops->chown)
319- return ops->chown(handler->cgroup_data, handler->conf);
320+ return ops->chown(handler->cgroup_data, handler->conf, inner);
f39a178a 321
308c8a3e
WB
322 return true;
323 }
308c8a3e 324diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
1513a0b5 325index 0f04e8b7..3a63133d 100644
308c8a3e
WB
326--- a/src/lxc/cgroups/cgroup.h
327+++ b/src/lxc/cgroups/cgroup.h
02a2999d
WB
328@@ -28,6 +28,12 @@
329 #include <stddef.h>
330 #include <sys/types.h>
331
332+/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace
333+ * will be moved into an additional subdirectory "cgns/" inside the cgroup in
334+ * order to prevent it from accessing the outer limiting cgroup.
335+ */
336+#define CGROUP_NAMESPACE_SUBDIR "cgns"
337+
338 struct lxc_handler;
339 struct lxc_conf;
340 struct lxc_list;
1513a0b5 341@@ -45,10 +51,10 @@ struct cgroup_ops {
308c8a3e 342
f39a178a 343 void *(*init)(struct lxc_handler *handler);
308c8a3e
WB
344 void (*destroy)(void *hdata, struct lxc_conf *conf);
345- bool (*create)(void *hdata);
346- bool (*enter)(void *hdata, pid_t pid);
347+ bool (*create)(void *hdata, bool inner);
348+ bool (*enter)(void *hdata, pid_t pid, bool inner);
349 bool (*create_legacy)(void *hdata, pid_t pid);
07288e64
WB
350- const char *(*get_cgroup)(void *hdata, const char *subsystem);
351+ const char *(*get_cgroup)(void *hdata, const char *subsystem, bool inner);
308c8a3e 352 bool (*escape)();
07288e64
WB
353 int (*num_hierarchies)();
354 bool (*get_hierarchies)(int n, char ***out);
1513a0b5 355@@ -56,7 +62,7 @@ struct cgroup_ops {
308c8a3e
WB
356 int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
357 bool (*unfreeze)(void *hdata);
1513a0b5 358 bool (*setup_limits)(void *hdata, struct lxc_conf *conf, bool with_devices);
308c8a3e
WB
359- bool (*chown)(void *hdata, struct lxc_conf *conf);
360+ bool (*chown)(void *hdata, struct lxc_conf *conf, bool inner);
361 bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
362 bool (*mount_cgroup)(void *hdata, const char *root, int type);
363 int (*nrtasks)(void *hdata);
1513a0b5 364@@ -67,15 +73,16 @@ extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid);
308c8a3e
WB
365 extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type);
366 extern void cgroup_destroy(struct lxc_handler *handler);
367 extern bool cgroup_init(struct lxc_handler *handler);
368-extern bool cgroup_create(struct lxc_handler *handler);
369+extern bool cgroup_create(struct lxc_handler *handler, bool inner);
370 extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices);
371-extern bool cgroup_chown(struct lxc_handler *handler);
372-extern bool cgroup_enter(struct lxc_handler *handler);
373+extern bool cgroup_chown(struct lxc_handler *handler, bool inner);
374+extern bool cgroup_enter(struct lxc_handler *handler, bool inner);
375 extern void cgroup_cleanup(struct lxc_handler *handler);
376 extern bool cgroup_create_legacy(struct lxc_handler *handler);
377 extern int cgroup_nrtasks(struct lxc_handler *handler);
1513a0b5
WB
378 extern const char *cgroup_get_cgroup(struct lxc_handler *handler,
379- const char *subsystem);
380+ const char *subsystem,
381+ bool inner);
07288e64
WB
382 extern bool cgroup_escape();
383 extern int cgroup_num_hierarchies();
384 extern bool cgroup_get_hierarchies(int i, char ***out);
385diff --git a/src/lxc/commands.c b/src/lxc/commands.c
1513a0b5 386index 54e9f75c..df5a9907 100644
07288e64
WB
387--- a/src/lxc/commands.c
388+++ b/src/lxc/commands.c
1513a0b5 389@@ -426,20 +426,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
02a2999d
WB
390 return lxc_cmd_rsp_send(fd, &rsp);
391 }
07288e64 392
02a2999d
WB
393-/*
394- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
395- * particular subsystem. This is the cgroup path relative to the root
396- * of the cgroup filesystem.
397- *
398- * @name : name of container to connect to
399- * @lxcpath : the lxcpath in which the container is running
400- * @subsystem : the subsystem being asked about
401- *
402- * Returns the path on success, NULL on failure. The caller must free() the
403- * returned path.
404- */
405-char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
1513a0b5
WB
406- const char *subsystem)
407+char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
408+ const char *subsystem, bool inner)
02a2999d
WB
409 {
410 int ret, stopped;
02a2999d 411 struct lxc_cmd_rr cmd = {
1513a0b5
WB
412@@ -452,8 +440,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
413
414 cmd.req.data = subsystem;
415 cmd.req.datalen = 0;
416- if (subsystem)
417- cmd.req.datalen = strlen(subsystem) + 1;
418+ if (subsystem) {
419+ size_t subsyslen = strlen(subsystem);
420+ if (inner) {
421+ char *data = alloca(subsyslen+2);
422+ memcpy(data, subsystem, subsyslen+1);
423+ data[subsyslen+1] = 1;
424+ cmd.req.datalen = subsyslen+2,
425+ cmd.req.data = data;
426+ } else {
427+ cmd.req.datalen = subsyslen+1;
428+ }
02a2999d 429+ }
1513a0b5 430
02a2999d 431 ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL);
1513a0b5
WB
432 if (ret < 0)
433@@ -468,16 +466,63 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
02a2999d
WB
434 return cmd.rsp.data;
435 }
436
07288e64 437+/*
02a2999d
WB
438+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
439+ * particular subsystem. This is the cgroup path relative to the root
07288e64
WB
440+ * of the cgroup filesystem.
441+ *
442+ * @name : name of container to connect to
443+ * @lxcpath : the lxcpath in which the container is running
444+ * @subsystem : the subsystem being asked about
445+ *
446+ * Returns the path on success, NULL on failure. The caller must free() the
447+ * returned path.
448+ */
02a2999d 449+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
07288e64
WB
450+ const char *subsystem)
451+{
02a2999d 452+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false);
07288e64 453+}
f39a178a
WB
454+
455+/*
456+ * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path
457+ * for a particular subsystem. This is the cgroup path relative to the root
458+ * of the cgroup filesystem.
459+ *
460+ * @name : name of container to connect to
461+ * @lxcpath : the lxcpath in which the container is running
462+ * @subsystem : the subsystem being asked about
463+ *
464+ * Returns the path on success, NULL on failure. The caller must free() the
465+ * returned path.
466+ */
467+char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath,
468+ const char *subsystem)
469+{
470+ return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true);
471+}
07288e64 472+
02a2999d
WB
473 static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
474 struct lxc_handler *handler)
475 {
02a2999d 476 const char *path;
1513a0b5 477 struct lxc_cmd_rsp rsp;
02a2999d 478
1513a0b5
WB
479- if (req->datalen > 0)
480- path = cgroup_get_cgroup(handler, req->data);
481- else
482- path = cgroup_get_cgroup(handler, NULL);
483+ if (req->datalen > 0) {
484+ const char *subsystem;
485+ size_t subsyslen;
486+ bool inner = false;
487+ subsystem = req->data;
488+ subsyslen = strlen(subsystem);
489+ if (req->datalen == subsyslen+2)
490+ inner = (subsystem[subsyslen+1] == 1);
07288e64 491+
1513a0b5
WB
492+ path = cgroup_get_cgroup(handler, req->data, inner);
493+ } else {
494+ // FIXME: cgroup separation for cgroup v2 cannot be handled
495+ // like we used to do v1 here... need to figure this out...
496+ path = cgroup_get_cgroup(handler, NULL, false);
497+ }
07288e64
WB
498 if (!path)
499 return -1;
1513a0b5 500
07288e64 501diff --git a/src/lxc/commands.h b/src/lxc/commands.h
1513a0b5 502index 816cd748..e16c0d79 100644
07288e64
WB
503--- a/src/lxc/commands.h
504+++ b/src/lxc/commands.h
1513a0b5 505@@ -93,6 +93,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
07288e64
WB
506 */
507 extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
508 const char *subsystem);
509+extern char *lxc_cmd_get_attach_cgroup_path(const char *name,
510+ const char *lxcpath, const char *subsystem);
511 extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
512 extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
513 extern char *lxc_cmd_get_name(const char *hashed_sock);
308c8a3e 514diff --git a/src/lxc/criu.c b/src/lxc/criu.c
1513a0b5 515index f60a6e15..7c8a8aee 100644
308c8a3e
WB
516--- a/src/lxc/criu.c
517+++ b/src/lxc/criu.c
f39a178a 518@@ -324,7 +324,7 @@ static void exec_criu(struct criu_opts *opts)
07288e64
WB
519 } else {
520 const char *p;
521
522- p = cgroup_get_cgroup(opts->handler, controllers[0]);
523+ p = cgroup_get_cgroup(opts->handler, controllers[0], false);
524 if (!p) {
525 ERROR("failed to get cgroup path for %s", controllers[0]);
526 goto err;
1513a0b5 527@@ -958,7 +958,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
308c8a3e
WB
528 goto out_fini_handler;
529 }
530
531- if (!cgroup_create(handler)) {
532+ if (!cgroup_create(handler, false)) {
533 ERROR("failed creating groups");
534 goto out_fini_handler;
535 }
536diff --git a/src/lxc/start.c b/src/lxc/start.c
1513a0b5 537index f66f50a7..772eacc2 100644
308c8a3e
WB
538--- a/src/lxc/start.c
539+++ b/src/lxc/start.c
1513a0b5 540@@ -1556,7 +1556,7 @@ static int lxc_spawn(struct lxc_handler *handler)
308c8a3e
WB
541
542 cgroups_connected = true;
543
544- if (!cgroup_create(handler)) {
545+ if (!cgroup_create(handler, false)) {
1513a0b5 546 ERROR("Failed creating cgroups");
308c8a3e
WB
547 goto out_delete_net;
548 }
1513a0b5 549@@ -1650,10 +1650,10 @@ static int lxc_spawn(struct lxc_handler *handler)
308c8a3e
WB
550 goto out_delete_net;
551 }
552
553- if (!cgroup_enter(handler))
554+ if (!cgroup_enter(handler, false))
555 goto out_delete_net;
556
557- if (!cgroup_chown(handler))
558+ if (!cgroup_chown(handler, false))
559 goto out_delete_net;
560
1513a0b5
WB
561 /* Now we're ready to preserve the network namespace */
562@@ -1714,16 +1714,30 @@ static int lxc_spawn(struct lxc_handler *handler)
563 }
bc7e56ac
WB
564 }
565
1513a0b5
WB
566- ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
567- if (ret < 0)
bc7e56ac
WB
568- goto out_delete_net;
569-
570 if (!cgroup_setup_limits(handler, true)) {
1513a0b5 571 ERROR("Failed to setup legacy device cgroup controller limits");
bc7e56ac 572 goto out_delete_net;
308c8a3e 573 }
1513a0b5 574 TRACE("Set up legacy device cgroup controller limits");
308c8a3e
WB
575
576+ if (cgns_supported()) {
577+ if (!cgroup_create(handler, true)) {
578+ ERROR("failed to create inner cgroup separation layer");
579+ goto out_delete_net;
580+ }
581+ if (!cgroup_enter(handler, true)) {
582+ ERROR("failed to enter inner cgroup separation layer");
583+ goto out_delete_net;
584+ }
585+ if (!cgroup_chown(handler, true)) {
586+ ERROR("failed chown inner cgroup separation layer");
587+ goto out_delete_net;
588+ }
589+ }
bc7e56ac
WB
590+
591+ if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE))
592+ goto out_delete_net;
308c8a3e
WB
593+
594 cgroup_disconnect();
595 cgroups_connected = false;
596
597--
7395ab25 5982.11.0
308c8a3e 599