]>
Commit | Line | Data |
---|---|---|
1513a0b5 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
308c8a3e | 2 | From: Wolfgang Bumiller <w.bumiller@proxmox.com> |
1513a0b5 | 3 | Date: Wed, 28 Mar 2018 13:37:28 +0200 |
2d8021b3 WB |
4 | Subject: [PATCH] PVE: [Up] separate the limiting from the namespaced cgroup |
5 | root | |
308c8a3e WB |
6 | |
7 | When cgroup namespaces are enabled a privileged container | |
8 | with mixed cgroups has full write access to its own root | |
9 | cgroup effectively allowing it to overwrite values written | |
10 | from the outside or configured via lxc.cgroup.*. | |
11 | ||
12 | This patch causes an additional 'ns/' directory to be | |
13 | created in all cgroups if cgroup namespaces and cgfsng are | |
14 | being used in order to combat this. | |
15 | ||
16 | Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com> | |
17 | --- | |
8a25e884 WB |
18 | src/lxc/cgroups/cgfsng.c | 94 +++++++++++++++++++++++++++++++++------- |
19 | src/lxc/cgroups/cgroup.h | 18 ++++++-- | |
20 | src/lxc/commands.c | 87 ++++++++++++++++++++++++++++--------- | |
21 | src/lxc/commands.h | 2 + | |
22 | src/lxc/criu.c | 4 +- | |
23 | src/lxc/start.c | 28 +++++++++--- | |
834bb4d7 | 24 | 6 files changed, 183 insertions(+), 50 deletions(-) |
308c8a3e | 25 | |
308c8a3e | 26 | diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c |
8a25e884 | 27 | index ab99b47c5..ac8f469bb 100644 |
308c8a3e WB |
28 | --- a/src/lxc/cgroups/cgfsng.c |
29 | +++ b/src/lxc/cgroups/cgfsng.c | |
834bb4d7 | 30 | @@ -818,6 +818,7 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char |
07288e64 | 31 | new->mountpoint = mountpoint; |
834bb4d7 WB |
32 | new->container_base_path = container_base_path; |
33 | new->container_full_path = NULL; | |
34 | + new->container_inner_path = NULL; | |
35 | new->monitor_full_path = NULL; | |
1513a0b5 | 36 | new->version = type; |
07288e64 | 37 | |
834bb4d7 | 38 | @@ -1059,6 +1060,9 @@ static int cgroup_rmdir(struct hierarchy **hierarchies, |
1513a0b5 | 39 | |
834bb4d7 WB |
40 | free(h->container_full_path); |
41 | h->container_full_path = NULL; | |
2d8021b3 | 42 | + |
834bb4d7 WB |
43 | + free(h->container_inner_path); |
44 | + h->container_inner_path = NULL; | |
07288e64 WB |
45 | } |
46 | ||
1513a0b5 | 47 | return 0; |
834bb4d7 | 48 | @@ -1070,6 +1074,7 @@ struct generic_userns_exec_data { |
1513a0b5 WB |
49 | struct lxc_conf *conf; |
50 | uid_t origuid; /* target uid in parent namespace */ | |
51 | char *path; | |
52 | + bool inner; | |
53 | }; | |
54 | ||
55 | static int cgroup_rmdir_wrapper(void *data) | |
834bb4d7 | 56 | @@ -1112,6 +1117,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, |
2d8021b3 WB |
57 | wrap.container_cgroup = ops->container_cgroup; |
58 | wrap.hierarchies = ops->hierarchies; | |
59 | wrap.conf = handler->conf; | |
1513a0b5 WB |
60 | + wrap.inner = false; |
61 | ||
2d8021b3 WB |
62 | if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) |
63 | ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap, | |
834bb4d7 WB |
64 | @@ -1323,17 +1329,26 @@ static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname) |
65 | return cg_unified_create_cgroup(h, cgname); | |
308c8a3e WB |
66 | } |
67 | ||
834bb4d7 WB |
68 | -static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname) |
69 | +static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname, bool inner) | |
308c8a3e | 70 | { |
1513a0b5 | 71 | int ret; |
2d8021b3 | 72 | + char *path; |
1513a0b5 | 73 | |
1513a0b5 WB |
74 | - if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) { |
75 | + if (!inner && !cg_legacy_handle_cpuset_hierarchy(h, cgname)) { | |
76 | ERROR("Failed to handle legacy cpuset controller"); | |
07288e64 | 77 | return false; |
da73bbc6 | 78 | } |
308c8a3e | 79 | |
834bb4d7 WB |
80 | - h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); |
81 | - ret = mkdir_eexist_on_last(h->container_full_path, 0755); | |
82 | + if (inner) { | |
83 | + path = must_make_path(h->container_full_path, CGROUP_NAMESPACE_SUBDIR, NULL); | |
84 | + h->container_inner_path = path; | |
85 | + ret = mkdir(path, 0755); | |
86 | + } else { | |
87 | + path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); | |
88 | + h->container_full_path = path; | |
89 | + ret = mkdir_eexist_on_last(path, 0755); | |
90 | + } | |
91 | + | |
1513a0b5 | 92 | if (ret < 0) { |
834bb4d7 | 93 | ERROR("Failed to create cgroup \"%s\"", h->container_full_path); |
1513a0b5 | 94 | return false; |
834bb4d7 WB |
95 | @@ -1425,11 +1440,29 @@ on_error: |
96 | return bret; | |
f39a178a WB |
97 | } |
98 | ||
2d8021b3 | 99 | +static inline bool cgfsng_create_inner(struct cgroup_ops *ops) |
f39a178a WB |
100 | +{ |
101 | + size_t i; | |
102 | + bool ret = true; | |
2d8021b3 WB |
103 | + char *cgname = must_make_path(ops->container_cgroup, CGROUP_NAMESPACE_SUBDIR, NULL); |
104 | + for (i = 0; ops->hierarchies[i]; i++) { | |
834bb4d7 | 105 | + if (!container_create_path_for_hierarchy(ops->hierarchies[i], cgname, true)) { |
2d8021b3 | 106 | + SYSERROR("Failed to create %s namespace subdirectory: %s", |
834bb4d7 | 107 | + ops->hierarchies[i]->container_full_path, strerror(errno)); |
f39a178a WB |
108 | + ret = false; |
109 | + break; | |
110 | + } | |
111 | + } | |
112 | + free(cgname); | |
113 | + return ret; | |
114 | +} | |
115 | + | |
1513a0b5 WB |
116 | /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; |
117 | * next cgroup_pattern-1, -2, ..., -999. | |
308c8a3e | 118 | */ |
834bb4d7 WB |
119 | __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, |
120 | - struct lxc_handler *handler) | |
121 | + struct lxc_handler *handler, | |
122 | + bool inner) | |
308c8a3e | 123 | { |
f39a178a WB |
124 | int i; |
125 | size_t len; | |
834bb4d7 | 126 | @@ -1438,10 +1471,17 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, |
2d8021b3 | 127 | struct lxc_conf *conf = handler->conf; |
f39a178a | 128 | |
2d8021b3 | 129 | if (ops->container_cgroup) { |
308c8a3e | 130 | + if (inner) |
2d8021b3 WB |
131 | + return cgfsng_create_inner(ops); |
132 | WARN("cgfsng_create called a second time: %s", ops->container_cgroup); | |
308c8a3e WB |
133 | return false; |
134 | } | |
1513a0b5 | 135 | |
308c8a3e | 136 | + if (inner) { |
2d8021b3 | 137 | + ERROR("cgfsng_create called twice for inner cgroup"); |
308c8a3e WB |
138 | + return false; |
139 | + } | |
1513a0b5 | 140 | + |
2d8021b3 WB |
141 | if (!conf) |
142 | return false; | |
143 | ||
834bb4d7 | 144 | @@ -1482,7 +1522,7 @@ again: |
f39a178a | 145 | } |
1513a0b5 | 146 | |
2d8021b3 | 147 | for (i = 0; ops->hierarchies[i]; i++) { |
834bb4d7 WB |
148 | - if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) { |
149 | + if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, false)) { | |
150 | ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path); | |
151 | free(ops->hierarchies[i]->container_full_path); | |
152 | ops->hierarchies[i]->container_full_path = NULL; | |
153 | @@ -1505,7 +1545,8 @@ out_free: | |
308c8a3e WB |
154 | } |
155 | ||
834bb4d7 WB |
156 | __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, |
157 | - bool monitor) | |
158 | + bool monitor, | |
159 | + bool inner) | |
308c8a3e | 160 | { |
834bb4d7 WB |
161 | int len; |
162 | char pidstr[INTTYPE_TO_STRLEN(pid_t)]; | |
163 | @@ -1521,6 +1562,9 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, | |
164 | if (monitor) | |
165 | path = must_make_path(ops->hierarchies[i]->monitor_full_path, | |
166 | "cgroup.procs", NULL); | |
167 | + else if (inner) | |
168 | + path = must_make_path(ops->hierarchies[i]->container_inner_path, | |
169 | + "cgroup.procs", NULL); | |
170 | else | |
171 | path = must_make_path(ops->hierarchies[i]->container_full_path, | |
172 | "cgroup.procs", NULL); | |
173 | @@ -1538,12 +1582,12 @@ __cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid, | |
174 | ||
175 | __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid) | |
176 | { | |
177 | - return __do_cgroup_enter(ops, pid, true); | |
178 | + return __do_cgroup_enter(ops, pid, true, false); | |
179 | } | |
308c8a3e | 180 | |
834bb4d7 WB |
181 | -static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid) |
182 | +static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid, bool inner) | |
183 | { | |
184 | - return __do_cgroup_enter(ops, pid, false); | |
185 | + return __do_cgroup_enter(ops, pid, false, inner); | |
186 | } | |
187 | ||
188 | static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid, | |
189 | @@ -1609,9 +1653,15 @@ static int chown_cgroup_wrapper(void *data) | |
1513a0b5 | 190 | char *fullpath; |
834bb4d7 | 191 | char *path = arg->hierarchies[i]->container_full_path; |
308c8a3e WB |
192 | |
193 | + if (arg->inner) | |
02a2999d | 194 | + path = must_make_path(path, CGROUP_NAMESPACE_SUBDIR, NULL); |
308c8a3e | 195 | + |
1513a0b5 WB |
196 | ret = chowmod(path, destuid, nsgid, 0775); |
197 | - if (ret < 0) | |
198 | + if (ret < 0) { | |
308c8a3e WB |
199 | + if (arg->inner) |
200 | + free(path); | |
201 | return -1; | |
1513a0b5 WB |
202 | + } |
203 | ||
204 | /* Failures to chown() these are inconvenient but not | |
205 | * detrimental We leave these owned by the container launcher, | |
834bb4d7 | 206 | @@ -1630,8 +1680,11 @@ static int chown_cgroup_wrapper(void *data) |
2d8021b3 | 207 | (void)chowmod(fullpath, destuid, nsgid, 0664); |
1513a0b5 | 208 | free(fullpath); |
308c8a3e | 209 | |
2d8021b3 WB |
210 | - if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) |
211 | + if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) { | |
308c8a3e WB |
212 | + if (arg->inner) |
213 | + free(path); | |
1513a0b5 WB |
214 | continue; |
215 | + } | |
308c8a3e | 216 | |
1513a0b5 WB |
217 | fullpath = must_make_path(path, "cgroup.subtree_control", NULL); |
218 | (void)chowmod(fullpath, destuid, nsgid, 0664); | |
834bb4d7 | 219 | @@ -1640,13 +1693,17 @@ static int chown_cgroup_wrapper(void *data) |
1513a0b5 WB |
220 | fullpath = must_make_path(path, "cgroup.threads", NULL); |
221 | (void)chowmod(fullpath, destuid, nsgid, 0664); | |
308c8a3e | 222 | free(fullpath); |
2d8021b3 | 223 | + |
f39a178a WB |
224 | + if (arg->inner) |
225 | + free(path); | |
308c8a3e WB |
226 | } |
227 | ||
228 | return 0; | |
229 | } | |
230 | ||
834bb4d7 WB |
231 | __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops, |
232 | - struct lxc_conf *conf) | |
233 | + struct lxc_conf *conf, | |
234 | + bool inner) | |
308c8a3e | 235 | { |
1513a0b5 | 236 | struct generic_userns_exec_data wrap; |
2d8021b3 | 237 | |
834bb4d7 | 238 | @@ -1657,6 +1714,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops, |
1513a0b5 | 239 | wrap.path = NULL; |
2d8021b3 | 240 | wrap.hierarchies = ops->hierarchies; |
1513a0b5 | 241 | wrap.conf = conf; |
308c8a3e WB |
242 | + wrap.inner = inner; |
243 | ||
f39a178a WB |
244 | if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, |
245 | "chown_cgroup_wrapper") < 0) { | |
834bb4d7 | 246 | @@ -2038,7 +2096,8 @@ __cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops) |
07288e64 WB |
247 | } |
248 | ||
834bb4d7 WB |
249 | __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, |
250 | - const char *controller) | |
251 | + const char *controller, | |
252 | + bool inner) | |
07288e64 | 253 | { |
1513a0b5 WB |
254 | struct hierarchy *h; |
255 | ||
834bb4d7 | 256 | @@ -2049,6 +2108,9 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, |
07288e64 | 257 | return NULL; |
1513a0b5 | 258 | } |
07288e64 | 259 | |
2d8021b3 | 260 | + if (inner) |
834bb4d7 | 261 | + return h->container_inner_path ? h->container_inner_path + strlen(h->mountpoint) : NULL; |
07288e64 | 262 | + |
834bb4d7 | 263 | return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL; |
07288e64 WB |
264 | } |
265 | ||
834bb4d7 | 266 | @@ -2080,7 +2142,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name, |
1513a0b5 WB |
267 | int fret = -1, idx = 0; |
268 | char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL; | |
07288e64 | 269 | |
1513a0b5 WB |
270 | - container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller); |
271 | + container_cgroup = lxc_cmd_get_attach_cgroup_path(name, lxcpath, controller); | |
272 | /* not running */ | |
273 | if (!container_cgroup) | |
274 | return 0; | |
834bb4d7 | 275 | @@ -2161,7 +2223,7 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, |
2d8021b3 WB |
276 | continue; |
277 | } | |
f39a178a | 278 | |
2d8021b3 WB |
279 | - path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]); |
280 | + path = lxc_cmd_get_attach_cgroup_path(name, lxcpath, h->controllers[0]); | |
281 | /* not running */ | |
282 | if (!path) | |
283 | continue; | |
308c8a3e | 284 | diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h |
8a25e884 | 285 | index d4dcd506b..59445b5a5 100644 |
308c8a3e WB |
286 | --- a/src/lxc/cgroups/cgroup.h |
287 | +++ b/src/lxc/cgroups/cgroup.h | |
834bb4d7 WB |
288 | @@ -32,6 +32,12 @@ |
289 | #define MONITOR_CGROUP "lxc.monitor" | |
290 | #define PIVOT_CGROUP "lxc.pivot" | |
02a2999d WB |
291 | |
292 | +/* When lxc.cgroup.protect_limits is in effect the container's cgroup namespace | |
293 | + * will be moved into an additional subdirectory "cgns/" inside the cgroup in | |
294 | + * order to prevent it from accessing the outer limiting cgroup. | |
295 | + */ | |
296 | +#define CGROUP_NAMESPACE_SUBDIR "cgns" | |
297 | + | |
298 | struct lxc_handler; | |
299 | struct lxc_conf; | |
300 | struct lxc_list; | |
834bb4d7 WB |
301 | @@ -72,6 +78,9 @@ typedef enum { |
302 | * @monitor_full_path | |
303 | * - The full path to the monitor's cgroup. | |
2d8021b3 | 304 | * |
834bb4d7 | 305 | + * @container_inner_path |
2d8021b3 WB |
306 | + * - The full path to the container's inner cgroup when protect_limits is used. |
307 | + * | |
308 | * @version | |
309 | * - legacy hierarchy | |
310 | * If the hierarchy is a legacy hierarchy this will be set to | |
834bb4d7 | 311 | @@ -85,6 +94,7 @@ struct hierarchy { |
2d8021b3 | 312 | char *mountpoint; |
834bb4d7 WB |
313 | char *container_base_path; |
314 | char *container_full_path; | |
315 | + char *container_inner_path; | |
316 | char *monitor_full_path; | |
2d8021b3 WB |
317 | int version; |
318 | }; | |
834bb4d7 WB |
319 | @@ -139,9 +149,9 @@ struct cgroup_ops { |
320 | void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); | |
321 | bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); | |
322 | bool (*monitor_enter)(struct cgroup_ops *ops, pid_t pid); | |
323 | - bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); | |
324 | - bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid); | |
2d8021b3 | 325 | - const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); |
834bb4d7 WB |
326 | + bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler, bool inner); |
327 | + bool (*payload_enter)(struct cgroup_ops *ops, pid_t pid, bool inner); | |
2d8021b3 | 328 | + const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller, bool inner); |
834bb4d7 | 329 | bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); |
2d8021b3 WB |
330 | int (*num_hierarchies)(struct cgroup_ops *ops); |
331 | bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); | |
834bb4d7 | 332 | @@ -152,7 +162,7 @@ struct cgroup_ops { |
2d8021b3 WB |
333 | bool (*unfreeze)(struct cgroup_ops *ops); |
334 | bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, | |
335 | bool with_devices); | |
336 | - bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf); | |
337 | + bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf, bool inner); | |
338 | bool (*attach)(struct cgroup_ops *ops, const char *name, | |
339 | const char *lxcpath, pid_t pid); | |
340 | bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler, | |
07288e64 | 341 | diff --git a/src/lxc/commands.c b/src/lxc/commands.c |
8a25e884 | 342 | index 133384d72..b41a76000 100644 |
07288e64 WB |
343 | --- a/src/lxc/commands.c |
344 | +++ b/src/lxc/commands.c | |
834bb4d7 | 345 | @@ -427,20 +427,8 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, |
02a2999d WB |
346 | return lxc_cmd_rsp_send(fd, &rsp); |
347 | } | |
07288e64 | 348 | |
02a2999d WB |
349 | -/* |
350 | - * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a | |
351 | - * particular subsystem. This is the cgroup path relative to the root | |
352 | - * of the cgroup filesystem. | |
353 | - * | |
354 | - * @name : name of container to connect to | |
355 | - * @lxcpath : the lxcpath in which the container is running | |
356 | - * @subsystem : the subsystem being asked about | |
357 | - * | |
358 | - * Returns the path on success, NULL on failure. The caller must free() the | |
359 | - * returned path. | |
360 | - */ | |
361 | -char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, | |
1513a0b5 WB |
362 | - const char *subsystem) |
363 | +char *do_lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, | |
364 | + const char *subsystem, bool inner) | |
02a2999d WB |
365 | { |
366 | int ret, stopped; | |
02a2999d | 367 | struct lxc_cmd_rr cmd = { |
834bb4d7 | 368 | @@ -453,8 +441,18 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, |
1513a0b5 WB |
369 | |
370 | cmd.req.data = subsystem; | |
371 | cmd.req.datalen = 0; | |
372 | - if (subsystem) | |
373 | - cmd.req.datalen = strlen(subsystem) + 1; | |
374 | + if (subsystem) { | |
375 | + size_t subsyslen = strlen(subsystem); | |
376 | + if (inner) { | |
377 | + char *data = alloca(subsyslen+2); | |
378 | + memcpy(data, subsystem, subsyslen+1); | |
379 | + data[subsyslen+1] = 1; | |
380 | + cmd.req.datalen = subsyslen+2, | |
381 | + cmd.req.data = data; | |
382 | + } else { | |
383 | + cmd.req.datalen = subsyslen+1; | |
384 | + } | |
02a2999d | 385 | + } |
1513a0b5 | 386 | |
02a2999d | 387 | ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); |
1513a0b5 | 388 | if (ret < 0) |
834bb4d7 | 389 | @@ -469,6 +467,42 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, |
02a2999d WB |
390 | return cmd.rsp.data; |
391 | } | |
392 | ||
07288e64 | 393 | +/* |
02a2999d WB |
394 | + * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a |
395 | + * particular subsystem. This is the cgroup path relative to the root | |
07288e64 WB |
396 | + * of the cgroup filesystem. |
397 | + * | |
398 | + * @name : name of container to connect to | |
399 | + * @lxcpath : the lxcpath in which the container is running | |
400 | + * @subsystem : the subsystem being asked about | |
401 | + * | |
402 | + * Returns the path on success, NULL on failure. The caller must free() the | |
403 | + * returned path. | |
404 | + */ | |
02a2999d | 405 | +char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, |
07288e64 WB |
406 | + const char *subsystem) |
407 | +{ | |
02a2999d | 408 | + return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, false); |
07288e64 | 409 | +} |
f39a178a WB |
410 | + |
411 | +/* | |
412 | + * lxc_cmd_get_attach_cgroup_path: Calculate a container's inner cgroup path | |
413 | + * for a particular subsystem. This is the cgroup path relative to the root | |
414 | + * of the cgroup filesystem. | |
415 | + * | |
416 | + * @name : name of container to connect to | |
417 | + * @lxcpath : the lxcpath in which the container is running | |
418 | + * @subsystem : the subsystem being asked about | |
419 | + * | |
420 | + * Returns the path on success, NULL on failure. The caller must free() the | |
421 | + * returned path. | |
422 | + */ | |
423 | +char *lxc_cmd_get_attach_cgroup_path(const char *name, const char *lxcpath, | |
424 | + const char *subsystem) | |
425 | +{ | |
426 | + return do_lxc_cmd_get_cgroup_path(name, lxcpath, subsystem, true); | |
427 | +} | |
07288e64 | 428 | + |
02a2999d WB |
429 | static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, |
430 | struct lxc_handler *handler) | |
431 | { | |
834bb4d7 | 432 | @@ -476,10 +510,21 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, |
1513a0b5 | 433 | struct lxc_cmd_rsp rsp; |
2d8021b3 | 434 | struct cgroup_ops *cgroup_ops = handler->cgroup_ops; |
02a2999d | 435 | |
1513a0b5 | 436 | - if (req->datalen > 0) |
2d8021b3 | 437 | - path = cgroup_ops->get_cgroup(cgroup_ops, req->data); |
1513a0b5 | 438 | - else |
2d8021b3 | 439 | - path = cgroup_ops->get_cgroup(cgroup_ops, NULL); |
1513a0b5 WB |
440 | + if (req->datalen > 0) { |
441 | + const char *subsystem; | |
442 | + size_t subsyslen; | |
443 | + bool inner = false; | |
444 | + subsystem = req->data; | |
445 | + subsyslen = strlen(subsystem); | |
446 | + if (req->datalen == subsyslen+2) | |
447 | + inner = (subsystem[subsyslen+1] == 1); | |
07288e64 | 448 | + |
2d8021b3 | 449 | + path = cgroup_ops->get_cgroup(cgroup_ops, req->data, inner); |
1513a0b5 WB |
450 | + } else { |
451 | + // FIXME: cgroup separation for cgroup v2 cannot be handled | |
452 | + // like we used to do v1 here... need to figure this out... | |
2d8021b3 | 453 | + path = cgroup_ops->get_cgroup(cgroup_ops, NULL, false); |
1513a0b5 | 454 | + } |
07288e64 WB |
455 | if (!path) |
456 | return -1; | |
1513a0b5 | 457 | |
834bb4d7 WB |
458 | @@ -651,7 +696,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, |
459 | * lxc_unfreeze() would do another cmd (GET_CGROUP) which would | |
460 | * deadlock us. | |
461 | */ | |
462 | - if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer")) | |
463 | + if (!cgroup_ops->get_cgroup(cgroup_ops, "freezer", false)) | |
464 | return 0; | |
465 | ||
466 | if (cgroup_ops->unfreeze(cgroup_ops)) | |
07288e64 | 467 | diff --git a/src/lxc/commands.h b/src/lxc/commands.h |
8a25e884 | 468 | index 2c024b65d..7c4c00b1e 100644 |
07288e64 WB |
469 | --- a/src/lxc/commands.h |
470 | +++ b/src/lxc/commands.h | |
834bb4d7 | 471 | @@ -88,6 +88,8 @@ extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, |
07288e64 WB |
472 | */ |
473 | extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, | |
474 | const char *subsystem); | |
475 | +extern char *lxc_cmd_get_attach_cgroup_path(const char *name, | |
476 | + const char *lxcpath, const char *subsystem); | |
477 | extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath); | |
478 | extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); | |
479 | extern char *lxc_cmd_get_name(const char *hashed_sock); | |
308c8a3e | 480 | diff --git a/src/lxc/criu.c b/src/lxc/criu.c |
8a25e884 | 481 | index 3d857b541..ec9bcb7e4 100644 |
308c8a3e WB |
482 | --- a/src/lxc/criu.c |
483 | +++ b/src/lxc/criu.c | |
834bb4d7 | 484 | @@ -332,7 +332,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, |
07288e64 WB |
485 | } else { |
486 | const char *p; | |
487 | ||
2d8021b3 WB |
488 | - p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); |
489 | + p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0], false); | |
07288e64 WB |
490 | if (!p) { |
491 | ERROR("failed to get cgroup path for %s", controllers[0]); | |
492 | goto err; | |
834bb4d7 | 493 | @@ -976,7 +976,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ |
308c8a3e | 494 | goto out_fini_handler; |
2d8021b3 | 495 | handler->cgroup_ops = cgroup_ops; |
308c8a3e | 496 | |
834bb4d7 WB |
497 | - if (!cgroup_ops->payload_create(cgroup_ops, handler)) { |
498 | + if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) { | |
308c8a3e WB |
499 | ERROR("failed creating groups"); |
500 | goto out_fini_handler; | |
501 | } | |
502 | diff --git a/src/lxc/start.c b/src/lxc/start.c | |
8a25e884 | 503 | index dae3bcfe5..f3b29d6cd 100644 |
308c8a3e WB |
504 | --- a/src/lxc/start.c |
505 | +++ b/src/lxc/start.c | |
834bb4d7 | 506 | @@ -1649,7 +1649,7 @@ static int lxc_spawn(struct lxc_handler *handler) |
2d8021b3 WB |
507 | } |
508 | } | |
308c8a3e | 509 | |
834bb4d7 WB |
510 | - if (!cgroup_ops->payload_create(cgroup_ops, handler)) { |
511 | + if (!cgroup_ops->payload_create(cgroup_ops, handler, false)) { | |
1513a0b5 | 512 | ERROR("Failed creating cgroups"); |
308c8a3e WB |
513 | goto out_delete_net; |
514 | } | |
834bb4d7 | 515 | @@ -1743,10 +1743,10 @@ static int lxc_spawn(struct lxc_handler *handler) |
308c8a3e WB |
516 | goto out_delete_net; |
517 | } | |
518 | ||
834bb4d7 WB |
519 | - if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid)) |
520 | + if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, false)) | |
308c8a3e WB |
521 | goto out_delete_net; |
522 | ||
2d8021b3 WB |
523 | - if (!cgroup_ops->chown(cgroup_ops, handler->conf)) |
524 | + if (!cgroup_ops->chown(cgroup_ops, handler->conf, false)) | |
308c8a3e WB |
525 | goto out_delete_net; |
526 | ||
1513a0b5 | 527 | /* Now we're ready to preserve the network namespace */ |
834bb4d7 | 528 | @@ -1813,16 +1813,30 @@ static int lxc_spawn(struct lxc_handler *handler) |
1513a0b5 | 529 | } |
bc7e56ac WB |
530 | } |
531 | ||
1513a0b5 WB |
532 | - ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE); |
533 | - if (ret < 0) | |
bc7e56ac WB |
534 | - goto out_delete_net; |
535 | - | |
2d8021b3 | 536 | if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) { |
1513a0b5 | 537 | ERROR("Failed to setup legacy device cgroup controller limits"); |
bc7e56ac | 538 | goto out_delete_net; |
308c8a3e | 539 | } |
1513a0b5 | 540 | TRACE("Set up legacy device cgroup controller limits"); |
308c8a3e WB |
541 | |
542 | + if (cgns_supported()) { | |
834bb4d7 | 543 | + if (!cgroup_ops->payload_create(cgroup_ops, handler, true)) { |
308c8a3e WB |
544 | + ERROR("failed to create inner cgroup separation layer"); |
545 | + goto out_delete_net; | |
546 | + } | |
834bb4d7 | 547 | + if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid, true)) { |
308c8a3e WB |
548 | + ERROR("failed to enter inner cgroup separation layer"); |
549 | + goto out_delete_net; | |
550 | + } | |
2d8021b3 | 551 | + if (!cgroup_ops->chown(cgroup_ops, handler->conf, true)) { |
308c8a3e WB |
552 | + ERROR("failed chown inner cgroup separation layer"); |
553 | + goto out_delete_net; | |
554 | + } | |
555 | + } | |
bc7e56ac WB |
556 | + |
557 | + if (lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE)) | |
558 | + goto out_delete_net; | |
308c8a3e | 559 | + |
2d8021b3 WB |
560 | if (handler->ns_clone_flags & CLONE_NEWCGROUP) { |
561 | /* Now we're ready to preserve the cgroup namespace */ | |
562 | ret = lxc_try_preserve_ns(handler->pid, "cgroup"); | |
308c8a3e | 563 | -- |
8a25e884 | 564 | 2.20.1 |
308c8a3e | 565 |