]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroups/cgfsng.c
utils: add lxc_iterate_parts()
[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
CommitLineData
ccb4cabe
SH
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2016 Canonical Ltd.
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@ubuntu.com>
3fd0de4d 8 * Christian Brauner <christian.brauner@ubuntu.com>
ccb4cabe
SH
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25/*
26 * cgfs-ng.c: this is a new, simplified implementation of a filesystem
27 * cgroup backend. The original cgfs.c was designed to be as flexible
28 * as possible. It would try to find cgroup filesystems no matter where
29 * or how you had them mounted, and deduce the most usable mount for
0e7ff52c 30 * each controller.
ccb4cabe
SH
31 *
32 * This new implementation assumes that cgroup filesystems are mounted
33 * under /sys/fs/cgroup/clist where clist is either the controller, or
34 * a comman-separated list of controllers.
35 */
a54694f8 36
ccb4cabe 37#include "config.h"
a54694f8
CB
38
39#include <ctype.h>
40#include <dirent.h>
41#include <errno.h>
42#include <grp.h>
43#include <stdint.h>
ccb4cabe
SH
44#include <stdio.h>
45#include <stdlib.h>
a54694f8 46#include <string.h>
ccb4cabe 47#include <unistd.h>
c8bf519d 48#include <linux/kdev_t.h>
438c4581
CB
49#include <linux/types.h>
50#include <sys/types.h>
c8bf519d 51
b635e92d 52#include "caps.h"
ccb4cabe 53#include "cgroup.h"
6328fd9c 54#include "cgroup_utils.h"
ccb4cabe 55#include "commands.h"
43654d34 56#include "conf.h"
a54694f8 57#include "log.h"
43654d34 58#include "storage/storage.h"
a54694f8 59#include "utils.h"
ccb4cabe 60
64e82f8b
DJ
61#ifndef HAVE_STRLCPY
62#include "include/strlcpy.h"
63#endif
64
3ebe2fbd
DJ
65#ifndef HAVE_STRLCAT
66#include "include/strlcat.h"
67#endif
68
ac2cecc4 69lxc_log_define(cgfsng, cgroup);
ccb4cabe 70
ccb4cabe
SH
71static void free_string_list(char **clist)
72{
2d5fe5ba 73 int i;
ccb4cabe 74
2d5fe5ba
CB
75 if (!clist)
76 return;
77
78 for (i = 0; clist[i]; i++)
79 free(clist[i]);
80
81 free(clist);
ccb4cabe
SH
82}
83
7745483d 84/* Allocate a pointer, do not fail. */
ccb4cabe
SH
85static void *must_alloc(size_t sz)
86{
87 return must_realloc(NULL, sz);
88}
89
8b8db2f6
CB
90/* Given a pointer to a null-terminated array of pointers, realloc to add one
91 * entry, and point the new entry to NULL. Do not fail. Return the index to the
92 * second-to-last entry - that is, the one which is now available for use
93 * (keeping the list null-terminated).
ccb4cabe
SH
94 */
95static int append_null_to_list(void ***list)
96{
97 int newentry = 0;
98
99 if (*list)
8b8db2f6
CB
100 for (; (*list)[newentry]; newentry++)
101 ;
ccb4cabe
SH
102
103 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
104 (*list)[newentry + 1] = NULL;
105 return newentry;
106}
107
8073018d
CB
108/* Given a null-terminated array of strings, check whether @entry is one of the
109 * strings.
ccb4cabe
SH
110 */
111static bool string_in_list(char **list, const char *entry)
112{
113 int i;
114
115 if (!list)
116 return false;
d6337a5f 117
ccb4cabe
SH
118 for (i = 0; list[i]; i++)
119 if (strcmp(list[i], entry) == 0)
120 return true;
121
122 return false;
123}
124
ac010944
CB
125/* Return a copy of @entry prepending "name=", i.e. turn "systemd" into
126 * "name=systemd". Do not fail.
127 */
128static char *cg_legacy_must_prefix_named(char *entry)
129{
130 size_t len;
131 char *prefixed;
132
133 len = strlen(entry);
134 prefixed = must_alloc(len + 6);
135
cbe2185b
CB
136 memcpy(prefixed, "name=", sizeof("name=") - 1);
137 memcpy(prefixed + sizeof("name=") - 1, entry, len);
ac010944
CB
138 prefixed[len + 5] = '\0';
139 return prefixed;
140}
141
42a993b4
CB
142/* Append an entry to the clist. Do not fail. @clist must be NULL the first time
143 * we are called.
ccb4cabe 144 *
42a993b4
CB
145 * We also handle named subsystems here. Any controller which is not a kernel
146 * subsystem, we prefix "name=". Any which is both a kernel and named subsystem,
147 * we refuse to use because we're not sure which we have here.
148 * (TODO: We could work around this in some cases by just remounting to be
149 * unambiguous, or by comparing mountpoint contents with current cgroup.)
ccb4cabe
SH
150 *
151 * The last entry will always be NULL.
152 */
42a993b4
CB
153static void must_append_controller(char **klist, char **nlist, char ***clist,
154 char *entry)
ccb4cabe
SH
155{
156 int newentry;
157 char *copy;
158
159 if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
c2712f64 160 ERROR("Refusing to use ambiguous controller \"%s\"", entry);
ccb4cabe
SH
161 ERROR("It is both a named and kernel subsystem");
162 return;
163 }
164
165 newentry = append_null_to_list((void ***)clist);
166
167 if (strncmp(entry, "name=", 5) == 0)
168 copy = must_copy_string(entry);
169 else if (string_in_list(klist, entry))
170 copy = must_copy_string(entry);
171 else
7745483d 172 copy = cg_legacy_must_prefix_named(entry);
ccb4cabe
SH
173
174 (*clist)[newentry] = copy;
175}
176
5ae0207c
CB
177/* Given a handler's cgroup data, return the struct hierarchy for the controller
178 * @c, or NULL if there is none.
ccb4cabe 179 */
2202afc9 180struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *c)
ccb4cabe
SH
181{
182 int i;
183
2202afc9 184 if (!ops->hierarchies)
ccb4cabe 185 return NULL;
d6337a5f 186
2202afc9 187 for (i = 0; ops->hierarchies[i]; i++) {
d6337a5f
CB
188 if (!c) {
189 /* This is the empty unified hierarchy. */
2202afc9
CB
190 if (ops->hierarchies[i]->controllers &&
191 !ops->hierarchies[i]->controllers[0])
192 return ops->hierarchies[i];
d6337a5f 193
106f1f38 194 continue;
d6337a5f
CB
195 }
196
2202afc9
CB
197 if (string_in_list(ops->hierarchies[i]->controllers, c))
198 return ops->hierarchies[i];
ccb4cabe 199 }
d6337a5f 200
ccb4cabe
SH
201 return NULL;
202}
203
a54694f8
CB
204#define BATCH_SIZE 50
205static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
206{
207 int newbatches = (newlen / BATCH_SIZE) + 1;
208 int oldbatches = (oldlen / BATCH_SIZE) + 1;
209
210 if (!*mem || newbatches > oldbatches) {
211 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
212 }
213}
214
215static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
216{
217 size_t full = oldlen + newlen;
218
219 batch_realloc(dest, oldlen, full + 1);
220
221 memcpy(*dest + oldlen, new, newlen + 1);
222}
223
224/* Slurp in a whole file */
d6337a5f 225static char *read_file(const char *fnam)
a54694f8
CB
226{
227 FILE *f;
228 char *line = NULL, *buf = NULL;
229 size_t len = 0, fulllen = 0;
230 int linelen;
231
232 f = fopen(fnam, "r");
233 if (!f)
234 return NULL;
235 while ((linelen = getline(&line, &len, f)) != -1) {
236 append_line(&buf, fulllen, line, linelen);
237 fulllen += linelen;
238 }
239 fclose(f);
240 free(line);
241 return buf;
242}
243
244/* Taken over modified from the kernel sources. */
245#define NBITS 32 /* bits in uint32_t */
246#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
247#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
248
249static void set_bit(unsigned bit, uint32_t *bitarr)
250{
251 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
252}
253
254static void clear_bit(unsigned bit, uint32_t *bitarr)
255{
256 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
257}
258
259static bool is_set(unsigned bit, uint32_t *bitarr)
260{
261 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
262}
263
264/* Create cpumask from cpulist aka turn:
265 *
266 * 0,2-3
267 *
d5d468f6 268 * into bit array
a54694f8
CB
269 *
270 * 1 0 1 1
271 */
272static uint32_t *lxc_cpumask(char *buf, size_t nbits)
273{
274 char *token;
d5d468f6
CB
275 size_t arrlen;
276 uint32_t *bitarr;
a54694f8 277 char *saveptr = NULL;
d5d468f6
CB
278
279 arrlen = BITS_TO_LONGS(nbits);
280 bitarr = calloc(arrlen, sizeof(uint32_t));
a54694f8
CB
281 if (!bitarr)
282 return NULL;
283
284 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
285 errno = 0;
d5d468f6
CB
286 unsigned end, start;
287 char *range;
a54694f8 288
d5d468f6
CB
289 start = strtoul(token, NULL, 0);
290 end = start;
291 range = strchr(token, '-');
a54694f8
CB
292 if (range)
293 end = strtoul(range + 1, NULL, 0);
d5d468f6 294
a54694f8
CB
295 if (!(start <= end)) {
296 free(bitarr);
297 return NULL;
298 }
299
300 if (end >= nbits) {
301 free(bitarr);
302 return NULL;
303 }
304
305 while (start <= end)
306 set_bit(start++, bitarr);
307 }
308
309 return bitarr;
310}
311
a54694f8
CB
312/* Turn cpumask into simple, comma-separated cpulist. */
313static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
314{
a54694f8 315 int ret;
414c6719 316 size_t i;
a54694f8 317 char **cpulist = NULL;
414c6719 318 char numstr[LXC_NUMSTRLEN64] = {0};
a54694f8
CB
319
320 for (i = 0; i <= nbits; i++) {
414c6719
CB
321 if (!is_set(i, bitarr))
322 continue;
323
324 ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
325 if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
326 lxc_free_array((void **)cpulist, free);
327 return NULL;
328 }
329
330 ret = lxc_append_string(&cpulist, numstr);
331 if (ret < 0) {
332 lxc_free_array((void **)cpulist, free);
333 return NULL;
a54694f8
CB
334 }
335 }
414c6719
CB
336
337 if (!cpulist)
338 return NULL;
339
a54694f8
CB
340 return lxc_string_join(",", (const char **)cpulist, false);
341}
342
343static ssize_t get_max_cpus(char *cpulist)
344{
345 char *c1, *c2;
346 char *maxcpus = cpulist;
347 size_t cpus = 0;
348
349 c1 = strrchr(maxcpus, ',');
350 if (c1)
351 c1++;
352
353 c2 = strrchr(maxcpus, '-');
354 if (c2)
355 c2++;
356
357 if (!c1 && !c2)
358 c1 = maxcpus;
359 else if (c1 > c2)
360 c2 = c1;
361 else if (c1 < c2)
362 c1 = c2;
333987b9 363 else if (!c1 && c2)
a54694f8
CB
364 c1 = c2;
365
a54694f8
CB
366 errno = 0;
367 cpus = strtoul(c1, NULL, 0);
368 if (errno != 0)
369 return -1;
370
371 return cpus;
372}
373
6f9584d8 374#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
a3926f6a 375static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
a54694f8 376{
a54694f8
CB
377 int ret;
378 ssize_t i;
59ac3b88
CB
379 char *lastslash, *fpath, oldv;
380 ssize_t maxisol = 0, maxposs = 0;
381 char *cpulist = NULL, *isolcpus = NULL, *posscpus = NULL;
382 uint32_t *isolmask = NULL, *possmask = NULL;
6f9584d8 383 bool bret = false, flipped_bit = false;
a54694f8
CB
384
385 lastslash = strrchr(path, '/');
59ac3b88
CB
386 if (!lastslash) {
387 ERROR("Failed to detect \"/\" in \"%s\"", path);
a54694f8
CB
388 return bret;
389 }
390 oldv = *lastslash;
391 *lastslash = '\0';
392 fpath = must_make_path(path, "cpuset.cpus", NULL);
393 posscpus = read_file(fpath);
6f9584d8 394 if (!posscpus) {
59ac3b88 395 SYSERROR("Failed to read file \"%s\"", fpath);
6f9584d8
CB
396 goto on_error;
397 }
a54694f8
CB
398
399 /* Get maximum number of cpus found in possible cpuset. */
400 maxposs = get_max_cpus(posscpus);
401 if (maxposs < 0)
6f9584d8 402 goto on_error;
a54694f8 403
6f9584d8
CB
404 if (!file_exists(__ISOL_CPUS)) {
405 /* This system doesn't expose isolated cpus. */
59ac3b88 406 DEBUG("The path \""__ISOL_CPUS"\" to read isolated cpus from does not exist");
65d29cbc
CB
407 cpulist = posscpus;
408 /* No isolated cpus but we weren't already initialized by
409 * someone. We should simply copy the parents cpuset.cpus
410 * values.
411 */
412 if (!am_initialized) {
59ac3b88 413 DEBUG("Copying cpu settings of parent cgroup");
65d29cbc
CB
414 goto copy_parent;
415 }
416 /* No isolated cpus but we were already initialized by someone.
417 * Nothing more to do for us.
418 */
6f9584d8
CB
419 goto on_success;
420 }
421
422 isolcpus = read_file(__ISOL_CPUS);
423 if (!isolcpus) {
59ac3b88 424 SYSERROR("Failed to read file \""__ISOL_CPUS"\"");
6f9584d8
CB
425 goto on_error;
426 }
a54694f8 427 if (!isdigit(isolcpus[0])) {
59ac3b88 428 TRACE("No isolated cpus detected");
a54694f8
CB
429 cpulist = posscpus;
430 /* No isolated cpus but we weren't already initialized by
431 * someone. We should simply copy the parents cpuset.cpus
432 * values.
433 */
6f9584d8 434 if (!am_initialized) {
59ac3b88 435 DEBUG("Copying cpu settings of parent cgroup");
a54694f8 436 goto copy_parent;
6f9584d8 437 }
a54694f8
CB
438 /* No isolated cpus but we were already initialized by someone.
439 * Nothing more to do for us.
440 */
6f9584d8 441 goto on_success;
a54694f8
CB
442 }
443
444 /* Get maximum number of cpus found in isolated cpuset. */
445 maxisol = get_max_cpus(isolcpus);
446 if (maxisol < 0)
6f9584d8 447 goto on_error;
a54694f8
CB
448
449 if (maxposs < maxisol)
450 maxposs = maxisol;
451 maxposs++;
452
453 possmask = lxc_cpumask(posscpus, maxposs);
6f9584d8 454 if (!possmask) {
59ac3b88 455 ERROR("Failed to create cpumask for possible cpus");
6f9584d8
CB
456 goto on_error;
457 }
a54694f8
CB
458
459 isolmask = lxc_cpumask(isolcpus, maxposs);
6f9584d8 460 if (!isolmask) {
59ac3b88 461 ERROR("Failed to create cpumask for isolated cpus");
6f9584d8
CB
462 goto on_error;
463 }
a54694f8
CB
464
465 for (i = 0; i <= maxposs; i++) {
59ac3b88
CB
466 if (!is_set(i, isolmask) || !is_set(i, possmask))
467 continue;
468
469 flipped_bit = true;
470 clear_bit(i, possmask);
a54694f8
CB
471 }
472
6f9584d8 473 if (!flipped_bit) {
59ac3b88 474 DEBUG("No isolated cpus present in cpuset");
6f9584d8
CB
475 goto on_success;
476 }
59ac3b88 477 DEBUG("Removed isolated cpus from cpuset");
6f9584d8 478
a54694f8 479 cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
6f9584d8 480 if (!cpulist) {
59ac3b88 481 ERROR("Failed to create cpu list");
6f9584d8
CB
482 goto on_error;
483 }
a54694f8
CB
484
485copy_parent:
486 *lastslash = oldv;
dcbc861e 487 free(fpath);
a54694f8 488 fpath = must_make_path(path, "cpuset.cpus", NULL);
7cea5905 489 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0666);
6f9584d8 490 if (ret < 0) {
59ac3b88 491 SYSERROR("Failed to write cpu list to \"%s\"", fpath);
6f9584d8
CB
492 goto on_error;
493 }
494
495on_success:
496 bret = true;
a54694f8 497
6f9584d8 498on_error:
a54694f8
CB
499 free(fpath);
500
501 free(isolcpus);
502 free(isolmask);
503
504 if (posscpus != cpulist)
505 free(posscpus);
506 free(possmask);
507
508 free(cpulist);
509 return bret;
510}
511
e3a3fecf
SH
512/* Copy contents of parent(@path)/@file to @path/@file */
513static bool copy_parent_file(char *path, char *file)
514{
e3a3fecf 515 int ret;
b095a8eb
CB
516 char *fpath, *lastslash, oldv;
517 int len = 0;
518 char *value = NULL;
e3a3fecf
SH
519
520 lastslash = strrchr(path, '/');
b095a8eb
CB
521 if (!lastslash) {
522 ERROR("Failed to detect \"/\" in \"%s\"", path);
e3a3fecf
SH
523 return false;
524 }
525 oldv = *lastslash;
526 *lastslash = '\0';
527 fpath = must_make_path(path, file, NULL);
528 len = lxc_read_from_file(fpath, NULL, 0);
529 if (len <= 0)
b095a8eb
CB
530 goto on_error;
531
e3a3fecf 532 value = must_alloc(len + 1);
b095a8eb
CB
533 ret = lxc_read_from_file(fpath, value, len);
534 if (ret != len)
535 goto on_error;
e3a3fecf 536 free(fpath);
b095a8eb 537
e3a3fecf
SH
538 *lastslash = oldv;
539 fpath = must_make_path(path, file, NULL);
7cea5905 540 ret = lxc_write_to_file(fpath, value, len, false, 0666);
e3a3fecf 541 if (ret < 0)
b095a8eb 542 SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
e3a3fecf
SH
543 free(fpath);
544 free(value);
545 return ret >= 0;
546
b095a8eb
CB
547on_error:
548 SYSERROR("Failed to read file \"%s\"", fpath);
e3a3fecf
SH
549 free(fpath);
550 free(value);
551 return false;
552}
553
7793add3
CB
554/* Initialize the cpuset hierarchy in first directory of @gname and set
555 * cgroup.clone_children so that children inherit settings. Since the
556 * h->base_path is populated by init or ourselves, we know it is already
557 * initialized.
e3a3fecf 558 */
a3926f6a 559static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
e3a3fecf 560{
7793add3
CB
561 int ret;
562 char v;
563 char *cgpath, *clonechildrenpath, *slash;
e3a3fecf
SH
564
565 if (!string_in_list(h->controllers, "cpuset"))
566 return true;
567
568 if (*cgname == '/')
569 cgname++;
570 slash = strchr(cgname, '/');
571 if (slash)
572 *slash = '\0';
573
574 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
575 if (slash)
576 *slash = '/';
7793add3
CB
577
578 ret = mkdir(cgpath, 0755);
579 if (ret < 0) {
580 if (errno != EEXIST) {
581 SYSERROR("Failed to create directory \"%s\"", cgpath);
582 free(cgpath);
583 return false;
584 }
e3a3fecf 585 }
6f9584d8 586
7793add3
CB
587 clonechildrenpath =
588 must_make_path(cgpath, "cgroup.clone_children", NULL);
6328fd9c
CB
589 /* unified hierarchy doesn't have clone_children */
590 if (!file_exists(clonechildrenpath)) {
e3a3fecf
SH
591 free(clonechildrenpath);
592 free(cgpath);
593 return true;
594 }
7793add3
CB
595
596 ret = lxc_read_from_file(clonechildrenpath, &v, 1);
597 if (ret < 0) {
598 SYSERROR("Failed to read file \"%s\"", clonechildrenpath);
e3a3fecf
SH
599 free(clonechildrenpath);
600 free(cgpath);
601 return false;
602 }
603
a54694f8 604 /* Make sure any isolated cpus are removed from cpuset.cpus. */
a3926f6a 605 if (!cg_legacy_filter_and_set_cpus(cgpath, v == '1')) {
7793add3 606 SYSERROR("Failed to remove isolated cpus");
6f9584d8
CB
607 free(clonechildrenpath);
608 free(cgpath);
a54694f8 609 return false;
6f9584d8 610 }
a54694f8 611
7793add3
CB
612 /* Already set for us by someone else. */
613 if (v == '1') {
614 DEBUG("\"cgroup.clone_children\" was already set to \"1\"");
e3a3fecf
SH
615 free(clonechildrenpath);
616 free(cgpath);
617 return true;
618 }
619
620 /* copy parent's settings */
a54694f8 621 if (!copy_parent_file(cgpath, "cpuset.mems")) {
7793add3 622 SYSERROR("Failed to copy \"cpuset.mems\" settings");
e3a3fecf
SH
623 free(cgpath);
624 free(clonechildrenpath);
625 return false;
626 }
627 free(cgpath);
628
7cea5905 629 ret = lxc_write_to_file(clonechildrenpath, "1", 1, false, 0666);
7793add3 630 if (ret < 0) {
e3a3fecf 631 /* Set clone_children so children inherit our settings */
7793add3 632 SYSERROR("Failed to write 1 to \"%s\"", clonechildrenpath);
e3a3fecf
SH
633 free(clonechildrenpath);
634 return false;
635 }
636 free(clonechildrenpath);
637 return true;
638}
639
5c0089ae
CB
640/* Given two null-terminated lists of strings, return true if any string is in
641 * both.
ccb4cabe
SH
642 */
643static bool controller_lists_intersect(char **l1, char **l2)
644{
645 int i;
646
647 if (!l1 || !l2)
648 return false;
649
650 for (i = 0; l1[i]; i++) {
651 if (string_in_list(l2, l1[i]))
652 return true;
653 }
5c0089ae 654
ccb4cabe
SH
655 return false;
656}
657
258449e5
CB
658/* For a null-terminated list of controllers @clist, return true if any of those
659 * controllers is already listed the null-terminated list of hierarchies @hlist.
660 * Realistically, if one is present, all must be present.
ccb4cabe
SH
661 */
662static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
663{
664 int i;
665
666 if (!hlist)
667 return false;
258449e5 668
ccb4cabe
SH
669 for (i = 0; hlist[i]; i++)
670 if (controller_lists_intersect(hlist[i]->controllers, clist))
671 return true;
ccb4cabe 672
258449e5 673 return false;
ccb4cabe
SH
674}
675
f57ac67f
CB
676/* Return true if the controller @entry is found in the null-terminated list of
677 * hierarchies @hlist.
ccb4cabe
SH
678 */
679static bool controller_found(struct hierarchy **hlist, char *entry)
680{
681 int i;
d6337a5f 682
ccb4cabe
SH
683 if (!hlist)
684 return false;
685
686 for (i = 0; hlist[i]; i++)
687 if (string_in_list(hlist[i]->controllers, entry))
688 return true;
d6337a5f 689
ccb4cabe
SH
690 return false;
691}
692
e1c27ab0
CB
693/* Return true if all of the controllers which we require have been found. The
694 * required list is freezer and anything in lxc.cgroup.use.
ccb4cabe 695 */
2202afc9 696static bool all_controllers_found(struct cgroup_ops *ops)
ccb4cabe 697{
b7b18fc5 698 char **cur;
2202afc9 699 struct hierarchy **hlist = ops->hierarchies;
ccb4cabe 700
ccb4cabe 701 if (!controller_found(hlist, "freezer")) {
2202afc9 702 ERROR("No freezer controller mountpoint found");
ccb4cabe
SH
703 return false;
704 }
705
2202afc9 706 if (!ops->cgroup_use)
ccb4cabe 707 return true;
c2712f64 708
b7b18fc5
CB
709 for (cur = ops->cgroup_use; cur && *cur; cur++)
710 if (!controller_found(hlist, *cur)) {
711 ERROR("No %s controller mountpoint found", *cur);
ccb4cabe
SH
712 return false;
713 }
c2712f64 714
ccb4cabe
SH
715 return true;
716}
717
f205f10c
CB
718/* Get the controllers from a mountinfo line There are other ways we could get
719 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
720 * could parse the mount options. But we simply assume that the mountpoint must
721 * be /sys/fs/cgroup/controller-list
ccb4cabe 722 */
a3926f6a
CB
723static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
724 int type)
ccb4cabe 725{
f205f10c
CB
726 /* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list
727 * for legacy hierarchies.
728 */
ccb4cabe 729 int i;
411ac6d8 730 char *dup, *p2, *tok;
d6337a5f 731 char *p = line, *saveptr = NULL, *sep = ",";
411ac6d8 732 char **aret = NULL;
6328fd9c 733
ccb4cabe 734 for (i = 0; i < 4; i++) {
235f1815 735 p = strchr(p, ' ');
ccb4cabe
SH
736 if (!p)
737 return NULL;
738 p++;
739 }
a55f31bd 740
f205f10c
CB
741 /* Note, if we change how mountinfo works, then our caller will need to
742 * verify /sys/fs/cgroup/ in this field.
743 */
744 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) {
2202afc9 745 ERROR("Found hierarchy not under /sys/fs/cgroup: \"%s\"", p);
ccb4cabe 746 return NULL;
5059aae9 747 }
d6337a5f 748
ccb4cabe 749 p += 15;
235f1815 750 p2 = strchr(p, ' ');
ccb4cabe 751 if (!p2) {
2202afc9 752 ERROR("Corrupt mountinfo");
ccb4cabe
SH
753 return NULL;
754 }
755 *p2 = '\0';
6328fd9c 756
d6337a5f
CB
757 if (type == CGROUP_SUPER_MAGIC) {
758 /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
759 * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
760 */
761 dup = strdup(p);
762 if (!dup)
763 return NULL;
764
765 for (tok = strtok_r(dup, sep, &saveptr); tok;
766 tok = strtok_r(NULL, sep, &saveptr))
767 must_append_controller(klist, nlist, &aret, tok);
768
769 free(dup);
411ac6d8 770 }
d6337a5f 771 *p2 = ' ';
f205f10c 772
d6337a5f
CB
773 return aret;
774}
411ac6d8 775
d6337a5f
CB
776static char **cg_unified_make_empty_controller(void)
777{
778 int newentry;
779 char **aret = NULL;
780
781 newentry = append_null_to_list((void ***)&aret);
782 aret[newentry] = NULL;
783 return aret;
784}
785
786static char **cg_unified_get_controllers(const char *file)
787{
788 char *buf, *tok;
789 char *saveptr = NULL, *sep = " \t\n";
790 char **aret = NULL;
791
792 buf = read_file(file);
793 if (!buf)
411ac6d8 794 return NULL;
6328fd9c 795
d6337a5f
CB
796 for (tok = strtok_r(buf, sep, &saveptr); tok;
797 tok = strtok_r(NULL, sep, &saveptr)) {
798 int newentry;
799 char *copy;
800
801 newentry = append_null_to_list((void ***)&aret);
802 copy = must_copy_string(tok);
803 aret[newentry] = copy;
ccb4cabe
SH
804 }
805
d6337a5f 806 free(buf);
ccb4cabe
SH
807 return aret;
808}
809
2202afc9 810static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
d6337a5f 811 char *base_cgroup, int type)
ccb4cabe
SH
812{
813 struct hierarchy *new;
814 int newentry;
815
816 new = must_alloc(sizeof(*new));
817 new->controllers = clist;
818 new->mountpoint = mountpoint;
819 new->base_cgroup = base_cgroup;
820 new->fullcgpath = NULL;
d6337a5f 821 new->version = type;
6328fd9c 822
2202afc9
CB
823 newentry = append_null_to_list((void ***)h);
824 (*h)[newentry] = new;
d6337a5f 825 return new;
ccb4cabe
SH
826}
827
798c3b33
CB
828/* Get a copy of the mountpoint from @line, which is a line from
829 * /proc/self/mountinfo.
ccb4cabe 830 */
a3926f6a 831static char *cg_hybrid_get_mountpoint(char *line)
ccb4cabe
SH
832{
833 int i;
ccb4cabe 834 size_t len;
798c3b33
CB
835 char *p2;
836 char *p = line, *sret = NULL;
ccb4cabe
SH
837
838 for (i = 0; i < 4; i++) {
235f1815 839 p = strchr(p, ' ');
ccb4cabe
SH
840 if (!p)
841 return NULL;
842 p++;
843 }
d6337a5f 844
798c3b33 845 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
d6337a5f
CB
846 return NULL;
847
848 p2 = strchr(p + 15, ' ');
849 if (!p2)
850 return NULL;
851 *p2 = '\0';
852
ccb4cabe
SH
853 len = strlen(p);
854 sret = must_alloc(len + 1);
855 memcpy(sret, p, len);
856 sret[len] = '\0';
857 return sret;
858}
859
f523291e 860/* Given a multi-line string, return a null-terminated copy of the current line. */
ccb4cabe
SH
861static char *copy_to_eol(char *p)
862{
235f1815 863 char *p2 = strchr(p, '\n'), *sret;
ccb4cabe
SH
864 size_t len;
865
866 if (!p2)
867 return NULL;
868
869 len = p2 - p;
870 sret = must_alloc(len + 1);
871 memcpy(sret, p, len);
872 sret[len] = '\0';
873 return sret;
874}
875
bced39de
CB
876/* cgline: pointer to character after the first ':' in a line in a \n-terminated
877 * /proc/self/cgroup file. Check whether controller c is present.
ccb4cabe
SH
878 */
879static bool controller_in_clist(char *cgline, char *c)
880{
881 char *tok, *saveptr = NULL, *eol, *tmp;
882 size_t len;
883
235f1815 884 eol = strchr(cgline, ':');
ccb4cabe
SH
885 if (!eol)
886 return false;
887
888 len = eol - cgline;
889 tmp = alloca(len + 1);
890 memcpy(tmp, cgline, len);
891 tmp[len] = '\0';
892
893 for (tok = strtok_r(tmp, ",", &saveptr); tok;
d6337a5f 894 tok = strtok_r(NULL, ",", &saveptr)) {
ccb4cabe
SH
895 if (strcmp(tok, c) == 0)
896 return true;
897 }
d6337a5f 898
ccb4cabe
SH
899 return false;
900}
901
c3ef912e
CB
902/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
903 * @controller.
ccb4cabe 904 */
c3ef912e
CB
905static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller,
906 int type)
ccb4cabe
SH
907{
908 char *p = basecginfo;
6328fd9c 909
d6337a5f
CB
910 for (;;) {
911 bool is_cgv2_base_cgroup = false;
912
6328fd9c 913 /* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
d6337a5f
CB
914 if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
915 is_cgv2_base_cgroup = true;
ccb4cabe 916
235f1815 917 p = strchr(p, ':');
ccb4cabe
SH
918 if (!p)
919 return NULL;
920 p++;
d6337a5f
CB
921
922 if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
235f1815 923 p = strchr(p, ':');
ccb4cabe
SH
924 if (!p)
925 return NULL;
926 p++;
927 return copy_to_eol(p);
928 }
929
235f1815 930 p = strchr(p, '\n');
ccb4cabe
SH
931 if (!p)
932 return NULL;
933 p++;
934 }
935}
936
ccb4cabe
SH
937static void must_append_string(char ***list, char *entry)
938{
6dfb18bf 939 int newentry;
ccb4cabe
SH
940 char *copy;
941
6dfb18bf 942 newentry = append_null_to_list((void ***)list);
ccb4cabe
SH
943 copy = must_copy_string(entry);
944 (*list)[newentry] = copy;
945}
946
d6337a5f 947static int get_existing_subsystems(char ***klist, char ***nlist)
ccb4cabe
SH
948{
949 FILE *f;
950 char *line = NULL;
951 size_t len = 0;
952
d6337a5f
CB
953 f = fopen("/proc/self/cgroup", "r");
954 if (!f)
955 return -1;
956
ccb4cabe
SH
957 while (getline(&line, &len, f) != -1) {
958 char *p, *p2, *tok, *saveptr = NULL;
235f1815 959 p = strchr(line, ':');
ccb4cabe
SH
960 if (!p)
961 continue;
962 p++;
235f1815 963 p2 = strchr(p, ':');
ccb4cabe
SH
964 if (!p2)
965 continue;
966 *p2 = '\0';
ff8d6ee9 967
6328fd9c
CB
968 /* If the kernel has cgroup v2 support, then /proc/self/cgroup
969 * contains an entry of the form:
ff8d6ee9
CB
970 *
971 * 0::/some/path
972 *
6328fd9c 973 * In this case we use "cgroup2" as controller name.
ff8d6ee9 974 */
6328fd9c
CB
975 if ((p2 - p) == 0) {
976 must_append_string(klist, "cgroup2");
ff8d6ee9 977 continue;
6328fd9c 978 }
ff8d6ee9 979
ccb4cabe 980 for (tok = strtok_r(p, ",", &saveptr); tok;
d6337a5f 981 tok = strtok_r(NULL, ",", &saveptr)) {
ccb4cabe
SH
982 if (strncmp(tok, "name=", 5) == 0)
983 must_append_string(nlist, tok);
984 else
985 must_append_string(klist, tok);
986 }
987 }
988
989 free(line);
990 fclose(f);
d6337a5f 991 return 0;
ccb4cabe
SH
992}
993
994static void trim(char *s)
995{
7689dfd7
CB
996 size_t len;
997
998 len = strlen(s);
2c28d76b 999 while ((len > 1) && (s[len - 1] == '\n'))
ccb4cabe
SH
1000 s[--len] = '\0';
1001}
1002
2202afc9 1003static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
ccb4cabe
SH
1004{
1005 int i;
27d84737 1006 struct hierarchy **it;
41c33dbe 1007
2202afc9
CB
1008 if (!ops->hierarchies) {
1009 TRACE(" No hierarchies found");
ccb4cabe
SH
1010 return;
1011 }
27d84737 1012
2202afc9
CB
1013 TRACE(" Hierarchies:");
1014 for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
ccb4cabe 1015 int j;
27d84737
CB
1016 char **cit;
1017
2202afc9
CB
1018 TRACE(" %d: base_cgroup: %s", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
1019 TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
1020 TRACE(" controllers:");
a7b0cc4c 1021 for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
2202afc9 1022 TRACE(" %d: %s", j, *cit);
ccb4cabe
SH
1023 }
1024}
41c33dbe 1025
a3926f6a
CB
1026static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
1027 char **nlist)
41c33dbe
SH
1028{
1029 int k;
a7b0cc4c 1030 char **it;
41c33dbe 1031
2202afc9
CB
1032 TRACE("basecginfo is:");
1033 TRACE("%s", basecginfo);
41c33dbe 1034
a7b0cc4c 1035 for (k = 0, it = klist; it && *it; it++, k++)
2202afc9 1036 TRACE("kernel subsystem %d: %s", k, *it);
0f71dd9b 1037
a7b0cc4c 1038 for (k = 0, it = nlist; it && *it; it++, k++)
2202afc9 1039 TRACE("named subsystem %d: %s", k, *it);
41c33dbe 1040}
ccb4cabe 1041
2202afc9
CB
1042static int cgroup_rmdir(struct hierarchy **hierarchies,
1043 const char *container_cgroup)
c71d83e1 1044{
2202afc9 1045 int i;
d6337a5f 1046
2202afc9
CB
1047 if (!container_cgroup || !hierarchies)
1048 return 0;
d6337a5f 1049
2202afc9
CB
1050 for (i = 0; hierarchies[i]; i++) {
1051 int ret;
1052 struct hierarchy *h = hierarchies[i];
d6337a5f 1053
2202afc9
CB
1054 if (!h->fullcgpath)
1055 continue;
1056
1057 ret = recursive_destroy(h->fullcgpath);
1058 if (ret < 0)
1059 WARN("Failed to destroy \"%s\"", h->fullcgpath);
1060
1061 free(h->fullcgpath);
1062 h->fullcgpath = NULL;
1063 }
d6337a5f 1064
c71d83e1 1065 return 0;
d6337a5f
CB
1066}
1067
2202afc9
CB
1068struct generic_userns_exec_data {
1069 struct hierarchy **hierarchies;
1070 const char *container_cgroup;
1071 struct lxc_conf *conf;
1072 uid_t origuid; /* target uid in parent namespace */
1073 char *path;
1074};
d6337a5f 1075
2202afc9
CB
1076static int cgroup_rmdir_wrapper(void *data)
1077{
1078 int ret;
1079 struct generic_userns_exec_data *arg = data;
1080 uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
1081 gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
d6337a5f 1082
2202afc9
CB
1083 ret = setresgid(nsgid, nsgid, nsgid);
1084 if (ret < 0) {
1085 SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
1086 (int)nsgid, (int)nsgid);
1087 return -1;
1088 }
d6337a5f 1089
2202afc9
CB
1090 ret = setresuid(nsuid, nsuid, nsuid);
1091 if (ret < 0) {
1092 SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
1093 (int)nsuid, (int)nsuid);
1094 return -1;
1095 }
d6337a5f 1096
2202afc9
CB
1097 ret = setgroups(0, NULL);
1098 if (ret < 0 && errno != EPERM) {
1099 SYSERROR("Failed to setgroups(0, NULL)");
1100 return -1;
1101 }
d6337a5f 1102
2202afc9 1103 return cgroup_rmdir(arg->hierarchies, arg->container_cgroup);
d6337a5f
CB
1104}
1105
2202afc9 1106static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler)
d6337a5f
CB
1107{
1108 int ret;
2202afc9 1109 struct generic_userns_exec_data wrap;
bd8ef4e4 1110
4160c3a0 1111 wrap.origuid = 0;
2202afc9
CB
1112 wrap.container_cgroup = ops->container_cgroup;
1113 wrap.hierarchies = ops->hierarchies;
1114 wrap.conf = handler->conf;
4160c3a0 1115
2202afc9
CB
1116 if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
1117 ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
bd8ef4e4 1118 "cgroup_rmdir_wrapper");
ccb4cabe 1119 else
2202afc9 1120 ret = cgroup_rmdir(ops->hierarchies, ops->container_cgroup);
bd8ef4e4
CB
1121 if (ret < 0) {
1122 WARN("Failed to destroy cgroups");
ccb4cabe 1123 return;
ccb4cabe 1124 }
ccb4cabe
SH
1125}
1126
a3926f6a 1127static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
0c3deb94 1128{
0c3deb94 1129 size_t i, parts_len;
389d44ec 1130 char **it;
0c3deb94
CB
1131 size_t full_len = 0;
1132 char *add_controllers = NULL, *cgroup = NULL;
1133 char **parts = NULL;
1134 bool bret = false;
1135
1136 if (h->version != CGROUP2_SUPER_MAGIC)
1137 return true;
1138
1139 if (!h->controllers)
1140 return true;
1141
1142 /* For now we simply enable all controllers that we have detected by
1143 * creating a string like "+memory +pids +cpu +io".
1144 * TODO: In the near future we might want to support "-<controller>"
1145 * etc. but whether supporting semantics like this make sense will need
1146 * some thinking.
1147 */
1148 for (it = h->controllers; it && *it; it++) {
64e82f8b
DJ
1149 full_len += strlen(*it) + 2;
1150 add_controllers = must_realloc(add_controllers, full_len + 1);
1151
1152 if (h->controllers[0] == *it)
1153 add_controllers[0] = '\0';
1154
3ebe2fbd
DJ
1155 (void)strlcat(add_controllers, "+", full_len + 1);
1156 (void)strlcat(add_controllers, *it, full_len + 1);
64e82f8b
DJ
1157
1158 if ((it + 1) && *(it + 1))
3ebe2fbd 1159 (void)strlcat(add_controllers, " ", full_len + 1);
0c3deb94
CB
1160 }
1161
1162 parts = lxc_string_split(cgname, '/');
1163 if (!parts)
1164 goto on_error;
64e82f8b 1165
0c3deb94
CB
1166 parts_len = lxc_array_len((void **)parts);
1167 if (parts_len > 0)
1168 parts_len--;
1169
1170 cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
1171 for (i = 0; i < parts_len; i++) {
1172 int ret;
1173 char *target;
1174
1175 cgroup = must_append_path(cgroup, parts[i], NULL);
1176 target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
7cea5905 1177 ret = lxc_write_to_file(target, add_controllers, full_len, false, 0666);
0c3deb94
CB
1178 free(target);
1179 if (ret < 0) {
1180 SYSERROR("Could not enable \"%s\" controllers in the "
1181 "unified cgroup \"%s\"", add_controllers, cgroup);
1182 goto on_error;
1183 }
1184 }
1185
1186 bret = true;
1187
1188on_error:
1189 lxc_free_array((void **)parts, free);
1190 free(add_controllers);
1191 free(cgroup);
1192 return bret;
1193}
1194
ccb4cabe
SH
1195static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
1196{
0c3deb94
CB
1197 int ret;
1198
e3a3fecf 1199 h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
4b4205e3
CB
1200 if (dir_exists(h->fullcgpath)) {
1201 ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
d8da679e 1202 return false;
6f9584d8 1203 }
0c3deb94 1204
a3926f6a 1205 if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
4b4205e3 1206 ERROR("Failed to handle legacy cpuset controller");
0c3deb94
CB
1207 return false;
1208 }
1209
1210 ret = mkdir_p(h->fullcgpath, 0755);
1211 if (ret < 0) {
1212 ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
e3a3fecf 1213 return false;
6f9584d8 1214 }
0c3deb94 1215
a3926f6a 1216 return cg_unified_create_cgroup(h, cgname);
ccb4cabe
SH
1217}
1218
1219static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
1220{
e56639fb
CB
1221 int ret;
1222
1223 ret = rmdir(h->fullcgpath);
1224 if (ret < 0)
1225 SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", h->fullcgpath);
1226
ccb4cabe
SH
1227 free(h->fullcgpath);
1228 h->fullcgpath = NULL;
1229}
1230
cecad0c1
CB
1231/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
1232 * next cgroup_pattern-1, -2, ..., -999.
ccb4cabe 1233 */
2202afc9
CB
1234static inline bool cgfsng_create(struct cgroup_ops *ops,
1235 struct lxc_handler *handler)
ccb4cabe 1236{
bb30b52a 1237 int i;
ccb4cabe 1238 size_t len;
0c3deb94 1239 char *container_cgroup, *offset, *tmp;
7d531e9b 1240 int idx = 0;
2202afc9 1241 struct lxc_conf *conf = handler->conf;
ccb4cabe 1242
2202afc9
CB
1243 if (ops->container_cgroup) {
1244 WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
ccb4cabe 1245 return false;
2202afc9 1246 }
43654d34 1247
2202afc9 1248 if (!conf)
ccb4cabe 1249 return false;
ccb4cabe 1250
2202afc9 1251 if (conf->cgroup_meta.dir)
3ec12d39 1252 tmp = lxc_string_join("/", (const char *[]){conf->cgroup_meta.dir, handler->name, NULL}, false);
43654d34 1253 else
2202afc9 1254 tmp = lxc_string_replace("%n", handler->name, ops->cgroup_pattern);
ccb4cabe
SH
1255 if (!tmp) {
1256 ERROR("Failed expanding cgroup name pattern");
1257 return false;
1258 }
64e82f8b 1259
1a0e70ac 1260 len = strlen(tmp) + 5; /* leave room for -NNN\0 */
0c3deb94 1261 container_cgroup = must_alloc(len);
64e82f8b 1262 (void)strlcpy(container_cgroup, tmp, len);
ccb4cabe 1263 free(tmp);
0c3deb94 1264 offset = container_cgroup + len - 5;
ccb4cabe
SH
1265
1266again:
95adfe93
SH
1267 if (idx == 1000) {
1268 ERROR("Too many conflicting cgroup names");
ccb4cabe 1269 goto out_free;
95adfe93 1270 }
cecad0c1 1271
66b66624 1272 if (idx) {
bb30b52a
CB
1273 int ret;
1274
66b66624
CB
1275 ret = snprintf(offset, 5, "-%d", idx);
1276 if (ret < 0 || (size_t)ret >= 5) {
1277 FILE *f = fopen("/dev/null", "w");
97ebced3 1278 if (f) {
66b66624
CB
1279 fprintf(f, "Workaround for GCC7 bug: "
1280 "https://gcc.gnu.org/bugzilla/"
1281 "show_bug.cgi?id=78969");
1282 fclose(f);
1283 }
1284 }
1285 }
cecad0c1 1286
2202afc9
CB
1287 for (i = 0; ops->hierarchies[i]; i++) {
1288 if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
ccb4cabe 1289 int j;
2202afc9
CB
1290 ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath);
1291 free(ops->hierarchies[i]->fullcgpath);
1292 ops->hierarchies[i]->fullcgpath = NULL;
ccb4cabe 1293 for (j = 0; j < i; j++)
2202afc9 1294 remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup);
ccb4cabe
SH
1295 idx++;
1296 goto again;
1297 }
1298 }
cecad0c1 1299
2202afc9 1300 ops->container_cgroup = container_cgroup;
cecad0c1 1301
ccb4cabe
SH
1302 return true;
1303
1304out_free:
0c3deb94 1305 free(container_cgroup);
cecad0c1 1306
ccb4cabe
SH
1307 return false;
1308}
1309
2202afc9 1310static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid)
ccb4cabe 1311{
ccb4cabe 1312 int i, len;
08768001 1313 char pidstr[25];
ccb4cabe
SH
1314
1315 len = snprintf(pidstr, 25, "%d", pid);
08768001 1316 if (len < 0 || len >= 25)
ccb4cabe
SH
1317 return false;
1318
2202afc9 1319 for (i = 0; ops->hierarchies[i]; i++) {
08768001
CB
1320 int ret;
1321 char *fullpath;
1322
2202afc9 1323 fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
08768001 1324 "cgroup.procs", NULL);
7cea5905 1325 ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
08768001
CB
1326 if (ret != 0) {
1327 SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
ccb4cabe
SH
1328 free(fullpath);
1329 return false;
1330 }
1331 free(fullpath);
1332 }
1333
1334 return true;
1335}
1336
6efacf80
CB
1337static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
1338 mode_t chmod_mode)
1339{
1340 int ret;
1341
1342 ret = chown(path, chown_uid, chown_gid);
1343 if (ret < 0) {
a24c5678 1344 SYSWARN("Failed to chown(%s, %d, %d)", path, (int)chown_uid, (int)chown_gid);
6efacf80
CB
1345 return -1;
1346 }
1347
1348 ret = chmod(path, chmod_mode);
1349 if (ret < 0) {
a24c5678 1350 SYSWARN("Failed to chmod(%s, %d)", path, (int)chmod_mode);
6efacf80
CB
1351 return -1;
1352 }
1353
1354 return 0;
1355}
1356
1357/* chgrp the container cgroups to container group. We leave
c0888dfe
SH
1358 * the container owner as cgroup owner. So we must make the
1359 * directories 775 so that the container can create sub-cgroups.
43647298
SH
1360 *
1361 * Also chown the tasks and cgroup.procs files. Those may not
1362 * exist depending on kernel version.
c0888dfe 1363 */
ccb4cabe
SH
1364static int chown_cgroup_wrapper(void *data)
1365{
6efacf80 1366 int i, ret;
4160c3a0
CB
1367 uid_t destuid;
1368 struct generic_userns_exec_data *arg = data;
1369 uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
1370 gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
ccb4cabe 1371
6efacf80
CB
1372 ret = setresgid(nsgid, nsgid, nsgid);
1373 if (ret < 0) {
1374 SYSERROR("Failed to setresgid(%d, %d, %d)",
1375 (int)nsgid, (int)nsgid, (int)nsgid);
1376 return -1;
1377 }
1378
1379 ret = setresuid(nsuid, nsuid, nsuid);
1380 if (ret < 0) {
1381 SYSERROR("Failed to setresuid(%d, %d, %d)",
1382 (int)nsuid, (int)nsuid, (int)nsuid);
1383 return -1;
1384 }
1385
1386 ret = setgroups(0, NULL);
1387 if (ret < 0 && errno != EPERM) {
1388 SYSERROR("Failed to setgroups(0, NULL)");
1389 return -1;
1390 }
ccb4cabe
SH
1391
1392 destuid = get_ns_uid(arg->origuid);
1393
2202afc9 1394 for (i = 0; arg->hierarchies[i]; i++) {
6efacf80 1395 char *fullpath;
2202afc9 1396 char *path = arg->hierarchies[i]->fullcgpath;
43647298 1397
63e42fee 1398 ret = chowmod(path, destuid, nsgid, 0775);
6efacf80 1399 if (ret < 0)
ccb4cabe 1400 return -1;
c0888dfe 1401
6efacf80
CB
1402 /* Failures to chown() these are inconvenient but not
1403 * detrimental We leave these owned by the container launcher,
1404 * so that container root can write to the files to attach. We
1405 * chmod() them 664 so that container systemd can write to the
1406 * files (which systemd in wily insists on doing).
ab8f5424 1407 */
6efacf80 1408
2202afc9 1409 if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
6efacf80
CB
1410 fullpath = must_make_path(path, "tasks", NULL);
1411 (void)chowmod(fullpath, destuid, nsgid, 0664);
1412 free(fullpath);
1413 }
43647298
SH
1414
1415 fullpath = must_make_path(path, "cgroup.procs", NULL);
2202afc9 1416 (void)chowmod(fullpath, destuid, nsgid, 0664);
ccb4cabe 1417 free(fullpath);
0e17357c 1418
2202afc9 1419 if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
0e17357c
CB
1420 continue;
1421
1422 fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
6efacf80 1423 (void)chowmod(fullpath, destuid, nsgid, 0664);
0e17357c
CB
1424 free(fullpath);
1425
1426 fullpath = must_make_path(path, "cgroup.threads", NULL);
6efacf80 1427 (void)chowmod(fullpath, destuid, nsgid, 0664);
0e17357c 1428 free(fullpath);
ccb4cabe
SH
1429 }
1430
1431 return 0;
1432}
1433
2202afc9 1434static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf)
ccb4cabe 1435{
4160c3a0 1436 struct generic_userns_exec_data wrap;
ccb4cabe 1437
ccb4cabe
SH
1438 if (lxc_list_empty(&conf->id_map))
1439 return true;
1440
ccb4cabe 1441 wrap.origuid = geteuid();
4160c3a0 1442 wrap.path = NULL;
2202afc9 1443 wrap.hierarchies = ops->hierarchies;
4160c3a0 1444 wrap.conf = conf;
ccb4cabe 1445
c9b7c33e
CB
1446 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
1447 "chown_cgroup_wrapper") < 0) {
f7faba6c 1448 ERROR("Error requesting cgroup chown in new user namespace");
ccb4cabe
SH
1449 return false;
1450 }
1451
1452 return true;
1453}
1454
8aa1044f
SH
1455/* cgroup-full:* is done, no need to create subdirs */
1456static bool cg_mount_needs_subdirs(int type)
1457{
1458 if (type >= LXC_AUTO_CGROUP_FULL_RO)
1459 return false;
a3926f6a 1460
8aa1044f
SH
1461 return true;
1462}
1463
886cac86
CB
1464/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
1465 * remount controller ro if needed and bindmount the cgroupfs onto
1466 * controll/the/cg/path.
8aa1044f 1467 */
6812d833
CB
1468static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
1469 char *controllerpath, char *cgpath,
1470 const char *container_cgroup)
8aa1044f 1471{
5285689c 1472 int ret, remount_flags;
886cac86
CB
1473 char *sourcepath;
1474 int flags = MS_BIND;
1475
8aa1044f 1476 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
886cac86
CB
1477 ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
1478 if (ret < 0) {
1479 SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
1480 controllerpath, controllerpath);
8aa1044f
SH
1481 return -1;
1482 }
886cac86 1483
5285689c
CB
1484 remount_flags = add_required_remount_flags(controllerpath,
1485 controllerpath,
1486 flags | MS_REMOUNT);
886cac86 1487 ret = mount(controllerpath, controllerpath, "cgroup",
8186c5c7
CB
1488 remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
1489 NULL);
886cac86
CB
1490 if (ret < 0) {
1491 SYSERROR("Failed to remount \"%s\" ro", controllerpath);
8aa1044f
SH
1492 return -1;
1493 }
886cac86 1494
8aa1044f
SH
1495 INFO("Remounted %s read-only", controllerpath);
1496 }
886cac86
CB
1497
1498 sourcepath = must_make_path(h->mountpoint, h->base_cgroup,
1499 container_cgroup, NULL);
8aa1044f
SH
1500 if (type == LXC_AUTO_CGROUP_RO)
1501 flags |= MS_RDONLY;
886cac86
CB
1502
1503 ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
1504 if (ret < 0) {
1505 SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
8aa1044f 1506 free(sourcepath);
8aa1044f
SH
1507 return -1;
1508 }
886cac86 1509 INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
f8c40ffa
L
1510
1511 if (flags & MS_RDONLY) {
5285689c
CB
1512 remount_flags = add_required_remount_flags(sourcepath, cgpath,
1513 flags | MS_REMOUNT);
1514 ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
886cac86
CB
1515 if (ret < 0) {
1516 SYSERROR("Failed to remount \"%s\" ro", cgpath);
f8c40ffa 1517 free(sourcepath);
f8c40ffa
L
1518 return -1;
1519 }
5285689c 1520 INFO("Remounted %s read-only", cgpath);
f8c40ffa
L
1521 }
1522
8aa1044f 1523 free(sourcepath);
886cac86 1524 INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
8aa1044f
SH
1525 return 0;
1526}
1527
6812d833
CB
1528/* __cg_mount_direct
1529 *
1530 * Mount cgroup hierarchies directly without using bind-mounts. The main
1531 * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
1532 * cgroups for the LXC_AUTO_CGROUP_FULL option.
1533 */
1534static int __cg_mount_direct(int type, struct hierarchy *h,
1535 const char *controllerpath)
b635e92d
CB
1536{
1537 int ret;
1538 char *controllers = NULL;
a760603e
CB
1539 char *fstype = "cgroup2";
1540 unsigned long flags = 0;
b635e92d 1541
a760603e
CB
1542 flags |= MS_NOSUID;
1543 flags |= MS_NOEXEC;
1544 flags |= MS_NODEV;
1545 flags |= MS_RELATIME;
1546
1547 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
1548 flags |= MS_RDONLY;
1549
d6337a5f 1550 if (h->version != CGROUP2_SUPER_MAGIC) {
a760603e
CB
1551 controllers = lxc_string_join(",", (const char **)h->controllers, false);
1552 if (!controllers)
1553 return -ENOMEM;
1554 fstype = "cgroup";
b635e92d
CB
1555 }
1556
a760603e 1557 ret = mount("cgroup", controllerpath, fstype, flags, controllers);
b635e92d
CB
1558 free(controllers);
1559 if (ret < 0) {
6812d833 1560 SYSERROR("Failed to mount \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
b635e92d
CB
1561 return -1;
1562 }
1563
6812d833 1564 DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
b635e92d
CB
1565 return 0;
1566}
1567
6812d833
CB
1568static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
1569 const char *controllerpath)
1570{
1571 return __cg_mount_direct(type, h, controllerpath);
1572}
1573
1574static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
1575 const char *controllerpath)
1576{
1577 if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
1578 return 0;
1579
1580 return __cg_mount_direct(type, h, controllerpath);
1581}
1582
2202afc9
CB
1583static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler,
1584 const char *root, int type)
ccb4cabe 1585{
3f69fb12 1586 int i, ret;
8aa1044f 1587 char *tmpfspath = NULL;
affd10fa 1588 bool has_cgns = false, retval = false, wants_force_mount = false;
8aa1044f
SH
1589
1590 if ((type & LXC_AUTO_CGROUP_MASK) == 0)
1591 return true;
1592
3f69fb12
SY
1593 if (type & LXC_AUTO_CGROUP_FORCE) {
1594 type &= ~LXC_AUTO_CGROUP_FORCE;
1595 wants_force_mount = true;
1596 }
b635e92d 1597
3f69fb12
SY
1598 if (!wants_force_mount){
1599 if (!lxc_list_empty(&handler->conf->keepcaps))
1600 wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
1601 else
1602 wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
1603 }
8aa1044f 1604
3f69fb12
SY
1605 has_cgns = cgns_supported();
1606 if (has_cgns && !wants_force_mount)
1607 return true;
8aa1044f
SH
1608
1609 if (type == LXC_AUTO_CGROUP_NOSPEC)
1610 type = LXC_AUTO_CGROUP_MIXED;
1611 else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
1612 type = LXC_AUTO_CGROUP_FULL_MIXED;
1613
1614 /* Mount tmpfs */
3f69fb12 1615 tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
6812d833 1616 ret = safe_mount(NULL, tmpfspath, "tmpfs",
3f69fb12
SY
1617 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
1618 "size=10240k,mode=755", root);
1619 if (ret < 0)
1620 goto on_error;
8aa1044f 1621
2202afc9 1622 for (i = 0; ops->hierarchies[i]; i++) {
8aa1044f 1623 char *controllerpath, *path2;
2202afc9 1624 struct hierarchy *h = ops->hierarchies[i];
8aa1044f 1625 char *controller = strrchr(h->mountpoint, '/');
8aa1044f
SH
1626
1627 if (!controller)
1628 continue;
1629 controller++;
affd10fa 1630
8aa1044f
SH
1631 controllerpath = must_make_path(tmpfspath, controller, NULL);
1632 if (dir_exists(controllerpath)) {
1633 free(controllerpath);
1634 continue;
1635 }
affd10fa 1636
3f69fb12
SY
1637 ret = mkdir(controllerpath, 0755);
1638 if (ret < 0) {
8aa1044f
SH
1639 SYSERROR("Error creating cgroup path: %s", controllerpath);
1640 free(controllerpath);
3f69fb12 1641 goto on_error;
8aa1044f 1642 }
b635e92d 1643
3f69fb12 1644 if (has_cgns && wants_force_mount) {
b635e92d
CB
1645 /* If cgroup namespaces are supported but the container
1646 * will not have CAP_SYS_ADMIN after it has started we
1647 * need to mount the cgroups manually.
1648 */
3f69fb12 1649 ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
b635e92d 1650 free(controllerpath);
3f69fb12
SY
1651 if (ret < 0)
1652 goto on_error;
1653
b635e92d
CB
1654 continue;
1655 }
1656
6812d833 1657 ret = cg_mount_cgroup_full(type, h, controllerpath);
3f69fb12 1658 if (ret < 0) {
8aa1044f 1659 free(controllerpath);
3f69fb12 1660 goto on_error;
8aa1044f 1661 }
3f69fb12 1662
8aa1044f
SH
1663 if (!cg_mount_needs_subdirs(type)) {
1664 free(controllerpath);
1665 continue;
1666 }
3f69fb12
SY
1667
1668 path2 = must_make_path(controllerpath, h->base_cgroup,
2202afc9 1669 ops->container_cgroup, NULL);
3f69fb12
SY
1670 ret = mkdir_p(path2, 0755);
1671 if (ret < 0) {
8aa1044f 1672 free(controllerpath);
8e0c6620 1673 free(path2);
3f69fb12 1674 goto on_error;
8aa1044f 1675 }
2f62fb00 1676
6812d833 1677 ret = cg_legacy_mount_controllers(type, h, controllerpath,
2202afc9 1678 path2, ops->container_cgroup);
8aa1044f
SH
1679 free(controllerpath);
1680 free(path2);
3f69fb12
SY
1681 if (ret < 0)
1682 goto on_error;
8aa1044f
SH
1683 }
1684 retval = true;
1685
3f69fb12 1686on_error:
8aa1044f
SH
1687 free(tmpfspath);
1688 return retval;
ccb4cabe
SH
1689}
1690
1691static int recursive_count_nrtasks(char *dirname)
1692{
74f96976 1693 struct dirent *direntp;
ccb4cabe
SH
1694 DIR *dir;
1695 int count = 0, ret;
1696 char *path;
1697
1698 dir = opendir(dirname);
1699 if (!dir)
1700 return 0;
1701
74f96976 1702 while ((direntp = readdir(dir))) {
ccb4cabe
SH
1703 struct stat mystat;
1704
ccb4cabe
SH
1705 if (!strcmp(direntp->d_name, ".") ||
1706 !strcmp(direntp->d_name, ".."))
1707 continue;
1708
1709 path = must_make_path(dirname, direntp->d_name, NULL);
1710
1711 if (lstat(path, &mystat))
1712 goto next;
1713
1714 if (!S_ISDIR(mystat.st_mode))
1715 goto next;
1716
1717 count += recursive_count_nrtasks(path);
13c49955 1718 next:
ccb4cabe
SH
1719 free(path);
1720 }
1721
1722 path = must_make_path(dirname, "cgroup.procs", NULL);
1723 ret = lxc_count_file_lines(path);
1724 if (ret != -1)
1725 count += ret;
1726 free(path);
1727
13c49955 1728 (void)closedir(dir);
ccb4cabe
SH
1729
1730 return count;
1731}
1732
2202afc9 1733static int cgfsng_nrtasks(struct cgroup_ops *ops)
3135c5d4 1734{
ccb4cabe 1735 int count;
3135c5d4 1736 char *path;
ccb4cabe 1737
2202afc9 1738 if (!ops->container_cgroup || !ops->hierarchies)
ccb4cabe 1739 return -1;
a3926f6a 1740
2202afc9 1741 path = must_make_path(ops->hierarchies[0]->fullcgpath, NULL);
ccb4cabe
SH
1742 count = recursive_count_nrtasks(path);
1743 free(path);
1744 return count;
1745}
1746
11c23867 1747/* Only root needs to escape to the cgroup of its init. */
2202afc9 1748static bool cgfsng_escape(const struct cgroup_ops *ops)
ccb4cabe 1749{
ccb4cabe
SH
1750 int i;
1751
1752 if (geteuid())
1753 return true;
1754
2202afc9 1755 for (i = 0; ops->hierarchies[i]; i++) {
11c23867
CB
1756 int ret;
1757 char *fullpath;
1758
2202afc9
CB
1759 fullpath = must_make_path(ops->hierarchies[i]->mountpoint,
1760 ops->hierarchies[i]->base_cgroup,
11c23867 1761 "cgroup.procs", NULL);
7cea5905 1762 ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
11c23867
CB
1763 if (ret != 0) {
1764 SYSERROR("Failed to escape to cgroup \"%s\"", fullpath);
ccb4cabe 1765 free(fullpath);
6df334d1 1766 return false;
ccb4cabe
SH
1767 }
1768 free(fullpath);
1769 }
1770
6df334d1 1771 return true;
ccb4cabe
SH
1772}
1773
2202afc9 1774static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
36662416
TA
1775{
1776 int i;
1777
2202afc9 1778 for (i = 0; ops->hierarchies[i]; i++)
36662416
TA
1779 ;
1780
1781 return i;
1782}
1783
2202afc9 1784static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
36662416
TA
1785{
1786 int i;
1787
1788 /* sanity check n */
6b38e644 1789 for (i = 0; i < n; i++)
2202afc9 1790 if (!ops->hierarchies[i])
36662416 1791 return false;
36662416 1792
2202afc9 1793 *out = ops->hierarchies[i]->controllers;
36662416
TA
1794
1795 return true;
1796}
1797
ccb4cabe
SH
1798#define THAWED "THAWED"
1799#define THAWED_LEN (strlen(THAWED))
1800
d6337a5f
CB
1801/* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
1802 * to be adapted.
1803 */
2202afc9 1804static bool cgfsng_unfreeze(struct cgroup_ops *ops)
ccb4cabe 1805{
d6337a5f 1806 int ret;
ccb4cabe 1807 char *fullpath;
d6337a5f 1808 struct hierarchy *h;
ccb4cabe 1809
2202afc9 1810 h = get_hierarchy(ops, "freezer");
457ca9aa 1811 if (!h)
ccb4cabe 1812 return false;
d6337a5f 1813
ccb4cabe 1814 fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
7cea5905 1815 ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false, 0666);
ccb4cabe 1816 free(fullpath);
d6337a5f
CB
1817 if (ret < 0)
1818 return false;
1819
ccb4cabe
SH
1820 return true;
1821}
1822
2202afc9
CB
1823static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
1824 const char *controller)
ccb4cabe 1825{
d6337a5f
CB
1826 struct hierarchy *h;
1827
2202afc9 1828 h = get_hierarchy(ops, controller);
106f1f38 1829 if (!h) {
2202afc9
CB
1830 WARN("Failed to find hierarchy for controller \"%s\"",
1831 controller ? controller : "(null)");
ccb4cabe 1832 return NULL;
106f1f38 1833 }
ccb4cabe 1834
371f834d
SH
1835 return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
1836}
1837
c40c8209
CB
1838/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
1839 * which must be freed by the caller.
371f834d 1840 */
c40c8209
CB
1841static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
1842 const char *inpath,
1843 const char *filename)
371f834d 1844{
371f834d 1845 return must_make_path(h->mountpoint, inpath, filename, NULL);
ccb4cabe
SH
1846}
1847
25f66a8f
CB
1848/* Technically, we're always at a delegation boundary here (This is especially
1849 * true when cgroup namespaces are available.). The reasoning is that in order
c2aed66d 1850 * for us to have been able to start a container in the first place the root
25f66a8f 1851 * cgroup must have been a leaf node. Now, either the container's init system
c2aed66d
CB
1852 * has populated the cgroup and kept it as a leaf node or it has created
1853 * subtrees. In the former case we will simply attach to the leaf node we
1854 * created when we started the container in the latter case we create our own
1855 * cgroup for the attaching process.
1856 */
a3926f6a
CB
1857static int __cg_unified_attach(const struct hierarchy *h, const char *name,
1858 const char *lxcpath, const char *pidstr,
1859 size_t pidstr_len, const char *controller)
c2aed66d
CB
1860{
1861 int ret;
1862 size_t len;
1863 int fret = -1, idx = 0;
1864 char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
1865
1866 container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
1867 /* not running */
1868 if (!container_cgroup)
1869 return 0;
1870
1871 base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
1872 full_path = must_make_path(base_path, "cgroup.procs", NULL);
1873 /* cgroup is populated */
7cea5905 1874 ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false, 0666);
c2aed66d
CB
1875 if (ret < 0 && errno != EBUSY)
1876 goto on_error;
1877
1878 if (ret == 0)
1879 goto on_success;
1880
1881 free(full_path);
1882
1883 len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
1884 sizeof("/cgroup-procs") - 1;
1885 full_path = must_alloc(len + 1);
1886 do {
1887 if (idx)
1888 ret = snprintf(full_path, len + 1, "%s/lxc-%d",
1889 base_path, idx);
1890 else
1891 ret = snprintf(full_path, len + 1, "%s/lxc", base_path);
1892 if (ret < 0 || (size_t)ret >= len + 1)
1893 goto on_error;
1894
1895 ret = mkdir_p(full_path, 0755);
1896 if (ret < 0 && errno != EEXIST)
1897 goto on_error;
1898
3ebe2fbd 1899 (void)strlcat(full_path, "/cgroup.procs", len + 1);
7cea5905 1900 ret = lxc_write_to_file(full_path, pidstr, len, false, 0666);
c2aed66d
CB
1901 if (ret == 0)
1902 goto on_success;
1903
1904 /* this is a non-leaf node */
1905 if (errno != EBUSY)
1906 goto on_error;
1907
1908 } while (++idx > 0 && idx < 1000);
1909
1910on_success:
1911 if (idx < 1000)
1912 fret = 0;
1913
1914on_error:
1915 free(base_path);
1916 free(container_cgroup);
1917 free(full_path);
1918
1919 return fret;
1920}
1921
2202afc9
CB
1922static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
1923 const char *lxcpath, pid_t pid)
ccb4cabe 1924{
c2aed66d 1925 int i, len, ret;
ccb4cabe 1926 char pidstr[25];
ccb4cabe
SH
1927
1928 len = snprintf(pidstr, 25, "%d", pid);
0cb10e11 1929 if (len < 0 || len >= 25)
ccb4cabe
SH
1930 return false;
1931
2202afc9 1932 for (i = 0; ops->hierarchies[i]; i++) {
c2aed66d
CB
1933 char *path;
1934 char *fullpath = NULL;
2202afc9 1935 struct hierarchy *h = ops->hierarchies[i];
ccb4cabe 1936
c2aed66d 1937 if (h->version == CGROUP2_SUPER_MAGIC) {
a3926f6a
CB
1938 ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
1939 h->controllers[0]);
c2aed66d
CB
1940 if (ret < 0)
1941 return false;
1942
1943 continue;
1944 }
1945
ccb4cabe 1946 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
c2aed66d
CB
1947 /* not running */
1948 if (!path)
ccb4cabe
SH
1949 continue;
1950
371f834d 1951 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
71cb9afb 1952 free(path);
7cea5905 1953 ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
c2aed66d 1954 if (ret < 0) {
ccb4cabe
SH
1955 SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
1956 free(fullpath);
ccb4cabe
SH
1957 return false;
1958 }
ccb4cabe
SH
1959 free(fullpath);
1960 }
1961
ccb4cabe
SH
1962 return true;
1963}
1964
e2bd2b13
CB
1965/* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we
1966 * don't have a cgroup_data set up, so we ask the running container through the
1967 * commands API for the cgroup path.
ccb4cabe 1968 */
2202afc9
CB
1969static int cgfsng_get(struct cgroup_ops *ops, const char *filename, char *value,
1970 size_t len, const char *name, const char *lxcpath)
ccb4cabe 1971{
ccb4cabe 1972 int ret = -1;
0069cc61
CB
1973 size_t controller_len;
1974 char *controller, *p, *path;
1975 struct hierarchy *h;
ccb4cabe 1976
0069cc61
CB
1977 controller_len = strlen(filename);
1978 controller = alloca(controller_len + 1);
64e82f8b
DJ
1979 (void)strlcpy(controller, filename, controller_len + 1);
1980
0069cc61
CB
1981 p = strchr(controller, '.');
1982 if (p)
ccb4cabe
SH
1983 *p = '\0';
1984
0069cc61
CB
1985 path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
1986 /* not running */
1987 if (!path)
ccb4cabe
SH
1988 return -1;
1989
2202afc9 1990 h = get_hierarchy(ops, controller);
ccb4cabe 1991 if (h) {
0069cc61
CB
1992 char *fullpath;
1993
1994 fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
1995 ret = lxc_read_from_file(fullpath, value, len);
1996 free(fullpath);
1997 }
ccb4cabe
SH
1998 free(path);
1999
2000 return ret;
2001}
2002
eec533e3
CB
2003/* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we
2004 * don't have a cgroup_data set up, so we ask the running container through the
2005 * commands API for the cgroup path.
ccb4cabe 2006 */
2202afc9
CB
2007static int cgfsng_set(struct cgroup_ops *ops, const char *filename,
2008 const char *value, const char *name, const char *lxcpath)
ccb4cabe 2009{
ccb4cabe 2010 int ret = -1;
87777968
CB
2011 size_t controller_len;
2012 char *controller, *p, *path;
2013 struct hierarchy *h;
ccb4cabe 2014
87777968
CB
2015 controller_len = strlen(filename);
2016 controller = alloca(controller_len + 1);
64e82f8b
DJ
2017 (void)strlcpy(controller, filename, controller_len + 1);
2018
87777968
CB
2019 p = strchr(controller, '.');
2020 if (p)
ccb4cabe
SH
2021 *p = '\0';
2022
87777968
CB
2023 path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
2024 /* not running */
2025 if (!path)
ccb4cabe
SH
2026 return -1;
2027
2202afc9 2028 h = get_hierarchy(ops, controller);
ccb4cabe 2029 if (h) {
87777968
CB
2030 char *fullpath;
2031
2032 fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
7cea5905 2033 ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
ccb4cabe
SH
2034 free(fullpath);
2035 }
ccb4cabe
SH
2036 free(path);
2037
2038 return ret;
2039}
2040
91d1a13a 2041/* take devices cgroup line
72add155
SH
2042 * /dev/foo rwx
2043 * and convert it to a valid
2044 * type major:minor mode
91d1a13a
CB
2045 * line. Return <0 on error. Dest is a preallocated buffer long enough to hold
2046 * the output.
72add155
SH
2047 */
2048static int convert_devpath(const char *invalue, char *dest)
2049{
2a06d041
CB
2050 int n_parts;
2051 char *p, *path, type;
72add155 2052 unsigned long minor, major;
91d1a13a 2053 struct stat sb;
2a06d041
CB
2054 int ret = -EINVAL;
2055 char *mode = NULL;
72add155
SH
2056
2057 path = must_copy_string(invalue);
2058
91d1a13a
CB
2059 /* Read path followed by mode. Ignore any trailing text.
2060 * A ' # comment' would be legal. Technically other text is not
2061 * legal, we could check for that if we cared to.
72add155
SH
2062 */
2063 for (n_parts = 1, p = path; *p && n_parts < 3; p++) {
2c2d6c49
SH
2064 if (*p != ' ')
2065 continue;
2066 *p = '\0';
91d1a13a 2067
2c2d6c49
SH
2068 if (n_parts != 1)
2069 break;
2070 p++;
2071 n_parts++;
91d1a13a 2072
2c2d6c49
SH
2073 while (*p == ' ')
2074 p++;
91d1a13a 2075
2c2d6c49 2076 mode = p;
91d1a13a 2077
2c2d6c49
SH
2078 if (*p == '\0')
2079 goto out;
72add155 2080 }
2c2d6c49
SH
2081
2082 if (n_parts == 1)
72add155 2083 goto out;
72add155
SH
2084
2085 ret = stat(path, &sb);
2086 if (ret < 0)
2087 goto out;
2088
72add155
SH
2089 mode_t m = sb.st_mode & S_IFMT;
2090 switch (m) {
2091 case S_IFBLK:
2092 type = 'b';
2093 break;
2094 case S_IFCHR:
2095 type = 'c';
2096 break;
2c2d6c49 2097 default:
91d1a13a 2098 ERROR("Unsupported device type %i for \"%s\"", m, path);
72add155
SH
2099 ret = -EINVAL;
2100 goto out;
2101 }
2c2d6c49
SH
2102
2103 major = MAJOR(sb.st_rdev);
2104 minor = MINOR(sb.st_rdev);
2105 ret = snprintf(dest, 50, "%c %lu:%lu %s", type, major, minor, mode);
72add155 2106 if (ret < 0 || ret >= 50) {
2a06d041
CB
2107 ERROR("Error on configuration value \"%c %lu:%lu %s\" (max 50 "
2108 "chars)", type, major, minor, mode);
72add155
SH
2109 ret = -ENAMETOOLONG;
2110 goto out;
2111 }
2112 ret = 0;
2113
2114out:
2115 free(path);
2116 return ret;
2117}
2118
90e97284
CB
2119/* Called from setup_limits - here we have the container's cgroup_data because
2120 * we created the cgroups.
ccb4cabe 2121 */
2202afc9
CB
2122static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
2123 const char *value)
ccb4cabe 2124{
ab1a6cac 2125 size_t len;
90e97284 2126 char *fullpath, *p;
1a0e70ac
CB
2127 /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
2128 char converted_value[50];
b3646d7e
CB
2129 struct hierarchy *h;
2130 int ret = 0;
2131 char *controller = NULL;
ccb4cabe 2132
ab1a6cac
CB
2133 len = strlen(filename);
2134 controller = alloca(len + 1);
64e82f8b
DJ
2135 (void)strlcpy(controller, filename, len + 1);
2136
ab1a6cac
CB
2137 p = strchr(controller, '.');
2138 if (p)
ccb4cabe
SH
2139 *p = '\0';
2140
c8bf519d 2141 if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
72add155
SH
2142 ret = convert_devpath(value, converted_value);
2143 if (ret < 0)
c8bf519d 2144 return ret;
72add155 2145 value = converted_value;
c8bf519d 2146 }
2147
2202afc9 2148 h = get_hierarchy(ops, controller);
b3646d7e
CB
2149 if (!h) {
2150 ERROR("Failed to setup limits for the \"%s\" controller. "
2151 "The controller seems to be unused by \"cgfsng\" cgroup "
2152 "driver or not enabled on the cgroup hierarchy",
2153 controller);
d1953b26 2154 errno = ENOENT;
ab1a6cac 2155 return -ENOENT;
ccb4cabe 2156 }
b3646d7e
CB
2157
2158 fullpath = must_make_path(h->fullcgpath, filename, NULL);
7cea5905 2159 ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
b3646d7e 2160 free(fullpath);
ccb4cabe
SH
2161 return ret;
2162}
2163
2202afc9 2164static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
a3926f6a
CB
2165 struct lxc_list *cgroup_settings,
2166 bool do_devices)
ccb4cabe 2167{
c347df58 2168 struct lxc_list *iterator, *next, *sorted_cgroup_settings;
ccb4cabe 2169 struct lxc_cgroup *cg;
ccb4cabe
SH
2170 bool ret = false;
2171
2172 if (lxc_list_empty(cgroup_settings))
2173 return true;
2174
2175 sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
6b38e644 2176 if (!sorted_cgroup_settings)
ccb4cabe 2177 return false;
ccb4cabe 2178
ccb4cabe
SH
2179 lxc_list_for_each(iterator, sorted_cgroup_settings) {
2180 cg = iterator->elem;
2181
2182 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
2202afc9 2183 if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
ccb4cabe 2184 if (do_devices && (errno == EACCES || errno == EPERM)) {
c347df58
CB
2185 WARN("Failed to set \"%s\" to \"%s\"",
2186 cg->subsystem, cg->value);
ccb4cabe
SH
2187 continue;
2188 }
c347df58
CB
2189 WARN("Failed to set \"%s\" to \"%s\"",
2190 cg->subsystem, cg->value);
ccb4cabe
SH
2191 goto out;
2192 }
c347df58
CB
2193 DEBUG("Set controller \"%s\" set to \"%s\"",
2194 cg->subsystem, cg->value);
ccb4cabe 2195 }
ccb4cabe
SH
2196 }
2197
2198 ret = true;
6b38e644 2199 INFO("Limits for the legacy cgroup hierarchies have been setup");
ccb4cabe 2200out:
ccb4cabe
SH
2201 lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
2202 lxc_list_del(iterator);
2203 free(iterator);
2204 }
2205 free(sorted_cgroup_settings);
2206 return ret;
2207}
2208
2202afc9 2209static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
a3926f6a 2210 struct lxc_list *cgroup_settings)
6b38e644
CB
2211{
2212 struct lxc_list *iterator;
2202afc9 2213 struct hierarchy *h = ops->unified;
6b38e644
CB
2214
2215 if (lxc_list_empty(cgroup_settings))
2216 return true;
2217
2218 if (!h)
2219 return false;
2220
2221 lxc_list_for_each(iterator, cgroup_settings) {
2222 int ret;
2223 char *fullpath;
2224 struct lxc_cgroup *cg = iterator->elem;
2225
2226 fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
7cea5905 2227 ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false, 0666);
6b38e644
CB
2228 free(fullpath);
2229 if (ret < 0) {
b2ac2cb7
CB
2230 SYSERROR("Failed to set \"%s\" to \"%s\"",
2231 cg->subsystem, cg->value);
6b38e644
CB
2232 return false;
2233 }
2234 TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
2235 }
2236
2237 INFO("Limits for the unified cgroup hierarchy have been setup");
2238 return true;
2239}
2240
2202afc9 2241static bool cgfsng_setup_limits(struct cgroup_ops *ops, struct lxc_conf *conf,
6b38e644
CB
2242 bool do_devices)
2243{
2244 bool bret;
2245
2202afc9 2246 bret = __cg_legacy_setup_limits(ops, &conf->cgroup, do_devices);
6b38e644
CB
2247 if (!bret)
2248 return false;
2249
2202afc9
CB
2250 return __cg_unified_setup_limits(ops, &conf->cgroup2);
2251}
2252
b7b18fc5
CB
2253static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
2254 char **controllers)
2255{
2256 char **cur_ctrl, **cur_use;
2257
2258 if (!ops->cgroup_use)
2259 return true;
2260
2261 for (cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
2262 bool found = false;
2263
2264 for (cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
2265 if (strcmp(*cur_use, *cur_ctrl) != 0)
2266 continue;
2267
2268 found = true;
2269 break;
2270 }
2271
2272 if (found)
2273 continue;
2274
2275 return false;
2276 }
2277
2278 return true;
2279}
2280
2202afc9
CB
2281/* At startup, parse_hierarchies finds all the info we need about cgroup
2282 * mountpoints and current cgroups, and stores it in @d.
2283 */
2284static bool cg_hybrid_init(struct cgroup_ops *ops)
2285{
2286 int ret;
2287 char *basecginfo;
2288 bool will_escape;
2289 FILE *f;
2290 size_t len = 0;
2291 char *line = NULL;
2292 char **klist = NULL, **nlist = NULL;
2293
2294 /* Root spawned containers escape the current cgroup, so use init's
2295 * cgroups as our base in that case.
2296 */
2297 will_escape = (geteuid() == 0);
2298 if (will_escape)
2299 basecginfo = read_file("/proc/1/cgroup");
2300 else
2301 basecginfo = read_file("/proc/self/cgroup");
2302 if (!basecginfo)
2303 return false;
2304
2305 ret = get_existing_subsystems(&klist, &nlist);
2306 if (ret < 0) {
2307 ERROR("Failed to retrieve available legacy cgroup controllers");
2308 free(basecginfo);
2309 return false;
2310 }
2311
2312 f = fopen("/proc/self/mountinfo", "r");
2313 if (!f) {
2314 ERROR("Failed to open \"/proc/self/mountinfo\"");
2315 free(basecginfo);
2316 return false;
2317 }
2318
2319 lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
2320
2321 while (getline(&line, &len, f) != -1) {
2322 int type;
2323 bool writeable;
2324 struct hierarchy *new;
2325 char *base_cgroup = NULL, *mountpoint = NULL;
2326 char **controller_list = NULL;
2327
2328 type = get_cgroup_version(line);
2329 if (type == 0)
2330 continue;
2331
2332 if (type == CGROUP2_SUPER_MAGIC && ops->unified)
2333 continue;
2334
2335 if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
2336 if (type == CGROUP2_SUPER_MAGIC)
2337 ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
2338 else if (type == CGROUP_SUPER_MAGIC)
2339 ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
2340 } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
2341 if (type == CGROUP_SUPER_MAGIC)
2342 ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
2343 } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
2344 if (type == CGROUP2_SUPER_MAGIC)
2345 ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
2346 }
2347
2348 controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
2349 if (!controller_list && type == CGROUP_SUPER_MAGIC)
2350 continue;
2351
2352 if (type == CGROUP_SUPER_MAGIC)
2353 if (controller_list_is_dup(ops->hierarchies, controller_list))
2354 goto next;
2355
2356 mountpoint = cg_hybrid_get_mountpoint(line);
2357 if (!mountpoint) {
2358 ERROR("Failed parsing mountpoint from \"%s\"", line);
2359 goto next;
2360 }
2361
2362 if (type == CGROUP_SUPER_MAGIC)
2363 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
2364 else
2365 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
2366 if (!base_cgroup) {
2367 ERROR("Failed to find current cgroup");
2368 goto next;
2369 }
2370
2371 trim(base_cgroup);
2372 prune_init_scope(base_cgroup);
2373 if (type == CGROUP2_SUPER_MAGIC)
2374 writeable = test_writeable_v2(mountpoint, base_cgroup);
2375 else
2376 writeable = test_writeable_v1(mountpoint, base_cgroup);
2377 if (!writeable)
2378 goto next;
2379
2380 if (type == CGROUP2_SUPER_MAGIC) {
2381 char *cgv2_ctrl_path;
2382
2383 cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
2384 "cgroup.controllers",
2385 NULL);
2386
2387 controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
2388 free(cgv2_ctrl_path);
2389 if (!controller_list) {
2390 controller_list = cg_unified_make_empty_controller();
2391 TRACE("No controllers are enabled for "
2392 "delegation in the unified hierarchy");
2393 }
2394 }
2395
b7b18fc5
CB
2396 /* Exclude all controllers that cgroup use does not want. */
2397 if (!cgroup_use_wants_controllers(ops, controller_list))
2398 goto next;
2399
2202afc9
CB
2400 new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
2401 if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
2402 ops->unified = new;
2403
2404 continue;
2405
2406 next:
2407 free_string_list(controller_list);
2408 free(mountpoint);
2409 free(base_cgroup);
2410 }
2411
2412 free_string_list(klist);
2413 free_string_list(nlist);
2414
2415 free(basecginfo);
2416
2417 fclose(f);
2418 free(line);
2419
2420 TRACE("Writable cgroup hierarchies:");
2421 lxc_cgfsng_print_hierarchies(ops);
2422
2423 /* verify that all controllers in cgroup.use and all crucial
2424 * controllers are accounted for
2425 */
2426 if (!all_controllers_found(ops))
2427 return false;
2428
2429 return true;
2430}
2431
2432static int cg_is_pure_unified(void)
2433{
2434
2435 int ret;
2436 struct statfs fs;
2437
2438 ret = statfs("/sys/fs/cgroup", &fs);
2439 if (ret < 0)
2440 return -ENOMEDIUM;
2441
2442 if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
2443 return CGROUP2_SUPER_MAGIC;
2444
2445 return 0;
2446}
2447
2448/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
2449static char *cg_unified_get_current_cgroup(void)
2450{
2451 char *basecginfo, *base_cgroup;
2452 bool will_escape;
2453 char *copy = NULL;
2454
2455 will_escape = (geteuid() == 0);
2456 if (will_escape)
2457 basecginfo = read_file("/proc/1/cgroup");
2458 else
2459 basecginfo = read_file("/proc/self/cgroup");
2460 if (!basecginfo)
2461 return NULL;
2462
2463 base_cgroup = strstr(basecginfo, "0::/");
2464 if (!base_cgroup)
2465 goto cleanup_on_err;
2466
2467 base_cgroup = base_cgroup + 3;
2468 copy = copy_to_eol(base_cgroup);
2469 if (!copy)
2470 goto cleanup_on_err;
2471
2472cleanup_on_err:
2473 free(basecginfo);
2474 if (copy)
2475 trim(copy);
2476
2477 return copy;
2478}
2479
2480static int cg_unified_init(struct cgroup_ops *ops)
2481{
2482 int ret;
2483 char *mountpoint, *subtree_path;
2484 char **delegatable;
2485 char *base_cgroup = NULL;
2486
2487 ret = cg_is_pure_unified();
2488 if (ret == -ENOMEDIUM)
2489 return -ENOMEDIUM;
2490
2491 if (ret != CGROUP2_SUPER_MAGIC)
2492 return 0;
2493
2494 base_cgroup = cg_unified_get_current_cgroup();
2495 if (!base_cgroup)
2496 return -EINVAL;
2497 prune_init_scope(base_cgroup);
2498
2499 /* We assume that we have already been given controllers to delegate
2500 * further down the hierarchy. If not it is up to the user to delegate
2501 * them to us.
2502 */
2503 mountpoint = must_copy_string("/sys/fs/cgroup");
2504 subtree_path = must_make_path(mountpoint, base_cgroup,
2505 "cgroup.subtree_control", NULL);
2506 delegatable = cg_unified_get_controllers(subtree_path);
2507 free(subtree_path);
2508 if (!delegatable)
2509 delegatable = cg_unified_make_empty_controller();
2510 if (!delegatable[0])
2511 TRACE("No controllers are enabled for delegation");
2512
2513 /* TODO: If the user requested specific controllers via lxc.cgroup.use
2514 * we should verify here. The reason I'm not doing it right is that I'm
2515 * not convinced that lxc.cgroup.use will be the future since it is a
2516 * global property. I much rather have an option that lets you request
2517 * controllers per container.
2518 */
2519
2520 add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
2521
2522 ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
2523 return CGROUP2_SUPER_MAGIC;
2524}
2525
2526static bool cg_init(struct cgroup_ops *ops)
2527{
2528 int ret;
2529 const char *tmp;
2530
2531 tmp = lxc_global_config_value("lxc.cgroup.use");
b7b18fc5
CB
2532 if (tmp) {
2533 char *chop, *cur, *pin;
2534 char *saveptr = NULL;
2535
2536 pin = must_copy_string(tmp);
2537 chop = pin;
2538
2539 for (; (cur = strtok_r(chop, ",", &saveptr)); chop = NULL)
2540 must_append_string(&ops->cgroup_use, cur);
2541
2542 free(pin);
2543 }
2202afc9
CB
2544
2545 ret = cg_unified_init(ops);
2546 if (ret < 0)
2547 return false;
2548
2549 if (ret == CGROUP2_SUPER_MAGIC)
2550 return true;
2551
2552 return cg_hybrid_init(ops);
2553}
2554
2555static bool cgfsng_data_init(struct cgroup_ops *ops)
2556{
2557 const char *cgroup_pattern;
2558
2559 /* copy system-wide cgroup information */
2560 cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
2561 if (!cgroup_pattern) {
2562 /* lxc.cgroup.pattern is only NULL on error. */
2563 ERROR("Failed to retrieve cgroup pattern");
2564 return false;
2565 }
2566 ops->cgroup_pattern = must_copy_string(cgroup_pattern);
2567
2568 return true;
2569}
2570
2571struct cgroup_ops *cgfsng_ops_init(void)
2572{
2573 struct cgroup_ops *cgfsng_ops;
2574
2575 cgfsng_ops = malloc(sizeof(struct cgroup_ops));
2576 if (!cgfsng_ops)
2577 return NULL;
2578
2579 memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
2580 cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
2581
2582 if (!cg_init(cgfsng_ops)) {
2583 free(cgfsng_ops);
2584 return NULL;
2585 }
2586
2587 cgfsng_ops->data_init = cgfsng_data_init;
2588 cgfsng_ops->destroy = cgfsng_destroy;
2589 cgfsng_ops->create = cgfsng_create;
2590 cgfsng_ops->enter = cgfsng_enter;
2591 cgfsng_ops->escape = cgfsng_escape;
2592 cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
2593 cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
2594 cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
2595 cgfsng_ops->get = cgfsng_get;
2596 cgfsng_ops->set = cgfsng_set;
2597 cgfsng_ops->unfreeze = cgfsng_unfreeze;
2598 cgfsng_ops->setup_limits = cgfsng_setup_limits;
2599 cgfsng_ops->driver = "cgfsng";
2600 cgfsng_ops->version = "1.0.0";
2601 cgfsng_ops->attach = cgfsng_attach;
2602 cgfsng_ops->chown = cgfsng_chown;
2603 cgfsng_ops->mount = cgfsng_mount;
2604 cgfsng_ops->nrtasks = cgfsng_nrtasks;
2605
2606 return cgfsng_ops;
2607}