]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroups/cgfsng.c
cgroups: non-functional changes
[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
CommitLineData
ccb4cabe
SH
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2016 Canonical Ltd.
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@ubuntu.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/*
25 * cgfs-ng.c: this is a new, simplified implementation of a filesystem
26 * cgroup backend. The original cgfs.c was designed to be as flexible
27 * as possible. It would try to find cgroup filesystems no matter where
28 * or how you had them mounted, and deduce the most usable mount for
29 * each controller. It also was not designed for unprivileged use, as
30 * that was reserved for cgmanager.
31 *
32 * This new implementation assumes that cgroup filesystems are mounted
33 * under /sys/fs/cgroup/clist where clist is either the controller, or
34 * a comman-separated list of controllers.
35 */
a54694f8 36
ccb4cabe 37#include "config.h"
a54694f8
CB
38
39#include <ctype.h>
40#include <dirent.h>
41#include <errno.h>
42#include <grp.h>
43#include <stdint.h>
ccb4cabe
SH
44#include <stdio.h>
45#include <stdlib.h>
a54694f8 46#include <string.h>
ccb4cabe 47#include <unistd.h>
a54694f8 48#include <sys/types.h>
ccb4cabe 49
c8bf519d 50#include <linux/types.h>
51#include <linux/kdev_t.h>
52
ccb4cabe 53#include "cgroup.h"
6328fd9c 54#include "cgroup_utils.h"
ccb4cabe 55#include "commands.h"
a54694f8 56#include "log.h"
28d832c4 57#include "storage.h"
a54694f8 58#include "utils.h"
ccb4cabe
SH
59
60lxc_log_define(lxc_cgfsng, lxc);
61
62static struct cgroup_ops cgfsng_ops;
63
ccb4cabe
SH
64/*
65 * A descriptor for a mounted hierarchy
66 * @controllers: either NULL, or a null-terminated list of all
67 * the co-mounted controllers
68 * @mountpoint: the mountpoint we will use. It will be either
69 * /sys/fs/cgroup/controller or /sys/fs/cgroup/controllerlist
70 * @base_cgroup: the cgroup under which the container cgroup path
71 is created. This will be either the caller's cgroup (if not
72 root), or init's cgroup (if root).
73 */
74struct hierarchy {
75 char **controllers;
76 char *mountpoint;
77 char *base_cgroup;
78 char *fullcgpath;
6328fd9c 79 bool is_cgroup_v2;
ccb4cabe
SH
80};
81
82/*
83 * The cgroup data which is attached to the lxc_handler.
ccb4cabe
SH
84 * @cgroup_pattern - a copy of the lxc.cgroup.pattern
85 * @container_cgroup - if not null, the cgroup which was created for
86 * the container. For each hierarchy, it is created under the
87 * @hierarchy->base_cgroup directory. Relative to the base_cgroup
88 * it is the same for all hierarchies.
89 * @name - the container name
90 */
91struct cgfsng_handler_data {
ccb4cabe 92 char *cgroup_pattern;
1a0e70ac
CB
93 char *container_cgroup; /* cgroup we created for the container */
94 char *name; /* container name */
ccb4cabe
SH
95};
96
457ca9aa
SH
97/*
98 * @hierarchies - a NULL-terminated array of struct hierarchy, one per
99 * hierarchy. No duplicates. First sufficient, writeable mounted
100 * hierarchy wins
101 */
102struct hierarchy **hierarchies;
103
104/*
105 * @cgroup_use - a copy of the lxc.cgroup.use
106 */
107char *cgroup_use;
108
e4aeecf5
CB
109/*
110 * @lxc_cgfsng_debug - whether to print debug info to stdout for the cgfsng
111 * driver
112 */
113static bool lxc_cgfsng_debug;
114
ccb4cabe
SH
115static void free_string_list(char **clist)
116{
117 if (clist) {
118 int i;
119
120 for (i = 0; clist[i]; i++)
121 free(clist[i]);
122 free(clist);
123 }
124}
125
ccb4cabe
SH
126/* Allocate a pointer, do not fail */
127static void *must_alloc(size_t sz)
128{
129 return must_realloc(NULL, sz);
130}
131
ccb4cabe
SH
132/*
133 * This is a special case - return a copy of @entry
134 * prepending 'name='. I.e. turn systemd into name=systemd.
135 * Do not fail.
136 */
137static char *must_prefix_named(char *entry)
138{
139 char *ret;
140 size_t len = strlen(entry);
141
142 ret = must_alloc(len + 6);
143 snprintf(ret, len + 6, "name=%s", entry);
144 return ret;
145}
146
147/*
148 * Given a pointer to a null-terminated array of pointers, realloc to
149 * add one entry, and point the new entry to NULL. Do not fail. Return
150 * the index to the second-to-last entry - that is, the one which is
151 * now available for use (keeping the list null-terminated).
152 */
153static int append_null_to_list(void ***list)
154{
155 int newentry = 0;
156
157 if (*list)
158 for (; (*list)[newentry]; newentry++);
159
160 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
161 (*list)[newentry + 1] = NULL;
162 return newentry;
163}
164
165/*
166 * Given a null-terminated array of strings, check whether @entry
167 * is one of the strings
168 */
169static bool string_in_list(char **list, const char *entry)
170{
171 int i;
172
173 if (!list)
174 return false;
175 for (i = 0; list[i]; i++)
176 if (strcmp(list[i], entry) == 0)
177 return true;
178
179 return false;
180}
181
182/*
183 * append an entry to the clist. Do not fail.
184 * *clist must be NULL the first time we are called.
185 *
186 * We also handle named subsystems here. Any controller which is not a
187 * kernel subsystem, we prefix 'name='. Any which is both a kernel and
188 * named subsystem, we refuse to use because we're not sure which we
189 * have here. (TODO - we could work around this in some cases by just
190 * remounting to be unambiguous, or by comparing mountpoint contents
191 * with current cgroup)
192 *
193 * The last entry will always be NULL.
194 */
195static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
196{
197 int newentry;
198 char *copy;
199
200 if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
201 ERROR("Refusing to use ambiguous controller '%s'", entry);
202 ERROR("It is both a named and kernel subsystem");
203 return;
204 }
205
206 newentry = append_null_to_list((void ***)clist);
207
208 if (strncmp(entry, "name=", 5) == 0)
209 copy = must_copy_string(entry);
210 else if (string_in_list(klist, entry))
211 copy = must_copy_string(entry);
212 else
213 copy = must_prefix_named(entry);
214
215 (*clist)[newentry] = copy;
216}
217
ccb4cabe
SH
218static void free_handler_data(struct cgfsng_handler_data *d)
219{
ccb4cabe
SH
220 free(d->cgroup_pattern);
221 free(d->container_cgroup);
222 free(d->name);
223 free(d);
224}
225
226/*
227 * Given a handler's cgroup data, return the struct hierarchy for the
228 * controller @c, or NULL if there is none.
229 */
457ca9aa 230struct hierarchy *get_hierarchy(const char *c)
ccb4cabe
SH
231{
232 int i;
233
457ca9aa 234 if (!hierarchies)
ccb4cabe 235 return NULL;
457ca9aa
SH
236 for (i = 0; hierarchies[i]; i++) {
237 if (string_in_list(hierarchies[i]->controllers, c))
238 return hierarchies[i];
ccb4cabe
SH
239 }
240 return NULL;
241}
242
a54694f8
CB
243#define BATCH_SIZE 50
244static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
245{
246 int newbatches = (newlen / BATCH_SIZE) + 1;
247 int oldbatches = (oldlen / BATCH_SIZE) + 1;
248
249 if (!*mem || newbatches > oldbatches) {
250 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
251 }
252}
253
254static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
255{
256 size_t full = oldlen + newlen;
257
258 batch_realloc(dest, oldlen, full + 1);
259
260 memcpy(*dest + oldlen, new, newlen + 1);
261}
262
263/* Slurp in a whole file */
264static char *read_file(char *fnam)
265{
266 FILE *f;
267 char *line = NULL, *buf = NULL;
268 size_t len = 0, fulllen = 0;
269 int linelen;
270
271 f = fopen(fnam, "r");
272 if (!f)
273 return NULL;
274 while ((linelen = getline(&line, &len, f)) != -1) {
275 append_line(&buf, fulllen, line, linelen);
276 fulllen += linelen;
277 }
278 fclose(f);
279 free(line);
280 return buf;
281}
282
283/* Taken over modified from the kernel sources. */
284#define NBITS 32 /* bits in uint32_t */
285#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
286#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
287
288static void set_bit(unsigned bit, uint32_t *bitarr)
289{
290 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
291}
292
293static void clear_bit(unsigned bit, uint32_t *bitarr)
294{
295 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
296}
297
298static bool is_set(unsigned bit, uint32_t *bitarr)
299{
300 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
301}
302
303/* Create cpumask from cpulist aka turn:
304 *
305 * 0,2-3
306 *
307 * into bit array
308 *
309 * 1 0 1 1
310 */
311static uint32_t *lxc_cpumask(char *buf, size_t nbits)
312{
313 char *token;
314 char *saveptr = NULL;
315 size_t arrlen = BITS_TO_LONGS(nbits);
316 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
317 if (!bitarr)
318 return NULL;
319
320 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
321 errno = 0;
322 unsigned start = strtoul(token, NULL, 0);
323 unsigned end = start;
324
325 char *range = strchr(token, '-');
326 if (range)
327 end = strtoul(range + 1, NULL, 0);
328 if (!(start <= end)) {
329 free(bitarr);
330 return NULL;
331 }
332
333 if (end >= nbits) {
334 free(bitarr);
335 return NULL;
336 }
337
338 while (start <= end)
339 set_bit(start++, bitarr);
340 }
341
342 return bitarr;
343}
344
a54694f8
CB
345/* Turn cpumask into simple, comma-separated cpulist. */
346static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
347{
348 size_t i;
349 int ret;
eab15c1e 350 char numstr[LXC_NUMSTRLEN64] = {0};
a54694f8
CB
351 char **cpulist = NULL;
352
353 for (i = 0; i <= nbits; i++) {
354 if (is_set(i, bitarr)) {
eab15c1e
CB
355 ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
356 if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
a54694f8
CB
357 lxc_free_array((void **)cpulist, free);
358 return NULL;
359 }
360 if (lxc_append_string(&cpulist, numstr) < 0) {
361 lxc_free_array((void **)cpulist, free);
362 return NULL;
363 }
364 }
365 }
366 return lxc_string_join(",", (const char **)cpulist, false);
367}
368
369static ssize_t get_max_cpus(char *cpulist)
370{
371 char *c1, *c2;
372 char *maxcpus = cpulist;
373 size_t cpus = 0;
374
375 c1 = strrchr(maxcpus, ',');
376 if (c1)
377 c1++;
378
379 c2 = strrchr(maxcpus, '-');
380 if (c2)
381 c2++;
382
383 if (!c1 && !c2)
384 c1 = maxcpus;
385 else if (c1 > c2)
386 c2 = c1;
387 else if (c1 < c2)
388 c1 = c2;
1a0e70ac 389 else if (!c1 && c2) /* The reverse case is obvs. not needed. */
a54694f8
CB
390 c1 = c2;
391
392 /* If the above logic is correct, c1 should always hold a valid string
393 * here.
394 */
395
396 errno = 0;
397 cpus = strtoul(c1, NULL, 0);
398 if (errno != 0)
399 return -1;
400
401 return cpus;
402}
403
6f9584d8 404#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
a54694f8
CB
405static bool filter_and_set_cpus(char *path, bool am_initialized)
406{
407 char *lastslash, *fpath, oldv;
408 int ret;
409 ssize_t i;
410
411 ssize_t maxposs = 0, maxisol = 0;
412 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
413 uint32_t *possmask = NULL, *isolmask = NULL;
6f9584d8 414 bool bret = false, flipped_bit = false;
a54694f8
CB
415
416 lastslash = strrchr(path, '/');
1a0e70ac 417 if (!lastslash) { /* bug... this shouldn't be possible */
6f9584d8 418 ERROR("Invalid path: %s.", path);
a54694f8
CB
419 return bret;
420 }
421 oldv = *lastslash;
422 *lastslash = '\0';
423 fpath = must_make_path(path, "cpuset.cpus", NULL);
424 posscpus = read_file(fpath);
6f9584d8
CB
425 if (!posscpus) {
426 SYSERROR("Could not read file: %s.\n", fpath);
427 goto on_error;
428 }
a54694f8
CB
429
430 /* Get maximum number of cpus found in possible cpuset. */
431 maxposs = get_max_cpus(posscpus);
432 if (maxposs < 0)
6f9584d8 433 goto on_error;
a54694f8 434
6f9584d8
CB
435 if (!file_exists(__ISOL_CPUS)) {
436 /* This system doesn't expose isolated cpus. */
437 DEBUG("Path: "__ISOL_CPUS" to read isolated cpus from does not exist.\n");
65d29cbc
CB
438 cpulist = posscpus;
439 /* No isolated cpus but we weren't already initialized by
440 * someone. We should simply copy the parents cpuset.cpus
441 * values.
442 */
443 if (!am_initialized) {
444 DEBUG("Copying cpuset of parent cgroup.");
445 goto copy_parent;
446 }
447 /* No isolated cpus but we were already initialized by someone.
448 * Nothing more to do for us.
449 */
6f9584d8
CB
450 goto on_success;
451 }
452
453 isolcpus = read_file(__ISOL_CPUS);
454 if (!isolcpus) {
455 SYSERROR("Could not read file "__ISOL_CPUS);
456 goto on_error;
457 }
a54694f8 458 if (!isdigit(isolcpus[0])) {
6f9584d8 459 DEBUG("No isolated cpus detected.");
a54694f8
CB
460 cpulist = posscpus;
461 /* No isolated cpus but we weren't already initialized by
462 * someone. We should simply copy the parents cpuset.cpus
463 * values.
464 */
6f9584d8
CB
465 if (!am_initialized) {
466 DEBUG("Copying cpuset of parent cgroup.");
a54694f8 467 goto copy_parent;
6f9584d8 468 }
a54694f8
CB
469 /* No isolated cpus but we were already initialized by someone.
470 * Nothing more to do for us.
471 */
6f9584d8 472 goto on_success;
a54694f8
CB
473 }
474
475 /* Get maximum number of cpus found in isolated cpuset. */
476 maxisol = get_max_cpus(isolcpus);
477 if (maxisol < 0)
6f9584d8 478 goto on_error;
a54694f8
CB
479
480 if (maxposs < maxisol)
481 maxposs = maxisol;
482 maxposs++;
483
484 possmask = lxc_cpumask(posscpus, maxposs);
6f9584d8
CB
485 if (!possmask) {
486 ERROR("Could not create cpumask for all possible cpus.\n");
487 goto on_error;
488 }
a54694f8
CB
489
490 isolmask = lxc_cpumask(isolcpus, maxposs);
6f9584d8
CB
491 if (!isolmask) {
492 ERROR("Could not create cpumask for all isolated cpus.\n");
493 goto on_error;
494 }
a54694f8
CB
495
496 for (i = 0; i <= maxposs; i++) {
497 if (is_set(i, isolmask) && is_set(i, possmask)) {
6f9584d8 498 flipped_bit = true;
a54694f8
CB
499 clear_bit(i, possmask);
500 }
501 }
502
6f9584d8
CB
503 if (!flipped_bit) {
504 DEBUG("No isolated cpus present in cpuset.");
505 goto on_success;
506 }
507 DEBUG("Removed isolated cpus from cpuset.");
508
a54694f8 509 cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
6f9584d8
CB
510 if (!cpulist) {
511 ERROR("Could not create cpu list.\n");
512 goto on_error;
513 }
a54694f8
CB
514
515copy_parent:
516 *lastslash = oldv;
517 fpath = must_make_path(path, "cpuset.cpus", NULL);
518 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false);
6f9584d8
CB
519 if (ret < 0) {
520 SYSERROR("Could not write cpu list to: %s.\n", fpath);
521 goto on_error;
522 }
523
524on_success:
525 bret = true;
a54694f8 526
6f9584d8 527on_error:
a54694f8
CB
528 free(fpath);
529
530 free(isolcpus);
531 free(isolmask);
532
533 if (posscpus != cpulist)
534 free(posscpus);
535 free(possmask);
536
537 free(cpulist);
538 return bret;
539}
540
e3a3fecf
SH
541/* Copy contents of parent(@path)/@file to @path/@file */
542static bool copy_parent_file(char *path, char *file)
543{
544 char *lastslash, *value = NULL, *fpath, oldv;
545 int len = 0;
546 int ret;
547
548 lastslash = strrchr(path, '/');
1a0e70ac 549 if (!lastslash) { /* bug... this shouldn't be possible */
e3a3fecf
SH
550 ERROR("cgfsng:copy_parent_file: bad path %s", path);
551 return false;
552 }
553 oldv = *lastslash;
554 *lastslash = '\0';
555 fpath = must_make_path(path, file, NULL);
556 len = lxc_read_from_file(fpath, NULL, 0);
557 if (len <= 0)
558 goto bad;
559 value = must_alloc(len + 1);
560 if (lxc_read_from_file(fpath, value, len) != len)
561 goto bad;
562 free(fpath);
563 *lastslash = oldv;
564 fpath = must_make_path(path, file, NULL);
565 ret = lxc_write_to_file(fpath, value, len, false);
566 if (ret < 0)
567 SYSERROR("Unable to write %s to %s", value, fpath);
568 free(fpath);
569 free(value);
570 return ret >= 0;
571
572bad:
573 SYSERROR("Error reading '%s'", fpath);
574 free(fpath);
575 free(value);
576 return false;
577}
578
579/*
580 * Initialize the cpuset hierarchy in first directory of @gname and
581 * set cgroup.clone_children so that children inherit settings.
582 * Since the h->base_path is populated by init or ourselves, we know
583 * it is already initialized.
584 */
a54694f8 585static bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
e3a3fecf
SH
586{
587 char *cgpath, *clonechildrenpath, v, *slash;
588
589 if (!string_in_list(h->controllers, "cpuset"))
590 return true;
591
592 if (*cgname == '/')
593 cgname++;
594 slash = strchr(cgname, '/');
595 if (slash)
596 *slash = '\0';
597
598 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
599 if (slash)
600 *slash = '/';
601 if (mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
602 SYSERROR("Failed to create '%s'", cgpath);
603 free(cgpath);
604 return false;
605 }
6f9584d8 606
e3a3fecf 607 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
6328fd9c
CB
608 /* unified hierarchy doesn't have clone_children */
609 if (!file_exists(clonechildrenpath)) {
e3a3fecf
SH
610 free(clonechildrenpath);
611 free(cgpath);
612 return true;
613 }
614 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
615 SYSERROR("Failed to read '%s'", clonechildrenpath);
616 free(clonechildrenpath);
617 free(cgpath);
618 return false;
619 }
620
a54694f8 621 /* Make sure any isolated cpus are removed from cpuset.cpus. */
6f9584d8
CB
622 if (!filter_and_set_cpus(cgpath, v == '1')) {
623 SYSERROR("Failed to remove isolated cpus.");
624 free(clonechildrenpath);
625 free(cgpath);
a54694f8 626 return false;
6f9584d8 627 }
a54694f8 628
e3a3fecf 629 if (v == '1') { /* already set for us by someone else */
6f9584d8 630 DEBUG("\"cgroup.clone_children\" was already set to \"1\".");
e3a3fecf
SH
631 free(clonechildrenpath);
632 free(cgpath);
633 return true;
634 }
635
636 /* copy parent's settings */
a54694f8 637 if (!copy_parent_file(cgpath, "cpuset.mems")) {
6f9584d8 638 SYSERROR("Failed to copy \"cpuset.mems\" settings.");
e3a3fecf
SH
639 free(cgpath);
640 free(clonechildrenpath);
641 return false;
642 }
643 free(cgpath);
644
645 if (lxc_write_to_file(clonechildrenpath, "1", 1, false) < 0) {
646 /* Set clone_children so children inherit our settings */
647 SYSERROR("Failed to write 1 to %s", clonechildrenpath);
648 free(clonechildrenpath);
649 return false;
650 }
651 free(clonechildrenpath);
652 return true;
653}
654
ccb4cabe
SH
655/*
656 * Given two null-terminated lists of strings, return true if any string
657 * is in both.
658 */
659static bool controller_lists_intersect(char **l1, char **l2)
660{
661 int i;
662
663 if (!l1 || !l2)
664 return false;
665
666 for (i = 0; l1[i]; i++) {
667 if (string_in_list(l2, l1[i]))
668 return true;
669 }
670 return false;
671}
672
673/*
674 * For a null-terminated list of controllers @clist, return true if any of
675 * those controllers is already listed the null-terminated list of
676 * hierarchies @hlist. Realistically, if one is present, all must be present.
677 */
678static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
679{
680 int i;
681
682 if (!hlist)
683 return false;
684 for (i = 0; hlist[i]; i++)
685 if (controller_lists_intersect(hlist[i]->controllers, clist))
686 return true;
687 return false;
688
689}
690
691/*
692 * Return true if the controller @entry is found in the null-terminated
693 * list of hierarchies @hlist
694 */
695static bool controller_found(struct hierarchy **hlist, char *entry)
696{
697 int i;
698 if (!hlist)
699 return false;
700
701 for (i = 0; hlist[i]; i++)
702 if (string_in_list(hlist[i]->controllers, entry))
703 return true;
704 return false;
705}
706
707/*
c30b61c3
SH
708 * Return true if all of the controllers which we require have been found.
709 * The required list is freezer and anything in * lxc.cgroup.use.
ccb4cabe 710 */
457ca9aa 711static bool all_controllers_found(void)
ccb4cabe
SH
712{
713 char *p, *saveptr = NULL;
457ca9aa 714 struct hierarchy ** hlist = hierarchies;
ccb4cabe 715
ccb4cabe
SH
716 if (!controller_found(hlist, "freezer")) {
717 ERROR("no freezer controller mountpoint found");
718 return false;
719 }
720
457ca9aa 721 if (!cgroup_use)
ccb4cabe 722 return true;
457ca9aa 723 for (p = strtok_r(cgroup_use, ",", &saveptr); p;
ccb4cabe
SH
724 p = strtok_r(NULL, ",", &saveptr)) {
725 if (!controller_found(hlist, p)) {
726 ERROR("no %s controller mountpoint found", p);
727 return false;
728 }
729 }
730 return true;
731}
732
733/* Return true if the fs type is fuse.lxcfs */
734static bool is_lxcfs(const char *line)
735{
736 char *p = strstr(line, " - ");
737 if (!p)
738 return false;
2f62fb00 739 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
ccb4cabe
SH
740}
741
742/*
743 * Get the controllers from a mountinfo line
744 * There are other ways we could get this info. For lxcfs, field 3
745 * is /cgroup/controller-list. For cgroupfs, we could parse the mount
746 * options. But we simply assume that the mountpoint must be
747 * /sys/fs/cgroup/controller-list
748 */
749static char **get_controllers(char **klist, char **nlist, char *line)
750{
6328fd9c 751 /* the fourth field is /sys/fs/cgroup/comma-delimited-controller-list */
ccb4cabe
SH
752 int i;
753 char *p = line, *p2, *tok, *saveptr = NULL;
754 char **aret = NULL;
6328fd9c
CB
755 bool is_cgroup_v2;
756
757 /* handle cgroup v2 */
758 is_cgroup_v2 = is_cgroupfs_v2(line);
ccb4cabe
SH
759
760 for (i = 0; i < 4; i++) {
235f1815 761 p = strchr(p, ' ');
ccb4cabe
SH
762 if (!p)
763 return NULL;
764 p++;
765 }
766 if (!p)
767 return NULL;
768 /* note - if we change how mountinfo works, then our caller
769 * will need to verify /sys/fs/cgroup/ in this field */
5059aae9
SH
770 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) {
771 INFO("cgfsng: found hierarchy not under /sys/fs/cgroup: \"%s\"", p);
ccb4cabe 772 return NULL;
5059aae9 773 }
ccb4cabe 774 p += 15;
235f1815 775 p2 = strchr(p, ' ');
ccb4cabe
SH
776 if (!p2) {
777 ERROR("corrupt mountinfo");
778 return NULL;
779 }
780 *p2 = '\0';
6328fd9c
CB
781
782 /* cgroup v2 does not have separate mountpoints for controllers */
783 if (is_cgroup_v2) {
784 must_append_controller(klist, nlist, &aret, "cgroup2");
785 return aret;
786 }
787
ccb4cabe
SH
788 for (tok = strtok_r(p, ",", &saveptr); tok;
789 tok = strtok_r(NULL, ",", &saveptr)) {
790 must_append_controller(klist, nlist, &aret, tok);
791 }
792
793 return aret;
794}
795
ccb4cabe 796/* Add a controller to our list of hierarchies */
457ca9aa 797static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
ccb4cabe
SH
798{
799 struct hierarchy *new;
800 int newentry;
801
802 new = must_alloc(sizeof(*new));
803 new->controllers = clist;
804 new->mountpoint = mountpoint;
805 new->base_cgroup = base_cgroup;
806 new->fullcgpath = NULL;
807
6328fd9c
CB
808 /* record if this is the cgroup v2 hierarchy */
809 if (!strcmp(base_cgroup, "cgroup2"))
810 new->is_cgroup_v2 = true;
811 else
812 new->is_cgroup_v2 = false;
813
457ca9aa
SH
814 newentry = append_null_to_list((void ***)&hierarchies);
815 hierarchies[newentry] = new;
ccb4cabe
SH
816}
817
818/*
819 * Get a copy of the mountpoint from @line, which is a line from
820 * /proc/self/mountinfo
821 */
822static char *get_mountpoint(char *line)
823{
824 int i;
825 char *p = line, *sret;
826 size_t len;
827
828 for (i = 0; i < 4; i++) {
235f1815 829 p = strchr(p, ' ');
ccb4cabe
SH
830 if (!p)
831 return NULL;
832 p++;
833 }
834 /* we've already stuck a \0 after the mountpoint */
835 len = strlen(p);
836 sret = must_alloc(len + 1);
837 memcpy(sret, p, len);
838 sret[len] = '\0';
839 return sret;
840}
841
842/*
843 * Given a multi-line string, return a null-terminated copy of the
844 * current line.
845 */
846static char *copy_to_eol(char *p)
847{
235f1815 848 char *p2 = strchr(p, '\n'), *sret;
ccb4cabe
SH
849 size_t len;
850
851 if (!p2)
852 return NULL;
853
854 len = p2 - p;
855 sret = must_alloc(len + 1);
856 memcpy(sret, p, len);
857 sret[len] = '\0';
858 return sret;
859}
860
861/*
862 * cgline: pointer to character after the first ':' in a line in a
863 * \n-terminated /proc/self/cgroup file. Check whether * controller c is
864 * present.
865 */
866static bool controller_in_clist(char *cgline, char *c)
867{
868 char *tok, *saveptr = NULL, *eol, *tmp;
869 size_t len;
870
235f1815 871 eol = strchr(cgline, ':');
ccb4cabe
SH
872 if (!eol)
873 return false;
874
875 len = eol - cgline;
876 tmp = alloca(len + 1);
877 memcpy(tmp, cgline, len);
878 tmp[len] = '\0';
879
880 for (tok = strtok_r(tmp, ",", &saveptr); tok;
881 tok = strtok_r(NULL, ",", &saveptr)) {
882 if (strcmp(tok, c) == 0)
883 return true;
884 }
885 return false;
886}
887
888/*
889 * @basecginfo is a copy of /proc/$$/cgroup. Return the current
890 * cgroup for @controller
891 */
892static char *get_current_cgroup(char *basecginfo, char *controller)
893{
894 char *p = basecginfo;
6328fd9c
CB
895 bool is_cgroup_v2;
896 bool is_cgroup_v2_base_cgroup;
897
898 is_cgroup_v2 = !strcmp(controller, "cgroup2");
899 while (true) {
900 is_cgroup_v2_base_cgroup = false;
901 /* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
902 if (is_cgroup_v2 && (*p == '0'))
903 is_cgroup_v2_base_cgroup = true;
ccb4cabe 904
235f1815 905 p = strchr(p, ':');
ccb4cabe
SH
906 if (!p)
907 return NULL;
908 p++;
6328fd9c 909 if (is_cgroup_v2_base_cgroup || controller_in_clist(p, controller)) {
235f1815 910 p = strchr(p, ':');
ccb4cabe
SH
911 if (!p)
912 return NULL;
913 p++;
914 return copy_to_eol(p);
915 }
916
235f1815 917 p = strchr(p, '\n');
ccb4cabe
SH
918 if (!p)
919 return NULL;
920 p++;
921 }
922}
923
ccb4cabe
SH
924static void must_append_string(char ***list, char *entry)
925{
926 int newentry = append_null_to_list((void ***)list);
927 char *copy;
928
929 copy = must_copy_string(entry);
930 (*list)[newentry] = copy;
931}
932
933static void get_existing_subsystems(char ***klist, char ***nlist)
934{
935 FILE *f;
936 char *line = NULL;
937 size_t len = 0;
938
939 if ((f = fopen("/proc/self/cgroup", "r")) == NULL)
940 return;
941 while (getline(&line, &len, f) != -1) {
942 char *p, *p2, *tok, *saveptr = NULL;
235f1815 943 p = strchr(line, ':');
ccb4cabe
SH
944 if (!p)
945 continue;
946 p++;
235f1815 947 p2 = strchr(p, ':');
ccb4cabe
SH
948 if (!p2)
949 continue;
950 *p2 = '\0';
ff8d6ee9 951
6328fd9c
CB
952 /* If the kernel has cgroup v2 support, then /proc/self/cgroup
953 * contains an entry of the form:
ff8d6ee9
CB
954 *
955 * 0::/some/path
956 *
6328fd9c 957 * In this case we use "cgroup2" as controller name.
ff8d6ee9 958 */
6328fd9c
CB
959 if ((p2 - p) == 0) {
960 must_append_string(klist, "cgroup2");
ff8d6ee9 961 continue;
6328fd9c 962 }
ff8d6ee9 963
ccb4cabe
SH
964 for (tok = strtok_r(p, ",", &saveptr); tok;
965 tok = strtok_r(NULL, ",", &saveptr)) {
966 if (strncmp(tok, "name=", 5) == 0)
967 must_append_string(nlist, tok);
968 else
969 must_append_string(klist, tok);
970 }
971 }
972
973 free(line);
974 fclose(f);
975}
976
977static void trim(char *s)
978{
979 size_t len = strlen(s);
2c28d76b 980 while ((len > 1) && (s[len - 1] == '\n'))
ccb4cabe
SH
981 s[--len] = '\0';
982}
983
e4aeecf5
CB
984static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d)
985{
986 printf("Cgroup information:\n");
987 printf(" container name: %s\n", d->name ? d->name : "(null)");
988 printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)");
989 printf(" lxc.cgroup.pattern: %s\n", d->cgroup_pattern ? d->cgroup_pattern : "(null)");
990 printf(" cgroup: %s\n", d->container_cgroup ? d->container_cgroup : "(null)");
991}
992
993static void lxc_cgfsng_print_hierarchies()
ccb4cabe 994{
a7b0cc4c 995 struct hierarchy **it;
ccb4cabe 996 int i;
41c33dbe 997
457ca9aa 998 if (!hierarchies) {
e4aeecf5 999 printf(" No hierarchies found.");
ccb4cabe
SH
1000 return;
1001 }
e4aeecf5 1002 printf(" Hierarchies:\n");
a7b0cc4c
CB
1003 for (i = 0, it = hierarchies; it && *it; it++, i++) {
1004 char **cit;
ccb4cabe 1005 int j;
e4aeecf5
CB
1006 printf(" %d: base_cgroup %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
1007 printf(" mountpoint %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
1008 printf(" controllers:\n");
a7b0cc4c 1009 for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
e4aeecf5 1010 printf(" %d: %s\n", j, *cit);
ccb4cabe
SH
1011 }
1012}
41c33dbe 1013
e4aeecf5 1014static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, char **nlist)
41c33dbe
SH
1015{
1016 int k;
a7b0cc4c 1017 char **it;
41c33dbe 1018
a7b0cc4c
CB
1019 printf("basecginfo is:\n");
1020 printf("%s\n", basecginfo);
41c33dbe 1021
a7b0cc4c
CB
1022 for (k = 0, it = klist; it && *it; it++, k++)
1023 printf("kernel subsystem %d: %s\n", k, *it);
1024 for (k = 0, it = nlist; it && *it; it++, k++)
1025 printf("named subsystem %d: %s\n", k, *it);
41c33dbe 1026}
ccb4cabe 1027
e4aeecf5
CB
1028static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
1029{
1030 lxc_cgfsng_print_handler_data(d);
1031 lxc_cgfsng_print_hierarchies();
1032}
1033
ccb4cabe
SH
1034/*
1035 * At startup, parse_hierarchies finds all the info we need about
1036 * cgroup mountpoints and current cgroups, and stores it in @d.
1037 */
457ca9aa 1038static bool parse_hierarchies(void)
ccb4cabe
SH
1039{
1040 FILE *f;
1041 char * line = NULL, *basecginfo;
1042 char **klist = NULL, **nlist = NULL;
1043 size_t len = 0;
1044
d30ec4cb
SH
1045 /*
1046 * Root spawned containers escape the current cgroup, so use init's
1047 * cgroups as our base in that case.
1048 */
ccb4cabe
SH
1049 if (geteuid())
1050 basecginfo = read_file("/proc/self/cgroup");
1051 else
1052 basecginfo = read_file("/proc/1/cgroup");
1053 if (!basecginfo)
1054 return false;
1055
1056 if ((f = fopen("/proc/self/mountinfo", "r")) == NULL) {
d3b00a8f 1057 SYSERROR("Failed opening /proc/self/mountinfo");
ccb4cabe
SH
1058 return false;
1059 }
1060
1061 get_existing_subsystems(&klist, &nlist);
41c33dbe 1062
e4aeecf5
CB
1063 if (lxc_cgfsng_debug)
1064 lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
ccb4cabe
SH
1065
1066 /* we support simple cgroup mounts and lxcfs mounts */
1067 while (getline(&line, &len, f) != -1) {
1068 char **controller_list = NULL;
1069 char *mountpoint, *base_cgroup;
6328fd9c 1070 bool is_cgroup_v2, writeable;
ccb4cabe 1071
6328fd9c
CB
1072 is_cgroup_v2 = is_cgroupfs_v2(line);
1073 if (!is_lxcfs(line) && !is_cgroupfs_v1(line) && !is_cgroup_v2)
ccb4cabe
SH
1074 continue;
1075
1076 controller_list = get_controllers(klist, nlist, line);
1077 if (!controller_list)
1078 continue;
1079
457ca9aa 1080 if (controller_list_is_dup(hierarchies, controller_list)) {
ccb4cabe
SH
1081 free(controller_list);
1082 continue;
1083 }
1084
1085 mountpoint = get_mountpoint(line);
1086 if (!mountpoint) {
1087 ERROR("Error reading mountinfo: bad line '%s'", line);
1088 free_string_list(controller_list);
1089 continue;
1090 }
1091
1092 base_cgroup = get_current_cgroup(basecginfo, controller_list[0]);
1093 if (!base_cgroup) {
1094 ERROR("Failed to find current cgroup for controller '%s'", controller_list[0]);
1095 free_string_list(controller_list);
1096 free(mountpoint);
1097 continue;
1098 }
6328fd9c 1099
ccb4cabe
SH
1100 trim(base_cgroup);
1101 prune_init_scope(base_cgroup);
6328fd9c
CB
1102 if (is_cgroup_v2)
1103 writeable = test_writeable_v2(mountpoint, base_cgroup);
1104 else
1105 writeable = test_writeable_v1(mountpoint, base_cgroup);
1106 if (!writeable) {
ccb4cabe
SH
1107 free_string_list(controller_list);
1108 free(mountpoint);
1109 free(base_cgroup);
1110 continue;
1111 }
457ca9aa 1112 add_controller(controller_list, mountpoint, base_cgroup);
ccb4cabe
SH
1113 }
1114
1115 free_string_list(klist);
1116 free_string_list(nlist);
1117
1118 free(basecginfo);
1119
1120 fclose(f);
1121 free(line);
1122
e4aeecf5
CB
1123 if (lxc_cgfsng_debug) {
1124 printf("writeable subsystems:\n");
1125 lxc_cgfsng_print_hierarchies();
1126 }
1127
ccb4cabe
SH
1128 /* verify that all controllers in cgroup.use and all crucial
1129 * controllers are accounted for
1130 */
5059aae9
SH
1131 if (!all_controllers_found()) {
1132 INFO("cgfsng: not all controllers were find, deferring to cgfs driver");
ccb4cabe 1133 return false;
5059aae9 1134 }
ccb4cabe
SH
1135
1136 return true;
1137}
1138
457ca9aa
SH
1139static bool collect_hierarchy_info(void)
1140{
1141 const char *tmp;
1142 errno = 0;
1143 tmp = lxc_global_config_value("lxc.cgroup.use");
1a0e70ac 1144 if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
457ca9aa
SH
1145 SYSERROR("cgfsng: error reading list of cgroups to use");
1146 return false;
1147 }
1148 cgroup_use = must_copy_string(tmp);
1149
1150 return parse_hierarchies();
1151}
1152
ccb4cabe
SH
1153static void *cgfsng_init(const char *name)
1154{
1155 struct cgfsng_handler_data *d;
457ca9aa 1156 const char *cgroup_pattern;
ccb4cabe
SH
1157
1158 d = must_alloc(sizeof(*d));
1159 memset(d, 0, sizeof(*d));
1160
1161 d->name = must_copy_string(name);
1162
ccb4cabe 1163 cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
1a0e70ac 1164 if (!cgroup_pattern) { /* lxc.cgroup.pattern is only NULL on error */
ccb4cabe
SH
1165 ERROR("Error getting cgroup pattern");
1166 goto out_free;
1167 }
1168 d->cgroup_pattern = must_copy_string(cgroup_pattern);
1169
e4aeecf5
CB
1170 if (lxc_cgfsng_debug)
1171 lxc_cgfsng_print_debuginfo(d);
ccb4cabe
SH
1172
1173 return d;
1174
1175out_free:
1176 free_handler_data(d);
1177 return NULL;
1178}
1179
ccb4cabe
SH
1180static int cgroup_rmdir(char *dirname)
1181{
74f96976 1182 struct dirent *direntp;
ccb4cabe
SH
1183 DIR *dir;
1184 int r = 0;
1185
1186 dir = opendir(dirname);
1187 if (!dir)
1188 return -1;
1189
74f96976 1190 while ((direntp = readdir(dir))) {
ccb4cabe
SH
1191 struct stat mystat;
1192 char *pathname;
1193
1194 if (!direntp)
1195 break;
1196
1197 if (!strcmp(direntp->d_name, ".") ||
1198 !strcmp(direntp->d_name, ".."))
1199 continue;
1200
1201 pathname = must_make_path(dirname, direntp->d_name, NULL);
1202
1203 if (lstat(pathname, &mystat)) {
1204 if (!r)
1c9da8da 1205 WARN("failed to stat %s", pathname);
ccb4cabe
SH
1206 r = -1;
1207 goto next;
1208 }
1209
1210 if (!S_ISDIR(mystat.st_mode))
1211 goto next;
1212 if (cgroup_rmdir(pathname) < 0)
1213 r = -1;
1214next:
1215 free(pathname);
1216 }
1217
1218 if (rmdir(dirname) < 0) {
1219 if (!r)
13277ec4 1220 WARN("failed to delete %s: %s", dirname, strerror(errno));
ccb4cabe
SH
1221 r = -1;
1222 }
1223
1224 if (closedir(dir) < 0) {
1225 if (!r)
13277ec4 1226 WARN("failed to delete %s: %s", dirname, strerror(errno));
ccb4cabe
SH
1227 r = -1;
1228 }
1229 return r;
1230}
1231
1232static int rmdir_wrapper(void *data)
1233{
1234 char *path = data;
1235
1236 if (setresgid(0,0,0) < 0)
1237 SYSERROR("Failed to setgid to 0");
1238 if (setresuid(0,0,0) < 0)
1239 SYSERROR("Failed to setuid to 0");
1240 if (setgroups(0, NULL) < 0)
1241 SYSERROR("Failed to clear groups");
1242
1243 return cgroup_rmdir(path);
1244}
1245
1246void recursive_destroy(char *path, struct lxc_conf *conf)
1247{
1248 int r;
1249 if (conf && !lxc_list_empty(&conf->id_map))
c9b7c33e 1250 r = userns_exec_1(conf, rmdir_wrapper, path, "rmdir_wrapper");
ccb4cabe
SH
1251 else
1252 r = cgroup_rmdir(path);
1253
1254 if (r < 0)
1c9da8da 1255 ERROR("Error destroying %s", path);
ccb4cabe
SH
1256}
1257
1258static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
1259{
1260 struct cgfsng_handler_data *d = hdata;
1261
1262 if (!d)
1263 return;
1264
457ca9aa 1265 if (d->container_cgroup && hierarchies) {
ccb4cabe 1266 int i;
457ca9aa
SH
1267 for (i = 0; hierarchies[i]; i++) {
1268 struct hierarchy *h = hierarchies[i];
e2db2a89 1269 if (h->fullcgpath) {
ccb4cabe
SH
1270 recursive_destroy(h->fullcgpath, conf);
1271 free(h->fullcgpath);
1272 h->fullcgpath = NULL;
1273 }
1274 }
1275 }
1276
1277 free_handler_data(d);
1278}
1279
1280struct cgroup_ops *cgfsng_ops_init(void)
1281{
e4aeecf5
CB
1282 if (getenv("LXC_DEBUG_CGFSNG"))
1283 lxc_cgfsng_debug = true;
1284
457ca9aa
SH
1285 if (!collect_hierarchy_info())
1286 return NULL;
e4aeecf5 1287
ccb4cabe
SH
1288 return &cgfsng_ops;
1289}
1290
1291static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
1292{
e3a3fecf 1293 h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
1a0e70ac 1294 if (dir_exists(h->fullcgpath)) { /* it must not already exist */
6f9584d8 1295 ERROR("Path \"%s\" already existed.", h->fullcgpath);
d8da679e 1296 return false;
6f9584d8
CB
1297 }
1298 if (!handle_cpuset_hierarchy(h, cgname)) {
1299 ERROR("Failed to handle cgroupfs v1 cpuset controller.");
e3a3fecf 1300 return false;
6f9584d8 1301 }
e3a3fecf 1302 return mkdir_p(h->fullcgpath, 0755) == 0;
ccb4cabe
SH
1303}
1304
1305static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
1306{
1307 if (rmdir(h->fullcgpath) < 0)
1308 SYSERROR("Failed to clean up cgroup %s from failed creation attempt", h->fullcgpath);
1309 free(h->fullcgpath);
1310 h->fullcgpath = NULL;
1311}
1312
1313/*
d30ec4cb 1314 * Try to create the same cgroup in all hierarchies.
ccb4cabe
SH
1315 * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
1316 */
1317static inline bool cgfsng_create(void *hdata)
1318{
1319 struct cgfsng_handler_data *d = hdata;
1320 char *tmp, *cgname, *offset;
bb30b52a 1321 int i;
66b66624 1322 int idx = 0;
ccb4cabe
SH
1323 size_t len;
1324
1325 if (!d)
1326 return false;
1327 if (d->container_cgroup) {
1328 WARN("cgfsng_create called a second time");
1329 return false;
1330 }
1331
1332 tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
1333 if (!tmp) {
1334 ERROR("Failed expanding cgroup name pattern");
1335 return false;
1336 }
1a0e70ac 1337 len = strlen(tmp) + 5; /* leave room for -NNN\0 */
ccb4cabe
SH
1338 cgname = must_alloc(len);
1339 strcpy(cgname, tmp);
1340 free(tmp);
1341 offset = cgname + len - 5;
1342
1343again:
95adfe93
SH
1344 if (idx == 1000) {
1345 ERROR("Too many conflicting cgroup names");
ccb4cabe 1346 goto out_free;
95adfe93 1347 }
66b66624 1348 if (idx) {
bb30b52a
CB
1349 int ret;
1350
66b66624
CB
1351 ret = snprintf(offset, 5, "-%d", idx);
1352 if (ret < 0 || (size_t)ret >= 5) {
1353 FILE *f = fopen("/dev/null", "w");
1354 if (f >= 0) {
1355 fprintf(f, "Workaround for GCC7 bug: "
1356 "https://gcc.gnu.org/bugzilla/"
1357 "show_bug.cgi?id=78969");
1358 fclose(f);
1359 }
1360 }
1361 }
457ca9aa
SH
1362 for (i = 0; hierarchies[i]; i++) {
1363 if (!create_path_for_hierarchy(hierarchies[i], cgname)) {
ccb4cabe 1364 int j;
1a0e70ac 1365 ERROR("Failed to create \"%s\"", hierarchies[i]->fullcgpath);
457ca9aa
SH
1366 free(hierarchies[i]->fullcgpath);
1367 hierarchies[i]->fullcgpath = NULL;
ccb4cabe 1368 for (j = 0; j < i; j++)
457ca9aa 1369 remove_path_for_hierarchy(hierarchies[j], cgname);
ccb4cabe
SH
1370 idx++;
1371 goto again;
1372 }
1373 }
1374 /* Done */
1375 d->container_cgroup = cgname;
1376 return true;
1377
1378out_free:
1379 free(cgname);
1380 return false;
1381}
1382
ccb4cabe
SH
1383static bool cgfsng_enter(void *hdata, pid_t pid)
1384{
ccb4cabe
SH
1385 char pidstr[25];
1386 int i, len;
1387
1388 len = snprintf(pidstr, 25, "%d", pid);
1389 if (len < 0 || len > 25)
1390 return false;
1391
457ca9aa
SH
1392 for (i = 0; hierarchies[i]; i++) {
1393 char *fullpath = must_make_path(hierarchies[i]->fullcgpath,
ccb4cabe
SH
1394 "cgroup.procs", NULL);
1395 if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
d3b00a8f 1396 SYSERROR("Failed to enter %s", fullpath);
ccb4cabe
SH
1397 free(fullpath);
1398 return false;
1399 }
1400 free(fullpath);
1401 }
1402
1403 return true;
1404}
1405
1406struct chown_data {
1407 struct cgfsng_handler_data *d;
1a0e70ac 1408 uid_t origuid; /* target uid in parent namespace */
ccb4cabe
SH
1409};
1410
c0888dfe
SH
1411/*
1412 * chgrp the container cgroups to container group. We leave
1413 * the container owner as cgroup owner. So we must make the
1414 * directories 775 so that the container can create sub-cgroups.
43647298
SH
1415 *
1416 * Also chown the tasks and cgroup.procs files. Those may not
1417 * exist depending on kernel version.
c0888dfe 1418 */
ccb4cabe
SH
1419static int chown_cgroup_wrapper(void *data)
1420{
1421 struct chown_data *arg = data;
ccb4cabe
SH
1422 uid_t destuid;
1423 int i;
1424
1425 if (setresgid(0,0,0) < 0)
1426 SYSERROR("Failed to setgid to 0");
1427 if (setresuid(0,0,0) < 0)
1428 SYSERROR("Failed to setuid to 0");
1429 if (setgroups(0, NULL) < 0)
1430 SYSERROR("Failed to clear groups");
1431
1432 destuid = get_ns_uid(arg->origuid);
1433
457ca9aa
SH
1434 for (i = 0; hierarchies[i]; i++) {
1435 char *fullpath, *path = hierarchies[i]->fullcgpath;
43647298
SH
1436
1437 if (chown(path, destuid, 0) < 0) {
ab8f5424 1438 SYSERROR("Error chowning %s to %d", path, (int) destuid);
ccb4cabe
SH
1439 return -1;
1440 }
c0888dfe 1441
43647298 1442 if (chmod(path, 0775) < 0) {
ab8f5424 1443 SYSERROR("Error chmoding %s", path);
c0888dfe
SH
1444 return -1;
1445 }
ccb4cabe 1446
ab8f5424
SH
1447 /*
1448 * Failures to chown these are inconvenient but not detrimental
1449 * We leave these owned by the container launcher, so that container
1450 * root can write to the files to attach. We chmod them 664 so that
1451 * container systemd can write to the files (which systemd in wily
1452 * insists on doing)
1453 */
43647298
SH
1454 fullpath = must_make_path(path, "tasks", NULL);
1455 if (chown(fullpath, destuid, 0) < 0 && errno != ENOENT)
13277ec4 1456 WARN("Failed chowning %s to %d: %s", fullpath, (int) destuid,
1457 strerror(errno));
ab8f5424 1458 if (chmod(fullpath, 0664) < 0)
13277ec4 1459 WARN("Error chmoding %s: %s", path, strerror(errno));
43647298
SH
1460 free(fullpath);
1461
1462 fullpath = must_make_path(path, "cgroup.procs", NULL);
1463 if (chown(fullpath, destuid, 0) < 0 && errno != ENOENT)
13277ec4 1464 WARN("Failed chowning %s to %d: %s", fullpath, (int) destuid,
1465 strerror(errno));
ab8f5424 1466 if (chmod(fullpath, 0664) < 0)
13277ec4 1467 WARN("Error chmoding %s: %s", path, strerror(errno));
ccb4cabe
SH
1468 free(fullpath);
1469 }
1470
1471 return 0;
1472}
1473
1474static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
1475{
1476 struct cgfsng_handler_data *d = hdata;
1477 struct chown_data wrap;
1478
1479 if (!d)
1480 return false;
1481
1482 if (lxc_list_empty(&conf->id_map))
1483 return true;
1484
1485 wrap.d = d;
1486 wrap.origuid = geteuid();
1487
c9b7c33e
CB
1488 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
1489 "chown_cgroup_wrapper") < 0) {
ccb4cabe
SH
1490 ERROR("Error requesting cgroup chown in new namespace");
1491 return false;
1492 }
1493
1494 return true;
1495}
1496
8aa1044f
SH
1497/*
1498 * We've safe-mounted a tmpfs as parent, so we don't need to protect against
1499 * symlinks any more - just use mount
1500 */
1501
1502/* mount cgroup-full if requested */
1503static int mount_cgroup_full(int type, struct hierarchy *h, char *dest,
1504 char *container_cgroup)
1505{
1506 if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
1507 return 0;
1508 if (mount(h->mountpoint, dest, "cgroup", MS_BIND, NULL) < 0) {
1509 SYSERROR("Error bind-mounting %s cgroup onto %s", h->mountpoint,
1510 dest);
1511 return -1;
1512 }
1513 if (type != LXC_AUTO_CGROUP_FULL_RW) {
5b6f9369
SH
1514 unsigned long flags = MS_BIND | MS_NOSUID | MS_NOEXEC | MS_NODEV |
1515 MS_REMOUNT | MS_RDONLY;
1516 if (mount(NULL, dest, "cgroup", flags, NULL) < 0) {
8aa1044f
SH
1517 SYSERROR("Error remounting %s readonly", dest);
1518 return -1;
1519 }
1520 }
1521
1522 INFO("Bind mounted %s onto %s", h->mountpoint, dest);
1523 if (type != LXC_AUTO_CGROUP_FULL_MIXED)
1524 return 0;
1525
1526 /* mount just the container path rw */
1527 char *source = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
5b6f9369 1528 char *rwpath = must_make_path(dest, h->base_cgroup, container_cgroup, NULL);
8aa1044f 1529 if (mount(source, rwpath, "cgroup", MS_BIND, NULL) < 0)
13277ec4 1530 WARN("Failed to mount %s read-write: %s", rwpath,
1531 strerror(errno));
8aa1044f
SH
1532 INFO("Made %s read-write", rwpath);
1533 free(rwpath);
1534 free(source);
1535 return 0;
1536}
1537
1538/* cgroup-full:* is done, no need to create subdirs */
1539static bool cg_mount_needs_subdirs(int type)
1540{
1541 if (type >= LXC_AUTO_CGROUP_FULL_RO)
1542 return false;
1543 return true;
1544}
1545
1546/*
1547 * After $rootfs/sys/fs/container/controller/the/cg/path has been
1548 * created, remount controller ro if needed and bindmount the
1549 * cgroupfs onto controll/the/cg/path
1550 */
1551static int
1552do_secondstage_mounts_if_needed(int type, struct hierarchy *h,
1553 char *controllerpath, char *cgpath,
1554 const char *container_cgroup)
1555{
1556 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
1557 if (mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL) < 0) {
1558 SYSERROR("Error bind-mounting %s", controllerpath);
1559 return -1;
1560 }
1561 if (mount(controllerpath, controllerpath, "cgroup",
1562 MS_REMOUNT | MS_BIND | MS_RDONLY, NULL) < 0) {
1563 SYSERROR("Error remounting %s read-only", controllerpath);
1564 return -1;
1565 }
1566 INFO("Remounted %s read-only", controllerpath);
1567 }
1568 char *sourcepath = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
1569 int flags = MS_BIND;
1570 if (type == LXC_AUTO_CGROUP_RO)
1571 flags |= MS_RDONLY;
1572 INFO("Mounting %s onto %s", sourcepath, cgpath);
1573 if (mount(sourcepath, cgpath, "cgroup", flags, NULL) < 0) {
1574 free(sourcepath);
1575 SYSERROR("Error mounting cgroup %s onto %s", h->controllers[0],
1576 cgpath);
1577 return -1;
1578 }
1579 free(sourcepath);
1580 INFO("Completed second stage cgroup automounts for %s", cgpath);
1581 return 0;
1582}
1583
ccb4cabe
SH
1584static bool cgfsng_mount(void *hdata, const char *root, int type)
1585{
8aa1044f
SH
1586 struct cgfsng_handler_data *d = hdata;
1587 char *tmpfspath = NULL;
1588 bool retval = false;
a8de4c49 1589 int i;
8aa1044f
SH
1590
1591 if ((type & LXC_AUTO_CGROUP_MASK) == 0)
1592 return true;
1593
ccb4cabe
SH
1594 if (cgns_supported())
1595 return true;
8aa1044f
SH
1596
1597 tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
1598
1599 if (type == LXC_AUTO_CGROUP_NOSPEC)
1600 type = LXC_AUTO_CGROUP_MIXED;
1601 else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
1602 type = LXC_AUTO_CGROUP_FULL_MIXED;
1603
1604 /* Mount tmpfs */
1605 if (safe_mount("cgroup_root", tmpfspath, "tmpfs",
1606 MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME,
1607 "size=10240k,mode=755",
1608 root) < 0)
1609 goto bad;
1610
457ca9aa 1611 for (i = 0; hierarchies[i]; i++) {
8aa1044f 1612 char *controllerpath, *path2;
457ca9aa 1613 struct hierarchy *h = hierarchies[i];
8aa1044f
SH
1614 char *controller = strrchr(h->mountpoint, '/');
1615 int r;
1616
1617 if (!controller)
1618 continue;
1619 controller++;
1620 controllerpath = must_make_path(tmpfspath, controller, NULL);
1621 if (dir_exists(controllerpath)) {
1622 free(controllerpath);
1623 continue;
1624 }
1625 if (mkdir(controllerpath, 0755) < 0) {
1626 SYSERROR("Error creating cgroup path: %s", controllerpath);
1627 free(controllerpath);
1628 goto bad;
1629 }
1630 if (mount_cgroup_full(type, h, controllerpath, d->container_cgroup) < 0) {
1631 free(controllerpath);
1632 goto bad;
1633 }
1634 if (!cg_mount_needs_subdirs(type)) {
1635 free(controllerpath);
1636 continue;
1637 }
ef4413fa 1638 path2 = must_make_path(controllerpath, h->base_cgroup, d->container_cgroup, NULL);
8aa1044f
SH
1639 if (mkdir_p(path2, 0755) < 0) {
1640 free(controllerpath);
1641 goto bad;
1642 }
2f62fb00 1643
8aa1044f
SH
1644 r = do_secondstage_mounts_if_needed(type, h, controllerpath, path2,
1645 d->container_cgroup);
1646 free(controllerpath);
1647 free(path2);
1648 if (r < 0)
1649 goto bad;
1650 }
1651 retval = true;
1652
1653bad:
1654 free(tmpfspath);
1655 return retval;
ccb4cabe
SH
1656}
1657
1658static int recursive_count_nrtasks(char *dirname)
1659{
74f96976 1660 struct dirent *direntp;
ccb4cabe
SH
1661 DIR *dir;
1662 int count = 0, ret;
1663 char *path;
1664
1665 dir = opendir(dirname);
1666 if (!dir)
1667 return 0;
1668
74f96976 1669 while ((direntp = readdir(dir))) {
ccb4cabe
SH
1670 struct stat mystat;
1671
1672 if (!direntp)
1673 break;
1674
1675 if (!strcmp(direntp->d_name, ".") ||
1676 !strcmp(direntp->d_name, ".."))
1677 continue;
1678
1679 path = must_make_path(dirname, direntp->d_name, NULL);
1680
1681 if (lstat(path, &mystat))
1682 goto next;
1683
1684 if (!S_ISDIR(mystat.st_mode))
1685 goto next;
1686
1687 count += recursive_count_nrtasks(path);
1688next:
1689 free(path);
1690 }
1691
1692 path = must_make_path(dirname, "cgroup.procs", NULL);
1693 ret = lxc_count_file_lines(path);
1694 if (ret != -1)
1695 count += ret;
1696 free(path);
1697
1698 (void) closedir(dir);
1699
1700 return count;
1701}
1702
1703static int cgfsng_nrtasks(void *hdata) {
1704 struct cgfsng_handler_data *d = hdata;
1705 char *path;
1706 int count;
1707
457ca9aa 1708 if (!d || !d->container_cgroup || !hierarchies)
ccb4cabe 1709 return -1;
457ca9aa 1710 path = must_make_path(hierarchies[0]->fullcgpath, NULL);
ccb4cabe
SH
1711 count = recursive_count_nrtasks(path);
1712 free(path);
1713 return count;
1714}
1715
1716/* Only root needs to escape to the cgroup of its init */
7103fe6f 1717static bool cgfsng_escape()
ccb4cabe 1718{
ccb4cabe
SH
1719 int i;
1720
1721 if (geteuid())
1722 return true;
1723
457ca9aa
SH
1724 for (i = 0; hierarchies[i]; i++) {
1725 char *fullpath = must_make_path(hierarchies[i]->mountpoint,
1726 hierarchies[i]->base_cgroup,
ccb4cabe
SH
1727 "cgroup.procs", NULL);
1728 if (lxc_write_to_file(fullpath, "0", 2, false) != 0) {
d3b00a8f 1729 SYSERROR("Failed to escape to %s", fullpath);
ccb4cabe 1730 free(fullpath);
6df334d1 1731 return false;
ccb4cabe
SH
1732 }
1733 free(fullpath);
1734 }
1735
6df334d1 1736 return true;
ccb4cabe
SH
1737}
1738
36662416
TA
1739static int cgfsng_num_hierarchies(void)
1740{
1741 int i;
1742
1743 for (i = 0; hierarchies[i]; i++)
1744 ;
1745
1746 return i;
1747}
1748
1749static bool cgfsng_get_hierarchies(int n, char ***out)
1750{
1751 int i;
1752
1753 /* sanity check n */
1754 for (i = 0; i < n; i++) {
1755 if (!hierarchies[i])
1756 return false;
1757 }
1758
1759 *out = hierarchies[i]->controllers;
1760
1761 return true;
1762}
1763
ccb4cabe
SH
1764#define THAWED "THAWED"
1765#define THAWED_LEN (strlen(THAWED))
1766
1767static bool cgfsng_unfreeze(void *hdata)
1768{
ccb4cabe 1769 char *fullpath;
457ca9aa 1770 struct hierarchy *h = get_hierarchy("freezer");
ccb4cabe 1771
457ca9aa 1772 if (!h)
ccb4cabe
SH
1773 return false;
1774 fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
1775 if (lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false) != 0) {
1776 free(fullpath);
1777 return false;
1778 }
1779 free(fullpath);
1780 return true;
1781}
1782
1783static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
1784{
457ca9aa 1785 struct hierarchy *h = get_hierarchy(subsystem);
ccb4cabe
SH
1786 if (!h)
1787 return NULL;
1788
371f834d
SH
1789 return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
1790}
1791
1792/*
1793 * Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a
1794 * full path, which must be freed by the caller.
1795 */
1796static char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
1797 const char *inpath,
1798 const char *filename)
1799{
371f834d 1800 return must_make_path(h->mountpoint, inpath, filename, NULL);
ccb4cabe
SH
1801}
1802
1803static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
1804{
ccb4cabe
SH
1805 char pidstr[25];
1806 int i, len;
1807
1808 len = snprintf(pidstr, 25, "%d", pid);
1809 if (len < 0 || len > 25)
1810 return false;
1811
457ca9aa 1812 for (i = 0; hierarchies[i]; i++) {
ccb4cabe 1813 char *path, *fullpath;
457ca9aa 1814 struct hierarchy *h = hierarchies[i];
ccb4cabe
SH
1815
1816 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
1a0e70ac 1817 if (!path) /* not running */
ccb4cabe
SH
1818 continue;
1819
371f834d
SH
1820 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
1821 free(path);
ccb4cabe
SH
1822 if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
1823 SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
1824 free(fullpath);
ccb4cabe
SH
1825 return false;
1826 }
ccb4cabe
SH
1827 free(fullpath);
1828 }
1829
ccb4cabe
SH
1830 return true;
1831}
1832
1833/*
1834 * Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.
1835 * Here we don't have a cgroup_data set up, so we ask the running
1836 * container through the commands API for the cgroup path
1837 */
1838static int cgfsng_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
1839{
1840 char *subsystem, *p, *path;
ccb4cabe
SH
1841 struct hierarchy *h;
1842 int ret = -1;
1843
1844 subsystem = alloca(strlen(filename) + 1);
1845 strcpy(subsystem, filename);
1846 if ((p = strchr(subsystem, '.')) != NULL)
1847 *p = '\0';
1848
1849 path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
1a0e70ac 1850 if (!path) /* not running */
ccb4cabe
SH
1851 return -1;
1852
457ca9aa 1853 h = get_hierarchy(subsystem);
ccb4cabe 1854 if (h) {
371f834d 1855 char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
1856 ret = lxc_read_from_file(fullpath, value, len);
1857 free(fullpath);
1858 }
1859
ccb4cabe
SH
1860 free(path);
1861
1862 return ret;
1863}
1864
1865/*
1866 * Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.
1867 * Here we don't have a cgroup_data set up, so we ask the running
1868 * container through the commands API for the cgroup path
1869 */
1870static int cgfsng_set(const char *filename, const char *value, const char *name, const char *lxcpath)
1871{
1872 char *subsystem, *p, *path;
ccb4cabe
SH
1873 struct hierarchy *h;
1874 int ret = -1;
1875
1876 subsystem = alloca(strlen(filename) + 1);
1877 strcpy(subsystem, filename);
1878 if ((p = strchr(subsystem, '.')) != NULL)
1879 *p = '\0';
1880
1881 path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
1a0e70ac 1882 if (!path) /* not running */
ccb4cabe
SH
1883 return -1;
1884
457ca9aa 1885 h = get_hierarchy(subsystem);
ccb4cabe 1886 if (h) {
371f834d 1887 char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
1888 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
1889 free(fullpath);
1890 }
1891
ccb4cabe
SH
1892 free(path);
1893
1894 return ret;
1895}
1896
72add155
SH
1897/*
1898 * take devices cgroup line
1899 * /dev/foo rwx
1900 * and convert it to a valid
1901 * type major:minor mode
1902 * line. Return <0 on error. Dest is a preallocated buffer
1903 * long enough to hold the output.
1904 */
1905static int convert_devpath(const char *invalue, char *dest)
1906{
2a06d041
CB
1907 int n_parts;
1908 char *p, *path, type;
72add155
SH
1909 struct stat sb;
1910 unsigned long minor, major;
2a06d041
CB
1911 int ret = -EINVAL;
1912 char *mode = NULL;
72add155
SH
1913
1914 path = must_copy_string(invalue);
1915
1916 /*
1917 * read path followed by mode; ignore any trailing text.
1918 * A ' # comment' would be legal. Technically other text
1919 * is not legal, we could check for that if we cared to
1920 */
1921 for (n_parts = 1, p = path; *p && n_parts < 3; p++) {
2c2d6c49
SH
1922 if (*p != ' ')
1923 continue;
1924 *p = '\0';
1925 if (n_parts != 1)
1926 break;
1927 p++;
1928 n_parts++;
1929 while (*p == ' ')
1930 p++;
1931 mode = p;
1932 if (*p == '\0')
1933 goto out;
72add155 1934 }
2c2d6c49
SH
1935
1936 if (n_parts == 1)
72add155 1937 goto out;
72add155
SH
1938
1939 ret = stat(path, &sb);
1940 if (ret < 0)
1941 goto out;
1942
72add155
SH
1943 mode_t m = sb.st_mode & S_IFMT;
1944 switch (m) {
1945 case S_IFBLK:
1946 type = 'b';
1947 break;
1948 case S_IFCHR:
1949 type = 'c';
1950 break;
2c2d6c49 1951 default:
72add155
SH
1952 ERROR("Unsupported device type %i for %s", m, path);
1953 ret = -EINVAL;
1954 goto out;
1955 }
2c2d6c49
SH
1956
1957 major = MAJOR(sb.st_rdev);
1958 minor = MINOR(sb.st_rdev);
1959 ret = snprintf(dest, 50, "%c %lu:%lu %s", type, major, minor, mode);
72add155 1960 if (ret < 0 || ret >= 50) {
2a06d041
CB
1961 ERROR("Error on configuration value \"%c %lu:%lu %s\" (max 50 "
1962 "chars)", type, major, minor, mode);
72add155
SH
1963 ret = -ENAMETOOLONG;
1964 goto out;
1965 }
1966 ret = 0;
1967
1968out:
1969 free(path);
1970 return ret;
1971}
1972
ccb4cabe
SH
1973/*
1974 * Called from setup_limits - here we have the container's cgroup_data because
1975 * we created the cgroups
1976 */
1977static int lxc_cgroup_set_data(const char *filename, const char *value, struct cgfsng_handler_data *d)
1978{
1979 char *subsystem = NULL, *p;
72add155 1980 int ret = 0;
ccb4cabe 1981 struct hierarchy *h;
1a0e70ac
CB
1982 /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
1983 char converted_value[50];
ccb4cabe
SH
1984
1985 subsystem = alloca(strlen(filename) + 1);
1986 strcpy(subsystem, filename);
1987 if ((p = strchr(subsystem, '.')) != NULL)
1988 *p = '\0';
1989
c8bf519d 1990 if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
72add155
SH
1991 ret = convert_devpath(value, converted_value);
1992 if (ret < 0)
c8bf519d 1993 return ret;
72add155
SH
1994 value = converted_value;
1995
c8bf519d 1996 }
1997
457ca9aa 1998 h = get_hierarchy(subsystem);
ccb4cabe
SH
1999 if (h) {
2000 char *fullpath = must_make_path(h->fullcgpath, filename, NULL);
72add155 2001 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
ccb4cabe
SH
2002 free(fullpath);
2003 }
2004 return ret;
2005}
2006
2007static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
2008 bool do_devices)
2009{
2010 struct cgfsng_handler_data *d = hdata;
2011 struct lxc_list *iterator, *sorted_cgroup_settings, *next;
2012 struct lxc_cgroup *cg;
ccb4cabe
SH
2013 bool ret = false;
2014
2015 if (lxc_list_empty(cgroup_settings))
2016 return true;
2017
2018 sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
2019 if (!sorted_cgroup_settings) {
2020 return false;
2021 }
2022
ccb4cabe
SH
2023 lxc_list_for_each(iterator, sorted_cgroup_settings) {
2024 cg = iterator->elem;
2025
2026 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
ccb4cabe
SH
2027 if (lxc_cgroup_set_data(cg->subsystem, cg->value, d)) {
2028 if (do_devices && (errno == EACCES || errno == EPERM)) {
2029 WARN("Error setting %s to %s for %s",
2030 cg->subsystem, cg->value, d->name);
2031 continue;
2032 }
2033 SYSERROR("Error setting %s to %s for %s",
2034 cg->subsystem, cg->value, d->name);
2035 goto out;
2036 }
6a628f4a 2037 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
ccb4cabe 2038 }
ccb4cabe
SH
2039 }
2040
2041 ret = true;
2042 INFO("cgroup has been setup");
2043out:
ccb4cabe
SH
2044 lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
2045 lxc_list_del(iterator);
2046 free(iterator);
2047 }
2048 free(sorted_cgroup_settings);
2049 return ret;
2050}
2051
2052static struct cgroup_ops cgfsng_ops = {
2053 .init = cgfsng_init,
2054 .destroy = cgfsng_destroy,
2055 .create = cgfsng_create,
2056 .enter = cgfsng_enter,
ccb4cabe 2057 .escape = cgfsng_escape,
36662416
TA
2058 .num_hierarchies = cgfsng_num_hierarchies,
2059 .get_hierarchies = cgfsng_get_hierarchies,
ccb4cabe
SH
2060 .get_cgroup = cgfsng_get_cgroup,
2061 .get = cgfsng_get,
2062 .set = cgfsng_set,
2063 .unfreeze = cgfsng_unfreeze,
2064 .setup_limits = cgfsng_setup_limits,
2065 .name = "cgroupfs-ng",
2066 .attach = cgfsng_attach,
2067 .chown = cgfsns_chown,
2068 .mount_cgroup = cgfsng_mount,
2069 .nrtasks = cgfsng_nrtasks,
2070 .driver = CGFSNG,
2071
2072 /* unsupported */
2073 .create_legacy = NULL,
2074};