]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgfsng.c
Merge pull request #1097 from jirutka/patch-1
[mirror_lxc.git] / src / lxc / cgfsng.c
CommitLineData
ccb4cabe
SH
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2016 Canonical Ltd.
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@ubuntu.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/*
25 * cgfs-ng.c: this is a new, simplified implementation of a filesystem
26 * cgroup backend. The original cgfs.c was designed to be as flexible
27 * as possible. It would try to find cgroup filesystems no matter where
28 * or how you had them mounted, and deduce the most usable mount for
29 * each controller. It also was not designed for unprivileged use, as
30 * that was reserved for cgmanager.
31 *
32 * This new implementation assumes that cgroup filesystems are mounted
33 * under /sys/fs/cgroup/clist where clist is either the controller, or
34 * a comman-separated list of controllers.
35 */
36#include "config.h"
37#include <stdio.h>
38#include <stdio.h>
39#include <stdlib.h>
40#include <errno.h>
41#include <sys/types.h>
42#include <unistd.h>
43#include <dirent.h>
44#include <grp.h>
45
46#include "log.h"
47#include "cgroup.h"
48#include "utils.h"
49#include "commands.h"
3e32591c 50#include "bdev/bdev.h"
ccb4cabe
SH
51
52lxc_log_define(lxc_cgfsng, lxc);
53
54static struct cgroup_ops cgfsng_ops;
55
ccb4cabe
SH
56/*
57 * A descriptor for a mounted hierarchy
58 * @controllers: either NULL, or a null-terminated list of all
59 * the co-mounted controllers
60 * @mountpoint: the mountpoint we will use. It will be either
61 * /sys/fs/cgroup/controller or /sys/fs/cgroup/controllerlist
62 * @base_cgroup: the cgroup under which the container cgroup path
63 is created. This will be either the caller's cgroup (if not
64 root), or init's cgroup (if root).
65 */
66struct hierarchy {
67 char **controllers;
68 char *mountpoint;
69 char *base_cgroup;
70 char *fullcgpath;
71};
72
73/*
74 * The cgroup data which is attached to the lxc_handler.
ccb4cabe
SH
75 * @cgroup_pattern - a copy of the lxc.cgroup.pattern
76 * @container_cgroup - if not null, the cgroup which was created for
77 * the container. For each hierarchy, it is created under the
78 * @hierarchy->base_cgroup directory. Relative to the base_cgroup
79 * it is the same for all hierarchies.
80 * @name - the container name
81 */
82struct cgfsng_handler_data {
ccb4cabe
SH
83 char *cgroup_pattern;
84 char *container_cgroup; // cgroup we created for the container
85 char *name; // container name
86};
87
457ca9aa
SH
88/*
89 * @hierarchies - a NULL-terminated array of struct hierarchy, one per
90 * hierarchy. No duplicates. First sufficient, writeable mounted
91 * hierarchy wins
92 */
93struct hierarchy **hierarchies;
94
95/*
96 * @cgroup_use - a copy of the lxc.cgroup.use
97 */
98char *cgroup_use;
99
ccb4cabe
SH
100static void free_string_list(char **clist)
101{
102 if (clist) {
103 int i;
104
105 for (i = 0; clist[i]; i++)
106 free(clist[i]);
107 free(clist);
108 }
109}
110
111/* Re-alllocate a pointer, do not fail */
112static void *must_realloc(void *orig, size_t sz)
113{
114 void *ret;
115
116 do {
117 ret = realloc(orig, sz);
118 } while (!ret);
119 return ret;
120}
121
122/* Allocate a pointer, do not fail */
123static void *must_alloc(size_t sz)
124{
125 return must_realloc(NULL, sz);
126}
127
128/* return copy of string @entry; do not fail. */
129static char *must_copy_string(const char *entry)
130{
131 char *ret;
132
133 if (!entry)
134 return NULL;
135 do {
136 ret = strdup(entry);
137 } while (!ret);
138 return ret;
139}
140
141/*
142 * This is a special case - return a copy of @entry
143 * prepending 'name='. I.e. turn systemd into name=systemd.
144 * Do not fail.
145 */
146static char *must_prefix_named(char *entry)
147{
148 char *ret;
149 size_t len = strlen(entry);
150
151 ret = must_alloc(len + 6);
152 snprintf(ret, len + 6, "name=%s", entry);
153 return ret;
154}
155
156/*
157 * Given a pointer to a null-terminated array of pointers, realloc to
158 * add one entry, and point the new entry to NULL. Do not fail. Return
159 * the index to the second-to-last entry - that is, the one which is
160 * now available for use (keeping the list null-terminated).
161 */
162static int append_null_to_list(void ***list)
163{
164 int newentry = 0;
165
166 if (*list)
167 for (; (*list)[newentry]; newentry++);
168
169 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
170 (*list)[newentry + 1] = NULL;
171 return newentry;
172}
173
174/*
175 * Given a null-terminated array of strings, check whether @entry
176 * is one of the strings
177 */
178static bool string_in_list(char **list, const char *entry)
179{
180 int i;
181
182 if (!list)
183 return false;
184 for (i = 0; list[i]; i++)
185 if (strcmp(list[i], entry) == 0)
186 return true;
187
188 return false;
189}
190
191/*
192 * append an entry to the clist. Do not fail.
193 * *clist must be NULL the first time we are called.
194 *
195 * We also handle named subsystems here. Any controller which is not a
196 * kernel subsystem, we prefix 'name='. Any which is both a kernel and
197 * named subsystem, we refuse to use because we're not sure which we
198 * have here. (TODO - we could work around this in some cases by just
199 * remounting to be unambiguous, or by comparing mountpoint contents
200 * with current cgroup)
201 *
202 * The last entry will always be NULL.
203 */
204static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
205{
206 int newentry;
207 char *copy;
208
209 if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
210 ERROR("Refusing to use ambiguous controller '%s'", entry);
211 ERROR("It is both a named and kernel subsystem");
212 return;
213 }
214
215 newentry = append_null_to_list((void ***)clist);
216
217 if (strncmp(entry, "name=", 5) == 0)
218 copy = must_copy_string(entry);
219 else if (string_in_list(klist, entry))
220 copy = must_copy_string(entry);
221 else
222 copy = must_prefix_named(entry);
223
224 (*clist)[newentry] = copy;
225}
226
ccb4cabe
SH
227static void free_handler_data(struct cgfsng_handler_data *d)
228{
ccb4cabe
SH
229 free(d->cgroup_pattern);
230 free(d->container_cgroup);
231 free(d->name);
232 free(d);
233}
234
235/*
236 * Given a handler's cgroup data, return the struct hierarchy for the
237 * controller @c, or NULL if there is none.
238 */
457ca9aa 239struct hierarchy *get_hierarchy(const char *c)
ccb4cabe
SH
240{
241 int i;
242
457ca9aa 243 if (!hierarchies)
ccb4cabe 244 return NULL;
457ca9aa
SH
245 for (i = 0; hierarchies[i]; i++) {
246 if (string_in_list(hierarchies[i]->controllers, c))
247 return hierarchies[i];
ccb4cabe
SH
248 }
249 return NULL;
250}
251
e3a3fecf
SH
252static char *must_make_path(const char *first, ...) __attribute__((sentinel));
253
254/* Copy contents of parent(@path)/@file to @path/@file */
255static bool copy_parent_file(char *path, char *file)
256{
257 char *lastslash, *value = NULL, *fpath, oldv;
258 int len = 0;
259 int ret;
260
261 lastslash = strrchr(path, '/');
262 if (!lastslash) { // bug... this shouldn't be possible
263 ERROR("cgfsng:copy_parent_file: bad path %s", path);
264 return false;
265 }
266 oldv = *lastslash;
267 *lastslash = '\0';
268 fpath = must_make_path(path, file, NULL);
269 len = lxc_read_from_file(fpath, NULL, 0);
270 if (len <= 0)
271 goto bad;
272 value = must_alloc(len + 1);
273 if (lxc_read_from_file(fpath, value, len) != len)
274 goto bad;
275 free(fpath);
276 *lastslash = oldv;
277 fpath = must_make_path(path, file, NULL);
278 ret = lxc_write_to_file(fpath, value, len, false);
279 if (ret < 0)
280 SYSERROR("Unable to write %s to %s", value, fpath);
281 free(fpath);
282 free(value);
283 return ret >= 0;
284
285bad:
286 SYSERROR("Error reading '%s'", fpath);
287 free(fpath);
288 free(value);
289 return false;
290}
291
292/*
293 * Initialize the cpuset hierarchy in first directory of @gname and
294 * set cgroup.clone_children so that children inherit settings.
295 * Since the h->base_path is populated by init or ourselves, we know
296 * it is already initialized.
297 */
298bool handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
299{
300 char *cgpath, *clonechildrenpath, v, *slash;
301
302 if (!string_in_list(h->controllers, "cpuset"))
303 return true;
304
305 if (*cgname == '/')
306 cgname++;
307 slash = strchr(cgname, '/');
308 if (slash)
309 *slash = '\0';
310
311 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
312 if (slash)
313 *slash = '/';
314 if (mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
315 SYSERROR("Failed to create '%s'", cgpath);
316 free(cgpath);
317 return false;
318 }
319 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
320 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
321 free(clonechildrenpath);
322 free(cgpath);
323 return true;
324 }
325 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
326 SYSERROR("Failed to read '%s'", clonechildrenpath);
327 free(clonechildrenpath);
328 free(cgpath);
329 return false;
330 }
331
332 if (v == '1') { /* already set for us by someone else */
333 free(clonechildrenpath);
334 free(cgpath);
335 return true;
336 }
337
338 /* copy parent's settings */
339 if (!copy_parent_file(cgpath, "cpuset.cpus") ||
340 !copy_parent_file(cgpath, "cpuset.mems")) {
341 free(cgpath);
342 free(clonechildrenpath);
343 return false;
344 }
345 free(cgpath);
346
347 if (lxc_write_to_file(clonechildrenpath, "1", 1, false) < 0) {
348 /* Set clone_children so children inherit our settings */
349 SYSERROR("Failed to write 1 to %s", clonechildrenpath);
350 free(clonechildrenpath);
351 return false;
352 }
353 free(clonechildrenpath);
354 return true;
355}
356
ccb4cabe
SH
357/*
358 * Given two null-terminated lists of strings, return true if any string
359 * is in both.
360 */
361static bool controller_lists_intersect(char **l1, char **l2)
362{
363 int i;
364
365 if (!l1 || !l2)
366 return false;
367
368 for (i = 0; l1[i]; i++) {
369 if (string_in_list(l2, l1[i]))
370 return true;
371 }
372 return false;
373}
374
375/*
376 * For a null-terminated list of controllers @clist, return true if any of
377 * those controllers is already listed the null-terminated list of
378 * hierarchies @hlist. Realistically, if one is present, all must be present.
379 */
380static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
381{
382 int i;
383
384 if (!hlist)
385 return false;
386 for (i = 0; hlist[i]; i++)
387 if (controller_lists_intersect(hlist[i]->controllers, clist))
388 return true;
389 return false;
390
391}
392
393/*
394 * Return true if the controller @entry is found in the null-terminated
395 * list of hierarchies @hlist
396 */
397static bool controller_found(struct hierarchy **hlist, char *entry)
398{
399 int i;
400 if (!hlist)
401 return false;
402
403 for (i = 0; hlist[i]; i++)
404 if (string_in_list(hlist[i]->controllers, entry))
405 return true;
406 return false;
407}
408
409/*
c30b61c3
SH
410 * Return true if all of the controllers which we require have been found.
411 * The required list is freezer and anything in * lxc.cgroup.use.
ccb4cabe 412 */
457ca9aa 413static bool all_controllers_found(void)
ccb4cabe
SH
414{
415 char *p, *saveptr = NULL;
457ca9aa 416 struct hierarchy ** hlist = hierarchies;
ccb4cabe 417
ccb4cabe
SH
418 if (!controller_found(hlist, "freezer")) {
419 ERROR("no freezer controller mountpoint found");
420 return false;
421 }
422
457ca9aa 423 if (!cgroup_use)
ccb4cabe 424 return true;
457ca9aa 425 for (p = strtok_r(cgroup_use, ",", &saveptr); p;
ccb4cabe
SH
426 p = strtok_r(NULL, ",", &saveptr)) {
427 if (!controller_found(hlist, p)) {
428 ERROR("no %s controller mountpoint found", p);
429 return false;
430 }
431 }
432 return true;
433}
434
435/* Return true if the fs type is fuse.lxcfs */
436static bool is_lxcfs(const char *line)
437{
438 char *p = strstr(line, " - ");
439 if (!p)
440 return false;
441 return strncmp(p, " - fuse.lxcfs ", 14);
442}
443
444/*
445 * Get the controllers from a mountinfo line
446 * There are other ways we could get this info. For lxcfs, field 3
447 * is /cgroup/controller-list. For cgroupfs, we could parse the mount
448 * options. But we simply assume that the mountpoint must be
449 * /sys/fs/cgroup/controller-list
450 */
451static char **get_controllers(char **klist, char **nlist, char *line)
452{
453 // the fourth field is /sys/fs/cgroup/comma-delimited-controller-list
454 int i;
455 char *p = line, *p2, *tok, *saveptr = NULL;
456 char **aret = NULL;
457
458 for (i = 0; i < 4; i++) {
235f1815 459 p = strchr(p, ' ');
ccb4cabe
SH
460 if (!p)
461 return NULL;
462 p++;
463 }
464 if (!p)
465 return NULL;
466 /* note - if we change how mountinfo works, then our caller
467 * will need to verify /sys/fs/cgroup/ in this field */
468 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
469 return NULL;
470 p += 15;
235f1815 471 p2 = strchr(p, ' ');
ccb4cabe
SH
472 if (!p2) {
473 ERROR("corrupt mountinfo");
474 return NULL;
475 }
476 *p2 = '\0';
477 for (tok = strtok_r(p, ",", &saveptr); tok;
478 tok = strtok_r(NULL, ",", &saveptr)) {
479 must_append_controller(klist, nlist, &aret, tok);
480 }
481
482 return aret;
483}
484
485/* return true if the fstype is cgroup */
486static bool is_cgroupfs(char *line)
487{
488 char *p = strstr(line, " - ");
489 if (!p)
490 return false;
491 return strncmp(p, " - cgroup ", 10);
492}
493
494/* Add a controller to our list of hierarchies */
457ca9aa 495static void add_controller(char **clist, char *mountpoint, char *base_cgroup)
ccb4cabe
SH
496{
497 struct hierarchy *new;
498 int newentry;
499
500 new = must_alloc(sizeof(*new));
501 new->controllers = clist;
502 new->mountpoint = mountpoint;
503 new->base_cgroup = base_cgroup;
504 new->fullcgpath = NULL;
505
457ca9aa
SH
506 newentry = append_null_to_list((void ***)&hierarchies);
507 hierarchies[newentry] = new;
ccb4cabe
SH
508}
509
510/*
511 * Get a copy of the mountpoint from @line, which is a line from
512 * /proc/self/mountinfo
513 */
514static char *get_mountpoint(char *line)
515{
516 int i;
517 char *p = line, *sret;
518 size_t len;
519
520 for (i = 0; i < 4; i++) {
235f1815 521 p = strchr(p, ' ');
ccb4cabe
SH
522 if (!p)
523 return NULL;
524 p++;
525 }
526 /* we've already stuck a \0 after the mountpoint */
527 len = strlen(p);
528 sret = must_alloc(len + 1);
529 memcpy(sret, p, len);
530 sret[len] = '\0';
531 return sret;
532}
533
534/*
535 * Given a multi-line string, return a null-terminated copy of the
536 * current line.
537 */
538static char *copy_to_eol(char *p)
539{
235f1815 540 char *p2 = strchr(p, '\n'), *sret;
ccb4cabe
SH
541 size_t len;
542
543 if (!p2)
544 return NULL;
545
546 len = p2 - p;
547 sret = must_alloc(len + 1);
548 memcpy(sret, p, len);
549 sret[len] = '\0';
550 return sret;
551}
552
553/*
554 * cgline: pointer to character after the first ':' in a line in a
555 * \n-terminated /proc/self/cgroup file. Check whether * controller c is
556 * present.
557 */
558static bool controller_in_clist(char *cgline, char *c)
559{
560 char *tok, *saveptr = NULL, *eol, *tmp;
561 size_t len;
562
235f1815 563 eol = strchr(cgline, ':');
ccb4cabe
SH
564 if (!eol)
565 return false;
566
567 len = eol - cgline;
568 tmp = alloca(len + 1);
569 memcpy(tmp, cgline, len);
570 tmp[len] = '\0';
571
572 for (tok = strtok_r(tmp, ",", &saveptr); tok;
573 tok = strtok_r(NULL, ",", &saveptr)) {
574 if (strcmp(tok, c) == 0)
575 return true;
576 }
577 return false;
578}
579
580/*
581 * @basecginfo is a copy of /proc/$$/cgroup. Return the current
582 * cgroup for @controller
583 */
584static char *get_current_cgroup(char *basecginfo, char *controller)
585{
586 char *p = basecginfo;
587
588 while (1) {
235f1815 589 p = strchr(p, ':');
ccb4cabe
SH
590 if (!p)
591 return NULL;
592 p++;
593 if (controller_in_clist(p, controller)) {
235f1815 594 p = strchr(p, ':');
ccb4cabe
SH
595 if (!p)
596 return NULL;
597 p++;
598 return copy_to_eol(p);
599 }
600
235f1815 601 p = strchr(p, '\n');
ccb4cabe
SH
602 if (!p)
603 return NULL;
604 p++;
605 }
606}
607
b4ffcca8
SH
608#define BATCH_SIZE 50
609static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
610{
611 int newbatches = (newlen / BATCH_SIZE) + 1;
612 int oldbatches = (oldlen / BATCH_SIZE) + 1;
613
614 if (!*mem || newbatches > oldbatches) {
615 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
616 }
617}
618
ccb4cabe
SH
619static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
620{
621 size_t full = oldlen + newlen;
622
b4ffcca8 623 batch_realloc(dest, oldlen, full + 1);
ccb4cabe 624
b4ffcca8 625 memcpy(*dest + oldlen, new, newlen + 1);
ccb4cabe
SH
626}
627
628/* Slurp in a whole file */
629static char *read_file(char *fnam)
630{
631 FILE *f;
632 char *line = NULL, *buf = NULL;
633 size_t len = 0, fulllen = 0;
b4ffcca8 634 int linelen;
ccb4cabe
SH
635
636 f = fopen(fnam, "r");
637 if (!f)
638 return NULL;
b4ffcca8
SH
639 while ((linelen = getline(&line, &len, f)) != -1) {
640 append_line(&buf, fulllen, line, linelen);
641 fulllen += linelen;
ccb4cabe
SH
642 }
643 fclose(f);
644 free(line);
645 return buf;
646}
647
ccb4cabe
SH
648/*
649 * Given a hierarchy @mountpoint and base @path, verify that we can create
650 * directories underneath it.
651 */
652static bool test_writeable(char *mountpoint, char *path)
653{
654 char *fullpath = must_make_path(mountpoint, path, NULL);
655 int ret;
656
657 ret = access(fullpath, W_OK);
658 free(fullpath);
659 return ret == 0;
660}
661
662static void must_append_string(char ***list, char *entry)
663{
664 int newentry = append_null_to_list((void ***)list);
665 char *copy;
666
667 copy = must_copy_string(entry);
668 (*list)[newentry] = copy;
669}
670
671static void get_existing_subsystems(char ***klist, char ***nlist)
672{
673 FILE *f;
674 char *line = NULL;
675 size_t len = 0;
676
677 if ((f = fopen("/proc/self/cgroup", "r")) == NULL)
678 return;
679 while (getline(&line, &len, f) != -1) {
680 char *p, *p2, *tok, *saveptr = NULL;
235f1815 681 p = strchr(line, ':');
ccb4cabe
SH
682 if (!p)
683 continue;
684 p++;
235f1815 685 p2 = strchr(p, ':');
ccb4cabe
SH
686 if (!p2)
687 continue;
688 *p2 = '\0';
689 for (tok = strtok_r(p, ",", &saveptr); tok;
690 tok = strtok_r(NULL, ",", &saveptr)) {
691 if (strncmp(tok, "name=", 5) == 0)
692 must_append_string(nlist, tok);
693 else
694 must_append_string(klist, tok);
695 }
696 }
697
698 free(line);
699 fclose(f);
700}
701
702static void trim(char *s)
703{
704 size_t len = strlen(s);
705 while (s[len-1] == '\n')
706 s[--len] = '\0';
707}
708
ccb4cabe
SH
709static void print_init_debuginfo(struct cgfsng_handler_data *d)
710{
711 int i;
41c33dbe
SH
712
713 if (!getenv("LXC_DEBUG_CGFSNG"))
714 return;
715
ccb4cabe
SH
716 printf("Cgroup information:\n");
717 printf(" container name: %s\n", d->name);
457ca9aa 718 printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(none)");
ccb4cabe
SH
719 printf(" lxc.cgroup.pattern: %s\n", d->cgroup_pattern);
720 printf(" cgroup: %s\n", d->container_cgroup ? d->container_cgroup : "(none)");
457ca9aa 721 if (!hierarchies) {
ccb4cabe
SH
722 printf(" No hierarchies found.\n");
723 return;
724 }
725 printf(" Hierarchies:\n");
457ca9aa
SH
726 for (i = 0; hierarchies[i]; i++) {
727 struct hierarchy *h = hierarchies[i];
ccb4cabe
SH
728 int j;
729 printf(" %d: base_cgroup %s\n", i, h->base_cgroup);
730 printf(" mountpoint %s\n", h->mountpoint);
731 printf(" controllers:\n");
732 for (j = 0; h->controllers[j]; j++)
733 printf(" %d: %s\n", j, h->controllers[j]);
734 }
735}
41c33dbe
SH
736
737static void print_basecg_debuginfo(char *basecginfo, char **klist, char **nlist)
738{
739 int k;
740 if (!getenv("LXC_DEBUG_CGFSNG"))
741 return;
742
743 printf("basecginfo is %s\n", basecginfo);
744
745 for (k = 0; klist[k]; k++)
746 printf("kernel subsystem %d: %s\n", k, klist[k]);
747 for (k = 0; nlist[k]; k++)
748 printf("named subsystem %d: %s\n", k, nlist[k]);
749}
ccb4cabe
SH
750
751/*
752 * At startup, parse_hierarchies finds all the info we need about
753 * cgroup mountpoints and current cgroups, and stores it in @d.
754 */
457ca9aa 755static bool parse_hierarchies(void)
ccb4cabe
SH
756{
757 FILE *f;
758 char * line = NULL, *basecginfo;
759 char **klist = NULL, **nlist = NULL;
760 size_t len = 0;
761
d30ec4cb
SH
762 /*
763 * Root spawned containers escape the current cgroup, so use init's
764 * cgroups as our base in that case.
765 */
ccb4cabe
SH
766 if (geteuid())
767 basecginfo = read_file("/proc/self/cgroup");
768 else
769 basecginfo = read_file("/proc/1/cgroup");
770 if (!basecginfo)
771 return false;
772
773 if ((f = fopen("/proc/self/mountinfo", "r")) == NULL) {
d3b00a8f 774 SYSERROR("Failed opening /proc/self/mountinfo");
ccb4cabe
SH
775 return false;
776 }
777
778 get_existing_subsystems(&klist, &nlist);
41c33dbe
SH
779
780 print_basecg_debuginfo(basecginfo, klist, nlist);
ccb4cabe
SH
781
782 /* we support simple cgroup mounts and lxcfs mounts */
783 while (getline(&line, &len, f) != -1) {
784 char **controller_list = NULL;
785 char *mountpoint, *base_cgroup;
786
787 if (!is_lxcfs(line) && !is_cgroupfs(line))
788 continue;
789
790 controller_list = get_controllers(klist, nlist, line);
791 if (!controller_list)
792 continue;
793
457ca9aa 794 if (controller_list_is_dup(hierarchies, controller_list)) {
ccb4cabe
SH
795 free(controller_list);
796 continue;
797 }
798
799 mountpoint = get_mountpoint(line);
800 if (!mountpoint) {
801 ERROR("Error reading mountinfo: bad line '%s'", line);
802 free_string_list(controller_list);
803 continue;
804 }
805
806 base_cgroup = get_current_cgroup(basecginfo, controller_list[0]);
807 if (!base_cgroup) {
808 ERROR("Failed to find current cgroup for controller '%s'", controller_list[0]);
809 free_string_list(controller_list);
810 free(mountpoint);
811 continue;
812 }
813 trim(base_cgroup);
814 prune_init_scope(base_cgroup);
815 if (!test_writeable(mountpoint, base_cgroup)) {
816 free_string_list(controller_list);
817 free(mountpoint);
818 free(base_cgroup);
819 continue;
820 }
457ca9aa 821 add_controller(controller_list, mountpoint, base_cgroup);
ccb4cabe
SH
822 }
823
824 free_string_list(klist);
825 free_string_list(nlist);
826
827 free(basecginfo);
828
829 fclose(f);
830 free(line);
831
ccb4cabe
SH
832 /* verify that all controllers in cgroup.use and all crucial
833 * controllers are accounted for
834 */
457ca9aa 835 if (!all_controllers_found())
ccb4cabe
SH
836 return false;
837
838 return true;
839}
840
457ca9aa
SH
841static bool collect_hierarchy_info(void)
842{
843 const char *tmp;
844 errno = 0;
845 tmp = lxc_global_config_value("lxc.cgroup.use");
846 if (!cgroup_use && errno != 0) { // lxc.cgroup.use can be NULL
847 SYSERROR("cgfsng: error reading list of cgroups to use");
848 return false;
849 }
850 cgroup_use = must_copy_string(tmp);
851
852 return parse_hierarchies();
853}
854
ccb4cabe
SH
855static void *cgfsng_init(const char *name)
856{
857 struct cgfsng_handler_data *d;
457ca9aa 858 const char *cgroup_pattern;
ccb4cabe
SH
859
860 d = must_alloc(sizeof(*d));
861 memset(d, 0, sizeof(*d));
862
863 d->name = must_copy_string(name);
864
ccb4cabe
SH
865 cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
866 if (!cgroup_pattern) { // lxc.cgroup.pattern is only NULL on error
867 ERROR("Error getting cgroup pattern");
868 goto out_free;
869 }
870 d->cgroup_pattern = must_copy_string(cgroup_pattern);
871
ccb4cabe
SH
872 print_init_debuginfo(d);
873
874 return d;
875
876out_free:
877 free_handler_data(d);
878 return NULL;
879}
880
881/*
882 * Concatenate all passed-in strings into one path. Do not fail. If any piece is
883 * not prefixed with '/', add a '/'.
884 */
885static char *must_make_path(const char *first, ...)
886{
887 va_list args;
888 char *cur, *dest;
889 size_t full_len = strlen(first);
890
891 dest = must_copy_string(first);
892
893 va_start(args, first);
894 while ((cur = va_arg(args, char *)) != NULL) {
895 full_len += strlen(cur);
896 if (cur[0] != '/')
897 full_len++;
898 dest = must_realloc(dest, full_len + 1);
899 if (cur[0] != '/')
900 strcat(dest, "/");
901 strcat(dest, cur);
902 }
903 va_end(args);
904
905 return dest;
906}
907
908static int cgroup_rmdir(char *dirname)
909{
910 struct dirent dirent, *direntp;
911 DIR *dir;
912 int r = 0;
913
914 dir = opendir(dirname);
915 if (!dir)
916 return -1;
917
918 while (!readdir_r(dir, &dirent, &direntp)) {
919 struct stat mystat;
920 char *pathname;
921
922 if (!direntp)
923 break;
924
925 if (!strcmp(direntp->d_name, ".") ||
926 !strcmp(direntp->d_name, ".."))
927 continue;
928
929 pathname = must_make_path(dirname, direntp->d_name, NULL);
930
931 if (lstat(pathname, &mystat)) {
932 if (!r)
1c9da8da 933 WARN("failed to stat %s", pathname);
ccb4cabe
SH
934 r = -1;
935 goto next;
936 }
937
938 if (!S_ISDIR(mystat.st_mode))
939 goto next;
940 if (cgroup_rmdir(pathname) < 0)
941 r = -1;
942next:
943 free(pathname);
944 }
945
946 if (rmdir(dirname) < 0) {
947 if (!r)
948 WARN("%s: failed to delete %s: %m", __func__, dirname);
949 r = -1;
950 }
951
952 if (closedir(dir) < 0) {
953 if (!r)
954 WARN("%s: failed to delete %s: %m", __func__, dirname);
955 r = -1;
956 }
957 return r;
958}
959
960static int rmdir_wrapper(void *data)
961{
962 char *path = data;
963
964 if (setresgid(0,0,0) < 0)
965 SYSERROR("Failed to setgid to 0");
966 if (setresuid(0,0,0) < 0)
967 SYSERROR("Failed to setuid to 0");
968 if (setgroups(0, NULL) < 0)
969 SYSERROR("Failed to clear groups");
970
971 return cgroup_rmdir(path);
972}
973
974void recursive_destroy(char *path, struct lxc_conf *conf)
975{
976 int r;
977 if (conf && !lxc_list_empty(&conf->id_map))
978 r = userns_exec_1(conf, rmdir_wrapper, path);
979 else
980 r = cgroup_rmdir(path);
981
982 if (r < 0)
1c9da8da 983 ERROR("Error destroying %s", path);
ccb4cabe
SH
984}
985
986static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
987{
988 struct cgfsng_handler_data *d = hdata;
989
990 if (!d)
991 return;
992
457ca9aa 993 if (d->container_cgroup && hierarchies) {
ccb4cabe 994 int i;
457ca9aa
SH
995 for (i = 0; hierarchies[i]; i++) {
996 struct hierarchy *h = hierarchies[i];
e2db2a89 997 if (h->fullcgpath) {
ccb4cabe
SH
998 recursive_destroy(h->fullcgpath, conf);
999 free(h->fullcgpath);
1000 h->fullcgpath = NULL;
1001 }
1002 }
1003 }
1004
1005 free_handler_data(d);
1006}
1007
1008struct cgroup_ops *cgfsng_ops_init(void)
1009{
457ca9aa
SH
1010 if (!collect_hierarchy_info())
1011 return NULL;
ccb4cabe
SH
1012 return &cgfsng_ops;
1013}
1014
1015static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
1016{
e3a3fecf 1017 h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
d8da679e
SH
1018 if (dir_exists(h->fullcgpath)) // it must not already exist
1019 return false;
e3a3fecf
SH
1020 if (!handle_cpuset_hierarchy(h, cgname))
1021 return false;
1022 return mkdir_p(h->fullcgpath, 0755) == 0;
ccb4cabe
SH
1023}
1024
1025static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
1026{
1027 if (rmdir(h->fullcgpath) < 0)
1028 SYSERROR("Failed to clean up cgroup %s from failed creation attempt", h->fullcgpath);
1029 free(h->fullcgpath);
1030 h->fullcgpath = NULL;
1031}
1032
1033/*
d30ec4cb 1034 * Try to create the same cgroup in all hierarchies.
ccb4cabe
SH
1035 * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
1036 */
1037static inline bool cgfsng_create(void *hdata)
1038{
1039 struct cgfsng_handler_data *d = hdata;
1040 char *tmp, *cgname, *offset;
1041 int i, idx = 0;
1042 size_t len;
1043
1044 if (!d)
1045 return false;
1046 if (d->container_cgroup) {
1047 WARN("cgfsng_create called a second time");
1048 return false;
1049 }
1050
1051 tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
1052 if (!tmp) {
1053 ERROR("Failed expanding cgroup name pattern");
1054 return false;
1055 }
1056 len = strlen(tmp) + 5; // leave room for -NNN\0
1057 cgname = must_alloc(len);
1058 strcpy(cgname, tmp);
1059 free(tmp);
1060 offset = cgname + len - 5;
1061
1062again:
95adfe93
SH
1063 if (idx == 1000) {
1064 ERROR("Too many conflicting cgroup names");
ccb4cabe 1065 goto out_free;
95adfe93 1066 }
ccb4cabe
SH
1067 if (idx)
1068 snprintf(offset, 5, "-%d", idx);
457ca9aa
SH
1069 for (i = 0; hierarchies[i]; i++) {
1070 if (!create_path_for_hierarchy(hierarchies[i], cgname)) {
ccb4cabe 1071 int j;
457ca9aa
SH
1072 SYSERROR("Failed to create %s: %s", hierarchies[i]->fullcgpath, strerror(errno));
1073 free(hierarchies[i]->fullcgpath);
1074 hierarchies[i]->fullcgpath = NULL;
ccb4cabe 1075 for (j = 0; j < i; j++)
457ca9aa 1076 remove_path_for_hierarchy(hierarchies[j], cgname);
ccb4cabe
SH
1077 idx++;
1078 goto again;
1079 }
1080 }
1081 /* Done */
1082 d->container_cgroup = cgname;
1083 return true;
1084
1085out_free:
1086 free(cgname);
1087 return false;
1088}
1089
1090static const char *cgfsng_canonical_path(void *hdata)
1091{
1092 struct cgfsng_handler_data *d = hdata;
1093
1094 return d->container_cgroup;
1095}
1096
1097static bool cgfsng_enter(void *hdata, pid_t pid)
1098{
ccb4cabe
SH
1099 char pidstr[25];
1100 int i, len;
1101
1102 len = snprintf(pidstr, 25, "%d", pid);
1103 if (len < 0 || len > 25)
1104 return false;
1105
457ca9aa
SH
1106 for (i = 0; hierarchies[i]; i++) {
1107 char *fullpath = must_make_path(hierarchies[i]->fullcgpath,
ccb4cabe
SH
1108 "cgroup.procs", NULL);
1109 if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
d3b00a8f 1110 SYSERROR("Failed to enter %s", fullpath);
ccb4cabe
SH
1111 free(fullpath);
1112 return false;
1113 }
1114 free(fullpath);
1115 }
1116
1117 return true;
1118}
1119
1120struct chown_data {
1121 struct cgfsng_handler_data *d;
1122 uid_t origuid; // target uid in parent namespace
1123};
1124
c0888dfe
SH
1125/*
1126 * chgrp the container cgroups to container group. We leave
1127 * the container owner as cgroup owner. So we must make the
1128 * directories 775 so that the container can create sub-cgroups.
43647298
SH
1129 *
1130 * Also chown the tasks and cgroup.procs files. Those may not
1131 * exist depending on kernel version.
c0888dfe 1132 */
ccb4cabe
SH
1133static int chown_cgroup_wrapper(void *data)
1134{
1135 struct chown_data *arg = data;
ccb4cabe
SH
1136 uid_t destuid;
1137 int i;
1138
1139 if (setresgid(0,0,0) < 0)
1140 SYSERROR("Failed to setgid to 0");
1141 if (setresuid(0,0,0) < 0)
1142 SYSERROR("Failed to setuid to 0");
1143 if (setgroups(0, NULL) < 0)
1144 SYSERROR("Failed to clear groups");
1145
1146 destuid = get_ns_uid(arg->origuid);
1147
457ca9aa
SH
1148 for (i = 0; hierarchies[i]; i++) {
1149 char *fullpath, *path = hierarchies[i]->fullcgpath;
43647298
SH
1150
1151 if (chown(path, destuid, 0) < 0) {
ab8f5424 1152 SYSERROR("Error chowning %s to %d", path, (int) destuid);
ccb4cabe
SH
1153 return -1;
1154 }
c0888dfe 1155
43647298 1156 if (chmod(path, 0775) < 0) {
ab8f5424 1157 SYSERROR("Error chmoding %s", path);
c0888dfe
SH
1158 return -1;
1159 }
ccb4cabe 1160
ab8f5424
SH
1161 /*
1162 * Failures to chown these are inconvenient but not detrimental
1163 * We leave these owned by the container launcher, so that container
1164 * root can write to the files to attach. We chmod them 664 so that
1165 * container systemd can write to the files (which systemd in wily
1166 * insists on doing)
1167 */
43647298
SH
1168 fullpath = must_make_path(path, "tasks", NULL);
1169 if (chown(fullpath, destuid, 0) < 0 && errno != ENOENT)
1170 WARN("Failed chowning %s to %d: %m", fullpath, (int) destuid);
ab8f5424
SH
1171 if (chmod(fullpath, 0664) < 0)
1172 WARN("Error chmoding %s: %m", path);
43647298
SH
1173 free(fullpath);
1174
1175 fullpath = must_make_path(path, "cgroup.procs", NULL);
1176 if (chown(fullpath, destuid, 0) < 0 && errno != ENOENT)
1177 WARN("Failed chowning %s to %d: %m", fullpath, (int) destuid);
ab8f5424
SH
1178 if (chmod(fullpath, 0664) < 0)
1179 WARN("Error chmoding %s: %m", path);
ccb4cabe
SH
1180 free(fullpath);
1181 }
1182
1183 return 0;
1184}
1185
1186static bool cgfsns_chown(void *hdata, struct lxc_conf *conf)
1187{
1188 struct cgfsng_handler_data *d = hdata;
1189 struct chown_data wrap;
1190
1191 if (!d)
1192 return false;
1193
1194 if (lxc_list_empty(&conf->id_map))
1195 return true;
1196
1197 wrap.d = d;
1198 wrap.origuid = geteuid();
1199
1200 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap) < 0) {
1201 ERROR("Error requesting cgroup chown in new namespace");
1202 return false;
1203 }
1204
1205 return true;
1206}
1207
8aa1044f
SH
1208/*
1209 * We've safe-mounted a tmpfs as parent, so we don't need to protect against
1210 * symlinks any more - just use mount
1211 */
1212
1213/* mount cgroup-full if requested */
1214static int mount_cgroup_full(int type, struct hierarchy *h, char *dest,
1215 char *container_cgroup)
1216{
1217 if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
1218 return 0;
1219 if (mount(h->mountpoint, dest, "cgroup", MS_BIND, NULL) < 0) {
1220 SYSERROR("Error bind-mounting %s cgroup onto %s", h->mountpoint,
1221 dest);
1222 return -1;
1223 }
1224 if (type != LXC_AUTO_CGROUP_FULL_RW) {
5b6f9369
SH
1225 unsigned long flags = MS_BIND | MS_NOSUID | MS_NOEXEC | MS_NODEV |
1226 MS_REMOUNT | MS_RDONLY;
1227 if (mount(NULL, dest, "cgroup", flags, NULL) < 0) {
8aa1044f
SH
1228 SYSERROR("Error remounting %s readonly", dest);
1229 return -1;
1230 }
1231 }
1232
1233 INFO("Bind mounted %s onto %s", h->mountpoint, dest);
1234 if (type != LXC_AUTO_CGROUP_FULL_MIXED)
1235 return 0;
1236
1237 /* mount just the container path rw */
1238 char *source = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
5b6f9369 1239 char *rwpath = must_make_path(dest, h->base_cgroup, container_cgroup, NULL);
8aa1044f
SH
1240 if (mount(source, rwpath, "cgroup", MS_BIND, NULL) < 0)
1241 WARN("Failed to mount %s read-write: %m", rwpath);
1242 INFO("Made %s read-write", rwpath);
1243 free(rwpath);
1244 free(source);
1245 return 0;
1246}
1247
1248/* cgroup-full:* is done, no need to create subdirs */
1249static bool cg_mount_needs_subdirs(int type)
1250{
1251 if (type >= LXC_AUTO_CGROUP_FULL_RO)
1252 return false;
1253 return true;
1254}
1255
1256/*
1257 * After $rootfs/sys/fs/container/controller/the/cg/path has been
1258 * created, remount controller ro if needed and bindmount the
1259 * cgroupfs onto controll/the/cg/path
1260 */
1261static int
1262do_secondstage_mounts_if_needed(int type, struct hierarchy *h,
1263 char *controllerpath, char *cgpath,
1264 const char *container_cgroup)
1265{
1266 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
1267 if (mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL) < 0) {
1268 SYSERROR("Error bind-mounting %s", controllerpath);
1269 return -1;
1270 }
1271 if (mount(controllerpath, controllerpath, "cgroup",
1272 MS_REMOUNT | MS_BIND | MS_RDONLY, NULL) < 0) {
1273 SYSERROR("Error remounting %s read-only", controllerpath);
1274 return -1;
1275 }
1276 INFO("Remounted %s read-only", controllerpath);
1277 }
1278 char *sourcepath = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
1279 int flags = MS_BIND;
1280 if (type == LXC_AUTO_CGROUP_RO)
1281 flags |= MS_RDONLY;
1282 INFO("Mounting %s onto %s", sourcepath, cgpath);
1283 if (mount(sourcepath, cgpath, "cgroup", flags, NULL) < 0) {
1284 free(sourcepath);
1285 SYSERROR("Error mounting cgroup %s onto %s", h->controllers[0],
1286 cgpath);
1287 return -1;
1288 }
1289 free(sourcepath);
1290 INFO("Completed second stage cgroup automounts for %s", cgpath);
1291 return 0;
1292}
1293
ccb4cabe
SH
1294static bool cgfsng_mount(void *hdata, const char *root, int type)
1295{
8aa1044f
SH
1296 struct cgfsng_handler_data *d = hdata;
1297 char *tmpfspath = NULL;
1298 bool retval = false;
a8de4c49 1299 int i;
8aa1044f
SH
1300
1301 if ((type & LXC_AUTO_CGROUP_MASK) == 0)
1302 return true;
1303
ccb4cabe
SH
1304 if (cgns_supported())
1305 return true;
8aa1044f
SH
1306
1307 tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
1308
1309 if (type == LXC_AUTO_CGROUP_NOSPEC)
1310 type = LXC_AUTO_CGROUP_MIXED;
1311 else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
1312 type = LXC_AUTO_CGROUP_FULL_MIXED;
1313
1314 /* Mount tmpfs */
1315 if (safe_mount("cgroup_root", tmpfspath, "tmpfs",
1316 MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME,
1317 "size=10240k,mode=755",
1318 root) < 0)
1319 goto bad;
1320
457ca9aa 1321 for (i = 0; hierarchies[i]; i++) {
8aa1044f 1322 char *controllerpath, *path2;
457ca9aa 1323 struct hierarchy *h = hierarchies[i];
8aa1044f
SH
1324 char *controller = strrchr(h->mountpoint, '/');
1325 int r;
1326
1327 if (!controller)
1328 continue;
1329 controller++;
1330 controllerpath = must_make_path(tmpfspath, controller, NULL);
1331 if (dir_exists(controllerpath)) {
1332 free(controllerpath);
1333 continue;
1334 }
1335 if (mkdir(controllerpath, 0755) < 0) {
1336 SYSERROR("Error creating cgroup path: %s", controllerpath);
1337 free(controllerpath);
1338 goto bad;
1339 }
1340 if (mount_cgroup_full(type, h, controllerpath, d->container_cgroup) < 0) {
1341 free(controllerpath);
1342 goto bad;
1343 }
1344 if (!cg_mount_needs_subdirs(type)) {
1345 free(controllerpath);
1346 continue;
1347 }
ef4413fa 1348 path2 = must_make_path(controllerpath, h->base_cgroup, d->container_cgroup, NULL);
8aa1044f
SH
1349 if (mkdir_p(path2, 0755) < 0) {
1350 free(controllerpath);
1351 goto bad;
1352 }
1353
1354 r = do_secondstage_mounts_if_needed(type, h, controllerpath, path2,
1355 d->container_cgroup);
1356 free(controllerpath);
1357 free(path2);
1358 if (r < 0)
1359 goto bad;
1360 }
1361 retval = true;
1362
1363bad:
1364 free(tmpfspath);
1365 return retval;
ccb4cabe
SH
1366}
1367
1368static int recursive_count_nrtasks(char *dirname)
1369{
1370 struct dirent dirent, *direntp;
1371 DIR *dir;
1372 int count = 0, ret;
1373 char *path;
1374
1375 dir = opendir(dirname);
1376 if (!dir)
1377 return 0;
1378
1379 while (!readdir_r(dir, &dirent, &direntp)) {
1380 struct stat mystat;
1381
1382 if (!direntp)
1383 break;
1384
1385 if (!strcmp(direntp->d_name, ".") ||
1386 !strcmp(direntp->d_name, ".."))
1387 continue;
1388
1389 path = must_make_path(dirname, direntp->d_name, NULL);
1390
1391 if (lstat(path, &mystat))
1392 goto next;
1393
1394 if (!S_ISDIR(mystat.st_mode))
1395 goto next;
1396
1397 count += recursive_count_nrtasks(path);
1398next:
1399 free(path);
1400 }
1401
1402 path = must_make_path(dirname, "cgroup.procs", NULL);
1403 ret = lxc_count_file_lines(path);
1404 if (ret != -1)
1405 count += ret;
1406 free(path);
1407
1408 (void) closedir(dir);
1409
1410 return count;
1411}
1412
1413static int cgfsng_nrtasks(void *hdata) {
1414 struct cgfsng_handler_data *d = hdata;
1415 char *path;
1416 int count;
1417
457ca9aa 1418 if (!d || !d->container_cgroup || !hierarchies)
ccb4cabe 1419 return -1;
457ca9aa 1420 path = must_make_path(hierarchies[0]->fullcgpath, NULL);
ccb4cabe
SH
1421 count = recursive_count_nrtasks(path);
1422 free(path);
1423 return count;
1424}
1425
1426/* Only root needs to escape to the cgroup of its init */
7103fe6f 1427static bool cgfsng_escape()
ccb4cabe 1428{
7103fe6f 1429 struct cgfsng_handler_data *d;
ccb4cabe 1430 int i;
7103fe6f 1431 bool ret = false;
ccb4cabe
SH
1432
1433 if (geteuid())
1434 return true;
1435
7103fe6f
TA
1436 d = cgfsng_init("criu-temp-cgfsng");
1437 if (!d) {
1438 ERROR("cgfsng_init failed");
1439 return false;
1440 }
1441
457ca9aa
SH
1442 for (i = 0; hierarchies[i]; i++) {
1443 char *fullpath = must_make_path(hierarchies[i]->mountpoint,
1444 hierarchies[i]->base_cgroup,
ccb4cabe
SH
1445 "cgroup.procs", NULL);
1446 if (lxc_write_to_file(fullpath, "0", 2, false) != 0) {
d3b00a8f 1447 SYSERROR("Failed to escape to %s", fullpath);
ccb4cabe 1448 free(fullpath);
7103fe6f 1449 goto out;
ccb4cabe
SH
1450 }
1451 free(fullpath);
1452 }
1453
7103fe6f
TA
1454 ret = true;
1455out:
1456 free_handler_data(d);
1457 return ret;
ccb4cabe
SH
1458}
1459
1460#define THAWED "THAWED"
1461#define THAWED_LEN (strlen(THAWED))
1462
1463static bool cgfsng_unfreeze(void *hdata)
1464{
ccb4cabe 1465 char *fullpath;
457ca9aa 1466 struct hierarchy *h = get_hierarchy("freezer");
ccb4cabe 1467
457ca9aa 1468 if (!h)
ccb4cabe
SH
1469 return false;
1470 fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
1471 if (lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false) != 0) {
1472 free(fullpath);
1473 return false;
1474 }
1475 free(fullpath);
1476 return true;
1477}
1478
1479static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
1480{
457ca9aa 1481 struct hierarchy *h = get_hierarchy(subsystem);
ccb4cabe
SH
1482 if (!h)
1483 return NULL;
1484
371f834d
SH
1485 return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
1486}
1487
1488/*
1489 * Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a
1490 * full path, which must be freed by the caller.
1491 */
1492static char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
1493 const char *inpath,
1494 const char *filename)
1495{
1496 /*
1497 * XXX Remove this case after 2.0 release. It's for dealing with
1498 * containers spawned under the old buggy cgfsng which wasn't around
1499 * for long.
1500 */
1501 if (strncmp(inpath, "/sys/fs/cgroup/", 15) == 0)
1502 return must_make_path(inpath, filename, NULL);
1503 return must_make_path(h->mountpoint, inpath, filename, NULL);
ccb4cabe
SH
1504}
1505
1506static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
1507{
ccb4cabe
SH
1508 char pidstr[25];
1509 int i, len;
1510
1511 len = snprintf(pidstr, 25, "%d", pid);
1512 if (len < 0 || len > 25)
1513 return false;
1514
457ca9aa 1515 for (i = 0; hierarchies[i]; i++) {
ccb4cabe 1516 char *path, *fullpath;
457ca9aa 1517 struct hierarchy *h = hierarchies[i];
ccb4cabe
SH
1518
1519 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
1520 if (!path) // not running
1521 continue;
1522
371f834d
SH
1523 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
1524 free(path);
ccb4cabe
SH
1525 if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
1526 SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
1527 free(fullpath);
ccb4cabe
SH
1528 return false;
1529 }
ccb4cabe
SH
1530 free(fullpath);
1531 }
1532
ccb4cabe
SH
1533 return true;
1534}
1535
1536/*
1537 * Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.
1538 * Here we don't have a cgroup_data set up, so we ask the running
1539 * container through the commands API for the cgroup path
1540 */
1541static int cgfsng_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
1542{
1543 char *subsystem, *p, *path;
ccb4cabe
SH
1544 struct hierarchy *h;
1545 int ret = -1;
1546
1547 subsystem = alloca(strlen(filename) + 1);
1548 strcpy(subsystem, filename);
1549 if ((p = strchr(subsystem, '.')) != NULL)
1550 *p = '\0';
1551
1552 path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
1553 if (!path) // not running
1554 return -1;
1555
457ca9aa 1556 h = get_hierarchy(subsystem);
ccb4cabe 1557 if (h) {
371f834d 1558 char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
1559 ret = lxc_read_from_file(fullpath, value, len);
1560 free(fullpath);
1561 }
1562
ccb4cabe
SH
1563 free(path);
1564
1565 return ret;
1566}
1567
1568/*
1569 * Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.
1570 * Here we don't have a cgroup_data set up, so we ask the running
1571 * container through the commands API for the cgroup path
1572 */
1573static int cgfsng_set(const char *filename, const char *value, const char *name, const char *lxcpath)
1574{
1575 char *subsystem, *p, *path;
ccb4cabe
SH
1576 struct hierarchy *h;
1577 int ret = -1;
1578
1579 subsystem = alloca(strlen(filename) + 1);
1580 strcpy(subsystem, filename);
1581 if ((p = strchr(subsystem, '.')) != NULL)
1582 *p = '\0';
1583
1584 path = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
1585 if (!path) // not running
1586 return -1;
1587
457ca9aa 1588 h = get_hierarchy(subsystem);
ccb4cabe 1589 if (h) {
371f834d 1590 char *fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
1591 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
1592 free(fullpath);
1593 }
1594
ccb4cabe
SH
1595 free(path);
1596
1597 return ret;
1598}
1599
ccb4cabe
SH
1600/*
1601 * Called from setup_limits - here we have the container's cgroup_data because
1602 * we created the cgroups
1603 */
1604static int lxc_cgroup_set_data(const char *filename, const char *value, struct cgfsng_handler_data *d)
1605{
1606 char *subsystem = NULL, *p;
1607 int ret = -1;
1608 struct hierarchy *h;
1609
1610 subsystem = alloca(strlen(filename) + 1);
1611 strcpy(subsystem, filename);
1612 if ((p = strchr(subsystem, '.')) != NULL)
1613 *p = '\0';
1614
457ca9aa 1615 h = get_hierarchy(subsystem);
ccb4cabe
SH
1616 if (h) {
1617 char *fullpath = must_make_path(h->fullcgpath, filename, NULL);
1618 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
1619 free(fullpath);
1620 }
1621 return ret;
1622}
1623
1624static bool cgfsng_setup_limits(void *hdata, struct lxc_list *cgroup_settings,
1625 bool do_devices)
1626{
1627 struct cgfsng_handler_data *d = hdata;
1628 struct lxc_list *iterator, *sorted_cgroup_settings, *next;
1629 struct lxc_cgroup *cg;
ccb4cabe
SH
1630 bool ret = false;
1631
1632 if (lxc_list_empty(cgroup_settings))
1633 return true;
1634
1635 sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
1636 if (!sorted_cgroup_settings) {
1637 return false;
1638 }
1639
ccb4cabe
SH
1640 lxc_list_for_each(iterator, sorted_cgroup_settings) {
1641 cg = iterator->elem;
1642
1643 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
ccb4cabe
SH
1644 if (lxc_cgroup_set_data(cg->subsystem, cg->value, d)) {
1645 if (do_devices && (errno == EACCES || errno == EPERM)) {
1646 WARN("Error setting %s to %s for %s",
1647 cg->subsystem, cg->value, d->name);
1648 continue;
1649 }
1650 SYSERROR("Error setting %s to %s for %s",
1651 cg->subsystem, cg->value, d->name);
1652 goto out;
1653 }
1654 }
1655
1656 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
1657 }
1658
1659 ret = true;
1660 INFO("cgroup has been setup");
1661out:
ccb4cabe
SH
1662 lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
1663 lxc_list_del(iterator);
1664 free(iterator);
1665 }
1666 free(sorted_cgroup_settings);
1667 return ret;
1668}
1669
1670static struct cgroup_ops cgfsng_ops = {
1671 .init = cgfsng_init,
1672 .destroy = cgfsng_destroy,
1673 .create = cgfsng_create,
1674 .enter = cgfsng_enter,
1675 .canonical_path = cgfsng_canonical_path,
1676 .escape = cgfsng_escape,
1677 .get_cgroup = cgfsng_get_cgroup,
1678 .get = cgfsng_get,
1679 .set = cgfsng_set,
1680 .unfreeze = cgfsng_unfreeze,
1681 .setup_limits = cgfsng_setup_limits,
1682 .name = "cgroupfs-ng",
1683 .attach = cgfsng_attach,
1684 .chown = cgfsns_chown,
1685 .mount_cgroup = cgfsng_mount,
1686 .nrtasks = cgfsng_nrtasks,
1687 .driver = CGFSNG,
1688
1689 /* unsupported */
1690 .create_legacy = NULL,
1691};