]> git.proxmox.com Git - mirror_lxcfs.git/blame - cgroups/cgfsng.c
bindings: port blkio to new cgroup getters
[mirror_lxcfs.git] / cgroups / cgfsng.c
CommitLineData
5fbea8a6
CB
1/* SPDX-License-Identifier: LGPL-2.1+ */
2
3/*
4 * cgfs-ng.c: this is a new, simplified implementation of a filesystem
5 * cgroup backend. The original cgfs.c was designed to be as flexible
6 * as possible. It would try to find cgroup filesystems no matter where
7 * or how you had them mounted, and deduce the most usable mount for
8 * each controller.
9 *
10 * This new implementation assumes that cgroup filesystems are mounted
11 * under /sys/fs/cgroup/clist where clist is either the controller, or
12 * a comma-separated list of controllers.
13 */
14
15#ifndef _GNU_SOURCE
16#define _GNU_SOURCE 1
17#endif
18#include <ctype.h>
19#include <dirent.h>
20#include <errno.h>
21#include <grp.h>
22#include <linux/kdev_t.h>
23#include <linux/types.h>
24#include <poll.h>
25#include <signal.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <unistd.h>
33
34#include "cgroup.h"
35#include "cgroup2_devices.h"
36#include "cgroup_utils.h"
37#include "macro.h"
38#include "memory_utils.h"
39
40static void free_string_list(char **clist)
41{
42 int i;
43
44 if (!clist)
45 return;
46
47 for (i = 0; clist[i]; i++)
48 free(clist[i]);
49
50 free(clist);
51}
52
53/* Given a pointer to a null-terminated array of pointers, realloc to add one
54 * entry, and point the new entry to NULL. Do not fail. Return the index to the
55 * second-to-last entry - that is, the one which is now available for use
56 * (keeping the list null-terminated).
57 */
58static int append_null_to_list(void ***list)
59{
60 int newentry = 0;
61
62 if (*list)
63 for (; (*list)[newentry]; newentry++)
64 ;
65
66 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
67 (*list)[newentry + 1] = NULL;
68 return newentry;
69}
70
71/* Given a null-terminated array of strings, check whether @entry is one of the
72 * strings.
73 */
74static bool string_in_list(char **list, const char *entry)
75{
76 int i;
77
78 if (!list)
79 return false;
80
81 for (i = 0; list[i]; i++)
82 if (strcmp(list[i], entry) == 0)
83 return true;
84
85 return false;
86}
87
88/* Return a copy of @entry prepending "name=", i.e. turn "systemd" into
89 * "name=systemd". Do not fail.
90 */
91static char *cg_legacy_must_prefix_named(char *entry)
92{
93 size_t len;
94 char *prefixed;
95
96 len = strlen(entry);
97 prefixed = must_realloc(NULL, len + 6);
98
99 memcpy(prefixed, "name=", STRLITERALLEN("name="));
100 memcpy(prefixed + STRLITERALLEN("name="), entry, len);
101 prefixed[len + 5] = '\0';
102
103 return prefixed;
104}
105
106/* Append an entry to the clist. Do not fail. @clist must be NULL the first time
107 * we are called.
108 *
109 * We also handle named subsystems here. Any controller which is not a kernel
110 * subsystem, we prefix "name=". Any which is both a kernel and named subsystem,
111 * we refuse to use because we're not sure which we have here.
112 * (TODO: We could work around this in some cases by just remounting to be
113 * unambiguous, or by comparing mountpoint contents with current cgroup.)
114 *
115 * The last entry will always be NULL.
116 */
117static void must_append_controller(char **klist, char **nlist, char ***clist,
118 char *entry)
119{
120 int newentry;
121 char *copy;
122
123 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
124 return;
125
126 newentry = append_null_to_list((void ***)clist);
127
128 if (strncmp(entry, "name=", 5) == 0)
129 copy = must_copy_string(entry);
130 else if (string_in_list(klist, entry))
131 copy = must_copy_string(entry);
132 else
133 copy = cg_legacy_must_prefix_named(entry);
134
135 (*clist)[newentry] = copy;
136}
137
138/* Given a handler's cgroup data, return the struct hierarchy for the controller
139 * @c, or NULL if there is none.
140 */
141static struct hierarchy *cgfsng_get_hierarchy(struct cgroup_ops *ops,
142 const char *controller)
143{
144 int i;
145
146 errno = ENOENT;
147
148 if (!ops->hierarchies)
149 return NULL;
150
151 for (i = 0; ops->hierarchies[i]; i++) {
152 if (!controller) {
153 /* This is the empty unified hierarchy. */
154 if (ops->hierarchies[i]->controllers &&
155 !ops->hierarchies[i]->controllers[0])
156 return ops->hierarchies[i];
157 continue;
158 } else if (pure_unified_layout(ops) &&
159 strcmp(controller, "devices") == 0) {
160 if (ops->unified->bpf_device_controller)
161 return ops->unified;
162 break;
163 }
164
165 if (string_in_list(ops->hierarchies[i]->controllers, controller))
166 return ops->hierarchies[i];
167 }
168
169 return NULL;
170}
171
172static inline struct hierarchy *get_hierarchy(struct cgroup_ops *ops,
173 const char *controller)
174{
175 return cgfsng_get_hierarchy(ops, controller);
176}
177
178/* Given two null-terminated lists of strings, return true if any string is in
179 * both.
180 */
181static bool controller_lists_intersect(char **l1, char **l2)
182{
183 int i;
184
185 if (!l1 || !l2)
186 return false;
187
188 for (i = 0; l1[i]; i++) {
189 if (string_in_list(l2, l1[i]))
190 return true;
191 }
192
193 return false;
194}
195
196/* For a null-terminated list of controllers @clist, return true if any of those
197 * controllers is already listed the null-terminated list of hierarchies @hlist.
198 * Realistically, if one is present, all must be present.
199 */
200static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
201{
202 int i;
203
204 if (!hlist)
205 return false;
206
207 for (i = 0; hlist[i]; i++)
208 if (controller_lists_intersect(hlist[i]->controllers, clist))
209 return true;
210
211 return false;
212}
213
214/* Get the controllers from a mountinfo line There are other ways we could get
215 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
216 * could parse the mount options. But we simply assume that the mountpoint must
217 * be /sys/fs/cgroup/controller-list
218 */
219static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
220 int type, char **controllers)
221{
222 /* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list
223 * for legacy hierarchies.
224 */
225 int i;
226 char *p2, *tok;
227 char *p = line, *sep = ",";
228 char **aret = NULL;
229
230 for (i = 0; i < 4; i++) {
231 p = strchr(p, ' ');
232 if (!p)
233 return NULL;
234 p++;
235 }
236
237 /* Note, if we change how mountinfo works, then our caller will need to
238 * verify /sys/fs/cgroup/ in this field.
239 */
240 if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
241 return NULL;
242
243 p += 15;
244 p2 = strchr(p, ' ');
245 if (!p2)
246 return NULL;
247 *p2 = '\0';
248
249 if (type == CGROUP_SUPER_MAGIC) {
250 __do_free char *dup = NULL;
251
252 /* strdup() here for v1 hierarchies. Otherwise
253 * lxc_iterate_parts() will destroy mountpoints such as
254 * "/sys/fs/cgroup/cpu,cpuacct".
255 */
256 dup = must_copy_string(p);
257 if (!dup)
258 return NULL;
259
260 lxc_iterate_parts (tok, dup, sep)
261 must_append_controller(klist, nlist, &aret, tok);
262 *controllers = move_ptr(dup);
263 }
264 *p2 = ' ';
265
266 return aret;
267}
268
269static char **cg_unified_make_empty_controller(void)
270{
271 int newentry;
272 char **aret = NULL;
273
274 newentry = append_null_to_list((void ***)&aret);
275 aret[newentry] = NULL;
276 return aret;
277}
278
279static char **cg_unified_get_controllers(const char *file)
280{
281 __do_free char *buf = NULL;
282 char *sep = " \t\n";
283 char **aret = NULL;
284 char *tok;
285
286 buf = read_file(file);
287 if (!buf)
288 return NULL;
289
290 lxc_iterate_parts(tok, buf, sep) {
291 int newentry;
292 char *copy;
293
294 newentry = append_null_to_list((void ***)&aret);
295 copy = must_copy_string(tok);
296 aret[newentry] = copy;
297 }
298
299 return aret;
300}
301
302static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
303 char *container_base_path, int type)
304{
305 struct hierarchy *new;
306 int newentry;
307
308 new = zalloc(sizeof(*new));
309 new->controllers = clist;
310 new->mountpoint = mountpoint;
311 new->container_base_path = container_base_path;
312 new->version = type;
313
314 newentry = append_null_to_list((void ***)h);
315 (*h)[newentry] = new;
316 return new;
317}
318
319/* Get a copy of the mountpoint from @line, which is a line from
320 * /proc/self/mountinfo.
321 */
322static char *cg_hybrid_get_mountpoint(char *line)
323{
324 int i;
325 size_t len;
326 char *p2;
327 char *p = line, *sret = NULL;
328
329 for (i = 0; i < 4; i++) {
330 p = strchr(p, ' ');
331 if (!p)
332 return NULL;
333 p++;
334 }
335
336 if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
337 return NULL;
338
339 p2 = strchr(p + 15, ' ');
340 if (!p2)
341 return NULL;
342 *p2 = '\0';
343
344 len = strlen(p);
345 sret = must_realloc(NULL, len + 1);
346 memcpy(sret, p, len);
347 sret[len] = '\0';
348 return sret;
349}
350
351static void must_append_string(char ***list, char *entry)
352{
353 int newentry;
354 char *copy;
355
356 newentry = append_null_to_list((void ***)list);
357 copy = must_copy_string(entry);
358 (*list)[newentry] = copy;
359}
360
361static int get_existing_subsystems(char ***klist, char ***nlist)
362{
363 __do_free char *line = NULL;
364 __do_fclose FILE *f = NULL;
365 size_t len = 0;
366
367 f = fopen("/proc/self/cgroup", "r");
368 if (!f)
369 return -1;
370
371 while (getline(&line, &len, f) != -1) {
372 char *p, *p2, *tok;
373 p = strchr(line, ':');
374 if (!p)
375 continue;
376 p++;
377 p2 = strchr(p, ':');
378 if (!p2)
379 continue;
380 *p2 = '\0';
381
382 /* If the kernel has cgroup v2 support, then /proc/self/cgroup
383 * contains an entry of the form:
384 *
385 * 0::/some/path
386 *
387 * In this case we use "cgroup2" as controller name.
388 */
389 if ((p2 - p) == 0) {
390 must_append_string(klist, "cgroup2");
391 continue;
392 }
393
394 lxc_iterate_parts(tok, p, ",") {
395 if (strncmp(tok, "name=", 5) == 0)
396 must_append_string(nlist, tok);
397 else
398 must_append_string(klist, tok);
399 }
400 }
401
402 return 0;
403}
404
405static void trim(char *s)
406{
407 size_t len;
408
409 len = strlen(s);
410 while ((len > 1) && (s[len - 1] == '\n'))
411 s[--len] = '\0';
412}
413
414/* __cg_mount_direct
415 *
416 * Mount cgroup hierarchies directly without using bind-mounts. The main
417 * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
418 * cgroups for the LXC_AUTO_CGROUP_FULL option.
419 */
420static int __cg_mount_direct(struct hierarchy *h, const char *controllerpath)
421{
422 __do_free char *controllers = NULL;
423 char *fstype = "cgroup2";
424 unsigned long flags = 0;
425 int ret;
426
427 flags |= MS_NOSUID;
428 flags |= MS_NOEXEC;
429 flags |= MS_NODEV;
430 flags |= MS_RELATIME;
431
432 if (h->version != CGROUP2_SUPER_MAGIC) {
433 controllers = lxc_string_join(",", (const char **)h->controllers, false);
434 if (!controllers)
435 return -ENOMEM;
436 fstype = "cgroup";
437 }
438
439 ret = mount("cgroup", controllerpath, fstype, flags, controllers);
440 if (ret < 0)
441 return -1;
442
443 return 0;
444}
445
446static inline int cg_mount_cgroup_full(struct hierarchy *h,
447 const char *controllerpath)
448{
449 return __cg_mount_direct(h, controllerpath);
450}
451
452static bool cgfsng_mount(struct cgroup_ops *ops, const char *root)
453{
454 __do_free char *cgroup_root = NULL;
455 int ret;
456 bool retval = false;
457
458 if (!ops)
459 return ret_set_errno(false, ENOENT);
460
461 if (!ops->hierarchies)
462 return true;
463
464 cgroup_root = must_make_path(root, DEFAULT_CGROUP_MOUNTPOINT, NULL);
465 if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
466 return cg_mount_cgroup_full(ops->unified, cgroup_root) == 0;
467
468 /* mount tmpfs */
469 ret = safe_mount(NULL, cgroup_root, "tmpfs",
470 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
471 "size=10240k,mode=755", root);
472 if (ret < 0)
473 goto on_error;
474
475 for (int i = 0; ops->hierarchies[i]; i++) {
476 __do_free char *controllerpath = NULL;
477 struct hierarchy *h = ops->hierarchies[i];
478 char *controller = strrchr(h->mountpoint, '/');
479
480 if (!controller)
481 continue;
482 controller++;
483
484 controllerpath = must_make_path(cgroup_root, controller, NULL);
485 if (dir_exists(controllerpath))
486 continue;
487
488 ret = mkdir(controllerpath, 0755);
489 if (ret < 0)
490 log_error_errno(goto on_error, errno,
491 "Error creating cgroup path: %s",
492 controllerpath);
493
494 ret = cg_mount_cgroup_full( h, controllerpath);
495 if (ret < 0)
496 goto on_error;
497 }
498 retval = true;
499
500on_error:
501 return retval;
502}
503
504static int recursive_count_nrtasks(char *dirname)
505{
506 __do_free char *path = NULL;
507 __do_closedir DIR *dir = NULL;
508 struct dirent *direntp;
509 int count = 0, ret;
510
511 dir = opendir(dirname);
512 if (!dir)
513 return 0;
514
515 while ((direntp = readdir(dir))) {
516 struct stat mystat;
517
518 if (!strcmp(direntp->d_name, ".") ||
519 !strcmp(direntp->d_name, ".."))
520 continue;
521
522 path = must_make_path(dirname, direntp->d_name, NULL);
523
524 if (lstat(path, &mystat))
525 continue;
526
527 if (!S_ISDIR(mystat.st_mode))
528 continue;
529
530 count += recursive_count_nrtasks(path);
531 }
532
533 path = must_make_path(dirname, "cgroup.procs", NULL);
534 ret = lxc_count_file_lines(path);
535 if (ret != -1)
536 count += ret;
537
538 return count;
539}
540
541static int cgfsng_nrtasks(struct cgroup_ops *ops)
542{
543 __do_free char *path = NULL;
544
545 if (!ops)
546 return ret_set_errno(-1, ENOENT);
547
548 if (!ops->container_cgroup || !ops->hierarchies)
549 return ret_set_errno(-1, EINVAL);
550
551 path = must_make_path(ops->hierarchies[0]->container_full_path, NULL);
552 return recursive_count_nrtasks(path);
553}
554
555static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
556{
557 int i = 0;
558
559 if (!ops)
560 return ret_set_errno(-1, ENOENT);
561
562 if (!ops->hierarchies)
563 return 0;
564
565 for (; ops->hierarchies[i]; i++)
566 ;
567
568 return i;
569}
570
571static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
572{
573 int i;
574
575 if (!ops)
576 return ret_set_errno(false, ENOENT);
577
578 if (!ops->hierarchies)
579 return false;
580
581 /* sanity check n */
582 for (i = 0; i < n; i++)
583 if (!ops->hierarchies[i])
584 return ret_set_errno(false, ENOENT);
585
586 *out = ops->hierarchies[i]->controllers;
587
588 return true;
589}
590
1ca6a467
CB
591static bool cgfsng_get(struct cgroup_ops *ops, const char *controller,
592 const char *cgroup, const char *file, char **value)
593{
594 __do_free char *path = NULL;
595 struct hierarchy *h;
596
597 h = ops->get_hierarchy(ops, controller);
598 if (!h)
599 return false;
600
075387cd 601 path = must_make_path(dot_or_empty(cgroup), cgroup, file, NULL);
1ca6a467
CB
602 *value = readat_file(h->fd, path);
603 return *value != NULL;
604}
605
66c5e848
CB
606static int cgfsng_get_memory(struct cgroup_ops *ops, const char *cgroup,
607 const char *file, char **value)
608{
609 __do_free char *path = NULL;
610 struct hierarchy *h;
611 int ret;
612
613 h = ops->get_hierarchy(ops, "memory");
614 if (!h)
615 return -1;
616
617 if (!is_unified_hierarchy(h)) {
618 if (strcmp(file, "memory.max") == 0)
619 file = "memory.limit_in_bytes";
620 else if (strcmp(file, "memory.swap.max") == 0)
621 file = "memory.memsw.limit_in_bytes";
622 else if (strcmp(file, "memory.swap.current") == 0)
623 file = "memory.memsw.usage_in_bytes";
624 else if (strcmp(file, "memory.current") == 0)
625 file = "memory.usage_in_bytes";
626 ret = CGROUP_SUPER_MAGIC;
627 } else {
628 ret = CGROUP2_SUPER_MAGIC;
629 }
630
075387cd 631 path = must_make_path(dot_or_empty(cgroup), cgroup, file, NULL);
66c5e848
CB
632 *value = readat_file(h->fd, path);
633 if (!*value)
634 ret = -1;
635
636 return ret;
637}
638
639static int cgfsng_get_memory_current(struct cgroup_ops *ops, const char *cgroup,
640 char **value)
641{
642 return cgfsng_get_memory(ops, cgroup, "memory.current", value);
643}
644
645static int cgfsng_get_memory_swap_current(struct cgroup_ops *ops,
646 const char *cgroup, char **value)
647{
648 return cgfsng_get_memory(ops, cgroup, "memory.swap.current", value);
649}
650
651static int cgfsng_get_memory_max(struct cgroup_ops *ops, const char *cgroup,
652 char **value)
653{
654 return cgfsng_get_memory(ops, cgroup, "memory.max", value);
655}
656
657static int cgfsng_get_memory_swap_max(struct cgroup_ops *ops,
658 const char *cgroup, char **value)
659{
660 return cgfsng_get_memory(ops, cgroup, "memory.swap.max", value);
661}
662
663static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup,
664 char **value)
665{
666 return cgfsng_get_memory(ops, cgroup, "memory.stat", value);
667}
668
2c3bcd9e
CB
669static char *readat_cpuset(int cgroup_fd)
670{
671 __do_free char *val = NULL;
672
673 val = readat_file(cgroup_fd, "cpuset.cpus");
674 if (val && strcmp(val, "") != 0)
675 return move_ptr(val);
676
677 free_disarm(val);
678 val = readat_file(cgroup_fd, "cpuset.cpus.effective");
679 if (val && strcmp(val, "") != 0)
680 return move_ptr(val);
681
682 return NULL;
683}
684
685static int cgfsng_get_cpuset_cpus(struct cgroup_ops *ops, const char *cgroup,
686 char **value)
687{
688 __do_close_prot_errno int cgroup_fd = -EBADF;
689 __do_free char *path = NULL;
690 char *v;
691 struct hierarchy *h;
692 int ret;
693
694 h = ops->get_hierarchy(ops, "cpuset");
695 if (!h)
696 return -1;
697
698 if (!is_unified_hierarchy(h))
699 ret = CGROUP_SUPER_MAGIC;
700 else
701 ret = CGROUP2_SUPER_MAGIC;
702
703 *value = NULL;
075387cd 704 path = must_make_path(dot_or_empty(cgroup), cgroup, NULL);
2c3bcd9e
CB
705 cgroup_fd = openat_safe(h->fd, path);
706 if (cgroup_fd < 0) {
707 return -1;
708 }
709 v = readat_cpuset(cgroup_fd);
710 if (v) {
711 *value = v;
712 return ret;
713 }
714
715 /*
716 * cpuset.cpus and cpuset.cpus.effective are empty so we need to look
717 * the nearest ancestor with a non-empty cpuset.cpus{.effective} file.
718 */
719 for (;;) {
720 int fd;
721
722 fd = openat_safe(cgroup_fd, "../");
723 if (fd < 0 || !is_cgroup_fd(fd)) {
724 fprintf(stderr, "2222: %s\n", strerror(errno));
725 return -1;
726 }
727
728 close_prot_errno_replace(cgroup_fd, fd);
729
730 v = readat_cpuset(fd);
731 if (v) {
732 *value = v;
733 return ret;
734 }
735 }
736
737 return -1;
738}
739
9a9484ab
CB
740static int cgfsng_get_io(struct cgroup_ops *ops, const char *cgroup,
741 const char *file, char **value)
742{
743 __do_free char *path = NULL;
744 struct hierarchy *h;
745 int ret;
746
747 h = ops->get_hierarchy(ops, "blkio");
748 if (!h)
749 return -1;
750
751 if (!is_unified_hierarchy(h))
752 ret = CGROUP_SUPER_MAGIC;
753 else
754 ret = CGROUP2_SUPER_MAGIC;
755
756 path = must_make_path(dot_or_empty(cgroup), cgroup, file, NULL);
757 *value = readat_file(h->fd, path);
758 if (!*value) {
759 if (errno == ENOENT)
760 errno = EOPNOTSUPP;
761 return ret_errno(errno);
762 }
763
764 return ret;
765}
766
767static int cgfsng_get_io_service_bytes(struct cgroup_ops *ops,
768 const char *cgroup, char **value)
769{
770 return cgfsng_get_io(ops, cgroup, "blkio.io_service_bytes_recursive", value);
771}
772
773static int cgfsng_get_io_service_time(struct cgroup_ops *ops,
774 const char *cgroup, char **value)
775{
776 return cgfsng_get_io(ops, cgroup, "blkio.io_service_time_recursive", value);
777}
778
779static int cgfsng_get_io_serviced(struct cgroup_ops *ops, const char *cgroup,
780 char **value)
781{
782 return cgfsng_get_io(ops, cgroup, "blkio.io_serviced_recursive", value);
783}
784
785static int cgfsng_get_io_merged(struct cgroup_ops *ops, const char *cgroup,
786 char **value)
787{
788 return cgfsng_get_io(ops, cgroup, "blkio.io_merged_recursive", value);
789}
790
791static int cgfsng_get_io_wait_time(struct cgroup_ops *ops, const char *cgroup,
792 char **value)
793{
794 return cgfsng_get_io(ops, cgroup, "blkio.io_wait_time_recursive", value);
795}
796
5fbea8a6
CB
797/* At startup, parse_hierarchies finds all the info we need about cgroup
798 * mountpoints and current cgroups, and stores it in @d.
799 */
800static int cg_hybrid_init(struct cgroup_ops *ops)
801{
802 __do_free char *basecginfo = NULL;
803 __do_free char *line = NULL;
804 __do_fclose FILE *f = NULL;
805 int ret;
806 size_t len = 0;
807 char **klist = NULL, **nlist = NULL;
808
809 /* Root spawned containers escape the current cgroup, so use init's
810 * cgroups as our base in that case.
811 */
812 basecginfo = read_file("/proc/1/cgroup");
813 if (!basecginfo)
814 return ret_set_errno(-1, ENOMEM);
815
816 ret = get_existing_subsystems(&klist, &nlist);
817 if (ret < 0)
818 return log_error_errno(-1, errno, "Failed to retrieve available legacy cgroup controllers");
819
820 f = fopen("/proc/self/mountinfo", "r");
821 if (!f)
822 return log_error_errno(-1, errno, "Failed to open \"/proc/self/mountinfo\"");
823
824 while (getline(&line, &len, f) != -1) {
825 int type;
826 struct hierarchy *new;
827 char *base_cgroup = NULL, *mountpoint = NULL;
828 char **controller_list = NULL;
829 __do_free char *controllers = NULL;
830
831 type = get_cgroup_version(line);
832 if (type == 0)
833 continue;
834
835 if (type == CGROUP2_SUPER_MAGIC && ops->unified)
836 continue;
837
838 if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
839 if (type == CGROUP2_SUPER_MAGIC)
840 ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
841 else if (type == CGROUP_SUPER_MAGIC)
842 ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
843 } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
844 if (type == CGROUP_SUPER_MAGIC)
845 ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
846 } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
847 if (type == CGROUP2_SUPER_MAGIC)
848 ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
849 }
850
851 controller_list = cg_hybrid_get_controllers(klist, nlist, line,
852 type, &controllers);
853 if (!controller_list && type == CGROUP_SUPER_MAGIC)
854 continue;
855
856 if (type == CGROUP_SUPER_MAGIC)
857 if (controller_list_is_dup(ops->hierarchies, controller_list))
858 ret_set_errno(goto next, EEXIST);
859
860 mountpoint = cg_hybrid_get_mountpoint(line);
861 if (!mountpoint)
862 log_error_errno(goto next, EINVAL, "Failed parsing mountpoint from \"%s\"", line);
863
864 if (type == CGROUP_SUPER_MAGIC) {
865 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
866 } else {
867 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
868 }
869 if (!base_cgroup)
870 log_error_errno(goto next, EINVAL, "Failed to find current cgroup %s", mountpoint);
871
872 trim(base_cgroup);
873 prune_init_scope(base_cgroup);
874
875 if (type == CGROUP2_SUPER_MAGIC) {
876 char *cgv2_ctrl_path;
877
878 cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
879 "cgroup.controllers",
880 NULL);
881
882 controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
883 free(cgv2_ctrl_path);
884 if (!controller_list)
885 controller_list = cg_unified_make_empty_controller();
886 }
887
888 new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
889 new->__controllers = move_ptr(controllers);
890 if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
891 ops->unified = new;
892
893 continue;
894
895 next:
896 free_string_list(controller_list);
897 free(mountpoint);
898 free(base_cgroup);
899 }
900
901 free_string_list(klist);
902 free_string_list(nlist);
903
904 return 0;
905}
906
907static int cg_unified_init(struct cgroup_ops *ops)
908{
909 __do_free char *subtree_path = NULL;
910 int ret;
911 char *mountpoint;
912 char **delegatable;
913 struct hierarchy *new;
914 char *base_cgroup = NULL;
915
916 ret = unified_cgroup_hierarchy();
917 if (ret == -ENOMEDIUM)
918 return ret_errno(ENOMEDIUM);
919
920 if (ret != CGROUP2_SUPER_MAGIC)
921 return 0;
922
923 base_cgroup = cg_unified_get_current_cgroup(1);
924 if (!base_cgroup)
925 return ret_errno(EINVAL);
926 prune_init_scope(base_cgroup);
927
928 /*
929 * We assume that the cgroup we're currently in has been delegated to
930 * us and we are free to further delege all of the controllers listed
931 * in cgroup.controllers further down the hierarchy.
932 */
933 mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
934 subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
935 delegatable = cg_unified_get_controllers(subtree_path);
936 if (!delegatable)
937 delegatable = cg_unified_make_empty_controller();
938
939 /* TODO: If the user requested specific controllers via lxc.cgroup.use
940 * we should verify here. The reason I'm not doing it right is that I'm
941 * not convinced that lxc.cgroup.use will be the future since it is a
942 * global property. I much rather have an option that lets you request
943 * controllers per container.
944 */
945
946 new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
947
948 if (bpf_devices_cgroup_supported())
949 new->bpf_device_controller = 1;
950
951 ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
952 ops->unified = new;
953 return CGROUP2_SUPER_MAGIC;
954}
955
956static int cg_init(struct cgroup_ops *ops)
957{
958 int ret;
959
960 ret = cg_unified_init(ops);
961 if (ret < 0)
962 return -1;
963
964 if (ret == CGROUP2_SUPER_MAGIC)
965 return 0;
966
967 return cg_hybrid_init(ops);
968}
969
970struct cgroup_ops *cgfsng_ops_init(void)
971{
972 __do_free struct cgroup_ops *cgfsng_ops = NULL;
973
974 cgfsng_ops = malloc(sizeof(struct cgroup_ops));
975 if (!cgfsng_ops)
976 return ret_set_errno(NULL, ENOMEM);
977
978 memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
979 cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
980
981 if (cg_init(cgfsng_ops))
982 return NULL;
983
984 cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
1ca6a467 985 cgfsng_ops->get = cgfsng_get;
5fbea8a6
CB
986 cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
987 cgfsng_ops->get_hierarchy = get_hierarchy;
988 cgfsng_ops->driver = "cgfsng";
989 cgfsng_ops->version = "1.0.0";
990 cgfsng_ops->mount = cgfsng_mount;
991 cgfsng_ops->nrtasks = cgfsng_nrtasks;
992
66c5e848
CB
993 /* memory */
994 cgfsng_ops->get_memory_stats = cgfsng_get_memory_stats;
995 cgfsng_ops->get_memory_max = cgfsng_get_memory_max;
996 cgfsng_ops->get_memory_swap_max = cgfsng_get_memory_swap_max;
997 cgfsng_ops->get_memory_current = cgfsng_get_memory_current;
998 cgfsng_ops->get_memory_swap_current = cgfsng_get_memory_swap_current;
999
2c3bcd9e
CB
1000 /* cpuset */
1001 cgfsng_ops->get_cpuset_cpus = cgfsng_get_cpuset_cpus;
1002
9a9484ab
CB
1003 /* blkio */
1004 cgfsng_ops->get_io_service_bytes = cgfsng_get_io_service_bytes;
1005 cgfsng_ops->get_io_service_time = cgfsng_get_io_service_time;
1006 cgfsng_ops->get_io_serviced = cgfsng_get_io_serviced;
1007 cgfsng_ops->get_io_merged = cgfsng_get_io_merged;
1008 cgfsng_ops->get_io_wait_time = cgfsng_get_io_wait_time;
1009
1010
5fbea8a6
CB
1011 return move_ptr(cgfsng_ops);
1012}