]> git.proxmox.com Git - mirror_lxcfs.git/blob - pam/pam_cgfs.c
b823cda1cfbea364c8ec816f633814717c61d89c
[mirror_lxcfs.git] / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #include <dirent.h>
37 #include <errno.h>
38 #include <pwd.h>
39 #include <stdarg.h>
40 #include <stdbool.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <syslog.h>
45 #include <unistd.h>
46 #include <linux/unistd.h>
47 #include <sys/mount.h>
48 #include <sys/param.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <sys/vfs.h>
52
53 #define PAM_SM_SESSION
54 #include <security/_pam_macros.h>
55 #include <security/pam_modules.h>
56
57 #include "macro.h"
58
59 #ifndef CGROUP_SUPER_MAGIC
60 #define CGROUP_SUPER_MAGIC 0x27e0eb
61 #endif
62
63 #ifndef CGROUP2_SUPER_MAGIC
64 #define CGROUP2_SUPER_MAGIC 0x63677270
65 #endif
66
67 static enum cg_mount_mode {
68 CGROUP_UNKNOWN = -1,
69 CGROUP_MIXED = 0,
70 CGROUP_PURE_V1 = 1,
71 CGROUP_PURE_V2 = 2,
72 CGROUP_UNINITIALIZED = 3,
73 } cg_mount_mode = CGROUP_UNINITIALIZED;
74
75 /* Common helper prototypes. */
76 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
77 static int append_null_to_list(void ***list);
78 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
79 static char *copy_to_eol(char *s);
80 static bool file_exists(const char *f);
81 static void free_string_list(char **list);
82 static char *get_mountpoint(char *line);
83 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
84 static int handle_login(const char *user, uid_t uid, gid_t gid);
85 /* __typeof__ should be safe to use with all compilers. */
86 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
87 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val);
88 static bool is_lxcfs(const char *line);
89 static bool is_cgv1(char *line);
90 static bool is_cgv2(char *line);
91 static bool mkdir_p(const char *root, char *path);
92 static void *must_alloc(size_t sz);
93 static void must_add_to_list(char ***clist, char *entry);
94 static void must_append_controller(char **klist, char **nlist, char ***clist,
95 char *entry);
96 static void must_append_string(char ***list, char *entry);
97 static char *must_copy_string(const char *entry);
98 static char *must_make_path(const char *first, ...) __attribute__((sentinel));
99 static void *must_realloc(void *orig, size_t sz);
100 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
101 static char *read_file(char *fnam);
102 static int recursive_rmdir(char *dirname);
103 static bool string_in_list(char **list, const char *entry);
104 static void trim(char *s);
105 static bool write_int(char *path, int v);
106
107 /* cgroupfs prototypes. */
108 static void cg_mark_to_make_rw(const char *cstring);
109 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
110 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
111 const char *init_cgroup, uid_t uid);
112 static bool cg_systemd_created_user_slice(const char *base_cgroup,
113 const char *init_cgroup,
114 const char *in, uid_t uid);
115 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
116 const char *base_cgroup, uid_t uid,
117 gid_t gid,
118 bool systemd_user_slice);
119 static int cg_get_version_of_mntpt(const char *path);
120 static bool cg_enter(const char *cgroup);
121 static void cg_escape(void);
122 static bool cg_init(uid_t uid, gid_t gid);
123 static void cg_systemd_prune_init_scope(char *cg);
124 static void cg_prune_empty_cgroups(const char *user);
125 static bool is_lxcfs(const char *line);
126
127 /* cgroupfs v1 prototypes. */
128 struct cgv1_hierarchy {
129 char **controllers;
130 char *mountpoint;
131 char *base_cgroup;
132 char *fullcgpath;
133 char *init_cgroup;
134 bool create_rw_cgroup;
135 bool systemd_user_slice;
136 };
137
138 static struct cgv1_hierarchy **cgv1_hierarchies;
139
140 static void cgv1_add_controller(char **clist, char *mountpoint,
141 char *base_cgroup, char *init_cgroup);
142 static bool cgv1_controller_in_clist(char *cgline, char *c);
143 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
144 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
145 char **clist);
146 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
147 bool *existed);
148 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
149 uid_t uid, gid_t gid, bool *existed);
150 static bool cgv1_enter(const char *cgroup);
151 static void cgv1_escape(void);
152 static bool cgv1_get_controllers(char ***klist, char ***nlist);
153 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
154 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
155 char *line);
156 static bool cgv1_init(uid_t uid, gid_t gid);
157 static void cgv1_mark_to_make_rw(char **clist);
158 static char *cgv1_must_prefix_named(char *entry);
159 static bool cgv1_prune_empty_cgroups(const char *user);
160 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
161 static bool is_cgv1(char *line);
162
163 /* cgroupfs v2 prototypes. */
164 struct cgv2_hierarchy {
165 char **controllers;
166 char *mountpoint;
167 char *base_cgroup;
168 char *fullcgpath;
169 char *init_cgroup;
170 bool create_rw_cgroup;
171 bool systemd_user_slice;
172 };
173
174 /* Actually this should only be a single hierarchy. But for the sake of
175 * parallelism and because the layout of the cgroupfs v2 is still somewhat
176 * changing, we'll leave it as an array of structs.
177 */
178 static struct cgv2_hierarchy **cgv2_hierarchies;
179
180 static void cgv2_add_controller(char **clist, char *mountpoint,
181 char *base_cgroup, char *init_cgroup,
182 bool systemd_user_slice);
183 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
184 bool *existed);
185 static bool cgv2_enter(const char *cgroup);
186 static void cgv2_escape(void);
187 static char *cgv2_get_current_cgroup(int pid);
188 static bool cgv2_init(uid_t uid, gid_t gid);
189 static void cgv2_mark_to_make_rw(char **clist);
190 static bool cgv2_prune_empty_cgroups(const char *user);
191 static bool cgv2_remove(const char *cgroup);
192 static bool is_cgv2(char *line);
193
194 /* Common helper functions. */
195 static void mysyslog(int err, const char *format, ...)
196 {
197 va_list args;
198
199 va_start(args, format);
200 openlog("PAM-CGFS", LOG_CONS|LOG_PID, LOG_AUTH);
201 vsyslog(err, format, args);
202 va_end(args);
203 closelog();
204 }
205
206 /* realloc() pointer; do not fail. */
207 static void *must_realloc(void *orig, size_t sz)
208 {
209 void *ret;
210
211 do {
212 ret = realloc(orig, sz);
213 } while (!ret);
214
215 return ret;
216 }
217
218 /* realloc() pointer in batch sizes; do not fail. */
219 #define BATCH_SIZE 50
220 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
221 {
222 int newbatches = (newlen / BATCH_SIZE) + 1;
223 int oldbatches = (oldlen / BATCH_SIZE) + 1;
224
225 if (!*mem || newbatches > oldbatches)
226 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
227 }
228
229 /* Append lines as is to pointer; do not fail. */
230 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
231 {
232 size_t full = oldlen + newlen;
233
234 batch_realloc(dest, oldlen, full + 1);
235
236 memcpy(*dest + oldlen, new, newlen + 1);
237 }
238
239 /* Read in whole file and return allocated pointer. */
240 static char *read_file(char *fnam)
241 {
242 FILE *f;
243 int linelen;
244 char *line = NULL, *buf = NULL;
245 size_t len = 0, fulllen = 0;
246
247 f = fopen(fnam, "r");
248 if (!f)
249 return NULL;
250
251 while ((linelen = getline(&line, &len, f)) != -1) {
252 append_line(&buf, fulllen, line, linelen);
253 fulllen += linelen;
254 }
255
256 fclose(f);
257 free(line);
258
259 return buf;
260 }
261
262 /* Given a pointer to a null-terminated array of pointers, realloc to add one
263 * entry, and point the new entry to NULL. Do not fail. Return the index to the
264 * second-to-last entry - that is, the one which is now available for use
265 * (keeping the list null-terminated).
266 */
267 static int append_null_to_list(void ***list)
268 {
269 int newentry = 0;
270
271 if (*list)
272 for (; (*list)[newentry]; newentry++) {
273 ;
274 }
275
276 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
277 (*list)[newentry + 1] = NULL;
278
279 return newentry;
280 }
281
282 /* Make allocated copy of string; do not fail. */
283 static char *must_copy_string(const char *entry)
284 {
285 char *ret;
286
287 if (!entry)
288 return NULL;
289
290 do {
291 ret = strdup(entry);
292 } while (!ret);
293
294 return ret;
295 }
296
297 /* Append new entry to null-terminated array of pointer; make sure that array of
298 * pointers will still be null-terminated.
299 */
300 static void must_append_string(char ***list, char *entry)
301 {
302 int newentry;
303 char *copy;
304
305 newentry = append_null_to_list((void ***)list);
306 copy = must_copy_string(entry);
307 (*list)[newentry] = copy;
308 }
309
310 /* Remove newlines from string. */
311 static void trim(char *s)
312 {
313 size_t len = strlen(s);
314
315 while (s[len - 1] == '\n')
316 s[--len] = '\0';
317 }
318
319 /* Allocate pointer; do not fail. */
320 static void *must_alloc(size_t sz)
321 {
322 return must_realloc(NULL, sz);
323 }
324
325 /* Make allocated copy of string. End of string is taken to be '\n'. */
326 static char *copy_to_eol(char *s)
327 {
328 char *newline, *sret;
329 size_t len;
330
331 newline = strchr(s, '\n');
332 if (!newline)
333 return NULL;
334
335 len = newline - s;
336 sret = must_alloc(len + 1);
337 memcpy(sret, s, len);
338 sret[len] = '\0';
339
340 return sret;
341 }
342
343 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
344 static bool is_lxcfs(const char *line)
345 {
346 char *p = strstr(line, " - ");
347 if (!p)
348 return false;
349
350 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
351 }
352
353 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
354 static bool is_cgv1(char *line)
355 {
356 char *p = strstr(line, " - ");
357 if (!p)
358 return false;
359
360 return strncmp(p, " - cgroup ", 10) == 0;
361 }
362
363 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
364 static bool is_cgv2(char *line)
365 {
366 char *p = strstr(line, " - ");
367 if (!p)
368 return false;
369
370 return strncmp(p, " - cgroup2 ", 11) == 0;
371 }
372
373 /* Given a null-terminated array of strings, check whether @entry is one of the
374 * strings
375 */
376 static bool string_in_list(char **list, const char *entry)
377 {
378 char **it;
379
380 for (it = list; it && *it; it++)
381 if (strcmp(*it, entry) == 0)
382 return true;
383
384 return false;
385 }
386
387 /* Free null-terminated array of strings. */
388 static void free_string_list(char **list)
389 {
390 char **it;
391
392 for (it = list; it && *it; it++)
393 free(*it);
394 free(list);
395 }
396
397 /* Concatenate all passed-in strings into one path. Do not fail. If any piece
398 * is not prefixed with '/', add a '/'. Does not remove duplicate '///' from the
399 * created path.
400 */
401 static char *must_make_path(const char *first, ...)
402 {
403 va_list args;
404 char *cur, *dest;
405 size_t full_len;
406
407 full_len = strlen(first);
408
409 dest = must_copy_string(first);
410
411 va_start(args, first);
412 while ((cur = va_arg(args, char *)) != NULL) {
413 full_len += strlen(cur);
414
415 if (cur[0] != '/')
416 full_len++;
417
418 dest = must_realloc(dest, full_len + 1);
419
420 if (cur[0] != '/')
421 strcat(dest, "/");
422
423 strcat(dest, cur);
424 }
425 va_end(args);
426
427 return dest;
428 }
429
430 /* Write single integer to file. */
431 static bool write_int(char *path, int v)
432 {
433 FILE *f;
434 bool ret = true;
435
436 f = fopen(path, "w");
437 if (!f)
438 return false;
439
440 if (fprintf(f, "%d\n", v) < 0)
441 ret = false;
442
443 if (fclose(f) != 0)
444 ret = false;
445
446 return ret;
447 }
448
449 /* Check if a given file exists. */
450 static bool file_exists(const char *f)
451 {
452 struct stat statbuf;
453
454 return stat(f, &statbuf) == 0;
455 }
456
457 /* Create directory and (if necessary) its parents. */
458 static bool mkdir_p(const char *root, char *path)
459 {
460 char *b, orig, *e;
461
462 if (strlen(path) < strlen(root))
463 return false;
464
465 if (strlen(path) == strlen(root))
466 return true;
467
468 b = path + strlen(root) + 1;
469 while (true) {
470 while (*b && (*b == '/'))
471 b++;
472 if (!*b)
473 return true;
474
475 e = b + 1;
476 while (*e && *e != '/')
477 e++;
478
479 orig = *e;
480 if (orig)
481 *e = '\0';
482
483 if (file_exists(path))
484 goto next;
485
486 if (mkdir(path, 0755) < 0) {
487 lxcfs_debug("Failed to create %s: %m.\n", path);
488 return false;
489 }
490
491 next:
492 if (!orig)
493 return true;
494
495 *e = orig;
496 b = e + 1;
497 }
498
499 return false;
500 }
501
502 /* Recursively remove directory and its parents. */
503 static int recursive_rmdir(char *dirname)
504 {
505 struct dirent *direntp;
506 DIR *dir;
507 int r = 0;
508
509 dir = opendir(dirname);
510 if (!dir)
511 return -ENOENT;
512
513 while ((direntp = readdir(dir))) {
514 struct stat st;
515 char *pathname;
516
517 if (!direntp)
518 break;
519
520 if (!strcmp(direntp->d_name, ".") ||
521 !strcmp(direntp->d_name, ".."))
522 continue;
523
524 pathname = must_make_path(dirname, direntp->d_name, NULL);
525
526 if (lstat(pathname, &st)) {
527 if (!r)
528 lxcfs_debug("Failed to stat %s.\n", pathname);
529 r = -1;
530 goto next;
531 }
532
533 if (!S_ISDIR(st.st_mode))
534 goto next;
535
536 if (recursive_rmdir(pathname) < 0)
537 r = -1;
538 next:
539 free(pathname);
540 }
541
542 if (rmdir(dirname) < 0) {
543 if (!r)
544 lxcfs_debug("Failed to delete %s: %m.\n", dirname);
545 r = -1;
546 }
547
548 if (closedir(dir) < 0) {
549 if (!r)
550 lxcfs_debug("Failed to delete %s: %m.\n", dirname);
551 r = -1;
552 }
553
554 return r;
555 }
556
557 /* Add new entry to null-terminated array of pointers. Make sure array is still
558 * null-terminated.
559 */
560 static void must_add_to_list(char ***clist, char *entry)
561 {
562 int newentry;
563
564 newentry = append_null_to_list((void ***)clist);
565 (*clist)[newentry] = must_copy_string(entry);
566 }
567
568 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
569 static char *get_mountpoint(char *line)
570 {
571 int i;
572 char *p, *sret, *p2;
573 size_t len;
574
575 p = line;
576
577 for (i = 0; i < 4; i++) {
578 p = strchr(p, ' ');
579 if (!p)
580 return NULL;
581 p++;
582 }
583
584 p2 = strchr(p, ' ');
585 if (p2)
586 *p2 = '\0';
587
588 len = strlen(p);
589 sret = must_alloc(len + 1);
590 memcpy(sret, p, len);
591 sret[len] = '\0';
592
593 return sret;
594 }
595
596 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
597 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
598 * kernel controllers (@klist) and named controllers (@nlist).
599 */
600 static bool cgv1_get_controllers(char ***klist, char ***nlist)
601 {
602 FILE *f;
603 char *line = NULL;
604 size_t len = 0;
605
606 f = fopen("/proc/self/cgroup", "r");
607 if (!f)
608 return false;
609
610 while (getline(&line, &len, f) != -1) {
611 char *p, *p2, *tok;
612 char *saveptr = NULL;
613
614 p = strchr(line, ':');
615 if (!p)
616 continue;
617 p++;
618
619 p2 = strchr(p, ':');
620 if (!p2)
621 continue;
622 *p2 = '\0';
623
624 /* Skip the v2 hierarchy. */
625 if ((p2 - p) == 0)
626 continue;
627
628 for (tok = strtok_r(p, ",", &saveptr); tok;
629 tok = strtok_r(NULL, ",", &saveptr)) {
630 if (strncmp(tok, "name=", 5) == 0)
631 must_append_string(nlist, tok);
632 else
633 must_append_string(klist, tok);
634 }
635 }
636
637 free(line);
638 fclose(f);
639
640 return true;
641 }
642
643 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
644 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
645 static bool cgv2_get_controllers(char ***klist)
646 {
647 return -ENOSYS;
648 }
649 */
650
651 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
652 static char *cgv2_get_current_cgroup(int pid)
653 {
654 int ret;
655 char *cgroups_v2;
656 char *current_cgroup;
657 char *copy = NULL;
658 /* The largest integer that can fit into long int is 2^64. This is a
659 * 20-digit number. */
660 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
661 char path[__PIDLEN];
662
663 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
664 if (ret < 0 || ret >= __PIDLEN)
665 return NULL;
666
667 cgroups_v2 = read_file(path);
668 if (!cgroups_v2)
669 return NULL;
670
671 current_cgroup = strstr(cgroups_v2, "0::/");
672 if (!current_cgroup)
673 goto cleanup_on_err;
674
675 current_cgroup = current_cgroup + 3;
676 copy = copy_to_eol(current_cgroup);
677 if (!copy)
678 goto cleanup_on_err;
679
680 cleanup_on_err:
681 free(cgroups_v2);
682 if (copy)
683 trim(copy);
684
685 return copy;
686 }
687
688 /* Given two null-terminated lists of strings, return true if any string is in
689 * both.
690 */
691 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
692 {
693 char **it;
694
695 if (!l2)
696 return false;
697
698 for (it = l1; it && *it; it++)
699 if (string_in_list(l2, *it))
700 return true;
701
702 return false;
703 }
704
705 /* For a null-terminated list of controllers @clist, return true if any of those
706 * controllers is already listed the null-terminated list of hierarchies @hlist.
707 * Realistically, if one is present, all must be present.
708 */
709 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
710 {
711 struct cgv1_hierarchy **it;
712
713 for (it = hlist; it && *it; it++)
714 if ((*it)->controllers)
715 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
716 return true;
717 return false;
718
719 }
720
721 /* Set boolean to mark controllers under which we are supposed create a
722 * writeable cgroup.
723 */
724 static void cgv1_mark_to_make_rw(char **clist)
725 {
726 struct cgv1_hierarchy **it;
727
728 for (it = cgv1_hierarchies; it && *it; it++)
729 if ((*it)->controllers)
730 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
731 (*it)->create_rw_cgroup = true;
732 }
733
734 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
735 * the cgroupfs v2 hierarchy.
736 */
737 static void cgv2_mark_to_make_rw(char **clist)
738 {
739 if (string_in_list(clist, "unified"))
740 if (cgv2_hierarchies)
741 (*cgv2_hierarchies)->create_rw_cgroup = true;
742 }
743
744 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
745 static void cg_mark_to_make_rw(const char *cstring)
746 {
747 char *copy, *tok;
748 char *saveptr = NULL;
749 char **clist = NULL;
750
751 copy = must_copy_string(cstring);
752
753 for (tok = strtok_r(copy, ",", &saveptr); tok;
754 tok = strtok_r(NULL, ",", &saveptr))
755 must_add_to_list(&clist, tok);
756
757 free(copy);
758
759 cgv1_mark_to_make_rw(clist);
760 cgv2_mark_to_make_rw(clist);
761
762 free_string_list(clist);
763 }
764
765 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
766 static char *cgv1_must_prefix_named(char *entry)
767 {
768 char *s;
769 int ret;
770 size_t len;
771
772 len = strlen(entry);
773 s = must_alloc(len + 6);
774
775 ret = snprintf(s, len + 6, "name=%s", entry);
776 if (ret < 0 || (size_t)ret >= (len + 6))
777 return NULL;
778
779 return s;
780 }
781
782 /* Append kernel controller in @klist or named controller in @nlist to @clist */
783 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
784 {
785 int newentry;
786 char *copy;
787
788 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
789 return;
790
791 newentry = append_null_to_list((void ***)clist);
792
793 if (strncmp(entry, "name=", 5) == 0)
794 copy = must_copy_string(entry);
795 else if (string_in_list(klist, entry))
796 copy = must_copy_string(entry);
797 else
798 copy = cgv1_must_prefix_named(entry);
799
800 (*clist)[newentry] = copy;
801 }
802
803 /* Get the controllers from a mountinfo line. There are other ways we could get
804 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
805 * could parse the mount options. But we simply assume that the mountpoint must
806 * be /sys/fs/cgroup/controller-list
807 */
808 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
809 {
810 int i;
811 char *p, *p2, *tok;
812 char *saveptr = NULL;
813 char **aret = NULL;
814
815 p = line;
816
817 for (i = 0; i < 4; i++) {
818 p = strchr(p, ' ');
819 if (!p)
820 return NULL;
821 p++;
822 }
823 if (!p)
824 return NULL;
825
826 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
827 return NULL;
828
829 p += 15;
830
831 p2 = strchr(p, ' ');
832 if (!p2)
833 return NULL;
834 *p2 = '\0';
835
836 for (tok = strtok_r(p, ",", &saveptr); tok;
837 tok = strtok_r(NULL, ",", &saveptr))
838 must_append_controller(klist, nlist, &aret, tok);
839
840 return aret;
841 }
842
843 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
844 static bool cgv1_controller_in_clist(char *cgline, char *c)
845 {
846 size_t len;
847 char *tok, *eol, *tmp;
848 char *saveptr = NULL;
849
850 eol = strchr(cgline, ':');
851 if (!eol)
852 return false;
853
854 len = eol - cgline;
855 tmp = alloca(len + 1);
856 memcpy(tmp, cgline, len);
857 tmp[len] = '\0';
858
859 for (tok = strtok_r(tmp, ",", &saveptr); tok;
860 tok = strtok_r(NULL, ",", &saveptr)) {
861 if (strcmp(tok, c) == 0)
862 return true;
863 }
864 return false;
865 }
866
867 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
868 * of a given cgv1 controller passed in via @controller.
869 */
870 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
871 {
872 char *p;
873
874 p = basecginfo;
875
876 while (true) {
877 p = strchr(p, ':');
878 if (!p)
879 return NULL;
880 p++;
881
882 if (cgv1_controller_in_clist(p, controller)) {
883 p = strchr(p, ':');
884 if (!p)
885 return NULL;
886 p++;
887
888 return copy_to_eol(p);
889 }
890
891 p = strchr(p, '\n');
892 if (!p)
893 return NULL;
894 p++;
895 }
896
897 return NULL;
898 }
899
900 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
901 * systems.
902 */
903 #define INIT_SCOPE "/init.scope"
904 static void cg_systemd_prune_init_scope(char *cg)
905 {
906 char *point;
907
908 if (!cg)
909 return;
910
911 point = cg + strlen(cg) - strlen(INIT_SCOPE);
912 if (point < cg)
913 return;
914
915 if (strcmp(point, INIT_SCOPE) == 0) {
916 if (point == cg)
917 *(point + 1) = '\0';
918 else
919 *point = '\0';
920 }
921 }
922
923 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
924 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
925 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
926 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
927 * from /proc/1/cgroup.
928 */
929 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
930 {
931 struct cgv1_hierarchy *new;
932 int newentry;
933
934 new = must_alloc(sizeof(*new));
935 new->controllers = clist;
936 new->mountpoint = mountpoint;
937 new->base_cgroup = base_cgroup;
938 new->fullcgpath = NULL;
939 new->create_rw_cgroup = false;
940 new->init_cgroup = init_cgroup;
941 new->systemd_user_slice = false;
942
943 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
944 cgv1_hierarchies[newentry] = new;
945 }
946
947 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
948 * currently) include the controllers mounted into the hierarchy (e.g. memory,
949 * pids, blkio), the mountpoint of that hierarchy (Should usually be
950 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
951 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
952 * base cgroup of the current process gathered from /proc/self/cgroup, and the
953 * init cgroup of PID1 gathered from /proc/1/cgroup.
954 */
955 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
956 {
957 struct cgv2_hierarchy *new;
958 int newentry;
959
960 new = must_alloc(sizeof(*new));
961 new->controllers = clist;
962 new->mountpoint = mountpoint;
963 new->base_cgroup = base_cgroup;
964 new->fullcgpath = NULL;
965 new->create_rw_cgroup = false;
966 new->init_cgroup = init_cgroup;
967 new->systemd_user_slice = systemd_user_slice;
968
969 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
970 cgv2_hierarchies[newentry] = new;
971 }
972
973 /* In Ubuntu 14.04, the paths created for us were
974 * '/user/$uid.user/$something.session' This can be merged better with
975 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
976 * reason about the correctness.
977 */
978 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
979 {
980 char *p;
981 size_t len;
982 int id;
983 char *copy = NULL;
984 bool bret = false;
985
986 copy = must_copy_string(in);
987 if (strlen(copy) < strlen("/user/1.user/1.session"))
988 goto cleanup;
989 p = copy + strlen(copy) - 1;
990
991 /* skip any trailing '/' (shouldn't be any, but be sure) */
992 while (p >= copy && *p == '/')
993 *(p--) = '\0';
994 if (p < copy)
995 goto cleanup;
996
997 /* Get last path element */
998 while (p >= copy && *p != '/')
999 p--;
1000 if (p < copy)
1001 goto cleanup;
1002 /* make sure it is something.session */
1003 len = strlen(p + 1);
1004 if (len < strlen("1.session") ||
1005 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1006 goto cleanup;
1007
1008 /* ok last path piece checks out, now check the second to last */
1009 *(p + 1) = '\0';
1010 while (p >= copy && *(--p) != '/')
1011 ;
1012 if (sscanf(p + 1, "%d.user/", &id) != 1)
1013 goto cleanup;
1014
1015 if (id != (int)uid)
1016 goto cleanup;
1017
1018 bret = true;
1019
1020 cleanup:
1021 free(copy);
1022 return bret;
1023 }
1024
1025 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1026 * assume it belongs to $uid and chown it
1027 */
1028 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1029 const char *init_cgroup, uid_t uid)
1030 {
1031 int ret;
1032 char buf[100];
1033 size_t curlen, initlen;
1034
1035 curlen = strlen(base_cgroup);
1036 initlen = strlen(init_cgroup);
1037 if (curlen <= initlen)
1038 return false;
1039
1040 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1041 return false;
1042
1043 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1044 if (ret < 0 || ret >= 100)
1045 return false;
1046
1047 if (initlen == 1)
1048 initlen = 0; // skip the '/'
1049
1050 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1051 }
1052
1053 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1054 * is not the end of our systemd path, then we're not part of the PAM call that
1055 * created that path.
1056 *
1057 * The last piece is chowned to $uid, the user- part not.
1058 * Note: If the user creates paths that look like what we're looking for to
1059 * 'fool' us, either
1060 * - they fool us, we create new cgroups, and they get auto-logged-out.
1061 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1062 * lose ownership of their cgroups
1063 */
1064 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1065 const char *init_cgroup,
1066 const char *in, uid_t uid)
1067 {
1068 char *p;
1069 size_t len;
1070 int id;
1071 char *copy = NULL;
1072 bool bret = false;
1073
1074 copy = must_copy_string(in);
1075
1076 /* An old version of systemd has already created a cgroup for us. */
1077 if (cg_systemd_under_user_slice_1(in, uid))
1078 goto succeed;
1079
1080 /* A new version of systemd has already created a cgroup for us. */
1081 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1082 goto succeed;
1083
1084 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1085 goto cleanup;
1086
1087 p = copy + strlen(copy) - 1;
1088 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1089 while (p >= copy && *p == '/')
1090 *(p--) = '\0';
1091
1092 if (p < copy)
1093 goto cleanup;
1094
1095 /* Get last path element */
1096 while (p >= copy && *p != '/')
1097 p--;
1098
1099 if (p < copy)
1100 goto cleanup;
1101
1102 /* Make sure it is session-something.scope. */
1103 len = strlen(p + 1);
1104 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1105 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1106 goto cleanup;
1107
1108 /* Ok last path piece checks out, now check the second to last. */
1109 *(p + 1) = '\0';
1110 while (p >= copy && *(--p) != '/')
1111 ;
1112
1113 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1114 goto cleanup;
1115
1116 if (id != (int)uid)
1117 goto cleanup;
1118
1119 succeed:
1120 bret = true;
1121 cleanup:
1122 free(copy);
1123 return bret;
1124 }
1125
1126 /* Chown existing cgroup that systemd has already created for us. */
1127 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1128 const char *base_cgroup, uid_t uid,
1129 gid_t gid, bool systemd_user_slice)
1130 {
1131 char *path;
1132
1133 if (!systemd_user_slice)
1134 return false;
1135
1136 path = must_make_path(mountpoint, base_cgroup, NULL);
1137
1138 /* A cgroup within name=systemd has already been created. So we only
1139 * need to chown it.
1140 */
1141 if (chown(path, uid, gid) < 0)
1142 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %m.\n",
1143 path, (int)uid, (int)gid, NULL);
1144
1145 free(path);
1146 return true;
1147 }
1148
1149 /* Detect and store information about cgroupfs v1 hierarchies. */
1150 static bool cgv1_init(uid_t uid, gid_t gid)
1151 {
1152 FILE *f;
1153 struct cgv1_hierarchy **it;
1154 char *basecginfo;
1155 char *line = NULL;
1156 char **klist = NULL, **nlist = NULL;
1157 size_t len = 0;
1158
1159 basecginfo = read_file("/proc/self/cgroup");
1160 if (!basecginfo)
1161 return false;
1162
1163 f = fopen("/proc/self/mountinfo", "r");
1164 if (!f) {
1165 free(basecginfo);
1166 return false;
1167 }
1168
1169 cgv1_get_controllers(&klist, &nlist);
1170
1171 while (getline(&line, &len, f) != -1) {
1172 char **controller_list = NULL;
1173 char *mountpoint, *base_cgroup;
1174
1175 if (is_lxcfs(line) || !is_cgv1(line))
1176 continue;
1177
1178 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1179 if (!controller_list)
1180 continue;
1181
1182 if (cgv1_controller_list_is_dup(cgv1_hierarchies,
1183 controller_list)) {
1184 free(controller_list);
1185 continue;
1186 }
1187
1188 mountpoint = get_mountpoint(line);
1189 if (!mountpoint) {
1190 free_string_list(controller_list);
1191 continue;
1192 }
1193
1194 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1195 if (!base_cgroup) {
1196 free_string_list(controller_list);
1197 free(mountpoint);
1198 continue;
1199 }
1200 trim(base_cgroup);
1201 lxcfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1202 "mountpoint \"%s\" and cgroup \"%s\".\n",
1203 controller_list[0], mountpoint, base_cgroup);
1204 cgv1_add_controller(controller_list, mountpoint, base_cgroup,
1205 NULL);
1206 }
1207 free_string_list(klist);
1208 free_string_list(nlist);
1209 free(basecginfo);
1210 fclose(f);
1211 free(line);
1212
1213 /* Retrieve init cgroup path for all controllers. */
1214 basecginfo = read_file("/proc/1/cgroup");
1215 if (!basecginfo)
1216 return false;
1217
1218 for (it = cgv1_hierarchies; it && *it; it++) {
1219 if ((*it)->controllers) {
1220 char *init_cgroup, *user_slice;
1221 /* We've already stored the controller and received its
1222 * current cgroup. If we now fail to retrieve its init
1223 * cgroup, we should probably fail.
1224 */
1225 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1226 if (!init_cgroup) {
1227 free(basecginfo);
1228 return false;
1229 }
1230 cg_systemd_prune_init_scope(init_cgroup);
1231 (*it)->init_cgroup = init_cgroup;
1232 lxcfs_debug("cgroupfs v1 controller \"%s\" has init "
1233 "cgroup \"%s\".\n",
1234 (*(*it)->controllers), init_cgroup);
1235 /* Check whether systemd has already created a cgroup
1236 * for us.
1237 */
1238 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1239 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1240 (*it)->systemd_user_slice = true;
1241 }
1242 }
1243 free(basecginfo);
1244
1245 return true;
1246 }
1247
1248 /* __typeof__ should be safe to use with all compilers. */
1249 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
1250 /* Check whether given mountpoint has mount type specified via @magic_val. */
1251 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
1252 {
1253 return (fs->f_type == (fs_type_magic)magic_val);
1254 }
1255
1256 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1257 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1258 */
1259 static int cg_get_version_of_mntpt(const char *path)
1260 {
1261 int ret;
1262 struct statfs sb;
1263
1264 if (path)
1265 ret = statfs(path, &sb);
1266 else
1267 ret = statfs("/sys/fs/cgroup", &sb);
1268
1269 if (ret < 0)
1270 return -1;
1271
1272 if (has_fs_type(&sb, CGROUP_SUPER_MAGIC))
1273 return 1;
1274 else if (has_fs_type(&sb, CGROUP2_SUPER_MAGIC))
1275 return 2;
1276
1277 return 0;
1278 }
1279
1280 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1281 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1282 */
1283 static bool cgv2_init(uid_t uid, gid_t gid)
1284 {
1285 char *mountpoint;
1286 bool ret = false;
1287 FILE *f = NULL;
1288 char *current_cgroup = NULL, *init_cgroup = NULL;
1289 char * line = NULL;
1290 size_t len = 0;
1291
1292 current_cgroup = cgv2_get_current_cgroup(getpid());
1293 if (!current_cgroup) {
1294 /* No v2 hierarchy present. We're done. */
1295 ret = true;
1296 goto cleanup;
1297 }
1298
1299 init_cgroup = cgv2_get_current_cgroup(1);
1300 if (!init_cgroup) {
1301 /* If we're here and didn't fail already above, then something's
1302 * certainly wrong, so error this time.
1303 */
1304 goto cleanup;
1305 }
1306 cg_systemd_prune_init_scope(init_cgroup);
1307
1308 /* Check if the v2 hierarchy is mounted at its standard location.
1309 * If so we can skip the rest of the work here. Although the unified
1310 * hierarchy can be mounted multiple times, each of those mountpoints
1311 * will expose identical information.
1312 */
1313 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1314 char *user_slice;
1315 bool has_user_slice = false;
1316
1317 mountpoint = must_copy_string("/sys/fs/cgroup");
1318 if (!mountpoint)
1319 goto cleanup;
1320
1321 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1322 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1323 has_user_slice = true;
1324 free(user_slice);
1325
1326 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1327
1328 ret = true;
1329 goto cleanup;
1330 }
1331
1332 f = fopen("/proc/self/mountinfo", "r");
1333 if (!f)
1334 return false;
1335
1336 /* we support simple cgroup mounts and lxcfs mounts */
1337 while (getline(&line, &len, f) != -1) {
1338 char *user_slice;
1339 bool has_user_slice = false;
1340 if (!is_cgv2(line))
1341 continue;
1342
1343 mountpoint = get_mountpoint(line);
1344 if (!mountpoint)
1345 continue;
1346
1347 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1348 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1349 has_user_slice = true;
1350 free(user_slice);
1351
1352 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1353 /* Although the unified hierarchy can be mounted multiple times,
1354 * each of those mountpoints will expose identical information.
1355 * So let the first mountpoint we find, win.
1356 */
1357 break;
1358 }
1359
1360 lxcfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1361 "current cgroup \"%s\" and init cgroup \"%s\".\n",
1362 mountpoint, current_cgroup, init_cgroup);
1363
1364 cleanup:
1365 if (f)
1366 fclose(f);
1367 free(line);
1368
1369 return true;
1370 }
1371
1372 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1373 * cgroupfs v2 hierarchy.
1374 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1375 * where some controllers are mounted into their standard cgroupfs v1 locations
1376 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1377 * hierarchy (/sys/fs/cgroup).
1378 */
1379 static bool cg_init(uid_t uid, gid_t gid)
1380 {
1381 if (!cgv1_init(uid, gid))
1382 return false;
1383
1384 if (!cgv2_init(uid, gid))
1385 return false;
1386
1387 if (cgv1_hierarchies && cgv2_hierarchies) {
1388 cg_mount_mode = CGROUP_MIXED;
1389 lxcfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies.");
1390 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1391 cg_mount_mode = CGROUP_PURE_V1;
1392 lxcfs_debug("%s\n", "Detected cgroupfs v1 hierarchies.");
1393 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1394 cg_mount_mode = CGROUP_PURE_V2;
1395 lxcfs_debug("%s\n", "Detected cgroupfs v2 hierarchies.");
1396 } else {
1397 cg_mount_mode = CGROUP_UNKNOWN;
1398 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy.\n", NULL);
1399 }
1400
1401 if (cg_mount_mode == CGROUP_UNKNOWN)
1402 return false;
1403
1404 return true;
1405 }
1406
1407 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1408 static bool cgv1_enter(const char *cgroup)
1409 {
1410 struct cgv1_hierarchy **it;
1411
1412 for (it = cgv1_hierarchies; it && *it; it++) {
1413 char **controller;
1414 bool entered = false;
1415
1416 if (!(*it)->controllers || !(*it)->mountpoint ||
1417 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1418 continue;
1419
1420 for (controller = (*it)->controllers; controller && *controller;
1421 controller++) {
1422 char *path;
1423
1424 if ((*it)->systemd_user_slice)
1425 continue;
1426
1427 path = must_make_path((*it)->mountpoint,
1428 (*it)->init_cgroup,
1429 cgroup,
1430 "/cgroup.procs",
1431 NULL);
1432 if (!file_exists(path)) {
1433 free(path);
1434 path = must_make_path((*it)->mountpoint,
1435 (*it)->init_cgroup,
1436 cgroup,
1437 "/tasks",
1438 NULL);
1439 }
1440 lxcfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1441 entered = write_int(path, (int)getpid());
1442 if (entered) {
1443 free(path);
1444 break;
1445 }
1446 lxcfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1447 free(path);
1448 }
1449 if (!entered)
1450 return false;
1451 }
1452
1453 return true;
1454 }
1455
1456 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1457 static bool cgv2_enter(const char *cgroup)
1458 {
1459 struct cgv2_hierarchy *v2;
1460 char *path;
1461 bool entered = false;
1462
1463 if (!cgv2_hierarchies)
1464 return true;
1465
1466 v2 = *cgv2_hierarchies;
1467
1468 if (!v2->mountpoint || !v2->base_cgroup)
1469 return false;
1470
1471 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1472 return true;
1473
1474 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
1475 "/cgroup.procs", NULL);
1476 lxcfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1477 entered = write_int(path, (int)getpid());
1478 if (!entered) {
1479 lxcfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1480 free(path);
1481 return false;
1482 }
1483
1484 free(path);
1485
1486 return true;
1487 }
1488
1489 /* Wrapper around cgv{1,2}_enter(). */
1490 static bool cg_enter(const char *cgroup)
1491 {
1492 if (!cgv1_enter(cgroup)) {
1493 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups.\n", NULL);
1494 return false;
1495 }
1496
1497 if (!cgv2_enter(cgroup)) {
1498 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups.\n", NULL);
1499 return false;
1500 }
1501
1502 return true;
1503 }
1504
1505 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1506 static void cgv1_escape(void)
1507 {
1508 if (!cgv1_enter("/"))
1509 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup.\n", NULL);
1510 }
1511
1512 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1513 static void cgv2_escape(void)
1514 {
1515 if (!cgv2_enter("/"))
1516 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup.\n", NULL);
1517 }
1518
1519 /* Wrapper around cgv{1,2}_escape(). */
1520 static void cg_escape(void)
1521 {
1522 cgv1_escape();
1523 cgv2_escape();
1524 }
1525
1526 /* Get uid and gid for @user. */
1527 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1528 {
1529 struct passwd *pwent;
1530
1531 pwent = getpwnam(user);
1532 if (!pwent)
1533 return false;
1534
1535 *uid = pwent->pw_uid;
1536 *gid = pwent->pw_gid;
1537
1538 return true;
1539 }
1540
1541 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
1542 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
1543 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
1544 * to the caller in @existed.
1545 */
1546 #define __PAM_CGFS_USER "/user/"
1547 #define __PAM_CGFS_USER_LEN 6
1548 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1549 {
1550 char *clean_base_cgroup, *path;
1551 char **controller;
1552 struct cgv1_hierarchy *it;
1553 bool created = false;
1554
1555 it = h;
1556 for (controller = it->controllers; controller && *controller;
1557 controller++) {
1558 created = false;
1559 /* If systemd has already created a cgroup for us, keep using
1560 * it.
1561 */
1562 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
1563 it->base_cgroup, uid, gid,
1564 it->systemd_user_slice)) {
1565 return true;
1566 }
1567
1568 /* We need to make sure that we do not create an endless chain
1569 * of sub-cgroups. So we check if we have already logged in
1570 * somehow (sudo -i, su, etc.) and have created a
1571 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
1572 * cgroups this is unnecessary since we use the init_cgroup
1573 * anyway, but for controllers which have an existing systemd
1574 * cgroup that does not match the current uid, this is pretty
1575 * useful.
1576 */
1577 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
1578 free(it->base_cgroup);
1579 it->base_cgroup = must_copy_string("/");
1580 } else {
1581 clean_base_cgroup =
1582 strstr(it->base_cgroup, __PAM_CGFS_USER);
1583 if (clean_base_cgroup)
1584 *clean_base_cgroup = '\0';
1585 }
1586
1587 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
1588 lxcfs_debug("Constructing path: %s.\n", path);
1589 if (file_exists(path)) {
1590 *existed = true;
1591 lxcfs_debug("%s existed.\n", path);
1592 free(path);
1593 return false;
1594 }
1595 created = mkdir_p(it->mountpoint, path);
1596 if (!created) {
1597 free(path);
1598 continue;
1599 }
1600 if (chown(path, uid, gid) < 0)
1601 lxcfs_debug("Failed to chown %s to %d:%d: %m.\n", path,
1602 (int)uid, (int)gid);
1603 free(path);
1604 break;
1605 }
1606
1607 if (!created)
1608 return false;
1609
1610 return true;
1611 }
1612
1613 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
1614 * (For example, try to remove @cgroup for the cpu and cpuacct controller
1615 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
1616 */
1617 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
1618 {
1619
1620 char *path;
1621
1622 /* Better safe than sorry. */
1623 if (!h->controllers)
1624 return true;
1625
1626 /* Cgroups created by systemd for us which we re-use won't be removed
1627 * here, since we're using init_cgroup + cgroup as path instead of
1628 * base_cgroup + cgroup.
1629 */
1630 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
1631 (void)recursive_rmdir(path);
1632 free(path);
1633
1634 return true;
1635 }
1636
1637 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
1638 static bool cgv2_remove(const char *cgroup)
1639 {
1640 struct cgv2_hierarchy *v2;
1641 char *path;
1642
1643 if (!cgv2_hierarchies)
1644 return true;
1645
1646 v2 = *cgv2_hierarchies;
1647
1648 /* If we reused an already existing cgroup, don't bother trying to
1649 * remove (a potentially wrong)/the path.
1650 * Cgroups created by systemd for us which we re-use would be removed
1651 * here, since we're using base_cgroup + cgroup as path.
1652 */
1653 if (v2->systemd_user_slice)
1654 return true;
1655
1656 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
1657 (void)recursive_rmdir(path);
1658 free(path);
1659
1660 return true;
1661 }
1662
1663 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
1664 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
1665 * back, to the caller if the creation failed due to @cgroup already existing
1666 * via @existed.
1667 */
1668 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1669 {
1670 struct cgv1_hierarchy **it, **rev_it;
1671 bool all_created = true;
1672
1673 for (it = cgv1_hierarchies; it && *it; it++) {
1674 if (!(*it)->controllers || !(*it)->mountpoint ||
1675 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1676 continue;
1677
1678 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
1679 all_created = false;
1680 break;
1681 }
1682 }
1683
1684 if (all_created)
1685 return true;
1686
1687 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
1688 rev_it++)
1689 cgv1_remove_one(*rev_it, cgroup);
1690
1691 return false;
1692 }
1693
1694 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
1695 * the creation failed due to @cgroup already existing via @existed.
1696 */
1697 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1698 {
1699 char *clean_base_cgroup;
1700 char *path;
1701 struct cgv2_hierarchy *v2;
1702 bool created = false;
1703
1704 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
1705 return true;
1706
1707 v2 = *cgv2_hierarchies;
1708
1709 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
1710 * to be placed under our current cgroup.
1711 */
1712 if (cg_systemd_chown_existing_cgroup(v2->mountpoint,
1713 v2->base_cgroup, uid, gid,
1714 v2->systemd_user_slice))
1715 return true;
1716
1717 /* We need to make sure that we do not create an endless chaing of
1718 * sub-cgroups. So we check if we have already logged in somehow (sudo
1719 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
1720 * skip that part.
1721 */
1722 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
1723 free(v2->base_cgroup);
1724 v2->base_cgroup = must_copy_string("/");
1725 } else {
1726 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
1727 if (clean_base_cgroup)
1728 *clean_base_cgroup = '\0';
1729 }
1730
1731 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
1732 lxcfs_debug("Constructing path \"%s\".\n", path);
1733 if (file_exists(path)) {
1734 *existed = true;
1735 lxcfs_debug("%s existed.\n", path);
1736 free(path);
1737 return false;
1738 }
1739
1740 created = mkdir_p(v2->mountpoint, path);
1741 if (!created) {
1742 free(path);
1743 return false;
1744 }
1745
1746 if (chown(path, uid, gid) < 0)
1747 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %m.\n",
1748 path, (int)uid, (int)gid, NULL);
1749 free(path);
1750
1751 return true;
1752 }
1753
1754 /* Create writeable cgroups for @user at login. Details can be found in the
1755 * preamble/license at the top of this file.
1756 */
1757 static int handle_login(const char *user, uid_t uid, gid_t gid)
1758 {
1759 int idx = 0, ret;
1760 bool existed;
1761 char cg[MAXPATHLEN];
1762
1763 cg_escape();
1764
1765 while (idx >= 0) {
1766 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
1767 if (ret < 0 || ret >= MAXPATHLEN) {
1768 mysyslog(LOG_ERR, "Username too long.\n", NULL);
1769 return PAM_SESSION_ERR;
1770 }
1771
1772 existed = false;
1773 if (!cgv2_create(cg, uid, gid, &existed)) {
1774 if (existed) {
1775 cgv2_remove(cg);
1776 idx++;
1777 continue;
1778 }
1779 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
1780 return PAM_SESSION_ERR;
1781 }
1782
1783 existed = false;
1784 if (!cgv1_create(cg, uid, gid, &existed)) {
1785 if (existed) {
1786 cgv2_remove(cg);
1787 idx++;
1788 continue;
1789 }
1790 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
1791 return PAM_SESSION_ERR;
1792 }
1793
1794 if (!cg_enter(cg)) {
1795 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s.\n", cg, user, NULL);
1796 return PAM_SESSION_ERR;
1797 }
1798 break;
1799 }
1800
1801 return PAM_SUCCESS;
1802 }
1803
1804 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
1805 * hierarchies.
1806 */
1807 static bool cgv1_prune_empty_cgroups(const char *user)
1808 {
1809 bool controller_removed = true;
1810 bool all_removed = true;
1811 struct cgv1_hierarchy **it;
1812
1813 for (it = cgv1_hierarchies; it && *it; it++) {
1814 int ret;
1815 char *path_base, *path_init;
1816 char **controller;
1817
1818 if (!(*it)->controllers || !(*it)->mountpoint ||
1819 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1820 continue;
1821
1822 for (controller = (*it)->controllers; controller && *controller;
1823 controller++) {
1824 bool path_base_rm, path_init_rm;
1825
1826 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
1827 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_base);
1828 ret = recursive_rmdir(path_base);
1829 if (ret == -ENOENT || ret >= 0)
1830 path_base_rm = true;
1831 else
1832 path_base_rm = false;
1833 free(path_base);
1834
1835 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
1836 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_init);
1837 ret = recursive_rmdir(path_init);
1838 if (ret == -ENOENT || ret >= 0)
1839 path_init_rm = true;
1840 else
1841 path_init_rm = false;
1842 free(path_init);
1843
1844 if (!path_base_rm && !path_init_rm) {
1845 controller_removed = false;
1846 continue;
1847 }
1848
1849 controller_removed = true;
1850 break;
1851 }
1852 if (!controller_removed)
1853 all_removed = false;
1854 }
1855
1856 return all_removed;
1857 }
1858
1859 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
1860 * hierarchy.
1861 */
1862 static bool cgv2_prune_empty_cgroups(const char *user)
1863 {
1864 int ret;
1865 struct cgv2_hierarchy *v2;
1866 char *path_base, *path_init;
1867 bool path_base_rm, path_init_rm;
1868
1869 if (!cgv2_hierarchies)
1870 return true;
1871
1872 v2 = *cgv2_hierarchies;
1873
1874 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
1875 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_base);
1876 ret = recursive_rmdir(path_base);
1877 if (ret == -ENOENT || ret >= 0)
1878 path_base_rm = true;
1879 else
1880 path_base_rm = false;
1881 free(path_base);
1882
1883 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
1884 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_init);
1885 ret = recursive_rmdir(path_init);
1886 if (ret == -ENOENT || ret >= 0)
1887 path_init_rm = true;
1888 else
1889 path_init_rm = false;
1890 free(path_init);
1891
1892 if (!path_base_rm && !path_init_rm)
1893 return false;
1894
1895 return true;
1896 }
1897
1898 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
1899 static void cg_prune_empty_cgroups(const char *user)
1900 {
1901 (void)cgv1_prune_empty_cgroups(user);
1902 (void)cgv2_prune_empty_cgroups(user);
1903 }
1904
1905 /* Free allocated information for detected cgroupfs v1 hierarchies. */
1906 static void cgv1_free_hierarchies(void)
1907 {
1908 struct cgv1_hierarchy **it;
1909
1910 if (!cgv1_hierarchies)
1911 return;
1912
1913 for (it = cgv1_hierarchies; it && *it; it++) {
1914 if ((*it)->controllers) {
1915 char **tmp;
1916 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
1917 free(*tmp);
1918
1919 free((*it)->controllers);
1920 }
1921 free((*it)->mountpoint);
1922 free((*it)->base_cgroup);
1923 free((*it)->fullcgpath);
1924 free((*it)->init_cgroup);
1925 }
1926 free(cgv1_hierarchies);
1927 }
1928
1929 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
1930 static void cgv2_free_hierarchies(void)
1931 {
1932 struct cgv2_hierarchy **it;
1933
1934 if (!cgv2_hierarchies)
1935 return;
1936
1937 for (it = cgv2_hierarchies; it && *it; it++) {
1938 if ((*it)->controllers) {
1939 char **tmp;
1940 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
1941 free(*tmp);
1942
1943 free((*it)->controllers);
1944 }
1945 free((*it)->mountpoint);
1946 free((*it)->base_cgroup);
1947 free((*it)->fullcgpath);
1948 free((*it)->init_cgroup);
1949 }
1950 free(cgv2_hierarchies);
1951 }
1952
1953 /* Wrapper around cgv{1,2}_free_hierarchies(). */
1954 static void cg_exit(void)
1955 {
1956 cgv1_free_hierarchies();
1957 cgv2_free_hierarchies();
1958 }
1959
1960 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
1961 const char **argv)
1962 {
1963 int ret;
1964 uid_t uid = 0;
1965 gid_t gid = 0;
1966 const char *PAM_user = NULL;
1967
1968 ret = pam_get_user(pamh, &PAM_user, NULL);
1969 if (ret != PAM_SUCCESS) {
1970 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
1971 return PAM_SESSION_ERR;
1972 }
1973
1974 if (!get_uid_gid(PAM_user, &uid, &gid)) {
1975 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
1976 return PAM_SESSION_ERR;
1977 }
1978
1979 if (!cg_init(uid, gid)) {
1980 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
1981 return PAM_SESSION_ERR;
1982 }
1983
1984 /* Try to prune cgroups, that are actually empty but were still marked
1985 * as busy by the kernel so we couldn't remove them on session close.
1986 */
1987 cg_prune_empty_cgroups(PAM_user);
1988
1989 if (cg_mount_mode == CGROUP_UNKNOWN)
1990 return PAM_SESSION_ERR;
1991
1992 if (argc > 1 && strcmp(argv[0], "-c") == 0)
1993 cg_mark_to_make_rw(argv[1]);
1994
1995 return handle_login(PAM_user, uid, gid);
1996 }
1997
1998 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
1999 const char **argv)
2000 {
2001 int ret;
2002 uid_t uid = 0;
2003 gid_t gid = 0;
2004 const char *PAM_user = NULL;
2005
2006 ret = pam_get_user(pamh, &PAM_user, NULL);
2007 if (ret != PAM_SUCCESS) {
2008 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2009 return PAM_SESSION_ERR;
2010 }
2011
2012 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2013 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2014 return PAM_SESSION_ERR;
2015 }
2016
2017 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2018 if (!cg_init(uid, gid))
2019 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2020
2021 if (argc > 1 && strcmp(argv[0], "-c") == 0)
2022 cg_mark_to_make_rw(argv[1]);
2023 }
2024
2025 cg_prune_empty_cgroups(PAM_user);
2026 cg_exit();
2027
2028 return PAM_SUCCESS;
2029 }