]> git.proxmox.com Git - mirror_lxcfs.git/blob - pam/pam_cgfs.c
pam_cgfs: re-use cgroups that already belong to us
[mirror_lxcfs.git] / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #include <dirent.h>
37 #include <errno.h>
38 #include <pwd.h>
39 #include <stdarg.h>
40 #include <stdbool.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <syslog.h>
45 #include <unistd.h>
46 #include <linux/unistd.h>
47 #include <sys/mount.h>
48 #include <sys/param.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <sys/vfs.h>
52
53 #define PAM_SM_SESSION
54 #include <security/_pam_macros.h>
55 #include <security/pam_modules.h>
56
57 #include "macro.h"
58
59 #ifndef CGROUP_SUPER_MAGIC
60 #define CGROUP_SUPER_MAGIC 0x27e0eb
61 #endif
62
63 #ifndef CGROUP2_SUPER_MAGIC
64 #define CGROUP2_SUPER_MAGIC 0x63677270
65 #endif
66
67 static enum cg_mount_mode {
68 CGROUP_UNKNOWN = -1,
69 CGROUP_MIXED = 0,
70 CGROUP_PURE_V1 = 1,
71 CGROUP_PURE_V2 = 2,
72 CGROUP_UNINITIALIZED = 3,
73 } cg_mount_mode = CGROUP_UNINITIALIZED;
74
75 /* Common helper prototypes. */
76 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
77 static int append_null_to_list(void ***list);
78 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
79 static char *copy_to_eol(char *s);
80 static bool file_exists(const char *f);
81 static void free_string_list(char **list);
82 static char *get_mountpoint(char *line);
83 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
84 static int handle_login(const char *user, uid_t uid, gid_t gid);
85 /* __typeof__ should be safe to use with all compilers. */
86 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
87 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val);
88 static bool is_lxcfs(const char *line);
89 static bool is_cgv1(char *line);
90 static bool is_cgv2(char *line);
91 static bool mkdir_p(const char *root, char *path);
92 static void *must_alloc(size_t sz);
93 static void must_add_to_list(char ***clist, char *entry);
94 static void must_append_controller(char **klist, char **nlist, char ***clist,
95 char *entry);
96 static void must_append_string(char ***list, char *entry);
97 static char *must_copy_string(const char *entry);
98 static char *must_make_path(const char *first, ...) __attribute__((sentinel));
99 static void *must_realloc(void *orig, size_t sz);
100 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
101 static char *read_file(char *fnam);
102 static int recursive_rmdir(char *dirname);
103 static bool string_in_list(char **list, const char *entry);
104 static void trim(char *s);
105 static bool write_int(char *path, int v);
106
107 /* cgroupfs prototypes. */
108 static void cg_mark_to_make_rw(const char *cstring);
109 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
110 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
111 const char *init_cgroup, uid_t uid);
112 static bool cg_systemd_created_user_slice(const char *base_cgroup,
113 const char *init_cgroup,
114 const char *in, uid_t uid);
115 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
116 const char *base_cgroup, uid_t uid,
117 gid_t gid,
118 bool systemd_user_slice);
119 static int cg_get_version_of_mntpt(const char *path);
120 static bool cg_enter(const char *cgroup);
121 static void cg_escape(void);
122 static bool cg_init(uid_t uid, gid_t gid);
123 static void cg_systemd_prune_init_scope(char *cg);
124 static void cg_prune_empty_cgroups(const char *user);
125 static bool is_lxcfs(const char *line);
126 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
127
128 /* cgroupfs v1 prototypes. */
129 struct cgv1_hierarchy {
130 char **controllers;
131 char *mountpoint;
132 char *base_cgroup;
133 char *fullcgpath;
134 char *init_cgroup;
135 bool create_rw_cgroup;
136 bool systemd_user_slice;
137 };
138
139 static struct cgv1_hierarchy **cgv1_hierarchies;
140
141 static void cgv1_add_controller(char **clist, char *mountpoint,
142 char *base_cgroup, char *init_cgroup);
143 static bool cgv1_controller_in_clist(char *cgline, char *c);
144 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
145 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
146 char **clist);
147 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
148 bool *existed);
149 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
150 uid_t uid, gid_t gid, bool *existed);
151 static bool cgv1_enter(const char *cgroup);
152 static void cgv1_escape(void);
153 static bool cgv1_get_controllers(char ***klist, char ***nlist);
154 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
155 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
156 char *line);
157 static bool cgv1_init(uid_t uid, gid_t gid);
158 static void cgv1_mark_to_make_rw(char **clist);
159 static char *cgv1_must_prefix_named(char *entry);
160 static bool cgv1_prune_empty_cgroups(const char *user);
161 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
162 static bool is_cgv1(char *line);
163
164 /* cgroupfs v2 prototypes. */
165 struct cgv2_hierarchy {
166 char **controllers;
167 char *mountpoint;
168 char *base_cgroup;
169 char *fullcgpath;
170 char *init_cgroup;
171 bool create_rw_cgroup;
172 bool systemd_user_slice;
173 };
174
175 /* Actually this should only be a single hierarchy. But for the sake of
176 * parallelism and because the layout of the cgroupfs v2 is still somewhat
177 * changing, we'll leave it as an array of structs.
178 */
179 static struct cgv2_hierarchy **cgv2_hierarchies;
180
181 static void cgv2_add_controller(char **clist, char *mountpoint,
182 char *base_cgroup, char *init_cgroup,
183 bool systemd_user_slice);
184 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
185 bool *existed);
186 static bool cgv2_enter(const char *cgroup);
187 static void cgv2_escape(void);
188 static char *cgv2_get_current_cgroup(int pid);
189 static bool cgv2_init(uid_t uid, gid_t gid);
190 static void cgv2_mark_to_make_rw(char **clist);
191 static bool cgv2_prune_empty_cgroups(const char *user);
192 static bool cgv2_remove(const char *cgroup);
193 static bool is_cgv2(char *line);
194
195 /* Common helper functions. */
196 static void mysyslog(int err, const char *format, ...)
197 {
198 va_list args;
199
200 va_start(args, format);
201 openlog("PAM-CGFS", LOG_CONS|LOG_PID, LOG_AUTH);
202 vsyslog(err, format, args);
203 va_end(args);
204 closelog();
205 }
206
207 /* realloc() pointer; do not fail. */
208 static void *must_realloc(void *orig, size_t sz)
209 {
210 void *ret;
211
212 do {
213 ret = realloc(orig, sz);
214 } while (!ret);
215
216 return ret;
217 }
218
219 /* realloc() pointer in batch sizes; do not fail. */
220 #define BATCH_SIZE 50
221 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
222 {
223 int newbatches = (newlen / BATCH_SIZE) + 1;
224 int oldbatches = (oldlen / BATCH_SIZE) + 1;
225
226 if (!*mem || newbatches > oldbatches)
227 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
228 }
229
230 /* Append lines as is to pointer; do not fail. */
231 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
232 {
233 size_t full = oldlen + newlen;
234
235 batch_realloc(dest, oldlen, full + 1);
236
237 memcpy(*dest + oldlen, new, newlen + 1);
238 }
239
240 /* Read in whole file and return allocated pointer. */
241 static char *read_file(char *fnam)
242 {
243 FILE *f;
244 int linelen;
245 char *line = NULL, *buf = NULL;
246 size_t len = 0, fulllen = 0;
247
248 f = fopen(fnam, "r");
249 if (!f)
250 return NULL;
251
252 while ((linelen = getline(&line, &len, f)) != -1) {
253 append_line(&buf, fulllen, line, linelen);
254 fulllen += linelen;
255 }
256
257 fclose(f);
258 free(line);
259
260 return buf;
261 }
262
263 /* Given a pointer to a null-terminated array of pointers, realloc to add one
264 * entry, and point the new entry to NULL. Do not fail. Return the index to the
265 * second-to-last entry - that is, the one which is now available for use
266 * (keeping the list null-terminated).
267 */
268 static int append_null_to_list(void ***list)
269 {
270 int newentry = 0;
271
272 if (*list)
273 for (; (*list)[newentry]; newentry++) {
274 ;
275 }
276
277 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
278 (*list)[newentry + 1] = NULL;
279
280 return newentry;
281 }
282
283 /* Make allocated copy of string; do not fail. */
284 static char *must_copy_string(const char *entry)
285 {
286 char *ret;
287
288 if (!entry)
289 return NULL;
290
291 do {
292 ret = strdup(entry);
293 } while (!ret);
294
295 return ret;
296 }
297
298 /* Append new entry to null-terminated array of pointer; make sure that array of
299 * pointers will still be null-terminated.
300 */
301 static void must_append_string(char ***list, char *entry)
302 {
303 int newentry;
304 char *copy;
305
306 newentry = append_null_to_list((void ***)list);
307 copy = must_copy_string(entry);
308 (*list)[newentry] = copy;
309 }
310
311 /* Remove newlines from string. */
312 static void trim(char *s)
313 {
314 size_t len = strlen(s);
315
316 while (s[len - 1] == '\n')
317 s[--len] = '\0';
318 }
319
320 /* Allocate pointer; do not fail. */
321 static void *must_alloc(size_t sz)
322 {
323 return must_realloc(NULL, sz);
324 }
325
326 /* Make allocated copy of string. End of string is taken to be '\n'. */
327 static char *copy_to_eol(char *s)
328 {
329 char *newline, *sret;
330 size_t len;
331
332 newline = strchr(s, '\n');
333 if (!newline)
334 return NULL;
335
336 len = newline - s;
337 sret = must_alloc(len + 1);
338 memcpy(sret, s, len);
339 sret[len] = '\0';
340
341 return sret;
342 }
343
344 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
345 static bool is_lxcfs(const char *line)
346 {
347 char *p = strstr(line, " - ");
348 if (!p)
349 return false;
350
351 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
352 }
353
354 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
355 static bool is_cgv1(char *line)
356 {
357 char *p = strstr(line, " - ");
358 if (!p)
359 return false;
360
361 return strncmp(p, " - cgroup ", 10) == 0;
362 }
363
364 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
365 static bool is_cgv2(char *line)
366 {
367 char *p = strstr(line, " - ");
368 if (!p)
369 return false;
370
371 return strncmp(p, " - cgroup2 ", 11) == 0;
372 }
373
374 /* Given a null-terminated array of strings, check whether @entry is one of the
375 * strings
376 */
377 static bool string_in_list(char **list, const char *entry)
378 {
379 char **it;
380
381 for (it = list; it && *it; it++)
382 if (strcmp(*it, entry) == 0)
383 return true;
384
385 return false;
386 }
387
388 /* Free null-terminated array of strings. */
389 static void free_string_list(char **list)
390 {
391 char **it;
392
393 for (it = list; it && *it; it++)
394 free(*it);
395 free(list);
396 }
397
398 /* Concatenate all passed-in strings into one path. Do not fail. If any piece
399 * is not prefixed with '/', add a '/'. Does not remove duplicate '///' from the
400 * created path.
401 */
402 static char *must_make_path(const char *first, ...)
403 {
404 va_list args;
405 char *cur, *dest;
406 size_t full_len;
407
408 full_len = strlen(first);
409
410 dest = must_copy_string(first);
411
412 va_start(args, first);
413 while ((cur = va_arg(args, char *)) != NULL) {
414 full_len += strlen(cur);
415
416 if (cur[0] != '/')
417 full_len++;
418
419 dest = must_realloc(dest, full_len + 1);
420
421 if (cur[0] != '/')
422 strcat(dest, "/");
423
424 strcat(dest, cur);
425 }
426 va_end(args);
427
428 return dest;
429 }
430
431 /* Write single integer to file. */
432 static bool write_int(char *path, int v)
433 {
434 FILE *f;
435 bool ret = true;
436
437 f = fopen(path, "w");
438 if (!f)
439 return false;
440
441 if (fprintf(f, "%d\n", v) < 0)
442 ret = false;
443
444 if (fclose(f) != 0)
445 ret = false;
446
447 return ret;
448 }
449
450 /* Check if a given file exists. */
451 static bool file_exists(const char *f)
452 {
453 struct stat statbuf;
454
455 return stat(f, &statbuf) == 0;
456 }
457
458 /* Create directory and (if necessary) its parents. */
459 static bool mkdir_p(const char *root, char *path)
460 {
461 char *b, orig, *e;
462
463 if (strlen(path) < strlen(root))
464 return false;
465
466 if (strlen(path) == strlen(root))
467 return true;
468
469 b = path + strlen(root) + 1;
470 while (true) {
471 while (*b && (*b == '/'))
472 b++;
473 if (!*b)
474 return true;
475
476 e = b + 1;
477 while (*e && *e != '/')
478 e++;
479
480 orig = *e;
481 if (orig)
482 *e = '\0';
483
484 if (file_exists(path))
485 goto next;
486
487 if (mkdir(path, 0755) < 0) {
488 lxcfs_debug("Failed to create %s: %m.\n", path);
489 return false;
490 }
491
492 next:
493 if (!orig)
494 return true;
495
496 *e = orig;
497 b = e + 1;
498 }
499
500 return false;
501 }
502
503 /* Recursively remove directory and its parents. */
504 static int recursive_rmdir(char *dirname)
505 {
506 struct dirent *direntp;
507 DIR *dir;
508 int r = 0;
509
510 dir = opendir(dirname);
511 if (!dir)
512 return -ENOENT;
513
514 while ((direntp = readdir(dir))) {
515 struct stat st;
516 char *pathname;
517
518 if (!direntp)
519 break;
520
521 if (!strcmp(direntp->d_name, ".") ||
522 !strcmp(direntp->d_name, ".."))
523 continue;
524
525 pathname = must_make_path(dirname, direntp->d_name, NULL);
526
527 if (lstat(pathname, &st)) {
528 if (!r)
529 lxcfs_debug("Failed to stat %s.\n", pathname);
530 r = -1;
531 goto next;
532 }
533
534 if (!S_ISDIR(st.st_mode))
535 goto next;
536
537 if (recursive_rmdir(pathname) < 0)
538 r = -1;
539 next:
540 free(pathname);
541 }
542
543 if (rmdir(dirname) < 0) {
544 if (!r)
545 lxcfs_debug("Failed to delete %s: %m.\n", dirname);
546 r = -1;
547 }
548
549 if (closedir(dir) < 0) {
550 if (!r)
551 lxcfs_debug("Failed to delete %s: %m.\n", dirname);
552 r = -1;
553 }
554
555 return r;
556 }
557
558 /* Add new entry to null-terminated array of pointers. Make sure array is still
559 * null-terminated.
560 */
561 static void must_add_to_list(char ***clist, char *entry)
562 {
563 int newentry;
564
565 newentry = append_null_to_list((void ***)clist);
566 (*clist)[newentry] = must_copy_string(entry);
567 }
568
569 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
570 static char *get_mountpoint(char *line)
571 {
572 int i;
573 char *p, *sret, *p2;
574 size_t len;
575
576 p = line;
577
578 for (i = 0; i < 4; i++) {
579 p = strchr(p, ' ');
580 if (!p)
581 return NULL;
582 p++;
583 }
584
585 p2 = strchr(p, ' ');
586 if (p2)
587 *p2 = '\0';
588
589 len = strlen(p);
590 sret = must_alloc(len + 1);
591 memcpy(sret, p, len);
592 sret[len] = '\0';
593
594 return sret;
595 }
596
597 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
598 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
599 * kernel controllers (@klist) and named controllers (@nlist).
600 */
601 static bool cgv1_get_controllers(char ***klist, char ***nlist)
602 {
603 FILE *f;
604 char *line = NULL;
605 size_t len = 0;
606
607 f = fopen("/proc/self/cgroup", "r");
608 if (!f)
609 return false;
610
611 while (getline(&line, &len, f) != -1) {
612 char *p, *p2, *tok;
613 char *saveptr = NULL;
614
615 p = strchr(line, ':');
616 if (!p)
617 continue;
618 p++;
619
620 p2 = strchr(p, ':');
621 if (!p2)
622 continue;
623 *p2 = '\0';
624
625 /* Skip the v2 hierarchy. */
626 if ((p2 - p) == 0)
627 continue;
628
629 for (tok = strtok_r(p, ",", &saveptr); tok;
630 tok = strtok_r(NULL, ",", &saveptr)) {
631 if (strncmp(tok, "name=", 5) == 0)
632 must_append_string(nlist, tok);
633 else
634 must_append_string(klist, tok);
635 }
636 }
637
638 free(line);
639 fclose(f);
640
641 return true;
642 }
643
644 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
645 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
646 static bool cgv2_get_controllers(char ***klist)
647 {
648 return -ENOSYS;
649 }
650 */
651
652 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
653 static char *cgv2_get_current_cgroup(int pid)
654 {
655 int ret;
656 char *cgroups_v2;
657 char *current_cgroup;
658 char *copy = NULL;
659 /* The largest integer that can fit into long int is 2^64. This is a
660 * 20-digit number. */
661 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
662 char path[__PIDLEN];
663
664 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
665 if (ret < 0 || ret >= __PIDLEN)
666 return NULL;
667
668 cgroups_v2 = read_file(path);
669 if (!cgroups_v2)
670 return NULL;
671
672 current_cgroup = strstr(cgroups_v2, "0::/");
673 if (!current_cgroup)
674 goto cleanup_on_err;
675
676 current_cgroup = current_cgroup + 3;
677 copy = copy_to_eol(current_cgroup);
678 if (!copy)
679 goto cleanup_on_err;
680
681 cleanup_on_err:
682 free(cgroups_v2);
683 if (copy)
684 trim(copy);
685
686 return copy;
687 }
688
689 /* Given two null-terminated lists of strings, return true if any string is in
690 * both.
691 */
692 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
693 {
694 char **it;
695
696 if (!l2)
697 return false;
698
699 for (it = l1; it && *it; it++)
700 if (string_in_list(l2, *it))
701 return true;
702
703 return false;
704 }
705
706 /* For a null-terminated list of controllers @clist, return true if any of those
707 * controllers is already listed the null-terminated list of hierarchies @hlist.
708 * Realistically, if one is present, all must be present.
709 */
710 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
711 {
712 struct cgv1_hierarchy **it;
713
714 for (it = hlist; it && *it; it++)
715 if ((*it)->controllers)
716 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
717 return true;
718 return false;
719
720 }
721
722 /* Set boolean to mark controllers under which we are supposed create a
723 * writeable cgroup.
724 */
725 static void cgv1_mark_to_make_rw(char **clist)
726 {
727 struct cgv1_hierarchy **it;
728
729 for (it = cgv1_hierarchies; it && *it; it++)
730 if ((*it)->controllers)
731 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
732 (*it)->create_rw_cgroup = true;
733 }
734
735 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
736 * the cgroupfs v2 hierarchy.
737 */
738 static void cgv2_mark_to_make_rw(char **clist)
739 {
740 if (string_in_list(clist, "unified"))
741 if (cgv2_hierarchies)
742 (*cgv2_hierarchies)->create_rw_cgroup = true;
743 }
744
745 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
746 static void cg_mark_to_make_rw(const char *cstring)
747 {
748 char *copy, *tok;
749 char *saveptr = NULL;
750 char **clist = NULL;
751
752 copy = must_copy_string(cstring);
753
754 for (tok = strtok_r(copy, ",", &saveptr); tok;
755 tok = strtok_r(NULL, ",", &saveptr))
756 must_add_to_list(&clist, tok);
757
758 free(copy);
759
760 cgv1_mark_to_make_rw(clist);
761 cgv2_mark_to_make_rw(clist);
762
763 free_string_list(clist);
764 }
765
766 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
767 static char *cgv1_must_prefix_named(char *entry)
768 {
769 char *s;
770 int ret;
771 size_t len;
772
773 len = strlen(entry);
774 s = must_alloc(len + 6);
775
776 ret = snprintf(s, len + 6, "name=%s", entry);
777 if (ret < 0 || (size_t)ret >= (len + 6))
778 return NULL;
779
780 return s;
781 }
782
783 /* Append kernel controller in @klist or named controller in @nlist to @clist */
784 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
785 {
786 int newentry;
787 char *copy;
788
789 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
790 return;
791
792 newentry = append_null_to_list((void ***)clist);
793
794 if (strncmp(entry, "name=", 5) == 0)
795 copy = must_copy_string(entry);
796 else if (string_in_list(klist, entry))
797 copy = must_copy_string(entry);
798 else
799 copy = cgv1_must_prefix_named(entry);
800
801 (*clist)[newentry] = copy;
802 }
803
804 /* Get the controllers from a mountinfo line. There are other ways we could get
805 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
806 * could parse the mount options. But we simply assume that the mountpoint must
807 * be /sys/fs/cgroup/controller-list
808 */
809 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
810 {
811 int i;
812 char *p, *p2, *tok;
813 char *saveptr = NULL;
814 char **aret = NULL;
815
816 p = line;
817
818 for (i = 0; i < 4; i++) {
819 p = strchr(p, ' ');
820 if (!p)
821 return NULL;
822 p++;
823 }
824 if (!p)
825 return NULL;
826
827 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
828 return NULL;
829
830 p += 15;
831
832 p2 = strchr(p, ' ');
833 if (!p2)
834 return NULL;
835 *p2 = '\0';
836
837 for (tok = strtok_r(p, ",", &saveptr); tok;
838 tok = strtok_r(NULL, ",", &saveptr))
839 must_append_controller(klist, nlist, &aret, tok);
840
841 return aret;
842 }
843
844 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
845 static bool cgv1_controller_in_clist(char *cgline, char *c)
846 {
847 size_t len;
848 char *tok, *eol, *tmp;
849 char *saveptr = NULL;
850
851 eol = strchr(cgline, ':');
852 if (!eol)
853 return false;
854
855 len = eol - cgline;
856 tmp = alloca(len + 1);
857 memcpy(tmp, cgline, len);
858 tmp[len] = '\0';
859
860 for (tok = strtok_r(tmp, ",", &saveptr); tok;
861 tok = strtok_r(NULL, ",", &saveptr)) {
862 if (strcmp(tok, c) == 0)
863 return true;
864 }
865 return false;
866 }
867
868 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
869 * of a given cgv1 controller passed in via @controller.
870 */
871 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
872 {
873 char *p;
874
875 p = basecginfo;
876
877 while (true) {
878 p = strchr(p, ':');
879 if (!p)
880 return NULL;
881 p++;
882
883 if (cgv1_controller_in_clist(p, controller)) {
884 p = strchr(p, ':');
885 if (!p)
886 return NULL;
887 p++;
888
889 return copy_to_eol(p);
890 }
891
892 p = strchr(p, '\n');
893 if (!p)
894 return NULL;
895 p++;
896 }
897
898 return NULL;
899 }
900
901 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
902 * systems.
903 */
904 #define INIT_SCOPE "/init.scope"
905 static void cg_systemd_prune_init_scope(char *cg)
906 {
907 char *point;
908
909 if (!cg)
910 return;
911
912 point = cg + strlen(cg) - strlen(INIT_SCOPE);
913 if (point < cg)
914 return;
915
916 if (strcmp(point, INIT_SCOPE) == 0) {
917 if (point == cg)
918 *(point + 1) = '\0';
919 else
920 *point = '\0';
921 }
922 }
923
924 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
925 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
926 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
927 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
928 * from /proc/1/cgroup.
929 */
930 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
931 {
932 struct cgv1_hierarchy *new;
933 int newentry;
934
935 new = must_alloc(sizeof(*new));
936 new->controllers = clist;
937 new->mountpoint = mountpoint;
938 new->base_cgroup = base_cgroup;
939 new->fullcgpath = NULL;
940 new->create_rw_cgroup = false;
941 new->init_cgroup = init_cgroup;
942 new->systemd_user_slice = false;
943
944 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
945 cgv1_hierarchies[newentry] = new;
946 }
947
948 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
949 * currently) include the controllers mounted into the hierarchy (e.g. memory,
950 * pids, blkio), the mountpoint of that hierarchy (Should usually be
951 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
952 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
953 * base cgroup of the current process gathered from /proc/self/cgroup, and the
954 * init cgroup of PID1 gathered from /proc/1/cgroup.
955 */
956 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
957 {
958 struct cgv2_hierarchy *new;
959 int newentry;
960
961 new = must_alloc(sizeof(*new));
962 new->controllers = clist;
963 new->mountpoint = mountpoint;
964 new->base_cgroup = base_cgroup;
965 new->fullcgpath = NULL;
966 new->create_rw_cgroup = false;
967 new->init_cgroup = init_cgroup;
968 new->systemd_user_slice = systemd_user_slice;
969
970 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
971 cgv2_hierarchies[newentry] = new;
972 }
973
974 /* In Ubuntu 14.04, the paths created for us were
975 * '/user/$uid.user/$something.session' This can be merged better with
976 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
977 * reason about the correctness.
978 */
979 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
980 {
981 char *p;
982 size_t len;
983 int id;
984 char *copy = NULL;
985 bool bret = false;
986
987 copy = must_copy_string(in);
988 if (strlen(copy) < strlen("/user/1.user/1.session"))
989 goto cleanup;
990 p = copy + strlen(copy) - 1;
991
992 /* skip any trailing '/' (shouldn't be any, but be sure) */
993 while (p >= copy && *p == '/')
994 *(p--) = '\0';
995 if (p < copy)
996 goto cleanup;
997
998 /* Get last path element */
999 while (p >= copy && *p != '/')
1000 p--;
1001 if (p < copy)
1002 goto cleanup;
1003 /* make sure it is something.session */
1004 len = strlen(p + 1);
1005 if (len < strlen("1.session") ||
1006 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1007 goto cleanup;
1008
1009 /* ok last path piece checks out, now check the second to last */
1010 *(p + 1) = '\0';
1011 while (p >= copy && *(--p) != '/')
1012 ;
1013 if (sscanf(p + 1, "%d.user/", &id) != 1)
1014 goto cleanup;
1015
1016 if (id != (int)uid)
1017 goto cleanup;
1018
1019 bret = true;
1020
1021 cleanup:
1022 free(copy);
1023 return bret;
1024 }
1025
1026 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1027 * assume it belongs to $uid and chown it
1028 */
1029 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1030 const char *init_cgroup, uid_t uid)
1031 {
1032 int ret;
1033 char buf[100];
1034 size_t curlen, initlen;
1035
1036 curlen = strlen(base_cgroup);
1037 initlen = strlen(init_cgroup);
1038 if (curlen <= initlen)
1039 return false;
1040
1041 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1042 return false;
1043
1044 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1045 if (ret < 0 || ret >= 100)
1046 return false;
1047
1048 if (initlen == 1)
1049 initlen = 0; // skip the '/'
1050
1051 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1052 }
1053
1054 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1055 * is not the end of our systemd path, then we're not part of the PAM call that
1056 * created that path.
1057 *
1058 * The last piece is chowned to $uid, the user- part not.
1059 * Note: If the user creates paths that look like what we're looking for to
1060 * 'fool' us, either
1061 * - they fool us, we create new cgroups, and they get auto-logged-out.
1062 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1063 * lose ownership of their cgroups
1064 */
1065 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1066 const char *init_cgroup,
1067 const char *in, uid_t uid)
1068 {
1069 char *p;
1070 size_t len;
1071 int id;
1072 char *copy = NULL;
1073 bool bret = false;
1074
1075 copy = must_copy_string(in);
1076
1077 /* An old version of systemd has already created a cgroup for us. */
1078 if (cg_systemd_under_user_slice_1(in, uid))
1079 goto succeed;
1080
1081 /* A new version of systemd has already created a cgroup for us. */
1082 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1083 goto succeed;
1084
1085 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1086 goto cleanup;
1087
1088 p = copy + strlen(copy) - 1;
1089 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1090 while (p >= copy && *p == '/')
1091 *(p--) = '\0';
1092
1093 if (p < copy)
1094 goto cleanup;
1095
1096 /* Get last path element */
1097 while (p >= copy && *p != '/')
1098 p--;
1099
1100 if (p < copy)
1101 goto cleanup;
1102
1103 /* Make sure it is session-something.scope. */
1104 len = strlen(p + 1);
1105 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1106 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1107 goto cleanup;
1108
1109 /* Ok last path piece checks out, now check the second to last. */
1110 *(p + 1) = '\0';
1111 while (p >= copy && *(--p) != '/')
1112 ;
1113
1114 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1115 goto cleanup;
1116
1117 if (id != (int)uid)
1118 goto cleanup;
1119
1120 succeed:
1121 bret = true;
1122 cleanup:
1123 free(copy);
1124 return bret;
1125 }
1126
1127 /* Chown existing cgroup that systemd has already created for us. */
1128 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1129 const char *base_cgroup, uid_t uid,
1130 gid_t gid, bool systemd_user_slice)
1131 {
1132 char *path;
1133
1134 if (!systemd_user_slice)
1135 return false;
1136
1137 path = must_make_path(mountpoint, base_cgroup, NULL);
1138
1139 /* A cgroup within name=systemd has already been created. So we only
1140 * need to chown it.
1141 */
1142 if (chown(path, uid, gid) < 0)
1143 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %m.\n",
1144 path, (int)uid, (int)gid, NULL);
1145
1146 free(path);
1147 return true;
1148 }
1149
1150 /* Detect and store information about cgroupfs v1 hierarchies. */
1151 static bool cgv1_init(uid_t uid, gid_t gid)
1152 {
1153 FILE *f;
1154 struct cgv1_hierarchy **it;
1155 char *basecginfo;
1156 char *line = NULL;
1157 char **klist = NULL, **nlist = NULL;
1158 size_t len = 0;
1159
1160 basecginfo = read_file("/proc/self/cgroup");
1161 if (!basecginfo)
1162 return false;
1163
1164 f = fopen("/proc/self/mountinfo", "r");
1165 if (!f) {
1166 free(basecginfo);
1167 return false;
1168 }
1169
1170 cgv1_get_controllers(&klist, &nlist);
1171
1172 while (getline(&line, &len, f) != -1) {
1173 char **controller_list = NULL;
1174 char *mountpoint, *base_cgroup;
1175
1176 if (is_lxcfs(line) || !is_cgv1(line))
1177 continue;
1178
1179 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1180 if (!controller_list)
1181 continue;
1182
1183 if (cgv1_controller_list_is_dup(cgv1_hierarchies,
1184 controller_list)) {
1185 free(controller_list);
1186 continue;
1187 }
1188
1189 mountpoint = get_mountpoint(line);
1190 if (!mountpoint) {
1191 free_string_list(controller_list);
1192 continue;
1193 }
1194
1195 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1196 if (!base_cgroup) {
1197 free_string_list(controller_list);
1198 free(mountpoint);
1199 continue;
1200 }
1201 trim(base_cgroup);
1202 lxcfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1203 "mountpoint \"%s\" and cgroup \"%s\".\n",
1204 controller_list[0], mountpoint, base_cgroup);
1205 cgv1_add_controller(controller_list, mountpoint, base_cgroup,
1206 NULL);
1207 }
1208 free_string_list(klist);
1209 free_string_list(nlist);
1210 free(basecginfo);
1211 fclose(f);
1212 free(line);
1213
1214 /* Retrieve init cgroup path for all controllers. */
1215 basecginfo = read_file("/proc/1/cgroup");
1216 if (!basecginfo)
1217 return false;
1218
1219 for (it = cgv1_hierarchies; it && *it; it++) {
1220 if ((*it)->controllers) {
1221 char *init_cgroup, *user_slice;
1222 /* We've already stored the controller and received its
1223 * current cgroup. If we now fail to retrieve its init
1224 * cgroup, we should probably fail.
1225 */
1226 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1227 if (!init_cgroup) {
1228 free(basecginfo);
1229 return false;
1230 }
1231 cg_systemd_prune_init_scope(init_cgroup);
1232 (*it)->init_cgroup = init_cgroup;
1233 lxcfs_debug("cgroupfs v1 controller \"%s\" has init "
1234 "cgroup \"%s\".\n",
1235 (*(*it)->controllers), init_cgroup);
1236 /* Check whether systemd has already created a cgroup
1237 * for us.
1238 */
1239 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1240 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1241 (*it)->systemd_user_slice = true;
1242 }
1243 }
1244 free(basecginfo);
1245
1246 return true;
1247 }
1248
1249 /* __typeof__ should be safe to use with all compilers. */
1250 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
1251 /* Check whether given mountpoint has mount type specified via @magic_val. */
1252 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
1253 {
1254 return (fs->f_type == (fs_type_magic)magic_val);
1255 }
1256
1257 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1258 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1259 */
1260 static int cg_get_version_of_mntpt(const char *path)
1261 {
1262 int ret;
1263 struct statfs sb;
1264
1265 if (path)
1266 ret = statfs(path, &sb);
1267 else
1268 ret = statfs("/sys/fs/cgroup", &sb);
1269
1270 if (ret < 0)
1271 return -1;
1272
1273 if (has_fs_type(&sb, CGROUP_SUPER_MAGIC))
1274 return 1;
1275 else if (has_fs_type(&sb, CGROUP2_SUPER_MAGIC))
1276 return 2;
1277
1278 return 0;
1279 }
1280
1281 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1282 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1283 */
1284 static bool cgv2_init(uid_t uid, gid_t gid)
1285 {
1286 char *mountpoint;
1287 bool ret = false;
1288 FILE *f = NULL;
1289 char *current_cgroup = NULL, *init_cgroup = NULL;
1290 char * line = NULL;
1291 size_t len = 0;
1292
1293 current_cgroup = cgv2_get_current_cgroup(getpid());
1294 if (!current_cgroup) {
1295 /* No v2 hierarchy present. We're done. */
1296 ret = true;
1297 goto cleanup;
1298 }
1299
1300 init_cgroup = cgv2_get_current_cgroup(1);
1301 if (!init_cgroup) {
1302 /* If we're here and didn't fail already above, then something's
1303 * certainly wrong, so error this time.
1304 */
1305 goto cleanup;
1306 }
1307 cg_systemd_prune_init_scope(init_cgroup);
1308
1309 /* Check if the v2 hierarchy is mounted at its standard location.
1310 * If so we can skip the rest of the work here. Although the unified
1311 * hierarchy can be mounted multiple times, each of those mountpoints
1312 * will expose identical information.
1313 */
1314 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1315 char *user_slice;
1316 bool has_user_slice = false;
1317
1318 mountpoint = must_copy_string("/sys/fs/cgroup");
1319 if (!mountpoint)
1320 goto cleanup;
1321
1322 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1323 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1324 has_user_slice = true;
1325 free(user_slice);
1326
1327 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1328
1329 ret = true;
1330 goto cleanup;
1331 }
1332
1333 f = fopen("/proc/self/mountinfo", "r");
1334 if (!f)
1335 return false;
1336
1337 /* we support simple cgroup mounts and lxcfs mounts */
1338 while (getline(&line, &len, f) != -1) {
1339 char *user_slice;
1340 bool has_user_slice = false;
1341 if (!is_cgv2(line))
1342 continue;
1343
1344 mountpoint = get_mountpoint(line);
1345 if (!mountpoint)
1346 continue;
1347
1348 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1349 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1350 has_user_slice = true;
1351 free(user_slice);
1352
1353 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1354 /* Although the unified hierarchy can be mounted multiple times,
1355 * each of those mountpoints will expose identical information.
1356 * So let the first mountpoint we find, win.
1357 */
1358 break;
1359 }
1360
1361 lxcfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1362 "current cgroup \"%s\" and init cgroup \"%s\".\n",
1363 mountpoint, current_cgroup, init_cgroup);
1364
1365 cleanup:
1366 if (f)
1367 fclose(f);
1368 free(line);
1369
1370 return true;
1371 }
1372
1373 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1374 * cgroupfs v2 hierarchy.
1375 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1376 * where some controllers are mounted into their standard cgroupfs v1 locations
1377 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1378 * hierarchy (/sys/fs/cgroup).
1379 */
1380 static bool cg_init(uid_t uid, gid_t gid)
1381 {
1382 if (!cgv1_init(uid, gid))
1383 return false;
1384
1385 if (!cgv2_init(uid, gid))
1386 return false;
1387
1388 if (cgv1_hierarchies && cgv2_hierarchies) {
1389 cg_mount_mode = CGROUP_MIXED;
1390 lxcfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies.");
1391 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1392 cg_mount_mode = CGROUP_PURE_V1;
1393 lxcfs_debug("%s\n", "Detected cgroupfs v1 hierarchies.");
1394 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1395 cg_mount_mode = CGROUP_PURE_V2;
1396 lxcfs_debug("%s\n", "Detected cgroupfs v2 hierarchies.");
1397 } else {
1398 cg_mount_mode = CGROUP_UNKNOWN;
1399 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy.\n", NULL);
1400 }
1401
1402 if (cg_mount_mode == CGROUP_UNKNOWN)
1403 return false;
1404
1405 return true;
1406 }
1407
1408 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1409 static bool cgv1_enter(const char *cgroup)
1410 {
1411 struct cgv1_hierarchy **it;
1412
1413 for (it = cgv1_hierarchies; it && *it; it++) {
1414 char **controller;
1415 bool entered = false;
1416
1417 if (!(*it)->controllers || !(*it)->mountpoint ||
1418 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1419 continue;
1420
1421 for (controller = (*it)->controllers; controller && *controller;
1422 controller++) {
1423 char *path;
1424
1425 /* We've already been placed in a user slice, so we
1426 * don't need to enter the cgroup again.
1427 */
1428 if ((*it)->systemd_user_slice) {
1429 entered = true;
1430 break;
1431 }
1432
1433 path = must_make_path((*it)->mountpoint,
1434 (*it)->init_cgroup,
1435 cgroup,
1436 "/cgroup.procs",
1437 NULL);
1438 if (!file_exists(path)) {
1439 free(path);
1440 path = must_make_path((*it)->mountpoint,
1441 (*it)->init_cgroup,
1442 cgroup,
1443 "/tasks",
1444 NULL);
1445 }
1446 lxcfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1447 entered = write_int(path, (int)getpid());
1448 if (entered) {
1449 free(path);
1450 break;
1451 }
1452 lxcfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1453 free(path);
1454 }
1455 if (!entered)
1456 return false;
1457 }
1458
1459 return true;
1460 }
1461
1462 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1463 static bool cgv2_enter(const char *cgroup)
1464 {
1465 struct cgv2_hierarchy *v2;
1466 char *path;
1467 bool entered = false;
1468
1469 if (!cgv2_hierarchies)
1470 return true;
1471
1472 v2 = *cgv2_hierarchies;
1473
1474 if (!v2->mountpoint || !v2->base_cgroup)
1475 return false;
1476
1477 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1478 return true;
1479
1480 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
1481 "/cgroup.procs", NULL);
1482 lxcfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1483 entered = write_int(path, (int)getpid());
1484 if (!entered) {
1485 lxcfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1486 free(path);
1487 return false;
1488 }
1489
1490 free(path);
1491
1492 return true;
1493 }
1494
1495 /* Wrapper around cgv{1,2}_enter(). */
1496 static bool cg_enter(const char *cgroup)
1497 {
1498 if (!cgv1_enter(cgroup)) {
1499 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups.\n", NULL);
1500 return false;
1501 }
1502
1503 if (!cgv2_enter(cgroup)) {
1504 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups.\n", NULL);
1505 return false;
1506 }
1507
1508 return true;
1509 }
1510
1511 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1512 static void cgv1_escape(void)
1513 {
1514 if (!cgv1_enter("/"))
1515 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup.\n", NULL);
1516 }
1517
1518 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1519 static void cgv2_escape(void)
1520 {
1521 if (!cgv2_enter("/"))
1522 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup.\n", NULL);
1523 }
1524
1525 /* Wrapper around cgv{1,2}_escape(). */
1526 static void cg_escape(void)
1527 {
1528 cgv1_escape();
1529 cgv2_escape();
1530 }
1531
1532 /* Get uid and gid for @user. */
1533 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1534 {
1535 struct passwd *pwent;
1536
1537 pwent = getpwnam(user);
1538 if (!pwent)
1539 return false;
1540
1541 *uid = pwent->pw_uid;
1542 *gid = pwent->pw_gid;
1543
1544 return true;
1545 }
1546
1547 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1548 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1549 {
1550 struct stat statbuf;
1551
1552 if (stat(path, &statbuf) < 0)
1553 return false;
1554
1555 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1556 return false;
1557
1558 return true;
1559 }
1560
1561 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
1562 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
1563 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
1564 * to the caller in @existed.
1565 */
1566 #define __PAM_CGFS_USER "/user/"
1567 #define __PAM_CGFS_USER_LEN 6
1568 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1569 {
1570 char *clean_base_cgroup, *path;
1571 char **controller;
1572 struct cgv1_hierarchy *it;
1573 bool created = false;
1574
1575 *existed = false;
1576 it = h;
1577 for (controller = it->controllers; controller && *controller;
1578 controller++) {
1579 created = false;
1580 /* If systemd has already created a cgroup for us, keep using
1581 * it.
1582 */
1583 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
1584 it->base_cgroup, uid, gid,
1585 it->systemd_user_slice)) {
1586 return true;
1587 }
1588
1589 /* We need to make sure that we do not create an endless chain
1590 * of sub-cgroups. So we check if we have already logged in
1591 * somehow (sudo -i, su, etc.) and have created a
1592 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
1593 * cgroups this is unnecessary since we use the init_cgroup
1594 * anyway, but for controllers which have an existing systemd
1595 * cgroup that does not match the current uid, this is pretty
1596 * useful.
1597 */
1598 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
1599 free(it->base_cgroup);
1600 it->base_cgroup = must_copy_string("/");
1601 } else {
1602 clean_base_cgroup =
1603 strstr(it->base_cgroup, __PAM_CGFS_USER);
1604 if (clean_base_cgroup)
1605 *clean_base_cgroup = '\0';
1606 }
1607
1608 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
1609 lxcfs_debug("Constructing path: %s.\n", path);
1610 if (file_exists(path)) {
1611 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
1612 lxcfs_debug("%s existed and does %s have our uid and gid.\n", path, our_cg ? "" : "not");
1613 free(path);
1614 if (our_cg)
1615 *existed = false;
1616 else
1617 *existed = true;
1618 return our_cg;
1619 }
1620 created = mkdir_p(it->mountpoint, path);
1621 if (!created) {
1622 free(path);
1623 continue;
1624 }
1625 if (chown(path, uid, gid) < 0)
1626 lxcfs_debug("Failed to chown %s to %d:%d: %m.\n", path,
1627 (int)uid, (int)gid);
1628 free(path);
1629 break;
1630 }
1631
1632 if (!created)
1633 return false;
1634
1635 return true;
1636 }
1637
1638 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
1639 * (For example, try to remove @cgroup for the cpu and cpuacct controller
1640 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
1641 */
1642 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
1643 {
1644
1645 char *path;
1646
1647 /* Better safe than sorry. */
1648 if (!h->controllers)
1649 return true;
1650
1651 /* Cgroups created by systemd for us which we re-use won't be removed
1652 * here, since we're using init_cgroup + cgroup as path instead of
1653 * base_cgroup + cgroup.
1654 */
1655 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
1656 (void)recursive_rmdir(path);
1657 free(path);
1658
1659 return true;
1660 }
1661
1662 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
1663 static bool cgv2_remove(const char *cgroup)
1664 {
1665 struct cgv2_hierarchy *v2;
1666 char *path;
1667
1668 if (!cgv2_hierarchies)
1669 return true;
1670
1671 v2 = *cgv2_hierarchies;
1672
1673 /* If we reused an already existing cgroup, don't bother trying to
1674 * remove (a potentially wrong)/the path.
1675 * Cgroups created by systemd for us which we re-use would be removed
1676 * here, since we're using base_cgroup + cgroup as path.
1677 */
1678 if (v2->systemd_user_slice)
1679 return true;
1680
1681 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
1682 (void)recursive_rmdir(path);
1683 free(path);
1684
1685 return true;
1686 }
1687
1688 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
1689 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
1690 * back, to the caller if the creation failed due to @cgroup already existing
1691 * via @existed.
1692 */
1693 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1694 {
1695 struct cgv1_hierarchy **it, **rev_it;
1696 bool all_created = true;
1697
1698 for (it = cgv1_hierarchies; it && *it; it++) {
1699 if (!(*it)->controllers || !(*it)->mountpoint ||
1700 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1701 continue;
1702
1703 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
1704 all_created = false;
1705 break;
1706 }
1707 }
1708
1709 if (all_created)
1710 return true;
1711
1712 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
1713 rev_it++)
1714 cgv1_remove_one(*rev_it, cgroup);
1715
1716 return false;
1717 }
1718
1719 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
1720 * the creation failed due to @cgroup already existing via @existed.
1721 */
1722 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1723 {
1724 char *clean_base_cgroup;
1725 char *path;
1726 struct cgv2_hierarchy *v2;
1727 bool created = false;
1728
1729 *existed = false;
1730
1731 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
1732 return true;
1733
1734 v2 = *cgv2_hierarchies;
1735
1736 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
1737 * to be placed under our current cgroup.
1738 */
1739 if (cg_systemd_chown_existing_cgroup(v2->mountpoint,
1740 v2->base_cgroup, uid, gid,
1741 v2->systemd_user_slice))
1742 return true;
1743
1744 /* We need to make sure that we do not create an endless chaing of
1745 * sub-cgroups. So we check if we have already logged in somehow (sudo
1746 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
1747 * skip that part.
1748 */
1749 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
1750 free(v2->base_cgroup);
1751 v2->base_cgroup = must_copy_string("/");
1752 } else {
1753 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
1754 if (clean_base_cgroup)
1755 *clean_base_cgroup = '\0';
1756 }
1757
1758 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
1759 lxcfs_debug("Constructing path \"%s\".\n", path);
1760 if (file_exists(path)) {
1761 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
1762 lxcfs_debug("%s existed and does %s have our uid and gid.\n", path, our_cg ? "" : "not");
1763 free(path);
1764 if (our_cg)
1765 *existed = false;
1766 else
1767 *existed = true;
1768 return our_cg;
1769 }
1770
1771 created = mkdir_p(v2->mountpoint, path);
1772 if (!created) {
1773 free(path);
1774 return false;
1775 }
1776
1777 if (chown(path, uid, gid) < 0)
1778 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %m.\n",
1779 path, (int)uid, (int)gid, NULL);
1780 free(path);
1781
1782 return true;
1783 }
1784
1785 /* Create writeable cgroups for @user at login. Details can be found in the
1786 * preamble/license at the top of this file.
1787 */
1788 static int handle_login(const char *user, uid_t uid, gid_t gid)
1789 {
1790 int idx = 0, ret;
1791 bool existed;
1792 char cg[MAXPATHLEN];
1793
1794 cg_escape();
1795
1796 while (idx >= 0) {
1797 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
1798 if (ret < 0 || ret >= MAXPATHLEN) {
1799 mysyslog(LOG_ERR, "Username too long.\n", NULL);
1800 return PAM_SESSION_ERR;
1801 }
1802
1803 existed = false;
1804 if (!cgv2_create(cg, uid, gid, &existed)) {
1805 if (existed) {
1806 cgv2_remove(cg);
1807 idx++;
1808 continue;
1809 }
1810 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
1811 return PAM_SESSION_ERR;
1812 }
1813
1814 existed = false;
1815 if (!cgv1_create(cg, uid, gid, &existed)) {
1816 if (existed) {
1817 cgv2_remove(cg);
1818 idx++;
1819 continue;
1820 }
1821 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
1822 return PAM_SESSION_ERR;
1823 }
1824
1825 if (!cg_enter(cg)) {
1826 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s.\n", cg, user, NULL);
1827 return PAM_SESSION_ERR;
1828 }
1829 break;
1830 }
1831
1832 return PAM_SUCCESS;
1833 }
1834
1835 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
1836 * hierarchies.
1837 */
1838 static bool cgv1_prune_empty_cgroups(const char *user)
1839 {
1840 bool controller_removed = true;
1841 bool all_removed = true;
1842 struct cgv1_hierarchy **it;
1843
1844 for (it = cgv1_hierarchies; it && *it; it++) {
1845 int ret;
1846 char *path_base, *path_init;
1847 char **controller;
1848
1849 if (!(*it)->controllers || !(*it)->mountpoint ||
1850 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1851 continue;
1852
1853 for (controller = (*it)->controllers; controller && *controller;
1854 controller++) {
1855 bool path_base_rm, path_init_rm;
1856
1857 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
1858 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_base);
1859 ret = recursive_rmdir(path_base);
1860 if (ret == -ENOENT || ret >= 0)
1861 path_base_rm = true;
1862 else
1863 path_base_rm = false;
1864 free(path_base);
1865
1866 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
1867 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_init);
1868 ret = recursive_rmdir(path_init);
1869 if (ret == -ENOENT || ret >= 0)
1870 path_init_rm = true;
1871 else
1872 path_init_rm = false;
1873 free(path_init);
1874
1875 if (!path_base_rm && !path_init_rm) {
1876 controller_removed = false;
1877 continue;
1878 }
1879
1880 controller_removed = true;
1881 break;
1882 }
1883 if (!controller_removed)
1884 all_removed = false;
1885 }
1886
1887 return all_removed;
1888 }
1889
1890 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
1891 * hierarchy.
1892 */
1893 static bool cgv2_prune_empty_cgroups(const char *user)
1894 {
1895 int ret;
1896 struct cgv2_hierarchy *v2;
1897 char *path_base, *path_init;
1898 bool path_base_rm, path_init_rm;
1899
1900 if (!cgv2_hierarchies)
1901 return true;
1902
1903 v2 = *cgv2_hierarchies;
1904
1905 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
1906 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_base);
1907 ret = recursive_rmdir(path_base);
1908 if (ret == -ENOENT || ret >= 0)
1909 path_base_rm = true;
1910 else
1911 path_base_rm = false;
1912 free(path_base);
1913
1914 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
1915 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_init);
1916 ret = recursive_rmdir(path_init);
1917 if (ret == -ENOENT || ret >= 0)
1918 path_init_rm = true;
1919 else
1920 path_init_rm = false;
1921 free(path_init);
1922
1923 if (!path_base_rm && !path_init_rm)
1924 return false;
1925
1926 return true;
1927 }
1928
1929 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
1930 static void cg_prune_empty_cgroups(const char *user)
1931 {
1932 (void)cgv1_prune_empty_cgroups(user);
1933 (void)cgv2_prune_empty_cgroups(user);
1934 }
1935
1936 /* Free allocated information for detected cgroupfs v1 hierarchies. */
1937 static void cgv1_free_hierarchies(void)
1938 {
1939 struct cgv1_hierarchy **it;
1940
1941 if (!cgv1_hierarchies)
1942 return;
1943
1944 for (it = cgv1_hierarchies; it && *it; it++) {
1945 if ((*it)->controllers) {
1946 char **tmp;
1947 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
1948 free(*tmp);
1949
1950 free((*it)->controllers);
1951 }
1952 free((*it)->mountpoint);
1953 free((*it)->base_cgroup);
1954 free((*it)->fullcgpath);
1955 free((*it)->init_cgroup);
1956 }
1957 free(cgv1_hierarchies);
1958 }
1959
1960 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
1961 static void cgv2_free_hierarchies(void)
1962 {
1963 struct cgv2_hierarchy **it;
1964
1965 if (!cgv2_hierarchies)
1966 return;
1967
1968 for (it = cgv2_hierarchies; it && *it; it++) {
1969 if ((*it)->controllers) {
1970 char **tmp;
1971 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
1972 free(*tmp);
1973
1974 free((*it)->controllers);
1975 }
1976 free((*it)->mountpoint);
1977 free((*it)->base_cgroup);
1978 free((*it)->fullcgpath);
1979 free((*it)->init_cgroup);
1980 }
1981 free(cgv2_hierarchies);
1982 }
1983
1984 /* Wrapper around cgv{1,2}_free_hierarchies(). */
1985 static void cg_exit(void)
1986 {
1987 cgv1_free_hierarchies();
1988 cgv2_free_hierarchies();
1989 }
1990
1991 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
1992 const char **argv)
1993 {
1994 int ret;
1995 uid_t uid = 0;
1996 gid_t gid = 0;
1997 const char *PAM_user = NULL;
1998
1999 ret = pam_get_user(pamh, &PAM_user, NULL);
2000 if (ret != PAM_SUCCESS) {
2001 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2002 return PAM_SESSION_ERR;
2003 }
2004
2005 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2006 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2007 return PAM_SESSION_ERR;
2008 }
2009
2010 if (!cg_init(uid, gid)) {
2011 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2012 return PAM_SESSION_ERR;
2013 }
2014
2015 /* Try to prune cgroups, that are actually empty but were still marked
2016 * as busy by the kernel so we couldn't remove them on session close.
2017 */
2018 cg_prune_empty_cgroups(PAM_user);
2019
2020 if (cg_mount_mode == CGROUP_UNKNOWN)
2021 return PAM_SESSION_ERR;
2022
2023 if (argc > 1 && strcmp(argv[0], "-c") == 0)
2024 cg_mark_to_make_rw(argv[1]);
2025
2026 return handle_login(PAM_user, uid, gid);
2027 }
2028
2029 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2030 const char **argv)
2031 {
2032 int ret;
2033 uid_t uid = 0;
2034 gid_t gid = 0;
2035 const char *PAM_user = NULL;
2036
2037 ret = pam_get_user(pamh, &PAM_user, NULL);
2038 if (ret != PAM_SUCCESS) {
2039 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2040 return PAM_SESSION_ERR;
2041 }
2042
2043 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2044 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2045 return PAM_SESSION_ERR;
2046 }
2047
2048 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2049 if (!cg_init(uid, gid))
2050 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2051
2052 if (argc > 1 && strcmp(argv[0], "-c") == 0)
2053 cg_mark_to_make_rw(argv[1]);
2054 }
2055
2056 cg_prune_empty_cgroups(PAM_user);
2057 cg_exit();
2058
2059 return PAM_SUCCESS;
2060 }