]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
Merge pull request #3235 from xinhua9569/master
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <ctype.h>
7 #include <dirent.h>
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <linux/unistd.h>
11 #include <pwd.h>
12 #include <stdarg.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mount.h>
19 #include <sys/param.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/vfs.h>
23 #include <syslog.h>
24 #include <unistd.h>
25
26 #include "config.h"
27 #include "file_utils.h"
28 #include "macro.h"
29 #include "memory_utils.h"
30 #include "string_utils.h"
31
32 #define PAM_SM_SESSION
33 #include <security/_pam_macros.h>
34 #include <security/pam_modules.h>
35
36 #ifndef HAVE_STRLCPY
37 #include "include/strlcpy.h"
38 #endif
39
40 #ifndef HAVE_STRLCAT
41 #include "include/strlcat.h"
42 #endif
43
44 #define pam_cgfs_debug_stream(stream, format, ...) \
45 do { \
46 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
47 __func__, __VA_ARGS__); \
48 } while (false)
49
50 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
51
52 #ifdef DEBUG
53 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
54 #else
55 #define pam_cgfs_debug(format, ...)
56 #endif /* DEBUG */
57
58 /* Taken over modified from the kernel sources. */
59 #define NBITS 32 /* bits in uint32_t */
60 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
61 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
62
63 static enum cg_mount_mode {
64 CGROUP_UNKNOWN = -1,
65 CGROUP_MIXED = 0,
66 CGROUP_PURE_V1 = 1,
67 CGROUP_PURE_V2 = 2,
68 CGROUP_UNINITIALIZED = 3,
69 } cg_mount_mode = CGROUP_UNINITIALIZED;
70
71 /* Common helper functions. Most of these have been taken from LXC. */
72 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
73 static int append_null_to_list(void ***list);
74 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
75 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
76 {
77 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
78 }
79 static char *copy_to_eol(char *s);
80 static void free_string_list(char **list);
81 static char *get_mountpoint(char *line);
82 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
83 static int handle_login(const char *user, uid_t uid, gid_t gid);
84 static inline bool is_set(unsigned bit, uint32_t *bitarr)
85 {
86 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
87 }
88 static bool is_lxcfs(const char *line);
89 static bool is_cgv1(char *line);
90 static bool is_cgv2(char *line);
91 static void must_add_to_list(char ***clist, char *entry);
92 static void must_append_controller(char **klist, char **nlist, char ***clist,
93 char *entry);
94 static void must_append_string(char ***list, char *entry);
95 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
96 static char *read_file(char *fnam);
97 static int recursive_rmdir(char *dirname);
98 static inline void set_bit(unsigned bit, uint32_t *bitarr)
99 {
100 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
101 }
102 static bool string_in_list(char **list, const char *entry);
103 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
104 static void trim(char *s);
105 static bool write_int(char *path, int v);
106
107 /* cgroupfs prototypes. */
108 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
109 static uint32_t *cg_cpumask(char *buf, size_t nbits);
110 static bool cg_copy_parent_file(char *path, char *file);
111 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
112 static bool cg_enter(const char *cgroup);
113 static void cg_escape(void);
114 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
115 static ssize_t cg_get_max_cpus(char *cpulist);
116 static int cg_get_version_of_mntpt(const char *path);
117 static bool cg_init(uid_t uid, gid_t gid);
118 static void cg_mark_to_make_rw(char **list);
119 static void cg_prune_empty_cgroups(const char *user);
120 static bool cg_systemd_created_user_slice(const char *base_cgroup,
121 const char *init_cgroup,
122 const char *in, uid_t uid);
123 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
124 const char *base_cgroup, uid_t uid,
125 gid_t gid,
126 bool systemd_user_slice);
127 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
128 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
129 const char *init_cgroup, uid_t uid);
130 static void cg_systemd_prune_init_scope(char *cg);
131 static bool is_lxcfs(const char *line);
132
133 /* cgroupfs v1 prototypes. */
134 struct cgv1_hierarchy {
135 char **controllers;
136 char *mountpoint;
137 char *base_cgroup;
138 char *fullcgpath;
139 char *init_cgroup;
140 bool create_rw_cgroup;
141 bool systemd_user_slice;
142 };
143
144 static struct cgv1_hierarchy **cgv1_hierarchies;
145
146 static void cgv1_add_controller(char **clist, char *mountpoint,
147 char *base_cgroup, char *init_cgroup);
148 static bool cgv1_controller_in_clist(char *cgline, char *c);
149 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
150 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
151 char **clist);
152 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
153 bool *existed);
154 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
155 uid_t uid, gid_t gid, bool *existed);
156 static bool cgv1_enter(const char *cgroup);
157 static void cgv1_escape(void);
158 static bool cgv1_get_controllers(char ***klist, char ***nlist);
159 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
160 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
161 char *line);
162 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
163 const char *cgroup);
164 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
165 static bool cgv1_init(uid_t uid, gid_t gid);
166 static void cgv1_mark_to_make_rw(char **clist);
167 static char *cgv1_must_prefix_named(char *entry);
168 static bool cgv1_prune_empty_cgroups(const char *user);
169 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
170 static bool is_cgv1(char *line);
171
172 /* cgroupfs v2 prototypes. */
173 struct cgv2_hierarchy {
174 char **controllers;
175 char *mountpoint;
176 char *base_cgroup;
177 char *fullcgpath;
178 char *init_cgroup;
179 bool create_rw_cgroup;
180 bool systemd_user_slice;
181 };
182
183 /* Actually this should only be a single hierarchy. But for the sake of
184 * parallelism and because the layout of the cgroupfs v2 is still somewhat
185 * changing, we'll leave it as an array of structs.
186 */
187 static struct cgv2_hierarchy **cgv2_hierarchies;
188
189 static void cgv2_add_controller(char **clist, char *mountpoint,
190 char *base_cgroup, char *init_cgroup,
191 bool systemd_user_slice);
192 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
193 bool *existed);
194 static bool cgv2_enter(const char *cgroup);
195 static void cgv2_escape(void);
196 static char *cgv2_get_current_cgroup(int pid);
197 static bool cgv2_init(uid_t uid, gid_t gid);
198 static void cgv2_mark_to_make_rw(char **clist);
199 static bool cgv2_prune_empty_cgroups(const char *user);
200 static bool cgv2_remove(const char *cgroup);
201 static bool is_cgv2(char *line);
202
203 static int do_mkdir(const char *path, mode_t mode)
204 {
205 int saved_errno;
206 mode_t mask;
207 int r;
208
209 mask = umask(0);
210 r = mkdir(path, mode);
211 saved_errno = errno;
212 umask(mask);
213 errno = saved_errno;
214 return (r);
215 }
216
217 /* Create directory and (if necessary) its parents. */
218 static bool mkdir_parent(const char *root, char *path)
219 {
220 char *b, orig, *e;
221
222 if (strlen(path) < strlen(root))
223 return false;
224
225 if (strlen(path) == strlen(root))
226 return true;
227
228 b = path + strlen(root) + 1;
229 for (;;) {
230 while (*b && (*b == '/'))
231 b++;
232 if (!*b)
233 return true;
234
235 e = b + 1;
236 while (*e && *e != '/')
237 e++;
238
239 orig = *e;
240 if (orig)
241 *e = '\0';
242
243 if (file_exists(path))
244 goto next;
245
246 if (do_mkdir(path, 0755) < 0) {
247 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
248 return false;
249 }
250
251 next:
252 if (!orig)
253 return true;
254
255 *e = orig;
256 b = e + 1;
257 }
258
259 return false;
260 }
261
262 /* Common helper functions. Most of these have been taken from LXC. */
263 static void mysyslog(int err, const char *format, ...)
264 {
265 va_list args;
266
267 va_start(args, format);
268 #pragma GCC diagnostic push
269 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
270 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
271 vsyslog(err, format, args);
272 #pragma GCC diagnostic pop
273 va_end(args);
274 closelog();
275 }
276
277 /* realloc() pointer in batch sizes; do not fail. */
278 #define BATCH_SIZE 50
279 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
280 {
281 int newbatches = (newlen / BATCH_SIZE) + 1;
282 int oldbatches = (oldlen / BATCH_SIZE) + 1;
283
284 if (!*mem || newbatches > oldbatches)
285 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
286 }
287
288 /* Append lines as is to pointer; do not fail. */
289 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
290 {
291 size_t full = oldlen + newlen;
292
293 batch_realloc(dest, oldlen, full + 1);
294
295 memcpy(*dest + oldlen, new, newlen + 1);
296 }
297
298 /* Read in whole file and return allocated pointer. */
299 static char *read_file(char *fnam)
300 {
301 FILE *f;
302 int linelen;
303 char *line = NULL, *buf = NULL;
304 size_t len = 0, fulllen = 0;
305
306 f = fopen(fnam, "r");
307 if (!f)
308 return NULL;
309
310 while ((linelen = getline(&line, &len, f)) != -1) {
311 append_line(&buf, fulllen, line, linelen);
312 fulllen += linelen;
313 }
314
315 fclose(f);
316 free(line);
317
318 return buf;
319 }
320
321 /* Given a pointer to a null-terminated array of pointers, realloc to add one
322 * entry, and point the new entry to NULL. Do not fail. Return the index to the
323 * second-to-last entry - that is, the one which is now available for use
324 * (keeping the list null-terminated).
325 */
326 static int append_null_to_list(void ***list)
327 {
328 int newentry = 0;
329
330 if (*list)
331 for (; (*list)[newentry]; newentry++)
332 ;
333
334 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
335 (*list)[newentry + 1] = NULL;
336
337 return newentry;
338 }
339
340 /* Append new entry to null-terminated array of pointer; make sure that array of
341 * pointers will still be null-terminated.
342 */
343 static void must_append_string(char ***list, char *entry)
344 {
345 int newentry;
346 char *copy;
347
348 newentry = append_null_to_list((void ***)list);
349 copy = must_copy_string(entry);
350 (*list)[newentry] = copy;
351 }
352
353 /* Remove newlines from string. */
354 static void trim(char *s)
355 {
356 size_t len = strlen(s);
357
358 while ((len > 0) && s[len - 1] == '\n')
359 s[--len] = '\0';
360 }
361
362 /* Make allocated copy of string. End of string is taken to be '\n'. */
363 static char *copy_to_eol(char *s)
364 {
365 char *newline, *sret;
366 size_t len;
367
368 newline = strchr(s, '\n');
369 if (!newline)
370 return NULL;
371
372 len = newline - s;
373 sret = must_realloc(NULL, len + 1);
374 memcpy(sret, s, len);
375 sret[len] = '\0';
376
377 return sret;
378 }
379
380 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
381 static bool is_lxcfs(const char *line)
382 {
383 char *p = strstr(line, " - ");
384 if (!p)
385 return false;
386
387 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
388 }
389
390 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
391 static bool is_cgv1(char *line)
392 {
393 char *p = strstr(line, " - ");
394 if (!p)
395 return false;
396
397 return strncmp(p, " - cgroup ", 10) == 0;
398 }
399
400 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
401 static bool is_cgv2(char *line)
402 {
403 char *p = strstr(line, " - ");
404 if (!p)
405 return false;
406
407 return strncmp(p, " - cgroup2 ", 11) == 0;
408 }
409
410 /* Given a null-terminated array of strings, check whether @entry is one of the
411 * strings
412 */
413 static bool string_in_list(char **list, const char *entry)
414 {
415 char **it;
416
417 for (it = list; it && *it; it++)
418 if (strcmp(*it, entry) == 0)
419 return true;
420
421 return false;
422 }
423
424 /*
425 * Creates a null-terminated array of strings, made by splitting the entries in
426 * @str on each @sep. Caller is responsible for calling free_string_list.
427 */
428 static char **make_string_list(const char *str, const char *sep)
429 {
430 char *copy, *tok;
431 char *saveptr = NULL;
432 char **clist = NULL;
433
434 copy = must_copy_string(str);
435
436 for (tok = strtok_r(copy, sep, &saveptr); tok;
437 tok = strtok_r(NULL, sep, &saveptr))
438 must_add_to_list(&clist, tok);
439
440 free(copy);
441
442 return clist;
443 }
444
445 /* Gets the length of a null-terminated array of strings. */
446 static size_t string_list_length(char **list)
447 {
448 size_t len = 0;
449 char **it;
450
451 for (it = list; it && *it; it++)
452 len++;
453
454 return len;
455 }
456
457 /* Free null-terminated array of strings. */
458 static void free_string_list(char **list)
459 {
460 char **it;
461
462 for (it = list; it && *it; it++)
463 free(*it);
464 free(list);
465 }
466
467 /* Write single integer to file. */
468 static bool write_int(char *path, int v)
469 {
470 FILE *f;
471 bool ret = true;
472
473 f = fopen(path, "w");
474 if (!f)
475 return false;
476
477 if (fprintf(f, "%d\n", v) < 0)
478 ret = false;
479
480 if (fclose(f) != 0)
481 ret = false;
482
483 return ret;
484 }
485
486 /* Recursively remove directory and its parents. */
487 static int recursive_rmdir(char *dirname)
488 {
489 struct dirent *direntp;
490 DIR *dir;
491 int r = 0;
492
493 dir = opendir(dirname);
494 if (!dir)
495 return -ENOENT;
496
497 while ((direntp = readdir(dir))) {
498 struct stat st;
499 char *pathname;
500
501 if (!strcmp(direntp->d_name, ".") ||
502 !strcmp(direntp->d_name, ".."))
503 continue;
504
505 pathname = must_make_path(dirname, direntp->d_name, NULL);
506
507 if (lstat(pathname, &st)) {
508 if (!r)
509 pam_cgfs_debug("Failed to stat %s\n", pathname);
510 r = -1;
511 goto next;
512 }
513
514 if (!S_ISDIR(st.st_mode))
515 goto next;
516
517 if (recursive_rmdir(pathname) < 0)
518 r = -1;
519
520 next:
521 free(pathname);
522 }
523
524 if (rmdir(dirname) < 0) {
525 if (!r)
526 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
527 r = -1;
528 }
529
530 if (closedir(dir) < 0) {
531 if (!r)
532 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
533 r = -1;
534 }
535
536 return r;
537 }
538
539 /* Add new entry to null-terminated array of pointers. Make sure array is still
540 * null-terminated.
541 */
542 static void must_add_to_list(char ***clist, char *entry)
543 {
544 int newentry;
545
546 newentry = append_null_to_list((void ***)clist);
547 (*clist)[newentry] = must_copy_string(entry);
548 }
549
550 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
551 static char *get_mountpoint(char *line)
552 {
553 int i;
554 char *p, *sret, *p2;
555 size_t len;
556
557 p = line;
558
559 for (i = 0; i < 4; i++) {
560 p = strchr(p, ' ');
561 if (!p)
562 return NULL;
563 p++;
564 }
565
566 p2 = strchr(p, ' ');
567 if (p2)
568 *p2 = '\0';
569
570 len = strlen(p);
571 sret = must_realloc(NULL, len + 1);
572 memcpy(sret, p, len);
573 sret[len] = '\0';
574
575 return sret;
576 }
577
578 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
579 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
580 * kernel controllers (@klist) and named controllers (@nlist).
581 */
582 static bool cgv1_get_controllers(char ***klist, char ***nlist)
583 {
584 FILE *f;
585 char *line = NULL;
586 size_t len = 0;
587
588 f = fopen("/proc/self/cgroup", "r");
589 if (!f)
590 return false;
591
592 while (getline(&line, &len, f) != -1) {
593 char *p, *p2, *tok;
594 char *saveptr = NULL;
595
596 p = strchr(line, ':');
597 if (!p)
598 continue;
599 p++;
600
601 p2 = strchr(p, ':');
602 if (!p2)
603 continue;
604 *p2 = '\0';
605
606 /* Skip the v2 hierarchy. */
607 if ((p2 - p) == 0)
608 continue;
609
610 for (tok = strtok_r(p, ",", &saveptr); tok;
611 tok = strtok_r(NULL, ",", &saveptr)) {
612 if (strncmp(tok, "name=", 5) == 0)
613 must_append_string(nlist, tok);
614 else
615 must_append_string(klist, tok);
616 }
617 }
618
619 free(line);
620 fclose(f);
621
622 return true;
623 }
624
625 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
626 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
627 static bool cgv2_get_controllers(char ***klist)
628 {
629 return -ENOSYS;
630 }
631 */
632
633 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
634 static char *cgv2_get_current_cgroup(int pid)
635 {
636 int ret;
637 char *cgroups_v2;
638 char *current_cgroup;
639 char *copy = NULL;
640 /* The largest integer that can fit into long int is 2^64. This is a
641 * 20-digit number. */
642 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
643 char path[__PIDLEN];
644
645 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
646 if (ret < 0 || ret >= __PIDLEN)
647 return NULL;
648
649 cgroups_v2 = read_file(path);
650 if (!cgroups_v2)
651 return NULL;
652
653 current_cgroup = strstr(cgroups_v2, "0::/");
654 if (!current_cgroup)
655 goto cleanup_on_err;
656
657 current_cgroup = current_cgroup + 3;
658 copy = copy_to_eol(current_cgroup);
659 if (!copy)
660 goto cleanup_on_err;
661
662 cleanup_on_err:
663 free(cgroups_v2);
664 if (copy)
665 trim(copy);
666
667 return copy;
668 }
669
670 /* Given two null-terminated lists of strings, return true if any string is in
671 * both.
672 */
673 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
674 {
675 char **it;
676
677 if (!l2)
678 return false;
679
680 for (it = l1; it && *it; it++)
681 if (string_in_list(l2, *it))
682 return true;
683
684 return false;
685 }
686
687 /* For a null-terminated list of controllers @clist, return true if any of those
688 * controllers is already listed the null-terminated list of hierarchies @hlist.
689 * Realistically, if one is present, all must be present.
690 */
691 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
692 {
693 struct cgv1_hierarchy **it;
694
695 for (it = hlist; it && *it; it++)
696 if ((*it)->controllers)
697 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
698 return true;
699
700 return false;
701
702 }
703
704 /* Set boolean to mark controllers under which we are supposed create a
705 * writeable cgroup.
706 */
707 static void cgv1_mark_to_make_rw(char **clist)
708 {
709 struct cgv1_hierarchy **it;
710
711 for (it = cgv1_hierarchies; it && *it; it++)
712 if ((*it)->controllers)
713 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
714 string_in_list(clist, "all"))
715 (*it)->create_rw_cgroup = true;
716 }
717
718 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
719 * the cgroupfs v2 hierarchy.
720 */
721 static void cgv2_mark_to_make_rw(char **clist)
722 {
723 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
724 if (cgv2_hierarchies)
725 (*cgv2_hierarchies)->create_rw_cgroup = true;
726 }
727
728 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
729 static void cg_mark_to_make_rw(char **clist)
730 {
731 cgv1_mark_to_make_rw(clist);
732 cgv2_mark_to_make_rw(clist);
733 }
734
735 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
736 static char *cgv1_must_prefix_named(char *entry)
737 {
738 char *s;
739 int ret;
740 size_t len;
741
742 len = strlen(entry);
743 s = must_realloc(NULL, len + 6);
744
745 ret = snprintf(s, len + 6, "name=%s", entry);
746 if (ret < 0 || (size_t)ret >= (len + 6)) {
747 free(s);
748 return NULL;
749 }
750
751 return s;
752 }
753
754 /* Append kernel controller in @klist or named controller in @nlist to @clist */
755 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
756 {
757 int newentry;
758 char *copy;
759
760 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
761 return;
762
763 newentry = append_null_to_list((void ***)clist);
764
765 if (strncmp(entry, "name=", 5) == 0)
766 copy = must_copy_string(entry);
767 else if (string_in_list(klist, entry))
768 copy = must_copy_string(entry);
769 else
770 copy = cgv1_must_prefix_named(entry);
771
772 (*clist)[newentry] = copy;
773 }
774
775 /* Get the controllers from a mountinfo line. There are other ways we could get
776 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
777 * could parse the mount options. But we simply assume that the mountpoint must
778 * be /sys/fs/cgroup/controller-list
779 */
780 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
781 {
782 int i;
783 char *p, *p2, *tok;
784 char *saveptr = NULL;
785 char **aret = NULL;
786
787 p = line;
788
789 for (i = 0; i < 4; i++) {
790 p = strchr(p, ' ');
791 if (!p)
792 return NULL;
793 p++;
794 }
795
796 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
797 return NULL;
798
799 p += 15;
800
801 p2 = strchr(p, ' ');
802 if (!p2)
803 return NULL;
804 *p2 = '\0';
805
806 for (tok = strtok_r(p, ",", &saveptr); tok;
807 tok = strtok_r(NULL, ",", &saveptr))
808 must_append_controller(klist, nlist, &aret, tok);
809
810 return aret;
811 }
812
813 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
814 static bool cgv1_controller_in_clist(char *cgline, char *c)
815 {
816 __do_free char *tmp = NULL;
817 size_t len;
818 char *tok, *eol;
819 char *saveptr = NULL;
820
821 eol = strchr(cgline, ':');
822 if (!eol)
823 return false;
824
825 len = eol - cgline;
826 tmp = must_realloc(NULL, len + 1);
827 memcpy(tmp, cgline, len);
828 tmp[len] = '\0';
829
830 for (tok = strtok_r(tmp, ",", &saveptr); tok;
831 tok = strtok_r(NULL, ",", &saveptr)) {
832 if (strcmp(tok, c) == 0)
833 return true;
834 }
835
836 return false;
837 }
838
839 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
840 * of a given cgv1 controller passed in via @controller.
841 */
842 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
843 {
844 char *p;
845
846 p = basecginfo;
847
848 for (;;) {
849 p = strchr(p, ':');
850 if (!p)
851 return NULL;
852 p++;
853
854 if (cgv1_controller_in_clist(p, controller)) {
855 p = strchr(p, ':');
856 if (!p)
857 return NULL;
858 p++;
859
860 return copy_to_eol(p);
861 }
862
863 p = strchr(p, '\n');
864 if (!p)
865 return NULL;
866 p++;
867 }
868
869 return NULL;
870 }
871
872 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
873 * systems.
874 */
875 #define INIT_SCOPE "/init.scope"
876 static void cg_systemd_prune_init_scope(char *cg)
877 {
878 char *point;
879
880 if (!cg)
881 return;
882
883 point = cg + strlen(cg) - strlen(INIT_SCOPE);
884 if (point < cg)
885 return;
886
887 if (strcmp(point, INIT_SCOPE) == 0) {
888 if (point == cg)
889 *(point + 1) = '\0';
890 else
891 *point = '\0';
892 }
893 }
894
895 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
896 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
897 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
898 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
899 * from /proc/1/cgroup.
900 */
901 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
902 {
903 struct cgv1_hierarchy *new;
904 int newentry;
905
906 new = must_realloc(NULL, sizeof(*new));
907
908 new->controllers = clist;
909 new->mountpoint = mountpoint;
910 new->base_cgroup = base_cgroup;
911 new->fullcgpath = NULL;
912 new->create_rw_cgroup = false;
913 new->init_cgroup = init_cgroup;
914 new->systemd_user_slice = false;
915
916 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
917 cgv1_hierarchies[newentry] = new;
918 }
919
920 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
921 * currently) include the controllers mounted into the hierarchy (e.g. memory,
922 * pids, blkio), the mountpoint of that hierarchy (Should usually be
923 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
924 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
925 * base cgroup of the current process gathered from /proc/self/cgroup, and the
926 * init cgroup of PID1 gathered from /proc/1/cgroup.
927 */
928 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
929 {
930 struct cgv2_hierarchy *new;
931 int newentry;
932
933 new = must_realloc(NULL, sizeof(*new));
934
935 new->controllers = clist;
936 new->mountpoint = mountpoint;
937 new->base_cgroup = base_cgroup;
938 new->fullcgpath = NULL;
939 new->create_rw_cgroup = false;
940 new->init_cgroup = init_cgroup;
941 new->systemd_user_slice = systemd_user_slice;
942
943 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
944 cgv2_hierarchies[newentry] = new;
945 }
946
947 /* In Ubuntu 14.04, the paths created for us were
948 * '/user/$uid.user/$something.session' This can be merged better with
949 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
950 * reason about the correctness.
951 */
952 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
953 {
954 char *p;
955 size_t len;
956 int id;
957 char *copy = NULL;
958 bool bret = false;
959
960 copy = must_copy_string(in);
961 if (strlen(copy) < strlen("/user/1.user/1.session"))
962 goto cleanup;
963 p = copy + strlen(copy) - 1;
964
965 /* skip any trailing '/' (shouldn't be any, but be sure) */
966 while (p >= copy && *p == '/')
967 *(p--) = '\0';
968 if (p < copy)
969 goto cleanup;
970
971 /* Get last path element */
972 while (p >= copy && *p != '/')
973 p--;
974 if (p < copy)
975 goto cleanup;
976
977 /* make sure it is something.session */
978 len = strlen(p + 1);
979 if (len < strlen("1.session") ||
980 strncmp(p + 1 + len - 8, ".session", 8) != 0)
981 goto cleanup;
982
983 /* ok last path piece checks out, now check the second to last */
984 *(p + 1) = '\0';
985 while (p >= copy && *(--p) != '/')
986 ;
987
988 if (sscanf(p + 1, "%d.user/", &id) != 1)
989 goto cleanup;
990
991 if (id != (int)uid)
992 goto cleanup;
993
994 bret = true;
995
996 cleanup:
997 free(copy);
998 return bret;
999 }
1000
1001 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1002 * assume it belongs to $uid and chown it
1003 */
1004 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1005 const char *init_cgroup, uid_t uid)
1006 {
1007 int ret;
1008 char buf[100];
1009 size_t curlen, initlen;
1010
1011 curlen = strlen(base_cgroup);
1012 initlen = strlen(init_cgroup);
1013 if (curlen <= initlen)
1014 return false;
1015
1016 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1017 return false;
1018
1019 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1020 if (ret < 0 || ret >= 100)
1021 return false;
1022
1023 if (initlen == 1)
1024 initlen = 0; // skip the '/'
1025
1026 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1027 }
1028
1029 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1030 * is not the end of our systemd path, then we're not part of the PAM call that
1031 * created that path.
1032 *
1033 * The last piece is chowned to $uid, the user- part not.
1034 * Note: If the user creates paths that look like what we're looking for to
1035 * 'fool' us, either
1036 * - they fool us, we create new cgroups, and they get auto-logged-out.
1037 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1038 * lose ownership of their cgroups
1039 */
1040 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1041 const char *init_cgroup,
1042 const char *in, uid_t uid)
1043 {
1044 char *p;
1045 size_t len;
1046 int id;
1047 char *copy = NULL;
1048 bool bret = false;
1049
1050 copy = must_copy_string(in);
1051
1052 /* An old version of systemd has already created a cgroup for us. */
1053 if (cg_systemd_under_user_slice_1(in, uid))
1054 goto succeed;
1055
1056 /* A new version of systemd has already created a cgroup for us. */
1057 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1058 goto succeed;
1059
1060 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1061 goto cleanup;
1062
1063 p = copy + strlen(copy) - 1;
1064 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1065 while (p >= copy && *p == '/')
1066 *(p--) = '\0';
1067
1068 if (p < copy)
1069 goto cleanup;
1070
1071 /* Get last path element */
1072 while (p >= copy && *p != '/')
1073 p--;
1074
1075 if (p < copy)
1076 goto cleanup;
1077
1078 /* Make sure it is session-something.scope. */
1079 len = strlen(p + 1);
1080 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1081 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1082 goto cleanup;
1083
1084 /* Ok last path piece checks out, now check the second to last. */
1085 *(p + 1) = '\0';
1086 while (p >= copy && *(--p) != '/')
1087 ;
1088
1089 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1090 goto cleanup;
1091
1092 if (id != (int)uid)
1093 goto cleanup;
1094
1095 succeed:
1096 bret = true;
1097
1098 cleanup:
1099 free(copy);
1100 return bret;
1101 }
1102
1103 /* Chown existing cgroup that systemd has already created for us. */
1104 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1105 const char *base_cgroup, uid_t uid,
1106 gid_t gid, bool systemd_user_slice)
1107 {
1108 char *path;
1109
1110 if (!systemd_user_slice)
1111 return false;
1112
1113 path = must_make_path(mountpoint, base_cgroup, NULL);
1114
1115 /* A cgroup within name=systemd has already been created. So we only
1116 * need to chown it.
1117 */
1118 if (chown(path, uid, gid) < 0)
1119 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1120 path, (int)uid, (int)gid, strerror(errno), NULL);
1121 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
1122
1123 free(path);
1124 return true;
1125 }
1126
1127 /* Detect and store information about cgroupfs v1 hierarchies. */
1128 static bool cgv1_init(uid_t uid, gid_t gid)
1129 {
1130 FILE *f;
1131 struct cgv1_hierarchy **it;
1132 char *basecginfo;
1133 char *line = NULL;
1134 char **klist = NULL, **nlist = NULL;
1135 size_t len = 0;
1136
1137 basecginfo = read_file("/proc/self/cgroup");
1138 if (!basecginfo)
1139 return false;
1140
1141 f = fopen("/proc/self/mountinfo", "r");
1142 if (!f) {
1143 free(basecginfo);
1144 return false;
1145 }
1146
1147 cgv1_get_controllers(&klist, &nlist);
1148
1149 while (getline(&line, &len, f) != -1) {
1150 char **controller_list = NULL;
1151 char *mountpoint, *base_cgroup;
1152
1153 if (is_lxcfs(line) || !is_cgv1(line))
1154 continue;
1155
1156 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1157 if (!controller_list)
1158 continue;
1159
1160 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1161 free(controller_list);
1162 continue;
1163 }
1164
1165 mountpoint = get_mountpoint(line);
1166 if (!mountpoint) {
1167 free_string_list(controller_list);
1168 continue;
1169 }
1170
1171 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1172 if (!base_cgroup) {
1173 free_string_list(controller_list);
1174 free(mountpoint);
1175 continue;
1176 }
1177
1178 trim(base_cgroup);
1179 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1180 "mountpoint \"%s\" and cgroup \"%s\"\n",
1181 controller_list[0], mountpoint, base_cgroup);
1182 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1183 }
1184
1185 free_string_list(klist);
1186 free_string_list(nlist);
1187 free(basecginfo);
1188 fclose(f);
1189 free(line);
1190
1191 /* Retrieve init cgroup path for all controllers. */
1192 basecginfo = read_file("/proc/1/cgroup");
1193 if (!basecginfo)
1194 return false;
1195
1196 for (it = cgv1_hierarchies; it && *it; it++) {
1197 if ((*it)->controllers) {
1198 char *init_cgroup, *user_slice;
1199
1200 /* We've already stored the controller and received its
1201 * current cgroup. If we now fail to retrieve its init
1202 * cgroup, we should probably fail.
1203 */
1204 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1205 if (!init_cgroup) {
1206 free(basecginfo);
1207 return false;
1208 }
1209
1210 cg_systemd_prune_init_scope(init_cgroup);
1211 (*it)->init_cgroup = init_cgroup;
1212 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1213 "cgroup \"%s\"\n",
1214 (*(*it)->controllers), init_cgroup);
1215
1216 /* Check whether systemd has already created a cgroup
1217 * for us.
1218 */
1219 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1220 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1221 (*it)->systemd_user_slice = true;
1222
1223 free(user_slice);
1224 }
1225 }
1226 free(basecginfo);
1227
1228 return true;
1229 }
1230
1231 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1232 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1233 */
1234 static inline int cg_get_version_of_mntpt(const char *path)
1235 {
1236 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1237 return 1;
1238
1239 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1240 return 2;
1241
1242 return 0;
1243 }
1244
1245 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1246 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
1247 */
1248 static bool cgv2_init(uid_t uid, gid_t gid)
1249 {
1250 char *mountpoint;
1251 FILE *f = NULL;
1252 char *current_cgroup = NULL, *init_cgroup = NULL;
1253 char * line = NULL;
1254 size_t len = 0;
1255 int ret = false;
1256
1257 current_cgroup = cgv2_get_current_cgroup(getpid());
1258 if (!current_cgroup) {
1259 /* No v2 hierarchy present. We're done. */
1260 ret = true;
1261 goto cleanup;
1262 }
1263
1264 init_cgroup = cgv2_get_current_cgroup(1);
1265 if (!init_cgroup) {
1266 /* If we're here and didn't fail already above, then something's
1267 * certainly wrong, so error this time.
1268 */
1269 goto cleanup;
1270 }
1271
1272 cg_systemd_prune_init_scope(init_cgroup);
1273
1274 /* Check if the v2 hierarchy is mounted at its standard location.
1275 * If so we can skip the rest of the work here. Although the unified
1276 * hierarchy can be mounted multiple times, each of those mountpoints
1277 * will expose identical information.
1278 */
1279 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1280 char *user_slice;
1281 bool has_user_slice = false;
1282
1283 mountpoint = must_copy_string("/sys/fs/cgroup");
1284 if (!mountpoint)
1285 goto cleanup;
1286
1287 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1288 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1289 has_user_slice = true;
1290 free(user_slice);
1291
1292 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1293
1294 ret = true;
1295 goto cleanup;
1296 }
1297
1298 f = fopen("/proc/self/mountinfo", "r");
1299 if (!f)
1300 goto cleanup;
1301
1302 /* we support simple cgroup mounts and lxcfs mounts */
1303 while (getline(&line, &len, f) != -1) {
1304 char *user_slice;
1305 bool has_user_slice = false;
1306
1307 if (!is_cgv2(line))
1308 continue;
1309
1310 mountpoint = get_mountpoint(line);
1311 if (!mountpoint)
1312 continue;
1313
1314 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1315 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1316 has_user_slice = true;
1317 free(user_slice);
1318
1319 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1320
1321 /* Although the unified hierarchy can be mounted multiple times,
1322 * each of those mountpoints will expose identical information.
1323 * So let the first mountpoint we find, win.
1324 */
1325 ret = true;
1326 break;
1327 }
1328
1329 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1330 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1331 mountpoint, current_cgroup, init_cgroup);
1332
1333 cleanup:
1334 if (f)
1335 fclose(f);
1336 free(line);
1337
1338 if (!ret) {
1339 free(init_cgroup);
1340 free(current_cgroup);
1341 }
1342
1343 return ret;
1344 }
1345
1346 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1347 * cgroupfs v2 hierarchy.
1348 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1349 * where some controllers are mounted into their standard cgroupfs v1 locations
1350 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1351 * hierarchy (/sys/fs/cgroup).
1352 */
1353 static bool cg_init(uid_t uid, gid_t gid)
1354 {
1355 if (!cgv1_init(uid, gid))
1356 return false;
1357
1358 if (!cgv2_init(uid, gid))
1359 return false;
1360
1361 if (cgv1_hierarchies && cgv2_hierarchies) {
1362 cg_mount_mode = CGROUP_MIXED;
1363 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1364 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1365 cg_mount_mode = CGROUP_PURE_V1;
1366 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1367 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1368 cg_mount_mode = CGROUP_PURE_V2;
1369 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1370 } else {
1371 cg_mount_mode = CGROUP_UNKNOWN;
1372 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
1373 }
1374
1375 if (cg_mount_mode == CGROUP_UNKNOWN)
1376 return false;
1377
1378 return true;
1379 }
1380
1381 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1382 static bool cgv1_enter(const char *cgroup)
1383 {
1384 struct cgv1_hierarchy **it;
1385
1386 for (it = cgv1_hierarchies; it && *it; it++) {
1387 char **controller;
1388 bool entered = false;
1389
1390 if (!(*it)->controllers || !(*it)->mountpoint ||
1391 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1392 continue;
1393
1394 for (controller = (*it)->controllers; controller && *controller;
1395 controller++) {
1396 char *path;
1397
1398 /* We've already been placed in a user slice, so we
1399 * don't need to enter the cgroup again.
1400 */
1401 if ((*it)->systemd_user_slice) {
1402 entered = true;
1403 break;
1404 }
1405
1406 path = must_make_path((*it)->mountpoint,
1407 (*it)->init_cgroup,
1408 cgroup,
1409 "/cgroup.procs",
1410 NULL);
1411 if (!file_exists(path)) {
1412 free(path);
1413 path = must_make_path((*it)->mountpoint,
1414 (*it)->init_cgroup,
1415 cgroup,
1416 "/tasks",
1417 NULL);
1418 }
1419
1420 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1421 entered = write_int(path, (int)getpid());
1422 if (entered) {
1423 free(path);
1424 break;
1425 }
1426
1427 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1428 free(path);
1429 }
1430
1431 if (!entered)
1432 return false;
1433 }
1434
1435 return true;
1436 }
1437
1438 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1439 static bool cgv2_enter(const char *cgroup)
1440 {
1441 struct cgv2_hierarchy *v2;
1442 char *path;
1443 bool entered = false;
1444
1445 if (!cgv2_hierarchies)
1446 return true;
1447
1448 v2 = *cgv2_hierarchies;
1449
1450 if (!v2->mountpoint || !v2->base_cgroup)
1451 return false;
1452
1453 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1454 return true;
1455
1456 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1457 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1458
1459 entered = write_int(path, (int)getpid());
1460 if (!entered) {
1461 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1462 free(path);
1463 return false;
1464 }
1465
1466 free(path);
1467
1468 return true;
1469 }
1470
1471 /* Wrapper around cgv{1,2}_enter(). */
1472 static bool cg_enter(const char *cgroup)
1473 {
1474 if (!cgv1_enter(cgroup)) {
1475 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
1476 return false;
1477 }
1478
1479 if (!cgv2_enter(cgroup)) {
1480 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
1481 return false;
1482 }
1483
1484 return true;
1485 }
1486
1487 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1488 static void cgv1_escape(void)
1489 {
1490 struct cgv1_hierarchy **it;
1491
1492 /* In case systemd hasn't already placed us in a user slice for the
1493 * cpuset v1 controller we will reside in the root cgroup. This means
1494 * that cgroup.clone_children will not have been initialized for us so
1495 * we need to do it.
1496 */
1497 for (it = cgv1_hierarchies; it && *it; it++)
1498 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1499 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
1500
1501 if (!cgv1_enter("/"))
1502 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
1503 }
1504
1505 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1506 static void cgv2_escape(void)
1507 {
1508 if (!cgv2_enter("/"))
1509 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
1510 }
1511
1512 /* Wrapper around cgv{1,2}_escape(). */
1513 static void cg_escape(void)
1514 {
1515 cgv1_escape();
1516 cgv2_escape();
1517 }
1518
1519 /* Get uid and gid for @user. */
1520 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1521 {
1522 struct passwd pwent;
1523 struct passwd *pwentp = NULL;
1524 char *buf;
1525 size_t bufsize;
1526 int ret;
1527
1528 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1529 if (bufsize == -1)
1530 bufsize = 1024;
1531
1532 buf = malloc(bufsize);
1533 if (!buf)
1534 return false;
1535
1536 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1537 if (!pwentp) {
1538 if (ret == 0)
1539 mysyslog(LOG_ERR,
1540 "Could not find matched password record\n", NULL);
1541
1542 free(buf);
1543 return false;
1544 }
1545
1546 *uid = pwent.pw_uid;
1547 *gid = pwent.pw_gid;
1548 free(buf);
1549
1550 return true;
1551 }
1552
1553 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1554 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1555 {
1556 struct stat statbuf;
1557
1558 if (stat(path, &statbuf) < 0)
1559 return false;
1560
1561 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1562 return false;
1563
1564 return true;
1565 }
1566
1567 /* Create cpumask from cpulist aka turn:
1568 *
1569 * 0,2-3
1570 *
1571 * into bit array
1572 *
1573 * 1 0 1 1
1574 */
1575 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1576 {
1577 char *token;
1578 char *saveptr = NULL;
1579 size_t arrlen = BITS_TO_LONGS(nbits);
1580 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1581 if (!bitarr)
1582 return NULL;
1583
1584 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1585 errno = 0;
1586 unsigned start = strtoul(token, NULL, 0);
1587 unsigned end = start;
1588
1589 char *range = strchr(token, '-');
1590 if (range)
1591 end = strtoul(range + 1, NULL, 0);
1592
1593 if (!(start <= end)) {
1594 free(bitarr);
1595 return NULL;
1596 }
1597
1598 if (end >= nbits) {
1599 free(bitarr);
1600 return NULL;
1601 }
1602
1603 while (start <= end)
1604 set_bit(start++, bitarr);
1605 }
1606
1607 return bitarr;
1608 }
1609
1610 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1611 {
1612 char *result;
1613 char **p;
1614 size_t sep_len = strlen(sep);
1615 size_t result_len = use_as_prefix * sep_len;
1616 size_t buf_len;
1617
1618 if (!parts)
1619 return NULL;
1620
1621 /* calculate new string length */
1622 for (p = (char **)parts; *p; p++)
1623 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1624
1625 buf_len = result_len + 1;
1626 result = calloc(buf_len, sizeof(char));
1627 if (!result)
1628 return NULL;
1629
1630 if (use_as_prefix)
1631 (void)strlcpy(result, sep, buf_len * sizeof(char));
1632
1633 for (p = (char **)parts; *p; p++) {
1634 if (p > (char **)parts)
1635 (void)strlcat(result, sep, buf_len * sizeof(char));
1636
1637 (void)strlcat(result, *p, buf_len * sizeof(char));
1638 }
1639
1640 return result;
1641 }
1642
1643 /* The largest integer that can fit into long int is 2^64. This is a
1644 * 20-digit number.
1645 */
1646 #define __IN_TO_STR_LEN 21
1647 /* Turn cpumask into simple, comma-separated cpulist. */
1648 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1649 {
1650 size_t i;
1651 int ret;
1652 char numstr[__IN_TO_STR_LEN] = {0};
1653 char **cpulist = NULL;
1654
1655 for (i = 0; i <= nbits; i++) {
1656 if (is_set(i, bitarr)) {
1657 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1658 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1659 free_string_list(cpulist);
1660 return NULL;
1661 }
1662
1663 must_append_string(&cpulist, numstr);
1664 }
1665 }
1666
1667 return string_join(",", (const char **)cpulist, false);
1668 }
1669
1670 static ssize_t cg_get_max_cpus(char *cpulist)
1671 {
1672 char *c1, *c2;
1673 char *maxcpus = cpulist;
1674 size_t cpus = 0;
1675
1676 c1 = strrchr(maxcpus, ',');
1677 if (c1)
1678 c1++;
1679
1680 c2 = strrchr(maxcpus, '-');
1681 if (c2)
1682 c2++;
1683
1684 if (!c1 && !c2)
1685 c1 = maxcpus;
1686 else if (c1 < c2)
1687 c1 = c2;
1688
1689 if (!c1)
1690 return -1;
1691
1692 /* If the above logic is correct, c1 should always hold a valid string
1693 * here.
1694 */
1695 errno = 0;
1696 cpus = strtoul(c1, NULL, 0);
1697 if (errno != 0)
1698 return -1;
1699
1700 return cpus;
1701 }
1702
1703 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1704 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1705 {
1706 char *lastslash, *fpath, oldv;
1707 int ret;
1708 ssize_t i;
1709
1710 ssize_t maxposs = 0, maxisol = 0;
1711 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1712 uint32_t *possmask = NULL, *isolmask = NULL;
1713 bool bret = false, flipped_bit = false;
1714
1715 lastslash = strrchr(path, '/');
1716 if (!lastslash) { // bug... this shouldn't be possible
1717 pam_cgfs_debug("Invalid path: %s\n", path);
1718 return bret;
1719 }
1720
1721 oldv = *lastslash;
1722 *lastslash = '\0';
1723
1724 fpath = must_make_path(path, "cpuset.cpus", NULL);
1725 posscpus = read_file(fpath);
1726 if (!posscpus) {
1727 pam_cgfs_debug("Could not read file: %s\n", fpath);
1728 goto on_error;
1729 }
1730
1731 /* Get maximum number of cpus found in possible cpuset. */
1732 maxposs = cg_get_max_cpus(posscpus);
1733 if (maxposs < 0 || maxposs >= INT_MAX - 1)
1734 goto on_error;
1735
1736 if (!file_exists(__ISOL_CPUS)) {
1737 /* This system doesn't expose isolated cpus. */
1738 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
1739 cpulist = posscpus;
1740
1741 /* No isolated cpus but we weren't already initialized by
1742 * someone. We should simply copy the parents cpuset.cpus
1743 * values.
1744 */
1745 if (!am_initialized) {
1746 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1747 goto copy_parent;
1748 }
1749
1750 /* No isolated cpus but we were already initialized by someone.
1751 * Nothing more to do for us.
1752 */
1753 goto on_success;
1754 }
1755
1756 isolcpus = read_file(__ISOL_CPUS);
1757 if (!isolcpus) {
1758 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1759 goto on_error;
1760 }
1761
1762 if (!isdigit(isolcpus[0])) {
1763 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1764 cpulist = posscpus;
1765
1766 /* No isolated cpus but we weren't already initialized by
1767 * someone. We should simply copy the parents cpuset.cpus
1768 * values.
1769 */
1770 if (!am_initialized) {
1771 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1772 goto copy_parent;
1773 }
1774
1775 /* No isolated cpus but we were already initialized by someone.
1776 * Nothing more to do for us.
1777 */
1778 goto on_success;
1779 }
1780
1781 /* Get maximum number of cpus found in isolated cpuset. */
1782 maxisol = cg_get_max_cpus(isolcpus);
1783 if (maxisol < 0 || maxisol >= INT_MAX - 1)
1784 goto on_error;
1785
1786 if (maxposs < maxisol)
1787 maxposs = maxisol;
1788 maxposs++;
1789
1790 possmask = cg_cpumask(posscpus, maxposs);
1791 if (!possmask) {
1792 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1793 goto on_error;
1794 }
1795
1796 isolmask = cg_cpumask(isolcpus, maxposs);
1797 if (!isolmask) {
1798 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1799 goto on_error;
1800 }
1801
1802 for (i = 0; i <= maxposs; i++) {
1803 if (is_set(i, isolmask) && is_set(i, possmask)) {
1804 flipped_bit = true;
1805 clear_bit(i, possmask);
1806 }
1807 }
1808
1809 if (!flipped_bit) {
1810 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1811 goto on_success;
1812 }
1813 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1814
1815 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1816 if (!cpulist) {
1817 pam_cgfs_debug("%s", "Could not create cpu list\n");
1818 goto on_error;
1819 }
1820
1821 copy_parent:
1822 *lastslash = oldv;
1823
1824 free(fpath);
1825
1826 fpath = must_make_path(path, "cpuset.cpus", NULL);
1827 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0660);
1828 if (ret < 0) {
1829 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
1830 goto on_error;
1831 }
1832
1833 on_success:
1834 bret = true;
1835
1836 on_error:
1837 *lastslash = oldv;
1838
1839 free(fpath);
1840 free(isolcpus);
1841 free(isolmask);
1842
1843 if (posscpus != cpulist)
1844 free(posscpus);
1845 free(possmask);
1846
1847 free(cpulist);
1848 return bret;
1849 }
1850
1851 /* Copy contents of parent(@path)/@file to @path/@file */
1852 static bool cg_copy_parent_file(char *path, char *file)
1853 {
1854 char *lastslash, *value = NULL, *fpath, oldv;
1855 int len = 0;
1856 int ret;
1857
1858 lastslash = strrchr(path, '/');
1859 if (!lastslash) { // bug... this shouldn't be possible
1860 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1861 return false;
1862 }
1863
1864 oldv = *lastslash;
1865 *lastslash = '\0';
1866
1867 fpath = must_make_path(path, file, NULL);
1868 len = lxc_read_from_file(fpath, NULL, 0);
1869 if (len <= 0) {
1870 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1871 goto bad;
1872 }
1873
1874 value = must_realloc(NULL, len + 1);
1875 if (lxc_read_from_file(fpath, value, len) != len) {
1876 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1877 goto bad;
1878 }
1879 free(fpath);
1880
1881 *lastslash = oldv;
1882
1883 fpath = must_make_path(path, file, NULL);
1884 ret = lxc_write_to_file(fpath, value, len, false, 0660);
1885 if (ret < 0)
1886 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1887
1888 free(fpath);
1889 free(value);
1890 return ret >= 0;
1891
1892 bad:
1893 pam_cgfs_debug("Error reading '%s'", fpath);
1894 free(fpath);
1895 free(value);
1896 return false;
1897 }
1898
1899 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1900 * controller we will reside in the root cgroup. This means that
1901 * cgroup.clone_children will not have been initialized for us so we need to do
1902 * it.
1903 */
1904 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1905 {
1906 char *clonechildrenpath, v;
1907
1908 if (!string_in_list(h->controllers, "cpuset"))
1909 return true;
1910
1911 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1912
1913 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1914 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1915 free(clonechildrenpath);
1916 return false;
1917 }
1918
1919 if (v == '1') { /* already set for us by someone else */
1920 free(clonechildrenpath);
1921 return true;
1922 }
1923
1924 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1925 /* Set clone_children so children inherit our settings */
1926 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1927 free(clonechildrenpath);
1928 return false;
1929 }
1930
1931 free(clonechildrenpath);
1932 return true;
1933 }
1934
1935 /*
1936 * Initialize the cpuset hierarchy in first directory of @gname and
1937 * set cgroup.clone_children so that children inherit settings.
1938 * Since the h->base_path is populated by init or ourselves, we know
1939 * it is already initialized.
1940 */
1941 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1942 const char *cgroup)
1943 {
1944 char *cgpath, *clonechildrenpath, v, *slash;
1945
1946 if (!string_in_list(h->controllers, "cpuset"))
1947 return true;
1948
1949 if (*cgroup == '/')
1950 cgroup++;
1951 slash = strchr(cgroup, '/');
1952 if (slash)
1953 *slash = '\0';
1954
1955 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1956 if (slash)
1957 *slash = '/';
1958
1959 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
1960 pam_cgfs_debug("Failed to create '%s'", cgpath);
1961 free(cgpath);
1962 return false;
1963 }
1964
1965 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
1966 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
1967 free(clonechildrenpath);
1968 free(cgpath);
1969 return true;
1970 }
1971
1972 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1973 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1974 free(clonechildrenpath);
1975 free(cgpath);
1976 return false;
1977 }
1978
1979 /* Make sure any isolated cpus are removed from cpuset.cpus. */
1980 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
1981 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
1982 free(clonechildrenpath);
1983 free(cgpath);
1984 return false;
1985 }
1986
1987 if (v == '1') { /* already set for us by someone else */
1988 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
1989 free(clonechildrenpath);
1990 free(cgpath);
1991 return true;
1992 }
1993
1994 /* copy parent's settings */
1995 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
1996 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
1997 free(cgpath);
1998 free(clonechildrenpath);
1999 return false;
2000 }
2001 free(cgpath);
2002
2003 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
2004 /* Set clone_children so children inherit our settings */
2005 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2006 free(clonechildrenpath);
2007 return false;
2008 }
2009 free(clonechildrenpath);
2010 return true;
2011 }
2012
2013 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2014 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2015 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2016 * to the caller in @existed.
2017 */
2018 #define __PAM_CGFS_USER "/user/"
2019 #define __PAM_CGFS_USER_LEN 6
2020 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2021 {
2022 char *clean_base_cgroup, *path;
2023 char **controller;
2024 struct cgv1_hierarchy *it;
2025 bool created = false;
2026
2027 *existed = false;
2028 it = h;
2029
2030 for (controller = it->controllers; controller && *controller;
2031 controller++) {
2032 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2033 return false;
2034
2035 /* If systemd has already created a cgroup for us, keep using
2036 * it.
2037 */
2038 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2039 it->base_cgroup, uid, gid,
2040 it->systemd_user_slice))
2041 return true;
2042
2043 /* We need to make sure that we do not create an endless chain
2044 * of sub-cgroups. So we check if we have already logged in
2045 * somehow (sudo -i, su, etc.) and have created a
2046 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2047 * cgroups this is unnecessary since we use the init_cgroup
2048 * anyway, but for controllers which have an existing systemd
2049 * cgroup that does not match the current uid, this is pretty
2050 * useful.
2051 */
2052 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2053 free(it->base_cgroup);
2054 it->base_cgroup = must_copy_string("/");
2055 } else {
2056 clean_base_cgroup =
2057 strstr(it->base_cgroup, __PAM_CGFS_USER);
2058 if (clean_base_cgroup)
2059 *clean_base_cgroup = '\0';
2060 }
2061
2062 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2063 pam_cgfs_debug("Constructing path: %s\n", path);
2064
2065 if (file_exists(path)) {
2066 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2067 if (our_cg)
2068 *existed = false;
2069 else
2070 *existed = true;
2071
2072 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2073 path, our_cg ? "" : "not ", uid, gid);
2074 free(path);
2075
2076 return our_cg;
2077 }
2078
2079 created = mkdir_parent(it->mountpoint, path);
2080 if (!created) {
2081 free(path);
2082 continue;
2083 }
2084
2085 if (chown(path, uid, gid) < 0)
2086 mysyslog(LOG_WARNING,
2087 "Failed to chown %s to %d:%d: %s\n", path,
2088 (int)uid, (int)gid, strerror(errno), NULL);
2089
2090 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2091 free(path);
2092 break;
2093 }
2094
2095 return created;
2096 }
2097
2098 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2099 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2100 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2101 */
2102 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2103 {
2104
2105 char *path;
2106
2107 /* Better safe than sorry. */
2108 if (!h->controllers)
2109 return true;
2110
2111 /* Cgroups created by systemd for us which we re-use won't be removed
2112 * here, since we're using init_cgroup + cgroup as path instead of
2113 * base_cgroup + cgroup.
2114 */
2115 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2116 (void)recursive_rmdir(path);
2117 free(path);
2118
2119 return true;
2120 }
2121
2122 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2123 static bool cgv2_remove(const char *cgroup)
2124 {
2125 struct cgv2_hierarchy *v2;
2126 char *path;
2127
2128 if (!cgv2_hierarchies)
2129 return true;
2130
2131 v2 = *cgv2_hierarchies;
2132
2133 /* If we reused an already existing cgroup, don't bother trying to
2134 * remove (a potentially wrong)/the path.
2135 * Cgroups created by systemd for us which we re-use would be removed
2136 * here, since we're using base_cgroup + cgroup as path.
2137 */
2138 if (v2->systemd_user_slice)
2139 return true;
2140
2141 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2142 (void)recursive_rmdir(path);
2143 free(path);
2144
2145 return true;
2146 }
2147
2148 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2149 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2150 * back, to the caller if the creation failed due to @cgroup already existing
2151 * via @existed.
2152 */
2153 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2154 {
2155 struct cgv1_hierarchy **it, **rev_it;
2156 bool all_created = true;
2157
2158 for (it = cgv1_hierarchies; it && *it; it++) {
2159 if (!(*it)->controllers || !(*it)->mountpoint ||
2160 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2161 continue;
2162
2163 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2164 all_created = false;
2165 break;
2166 }
2167 }
2168
2169 if (all_created)
2170 return true;
2171
2172 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2173 rev_it++)
2174 cgv1_remove_one(*rev_it, cgroup);
2175
2176 return false;
2177 }
2178
2179 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2180 * the creation failed due to @cgroup already existing via @existed.
2181 */
2182 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2183 {
2184 int ret;
2185 char *clean_base_cgroup;
2186 char *path;
2187 struct cgv2_hierarchy *v2;
2188 bool our_cg = false, created = false;
2189
2190 *existed = false;
2191
2192 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2193 return true;
2194
2195 v2 = *cgv2_hierarchies;
2196
2197 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2198 * to be placed under our current cgroup.
2199 */
2200 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2201 uid, gid, v2->systemd_user_slice))
2202 goto delegate_files;
2203
2204 /* We need to make sure that we do not create an endless chain of
2205 * sub-cgroups. So we check if we have already logged in somehow (sudo
2206 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2207 * skip that part.
2208 */
2209 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2210 free(v2->base_cgroup);
2211 v2->base_cgroup = must_copy_string("/");
2212 } else {
2213 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2214 if (clean_base_cgroup)
2215 *clean_base_cgroup = '\0';
2216 }
2217
2218 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2219 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2220
2221 if (file_exists(path)) {
2222 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2223 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2224 path, our_cg ? "" : "not ", uid, gid);
2225 free(path);
2226 if (our_cg) {
2227 *existed = false;
2228 goto delegate_files;
2229 } else {
2230 *existed = true;
2231 return false;
2232 }
2233 }
2234
2235 created = mkdir_parent(v2->mountpoint, path);
2236 if (!created) {
2237 free(path);
2238 return false;
2239 }
2240
2241 /* chown cgroup to user */
2242 if (chown(path, uid, gid) < 0)
2243 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2244 path, (int)uid, (int)gid, strerror(errno), NULL);
2245 else
2246 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2247 free(path);
2248
2249 delegate_files:
2250 /* chown cgroup.procs to user */
2251 if (v2->systemd_user_slice)
2252 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2253 "/cgroup.procs", NULL);
2254 else
2255 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2256 "/cgroup.procs", NULL);
2257
2258 ret = chown(path, uid, gid);
2259 if (ret < 0)
2260 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2261 path, (int)uid, (int)gid, strerror(errno), NULL);
2262 else
2263 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2264 free(path);
2265
2266 /* chown cgroup.subtree_control to user */
2267 if (v2->systemd_user_slice)
2268 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2269 "/cgroup.subtree_control", NULL);
2270 else
2271 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2272 "/cgroup.subtree_control", NULL);
2273
2274 ret = chown(path, uid, gid);
2275 if (ret < 0)
2276 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2277 path, (int)uid, (int)gid, strerror(errno), NULL);
2278 free(path);
2279
2280 /* chown cgroup.threads to user */
2281 if (v2->systemd_user_slice)
2282 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2283 "/cgroup.threads", NULL);
2284 else
2285 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2286 "/cgroup.threads", NULL);
2287 ret = chown(path, uid, gid);
2288 if (ret < 0 && errno != ENOENT)
2289 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2290 path, (int)uid, (int)gid, strerror(errno), NULL);
2291 free(path);
2292
2293 return true;
2294 }
2295
2296 /* Create writeable cgroups for @user at login. Details can be found in the
2297 * preamble/license at the top of this file.
2298 */
2299 static int handle_login(const char *user, uid_t uid, gid_t gid)
2300 {
2301 int idx = 0, ret;
2302 bool existed;
2303 char cg[PATH_MAX];
2304
2305 cg_escape();
2306
2307 while (idx >= 0) {
2308 ret = snprintf(cg, PATH_MAX, "/user/%s/%d", user, idx);
2309 if (ret < 0 || ret >= PATH_MAX) {
2310 mysyslog(LOG_ERR, "Username too long\n", NULL);
2311 return PAM_SESSION_ERR;
2312 }
2313
2314 existed = false;
2315 if (!cgv2_create(cg, uid, gid, &existed)) {
2316 if (existed) {
2317 cgv2_remove(cg);
2318 idx++;
2319 continue;
2320 }
2321
2322 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2323 return PAM_SESSION_ERR;
2324 }
2325
2326 existed = false;
2327 if (!cgv1_create(cg, uid, gid, &existed)) {
2328 if (existed) {
2329 cgv2_remove(cg);
2330 idx++;
2331 continue;
2332 }
2333
2334 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2335 return PAM_SESSION_ERR;
2336 }
2337
2338 if (!cg_enter(cg)) {
2339 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
2340 return PAM_SESSION_ERR;
2341 }
2342
2343 break;
2344 }
2345
2346 return PAM_SUCCESS;
2347 }
2348
2349 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2350 * hierarchies.
2351 */
2352 static bool cgv1_prune_empty_cgroups(const char *user)
2353 {
2354 bool controller_removed = true;
2355 bool all_removed = true;
2356 struct cgv1_hierarchy **it;
2357
2358 for (it = cgv1_hierarchies; it && *it; it++) {
2359 int ret;
2360 char *path_base, *path_init;
2361 char **controller;
2362
2363 if (!(*it)->controllers || !(*it)->mountpoint ||
2364 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2365 continue;
2366
2367 for (controller = (*it)->controllers; controller && *controller;
2368 controller++) {
2369 bool path_base_rm, path_init_rm;
2370
2371 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2372 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2373
2374 ret = recursive_rmdir(path_base);
2375 if (ret == -ENOENT || ret >= 0)
2376 path_base_rm = true;
2377 else
2378 path_base_rm = false;
2379 free(path_base);
2380
2381 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2382 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2383
2384 ret = recursive_rmdir(path_init);
2385 if (ret == -ENOENT || ret >= 0)
2386 path_init_rm = true;
2387 else
2388 path_init_rm = false;
2389 free(path_init);
2390
2391 if (!path_base_rm && !path_init_rm) {
2392 controller_removed = false;
2393 continue;
2394 }
2395
2396 controller_removed = true;
2397 break;
2398 }
2399
2400 if (!controller_removed)
2401 all_removed = false;
2402 }
2403
2404 return all_removed;
2405 }
2406
2407 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2408 * hierarchy.
2409 */
2410 static bool cgv2_prune_empty_cgroups(const char *user)
2411 {
2412 int ret;
2413 struct cgv2_hierarchy *v2;
2414 char *path_base, *path_init;
2415 bool path_base_rm, path_init_rm;
2416
2417 if (!cgv2_hierarchies)
2418 return true;
2419
2420 v2 = *cgv2_hierarchies;
2421
2422 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2423 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2424
2425 ret = recursive_rmdir(path_base);
2426 if (ret == -ENOENT || ret >= 0)
2427 path_base_rm = true;
2428 else
2429 path_base_rm = false;
2430 free(path_base);
2431
2432 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2433 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2434
2435 ret = recursive_rmdir(path_init);
2436 if (ret == -ENOENT || ret >= 0)
2437 path_init_rm = true;
2438 else
2439 path_init_rm = false;
2440 free(path_init);
2441
2442 if (!path_base_rm && !path_init_rm)
2443 return false;
2444
2445 return true;
2446 }
2447
2448 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2449 static void cg_prune_empty_cgroups(const char *user)
2450 {
2451 (void)cgv1_prune_empty_cgroups(user);
2452 (void)cgv2_prune_empty_cgroups(user);
2453 }
2454
2455 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2456 static void cgv1_free_hierarchies(void)
2457 {
2458 struct cgv1_hierarchy **it;
2459
2460 if (!cgv1_hierarchies)
2461 return;
2462
2463 for (it = cgv1_hierarchies; it && *it; it++) {
2464 if ((*it)->controllers) {
2465 char **tmp;
2466 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2467 free(*tmp);
2468
2469 free((*it)->controllers);
2470 }
2471
2472 free((*it)->mountpoint);
2473 free((*it)->base_cgroup);
2474 free((*it)->fullcgpath);
2475 free((*it)->init_cgroup);
2476 }
2477
2478 free(cgv1_hierarchies);
2479 }
2480
2481 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2482 static void cgv2_free_hierarchies(void)
2483 {
2484 struct cgv2_hierarchy **it;
2485
2486 if (!cgv2_hierarchies)
2487 return;
2488
2489 for (it = cgv2_hierarchies; it && *it; it++) {
2490 if ((*it)->controllers) {
2491 char **tmp;
2492
2493 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2494 free(*tmp);
2495
2496 free((*it)->controllers);
2497 }
2498
2499 free((*it)->mountpoint);
2500 free((*it)->base_cgroup);
2501 free((*it)->fullcgpath);
2502 free((*it)->init_cgroup);
2503 }
2504
2505 free(cgv2_hierarchies);
2506 }
2507
2508 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2509 static void cg_exit(void)
2510 {
2511 cgv1_free_hierarchies();
2512 cgv2_free_hierarchies();
2513 }
2514
2515 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2516 const char **argv)
2517 {
2518 int ret;
2519 uid_t uid = 0;
2520 gid_t gid = 0;
2521 const char *PAM_user = NULL;
2522
2523 ret = pam_get_user(pamh, &PAM_user, NULL);
2524 if (ret != PAM_SUCCESS) {
2525 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2526 return PAM_SESSION_ERR;
2527 }
2528
2529 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2530 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2531 return PAM_SESSION_ERR;
2532 }
2533
2534 if (!cg_init(uid, gid)) {
2535 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2536 return PAM_SESSION_ERR;
2537 }
2538
2539 /* Try to prune cgroups, that are actually empty but were still marked
2540 * as busy by the kernel so we couldn't remove them on session close.
2541 */
2542 cg_prune_empty_cgroups(PAM_user);
2543
2544 if (cg_mount_mode == CGROUP_UNKNOWN)
2545 return PAM_SESSION_ERR;
2546
2547 if (argc > 1 && !strcmp(argv[0], "-c")) {
2548 char **clist = make_string_list(argv[1], ",");
2549
2550 /*
2551 * We don't allow using "all" and other controllers explicitly because
2552 * that simply doesn't make any sense.
2553 */
2554 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2555 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2556 free_string_list(clist);
2557 return PAM_SESSION_ERR;
2558 }
2559
2560 cg_mark_to_make_rw(clist);
2561 free_string_list(clist);
2562 }
2563
2564 return handle_login(PAM_user, uid, gid);
2565 }
2566
2567 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2568 const char **argv)
2569 {
2570 int ret;
2571 uid_t uid = 0;
2572 gid_t gid = 0;
2573 const char *PAM_user = NULL;
2574
2575 ret = pam_get_user(pamh, &PAM_user, NULL);
2576 if (ret != PAM_SUCCESS) {
2577 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2578 return PAM_SESSION_ERR;
2579 }
2580
2581 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2582 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2583 return PAM_SESSION_ERR;
2584 }
2585
2586 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2587 if (!cg_init(uid, gid))
2588 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2589
2590 if (argc > 1 && !strcmp(argv[0], "-c")) {
2591 char **clist = make_string_list(argv[1], ",");
2592
2593 /*
2594 * We don't allow using "all" and other controllers explicitly because
2595 * that simply doesn't make any sense.
2596 */
2597 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2598 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2599 free_string_list(clist);
2600 return PAM_SESSION_ERR;
2601 }
2602
2603 cg_mark_to_make_rw(clist);
2604 free_string_list(clist);
2605 }
2606 }
2607
2608 cg_prune_empty_cgroups(PAM_user);
2609 cg_exit();
2610
2611 return PAM_SUCCESS;
2612 }