]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
coverity: #1438233
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #include <ctype.h>
37 #include <dirent.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <pwd.h>
41 #include <stdarg.h>
42 #include <stdbool.h>
43 #include <stdint.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <syslog.h>
48 #include <unistd.h>
49 #include <linux/unistd.h>
50 #include <sys/mount.h>
51 #include <sys/param.h>
52 #include <sys/stat.h>
53 #include <sys/types.h>
54 #include <sys/vfs.h>
55
56 #define PAM_SM_SESSION
57 #include <security/_pam_macros.h>
58 #include <security/pam_modules.h>
59
60 #include "utils.h"
61
62 #ifndef HAVE_STRLCPY
63 #include "include/strlcpy.h"
64 #endif
65
66 #ifndef HAVE_STRLCAT
67 #include "include/strlcat.h"
68 #endif
69
70 #define pam_cgfs_debug_stream(stream, format, ...) \
71 do { \
72 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
73 __func__, __VA_ARGS__); \
74 } while (false)
75
76 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
77
78 #ifdef DEBUG
79 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
80 #else
81 #define pam_cgfs_debug(format, ...)
82 #endif /* DEBUG */
83
84 /* Taken over modified from the kernel sources. */
85 #define NBITS 32 /* bits in uint32_t */
86 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
87 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
88
89 static enum cg_mount_mode {
90 CGROUP_UNKNOWN = -1,
91 CGROUP_MIXED = 0,
92 CGROUP_PURE_V1 = 1,
93 CGROUP_PURE_V2 = 2,
94 CGROUP_UNINITIALIZED = 3,
95 } cg_mount_mode = CGROUP_UNINITIALIZED;
96
97 /* Common helper functions. Most of these have been taken from LXC. */
98 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
99 static int append_null_to_list(void ***list);
100 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
101 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
102 {
103 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
104 }
105 static char *copy_to_eol(char *s);
106 static void free_string_list(char **list);
107 static char *get_mountpoint(char *line);
108 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
109 static int handle_login(const char *user, uid_t uid, gid_t gid);
110 static inline bool is_set(unsigned bit, uint32_t *bitarr)
111 {
112 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
113 }
114 static bool is_lxcfs(const char *line);
115 static bool is_cgv1(char *line);
116 static bool is_cgv2(char *line);
117 static void *must_alloc(size_t sz);
118 static void must_add_to_list(char ***clist, char *entry);
119 static void must_append_controller(char **klist, char **nlist, char ***clist,
120 char *entry);
121 static void must_append_string(char ***list, char *entry);
122 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
123 static char *read_file(char *fnam);
124 static int read_from_file(const char *filename, void* buf, size_t count);
125 static int recursive_rmdir(char *dirname);
126 static inline void set_bit(unsigned bit, uint32_t *bitarr)
127 {
128 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
129 }
130 static bool string_in_list(char **list, const char *entry);
131 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
132 static void trim(char *s);
133 static bool write_int(char *path, int v);
134 static ssize_t write_nointr(int fd, const void* buf, size_t count);
135 static int write_to_file(const char *filename, const void *buf, size_t count,
136 bool add_newline);
137
138 /* cgroupfs prototypes. */
139 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
140 static uint32_t *cg_cpumask(char *buf, size_t nbits);
141 static bool cg_copy_parent_file(char *path, char *file);
142 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
143 static bool cg_enter(const char *cgroup);
144 static void cg_escape(void);
145 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
146 static ssize_t cg_get_max_cpus(char *cpulist);
147 static int cg_get_version_of_mntpt(const char *path);
148 static bool cg_init(uid_t uid, gid_t gid);
149 static void cg_mark_to_make_rw(char **list);
150 static void cg_prune_empty_cgroups(const char *user);
151 static bool cg_systemd_created_user_slice(const char *base_cgroup,
152 const char *init_cgroup,
153 const char *in, uid_t uid);
154 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
155 const char *base_cgroup, uid_t uid,
156 gid_t gid,
157 bool systemd_user_slice);
158 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
159 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
160 const char *init_cgroup, uid_t uid);
161 static void cg_systemd_prune_init_scope(char *cg);
162 static bool is_lxcfs(const char *line);
163
164 /* cgroupfs v1 prototypes. */
165 struct cgv1_hierarchy {
166 char **controllers;
167 char *mountpoint;
168 char *base_cgroup;
169 char *fullcgpath;
170 char *init_cgroup;
171 bool create_rw_cgroup;
172 bool systemd_user_slice;
173 };
174
175 static struct cgv1_hierarchy **cgv1_hierarchies;
176
177 static void cgv1_add_controller(char **clist, char *mountpoint,
178 char *base_cgroup, char *init_cgroup);
179 static bool cgv1_controller_in_clist(char *cgline, char *c);
180 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
181 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
182 char **clist);
183 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
184 bool *existed);
185 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
186 uid_t uid, gid_t gid, bool *existed);
187 static bool cgv1_enter(const char *cgroup);
188 static void cgv1_escape(void);
189 static bool cgv1_get_controllers(char ***klist, char ***nlist);
190 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
191 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
192 char *line);
193 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
194 const char *cgroup);
195 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
196 static bool cgv1_init(uid_t uid, gid_t gid);
197 static void cgv1_mark_to_make_rw(char **clist);
198 static char *cgv1_must_prefix_named(char *entry);
199 static bool cgv1_prune_empty_cgroups(const char *user);
200 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
201 static bool is_cgv1(char *line);
202
203 /* cgroupfs v2 prototypes. */
204 struct cgv2_hierarchy {
205 char **controllers;
206 char *mountpoint;
207 char *base_cgroup;
208 char *fullcgpath;
209 char *init_cgroup;
210 bool create_rw_cgroup;
211 bool systemd_user_slice;
212 };
213
214 /* Actually this should only be a single hierarchy. But for the sake of
215 * parallelism and because the layout of the cgroupfs v2 is still somewhat
216 * changing, we'll leave it as an array of structs.
217 */
218 static struct cgv2_hierarchy **cgv2_hierarchies;
219
220 static void cgv2_add_controller(char **clist, char *mountpoint,
221 char *base_cgroup, char *init_cgroup,
222 bool systemd_user_slice);
223 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
224 bool *existed);
225 static bool cgv2_enter(const char *cgroup);
226 static void cgv2_escape(void);
227 static char *cgv2_get_current_cgroup(int pid);
228 static bool cgv2_init(uid_t uid, gid_t gid);
229 static void cgv2_mark_to_make_rw(char **clist);
230 static bool cgv2_prune_empty_cgroups(const char *user);
231 static bool cgv2_remove(const char *cgroup);
232 static bool is_cgv2(char *line);
233
234 static int do_mkdir(const char *path, mode_t mode)
235 {
236 int saved_errno;
237 mode_t mask;
238 int r;
239
240 mask = umask(0);
241 r = mkdir(path, mode);
242 saved_errno = errno;
243 umask(mask);
244 errno = saved_errno;
245 return (r);
246 }
247
248 /* Create directory and (if necessary) its parents. */
249 static bool mkdir_parent(const char *root, char *path)
250 {
251 char *b, orig, *e;
252
253 if (strlen(path) < strlen(root))
254 return false;
255
256 if (strlen(path) == strlen(root))
257 return true;
258
259 b = path + strlen(root) + 1;
260 while (true) {
261 while (*b && (*b == '/'))
262 b++;
263 if (!*b)
264 return true;
265
266 e = b + 1;
267 while (*e && *e != '/')
268 e++;
269
270 orig = *e;
271 if (orig)
272 *e = '\0';
273
274 if (file_exists(path))
275 goto next;
276
277 if (do_mkdir(path, 0755) < 0) {
278 pam_cgfs_debug("Failed to create %s: %s.\n", path, strerror(errno));
279 return false;
280 }
281
282 next:
283 if (!orig)
284 return true;
285
286 *e = orig;
287 b = e + 1;
288 }
289
290 return false;
291 }
292
293 /* Common helper functions. Most of these have been taken from LXC. */
294 static void mysyslog(int err, const char *format, ...)
295 {
296 va_list args;
297
298 va_start(args, format);
299 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
300 vsyslog(err, format, args);
301 va_end(args);
302 closelog();
303 }
304
305 /* realloc() pointer in batch sizes; do not fail. */
306 #define BATCH_SIZE 50
307 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
308 {
309 int newbatches = (newlen / BATCH_SIZE) + 1;
310 int oldbatches = (oldlen / BATCH_SIZE) + 1;
311
312 if (!*mem || newbatches > oldbatches)
313 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
314 }
315
316 /* Append lines as is to pointer; do not fail. */
317 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
318 {
319 size_t full = oldlen + newlen;
320
321 batch_realloc(dest, oldlen, full + 1);
322
323 memcpy(*dest + oldlen, new, newlen + 1);
324 }
325
326 /* Read in whole file and return allocated pointer. */
327 static char *read_file(char *fnam)
328 {
329 FILE *f;
330 int linelen;
331 char *line = NULL, *buf = NULL;
332 size_t len = 0, fulllen = 0;
333
334 f = fopen(fnam, "r");
335 if (!f)
336 return NULL;
337
338 while ((linelen = getline(&line, &len, f)) != -1) {
339 append_line(&buf, fulllen, line, linelen);
340 fulllen += linelen;
341 }
342
343 fclose(f);
344 free(line);
345
346 return buf;
347 }
348
349 /* Given a pointer to a null-terminated array of pointers, realloc to add one
350 * entry, and point the new entry to NULL. Do not fail. Return the index to the
351 * second-to-last entry - that is, the one which is now available for use
352 * (keeping the list null-terminated).
353 */
354 static int append_null_to_list(void ***list)
355 {
356 int newentry = 0;
357
358 if (*list)
359 for (; (*list)[newentry]; newentry++) {
360 ;
361 }
362
363 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
364 (*list)[newentry + 1] = NULL;
365
366 return newentry;
367 }
368
369 /* Append new entry to null-terminated array of pointer; make sure that array of
370 * pointers will still be null-terminated.
371 */
372 static void must_append_string(char ***list, char *entry)
373 {
374 int newentry;
375 char *copy;
376
377 newentry = append_null_to_list((void ***)list);
378 copy = must_copy_string(entry);
379 (*list)[newentry] = copy;
380 }
381
382 /* Remove newlines from string. */
383 static void trim(char *s)
384 {
385 size_t len = strlen(s);
386
387 while ((len > 0) && s[len - 1] == '\n')
388 s[--len] = '\0';
389 }
390
391 /* Allocate pointer; do not fail. */
392 static void *must_alloc(size_t sz)
393 {
394 return must_realloc(NULL, sz);
395 }
396
397 /* Make allocated copy of string. End of string is taken to be '\n'. */
398 static char *copy_to_eol(char *s)
399 {
400 char *newline, *sret;
401 size_t len;
402
403 newline = strchr(s, '\n');
404 if (!newline)
405 return NULL;
406
407 len = newline - s;
408 sret = must_alloc(len + 1);
409 memcpy(sret, s, len);
410 sret[len] = '\0';
411
412 return sret;
413 }
414
415 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
416 static bool is_lxcfs(const char *line)
417 {
418 char *p = strstr(line, " - ");
419 if (!p)
420 return false;
421
422 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
423 }
424
425 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
426 static bool is_cgv1(char *line)
427 {
428 char *p = strstr(line, " - ");
429 if (!p)
430 return false;
431
432 return strncmp(p, " - cgroup ", 10) == 0;
433 }
434
435 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
436 static bool is_cgv2(char *line)
437 {
438 char *p = strstr(line, " - ");
439 if (!p)
440 return false;
441
442 return strncmp(p, " - cgroup2 ", 11) == 0;
443 }
444
445 /* Given a null-terminated array of strings, check whether @entry is one of the
446 * strings
447 */
448 static bool string_in_list(char **list, const char *entry)
449 {
450 char **it;
451
452 for (it = list; it && *it; it++)
453 if (strcmp(*it, entry) == 0)
454 return true;
455
456 return false;
457 }
458
459 /*
460 * Creates a null-terminated array of strings, made by splitting the entries in
461 * @str on each @sep. Caller is responsible for calling free_string_list.
462 */
463 static char **make_string_list(const char *str, const char *sep)
464 {
465 char *copy, *tok;
466 char *saveptr = NULL;
467 char **clist = NULL;
468
469 copy = must_copy_string(str);
470
471 for (tok = strtok_r(copy, sep, &saveptr); tok;
472 tok = strtok_r(NULL, sep, &saveptr))
473 must_add_to_list(&clist, tok);
474
475 free(copy);
476
477 return clist;
478 }
479
480 /* Gets the length of a null-terminated array of strings. */
481 static size_t string_list_length(char **list)
482 {
483 size_t len = 0;
484 char **it;
485
486 for (it = list; it && *it; it++)
487 len++;
488
489 return len;
490 }
491
492 /* Free null-terminated array of strings. */
493 static void free_string_list(char **list)
494 {
495 char **it;
496
497 for (it = list; it && *it; it++)
498 free(*it);
499 free(list);
500 }
501
502 /* Write single integer to file. */
503 static bool write_int(char *path, int v)
504 {
505 FILE *f;
506 bool ret = true;
507
508 f = fopen(path, "w");
509 if (!f)
510 return false;
511
512 if (fprintf(f, "%d\n", v) < 0)
513 ret = false;
514
515 if (fclose(f) != 0)
516 ret = false;
517
518 return ret;
519 }
520
521 /* Recursively remove directory and its parents. */
522 static int recursive_rmdir(char *dirname)
523 {
524 struct dirent *direntp;
525 DIR *dir;
526 int r = 0;
527
528 dir = opendir(dirname);
529 if (!dir)
530 return -ENOENT;
531
532 while ((direntp = readdir(dir))) {
533 struct stat st;
534 char *pathname;
535
536 if (!strcmp(direntp->d_name, ".") ||
537 !strcmp(direntp->d_name, ".."))
538 continue;
539
540 pathname = must_make_path(dirname, direntp->d_name, NULL);
541
542 if (lstat(pathname, &st)) {
543 if (!r)
544 pam_cgfs_debug("Failed to stat %s.\n", pathname);
545 r = -1;
546 goto next;
547 }
548
549 if (!S_ISDIR(st.st_mode))
550 goto next;
551
552 if (recursive_rmdir(pathname) < 0)
553 r = -1;
554 next:
555 free(pathname);
556 }
557
558 if (rmdir(dirname) < 0) {
559 if (!r)
560 pam_cgfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
561 r = -1;
562 }
563
564 if (closedir(dir) < 0) {
565 if (!r)
566 pam_cgfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
567 r = -1;
568 }
569
570 return r;
571 }
572
573 /* Add new entry to null-terminated array of pointers. Make sure array is still
574 * null-terminated.
575 */
576 static void must_add_to_list(char ***clist, char *entry)
577 {
578 int newentry;
579
580 newentry = append_null_to_list((void ***)clist);
581 (*clist)[newentry] = must_copy_string(entry);
582 }
583
584 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
585 static char *get_mountpoint(char *line)
586 {
587 int i;
588 char *p, *sret, *p2;
589 size_t len;
590
591 p = line;
592
593 for (i = 0; i < 4; i++) {
594 p = strchr(p, ' ');
595 if (!p)
596 return NULL;
597 p++;
598 }
599
600 p2 = strchr(p, ' ');
601 if (p2)
602 *p2 = '\0';
603
604 len = strlen(p);
605 sret = must_alloc(len + 1);
606 memcpy(sret, p, len);
607 sret[len] = '\0';
608
609 return sret;
610 }
611
612 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
613 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
614 * kernel controllers (@klist) and named controllers (@nlist).
615 */
616 static bool cgv1_get_controllers(char ***klist, char ***nlist)
617 {
618 FILE *f;
619 char *line = NULL;
620 size_t len = 0;
621
622 f = fopen("/proc/self/cgroup", "r");
623 if (!f)
624 return false;
625
626 while (getline(&line, &len, f) != -1) {
627 char *p, *p2, *tok;
628 char *saveptr = NULL;
629
630 p = strchr(line, ':');
631 if (!p)
632 continue;
633 p++;
634
635 p2 = strchr(p, ':');
636 if (!p2)
637 continue;
638 *p2 = '\0';
639
640 /* Skip the v2 hierarchy. */
641 if ((p2 - p) == 0)
642 continue;
643
644 for (tok = strtok_r(p, ",", &saveptr); tok;
645 tok = strtok_r(NULL, ",", &saveptr)) {
646 if (strncmp(tok, "name=", 5) == 0)
647 must_append_string(nlist, tok);
648 else
649 must_append_string(klist, tok);
650 }
651 }
652
653 free(line);
654 fclose(f);
655
656 return true;
657 }
658
659 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
660 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
661 static bool cgv2_get_controllers(char ***klist)
662 {
663 return -ENOSYS;
664 }
665 */
666
667 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
668 static char *cgv2_get_current_cgroup(int pid)
669 {
670 int ret;
671 char *cgroups_v2;
672 char *current_cgroup;
673 char *copy = NULL;
674 /* The largest integer that can fit into long int is 2^64. This is a
675 * 20-digit number. */
676 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
677 char path[__PIDLEN];
678
679 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
680 if (ret < 0 || ret >= __PIDLEN)
681 return NULL;
682
683 cgroups_v2 = read_file(path);
684 if (!cgroups_v2)
685 return NULL;
686
687 current_cgroup = strstr(cgroups_v2, "0::/");
688 if (!current_cgroup)
689 goto cleanup_on_err;
690
691 current_cgroup = current_cgroup + 3;
692 copy = copy_to_eol(current_cgroup);
693 if (!copy)
694 goto cleanup_on_err;
695
696 cleanup_on_err:
697 free(cgroups_v2);
698 if (copy)
699 trim(copy);
700
701 return copy;
702 }
703
704 /* Given two null-terminated lists of strings, return true if any string is in
705 * both.
706 */
707 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
708 {
709 char **it;
710
711 if (!l2)
712 return false;
713
714 for (it = l1; it && *it; it++)
715 if (string_in_list(l2, *it))
716 return true;
717
718 return false;
719 }
720
721 /* For a null-terminated list of controllers @clist, return true if any of those
722 * controllers is already listed the null-terminated list of hierarchies @hlist.
723 * Realistically, if one is present, all must be present.
724 */
725 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
726 {
727 struct cgv1_hierarchy **it;
728
729 for (it = hlist; it && *it; it++)
730 if ((*it)->controllers)
731 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
732 return true;
733 return false;
734
735 }
736
737 /* Set boolean to mark controllers under which we are supposed create a
738 * writeable cgroup.
739 */
740 static void cgv1_mark_to_make_rw(char **clist)
741 {
742 struct cgv1_hierarchy **it;
743
744 for (it = cgv1_hierarchies; it && *it; it++)
745 if ((*it)->controllers)
746 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
747 string_in_list(clist, "all"))
748 (*it)->create_rw_cgroup = true;
749 }
750
751 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
752 * the cgroupfs v2 hierarchy.
753 */
754 static void cgv2_mark_to_make_rw(char **clist)
755 {
756 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
757 if (cgv2_hierarchies)
758 (*cgv2_hierarchies)->create_rw_cgroup = true;
759 }
760
761 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
762 static void cg_mark_to_make_rw(char **clist)
763 {
764 cgv1_mark_to_make_rw(clist);
765 cgv2_mark_to_make_rw(clist);
766 }
767
768 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
769 static char *cgv1_must_prefix_named(char *entry)
770 {
771 char *s;
772 int ret;
773 size_t len;
774
775 len = strlen(entry);
776 s = must_alloc(len + 6);
777
778 ret = snprintf(s, len + 6, "name=%s", entry);
779 if (ret < 0 || (size_t)ret >= (len + 6)) {
780 free(s);
781 return NULL;
782 }
783
784 return s;
785 }
786
787 /* Append kernel controller in @klist or named controller in @nlist to @clist */
788 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
789 {
790 int newentry;
791 char *copy;
792
793 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
794 return;
795
796 newentry = append_null_to_list((void ***)clist);
797
798 if (strncmp(entry, "name=", 5) == 0)
799 copy = must_copy_string(entry);
800 else if (string_in_list(klist, entry))
801 copy = must_copy_string(entry);
802 else
803 copy = cgv1_must_prefix_named(entry);
804
805 (*clist)[newentry] = copy;
806 }
807
808 /* Get the controllers from a mountinfo line. There are other ways we could get
809 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
810 * could parse the mount options. But we simply assume that the mountpoint must
811 * be /sys/fs/cgroup/controller-list
812 */
813 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
814 {
815 int i;
816 char *p, *p2, *tok;
817 char *saveptr = NULL;
818 char **aret = NULL;
819
820 p = line;
821
822 for (i = 0; i < 4; i++) {
823 p = strchr(p, ' ');
824 if (!p)
825 return NULL;
826 p++;
827 }
828 if (!p)
829 return NULL;
830
831 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
832 return NULL;
833
834 p += 15;
835
836 p2 = strchr(p, ' ');
837 if (!p2)
838 return NULL;
839 *p2 = '\0';
840
841 for (tok = strtok_r(p, ",", &saveptr); tok;
842 tok = strtok_r(NULL, ",", &saveptr))
843 must_append_controller(klist, nlist, &aret, tok);
844
845 return aret;
846 }
847
848 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
849 static bool cgv1_controller_in_clist(char *cgline, char *c)
850 {
851 size_t len;
852 char *tok, *eol, *tmp;
853 char *saveptr = NULL;
854
855 eol = strchr(cgline, ':');
856 if (!eol)
857 return false;
858
859 len = eol - cgline;
860 tmp = alloca(len + 1);
861 memcpy(tmp, cgline, len);
862 tmp[len] = '\0';
863
864 for (tok = strtok_r(tmp, ",", &saveptr); tok;
865 tok = strtok_r(NULL, ",", &saveptr)) {
866 if (strcmp(tok, c) == 0)
867 return true;
868 }
869 return false;
870 }
871
872 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
873 * of a given cgv1 controller passed in via @controller.
874 */
875 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
876 {
877 char *p;
878
879 p = basecginfo;
880
881 while (true) {
882 p = strchr(p, ':');
883 if (!p)
884 return NULL;
885 p++;
886
887 if (cgv1_controller_in_clist(p, controller)) {
888 p = strchr(p, ':');
889 if (!p)
890 return NULL;
891 p++;
892
893 return copy_to_eol(p);
894 }
895
896 p = strchr(p, '\n');
897 if (!p)
898 return NULL;
899 p++;
900 }
901
902 return NULL;
903 }
904
905 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
906 * systems.
907 */
908 #define INIT_SCOPE "/init.scope"
909 static void cg_systemd_prune_init_scope(char *cg)
910 {
911 char *point;
912
913 if (!cg)
914 return;
915
916 point = cg + strlen(cg) - strlen(INIT_SCOPE);
917 if (point < cg)
918 return;
919
920 if (strcmp(point, INIT_SCOPE) == 0) {
921 if (point == cg)
922 *(point + 1) = '\0';
923 else
924 *point = '\0';
925 }
926 }
927
928 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
929 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
930 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
931 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
932 * from /proc/1/cgroup.
933 */
934 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
935 {
936 struct cgv1_hierarchy *new;
937 int newentry;
938
939 new = must_alloc(sizeof(*new));
940 new->controllers = clist;
941 new->mountpoint = mountpoint;
942 new->base_cgroup = base_cgroup;
943 new->fullcgpath = NULL;
944 new->create_rw_cgroup = false;
945 new->init_cgroup = init_cgroup;
946 new->systemd_user_slice = false;
947
948 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
949 cgv1_hierarchies[newentry] = new;
950 }
951
952 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
953 * currently) include the controllers mounted into the hierarchy (e.g. memory,
954 * pids, blkio), the mountpoint of that hierarchy (Should usually be
955 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
956 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
957 * base cgroup of the current process gathered from /proc/self/cgroup, and the
958 * init cgroup of PID1 gathered from /proc/1/cgroup.
959 */
960 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
961 {
962 struct cgv2_hierarchy *new;
963 int newentry;
964
965 new = must_alloc(sizeof(*new));
966 new->controllers = clist;
967 new->mountpoint = mountpoint;
968 new->base_cgroup = base_cgroup;
969 new->fullcgpath = NULL;
970 new->create_rw_cgroup = false;
971 new->init_cgroup = init_cgroup;
972 new->systemd_user_slice = systemd_user_slice;
973
974 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
975 cgv2_hierarchies[newentry] = new;
976 }
977
978 /* In Ubuntu 14.04, the paths created for us were
979 * '/user/$uid.user/$something.session' This can be merged better with
980 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
981 * reason about the correctness.
982 */
983 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
984 {
985 char *p;
986 size_t len;
987 int id;
988 char *copy = NULL;
989 bool bret = false;
990
991 copy = must_copy_string(in);
992 if (strlen(copy) < strlen("/user/1.user/1.session"))
993 goto cleanup;
994 p = copy + strlen(copy) - 1;
995
996 /* skip any trailing '/' (shouldn't be any, but be sure) */
997 while (p >= copy && *p == '/')
998 *(p--) = '\0';
999 if (p < copy)
1000 goto cleanup;
1001
1002 /* Get last path element */
1003 while (p >= copy && *p != '/')
1004 p--;
1005 if (p < copy)
1006 goto cleanup;
1007 /* make sure it is something.session */
1008 len = strlen(p + 1);
1009 if (len < strlen("1.session") ||
1010 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1011 goto cleanup;
1012
1013 /* ok last path piece checks out, now check the second to last */
1014 *(p + 1) = '\0';
1015 while (p >= copy && *(--p) != '/')
1016 ;
1017 if (sscanf(p + 1, "%d.user/", &id) != 1)
1018 goto cleanup;
1019
1020 if (id != (int)uid)
1021 goto cleanup;
1022
1023 bret = true;
1024
1025 cleanup:
1026 free(copy);
1027 return bret;
1028 }
1029
1030 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1031 * assume it belongs to $uid and chown it
1032 */
1033 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1034 const char *init_cgroup, uid_t uid)
1035 {
1036 int ret;
1037 char buf[100];
1038 size_t curlen, initlen;
1039
1040 curlen = strlen(base_cgroup);
1041 initlen = strlen(init_cgroup);
1042 if (curlen <= initlen)
1043 return false;
1044
1045 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1046 return false;
1047
1048 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1049 if (ret < 0 || ret >= 100)
1050 return false;
1051
1052 if (initlen == 1)
1053 initlen = 0; // skip the '/'
1054
1055 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1056 }
1057
1058 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1059 * is not the end of our systemd path, then we're not part of the PAM call that
1060 * created that path.
1061 *
1062 * The last piece is chowned to $uid, the user- part not.
1063 * Note: If the user creates paths that look like what we're looking for to
1064 * 'fool' us, either
1065 * - they fool us, we create new cgroups, and they get auto-logged-out.
1066 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1067 * lose ownership of their cgroups
1068 */
1069 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1070 const char *init_cgroup,
1071 const char *in, uid_t uid)
1072 {
1073 char *p;
1074 size_t len;
1075 int id;
1076 char *copy = NULL;
1077 bool bret = false;
1078
1079 copy = must_copy_string(in);
1080
1081 /* An old version of systemd has already created a cgroup for us. */
1082 if (cg_systemd_under_user_slice_1(in, uid))
1083 goto succeed;
1084
1085 /* A new version of systemd has already created a cgroup for us. */
1086 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1087 goto succeed;
1088
1089 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1090 goto cleanup;
1091
1092 p = copy + strlen(copy) - 1;
1093 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1094 while (p >= copy && *p == '/')
1095 *(p--) = '\0';
1096
1097 if (p < copy)
1098 goto cleanup;
1099
1100 /* Get last path element */
1101 while (p >= copy && *p != '/')
1102 p--;
1103
1104 if (p < copy)
1105 goto cleanup;
1106
1107 /* Make sure it is session-something.scope. */
1108 len = strlen(p + 1);
1109 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1110 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1111 goto cleanup;
1112
1113 /* Ok last path piece checks out, now check the second to last. */
1114 *(p + 1) = '\0';
1115 while (p >= copy && *(--p) != '/')
1116 ;
1117
1118 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1119 goto cleanup;
1120
1121 if (id != (int)uid)
1122 goto cleanup;
1123
1124 succeed:
1125 bret = true;
1126 cleanup:
1127 free(copy);
1128 return bret;
1129 }
1130
1131 /* Chown existing cgroup that systemd has already created for us. */
1132 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1133 const char *base_cgroup, uid_t uid,
1134 gid_t gid, bool systemd_user_slice)
1135 {
1136 char *path;
1137
1138 if (!systemd_user_slice)
1139 return false;
1140
1141 path = must_make_path(mountpoint, base_cgroup, NULL);
1142
1143 /* A cgroup within name=systemd has already been created. So we only
1144 * need to chown it.
1145 */
1146 if (chown(path, uid, gid) < 0)
1147 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
1148 path, (int)uid, (int)gid, strerror(errno), NULL);
1149 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
1150
1151 free(path);
1152 return true;
1153 }
1154
1155 /* Detect and store information about cgroupfs v1 hierarchies. */
1156 static bool cgv1_init(uid_t uid, gid_t gid)
1157 {
1158 FILE *f;
1159 struct cgv1_hierarchy **it;
1160 char *basecginfo;
1161 char *line = NULL;
1162 char **klist = NULL, **nlist = NULL;
1163 size_t len = 0;
1164
1165 basecginfo = read_file("/proc/self/cgroup");
1166 if (!basecginfo)
1167 return false;
1168
1169 f = fopen("/proc/self/mountinfo", "r");
1170 if (!f) {
1171 free(basecginfo);
1172 return false;
1173 }
1174
1175 cgv1_get_controllers(&klist, &nlist);
1176
1177 while (getline(&line, &len, f) != -1) {
1178 char **controller_list = NULL;
1179 char *mountpoint, *base_cgroup;
1180
1181 if (is_lxcfs(line) || !is_cgv1(line))
1182 continue;
1183
1184 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1185 if (!controller_list)
1186 continue;
1187
1188 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1189 free(controller_list);
1190 continue;
1191 }
1192
1193 mountpoint = get_mountpoint(line);
1194 if (!mountpoint) {
1195 free_string_list(controller_list);
1196 continue;
1197 }
1198
1199 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1200 if (!base_cgroup) {
1201 free_string_list(controller_list);
1202 free(mountpoint);
1203 continue;
1204 }
1205
1206 trim(base_cgroup);
1207 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1208 "mountpoint \"%s\" and cgroup \"%s\"\n",
1209 controller_list[0], mountpoint, base_cgroup);
1210 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1211 }
1212
1213 free_string_list(klist);
1214 free_string_list(nlist);
1215 free(basecginfo);
1216 fclose(f);
1217 free(line);
1218
1219 /* Retrieve init cgroup path for all controllers. */
1220 basecginfo = read_file("/proc/1/cgroup");
1221 if (!basecginfo)
1222 return false;
1223
1224 for (it = cgv1_hierarchies; it && *it; it++) {
1225 if ((*it)->controllers) {
1226 char *init_cgroup, *user_slice;
1227
1228 /* We've already stored the controller and received its
1229 * current cgroup. If we now fail to retrieve its init
1230 * cgroup, we should probably fail.
1231 */
1232 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1233 if (!init_cgroup) {
1234 free(basecginfo);
1235 return false;
1236 }
1237
1238 cg_systemd_prune_init_scope(init_cgroup);
1239 (*it)->init_cgroup = init_cgroup;
1240 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1241 "cgroup \"%s\".\n",
1242 (*(*it)->controllers), init_cgroup);
1243 /* Check whether systemd has already created a cgroup
1244 * for us.
1245 */
1246 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1247 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1248 (*it)->systemd_user_slice = true;
1249
1250 free(user_slice);
1251 }
1252 }
1253 free(basecginfo);
1254
1255 return true;
1256 }
1257
1258 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1259 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1260 */
1261 static inline int cg_get_version_of_mntpt(const char *path)
1262 {
1263 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1264 return 1;
1265
1266 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1267 return 2;
1268
1269 return 0;
1270 }
1271
1272 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1273 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1274 */
1275 static bool cgv2_init(uid_t uid, gid_t gid)
1276 {
1277 char *mountpoint;
1278 FILE *f = NULL;
1279 char *current_cgroup = NULL, *init_cgroup = NULL;
1280 char * line = NULL;
1281 size_t len = 0;
1282 int ret = false;
1283
1284 current_cgroup = cgv2_get_current_cgroup(getpid());
1285 if (!current_cgroup) {
1286 /* No v2 hierarchy present. We're done. */
1287 ret = true;
1288 goto cleanup;
1289 }
1290
1291 init_cgroup = cgv2_get_current_cgroup(1);
1292 if (!init_cgroup) {
1293 /* If we're here and didn't fail already above, then something's
1294 * certainly wrong, so error this time.
1295 */
1296 goto cleanup;
1297 }
1298
1299 cg_systemd_prune_init_scope(init_cgroup);
1300
1301 /* Check if the v2 hierarchy is mounted at its standard location.
1302 * If so we can skip the rest of the work here. Although the unified
1303 * hierarchy can be mounted multiple times, each of those mountpoints
1304 * will expose identical information.
1305 */
1306 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1307 char *user_slice;
1308 bool has_user_slice = false;
1309
1310 mountpoint = must_copy_string("/sys/fs/cgroup");
1311 if (!mountpoint)
1312 goto cleanup;
1313
1314 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1315 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1316 has_user_slice = true;
1317 free(user_slice);
1318
1319 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1320
1321 ret = true;
1322 goto cleanup;
1323 }
1324
1325 f = fopen("/proc/self/mountinfo", "r");
1326 if (!f)
1327 goto cleanup;
1328
1329 /* we support simple cgroup mounts and lxcfs mounts */
1330 while (getline(&line, &len, f) != -1) {
1331 char *user_slice;
1332 bool has_user_slice = false;
1333
1334 if (!is_cgv2(line))
1335 continue;
1336
1337 mountpoint = get_mountpoint(line);
1338 if (!mountpoint)
1339 continue;
1340
1341 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1342 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1343 has_user_slice = true;
1344 free(user_slice);
1345
1346 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1347
1348 /* Although the unified hierarchy can be mounted multiple times,
1349 * each of those mountpoints will expose identical information.
1350 * So let the first mountpoint we find, win.
1351 */
1352 ret = true;
1353 break;
1354 }
1355
1356 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1357 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1358 mountpoint, current_cgroup, init_cgroup);
1359
1360 cleanup:
1361 if (f)
1362 fclose(f);
1363 free(line);
1364
1365 if (!ret) {
1366 free(init_cgroup);
1367 free(current_cgroup);
1368 }
1369
1370 return ret;
1371 }
1372
1373 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1374 * cgroupfs v2 hierarchy.
1375 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1376 * where some controllers are mounted into their standard cgroupfs v1 locations
1377 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1378 * hierarchy (/sys/fs/cgroup).
1379 */
1380 static bool cg_init(uid_t uid, gid_t gid)
1381 {
1382 if (!cgv1_init(uid, gid))
1383 return false;
1384
1385 if (!cgv2_init(uid, gid))
1386 return false;
1387
1388 if (cgv1_hierarchies && cgv2_hierarchies) {
1389 cg_mount_mode = CGROUP_MIXED;
1390 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies.");
1391 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1392 cg_mount_mode = CGROUP_PURE_V1;
1393 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies.");
1394 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1395 cg_mount_mode = CGROUP_PURE_V2;
1396 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies.");
1397 } else {
1398 cg_mount_mode = CGROUP_UNKNOWN;
1399 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy.\n", NULL);
1400 }
1401
1402 if (cg_mount_mode == CGROUP_UNKNOWN)
1403 return false;
1404
1405 return true;
1406 }
1407
1408 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1409 static bool cgv1_enter(const char *cgroup)
1410 {
1411 struct cgv1_hierarchy **it;
1412
1413 for (it = cgv1_hierarchies; it && *it; it++) {
1414 char **controller;
1415 bool entered = false;
1416
1417 if (!(*it)->controllers || !(*it)->mountpoint ||
1418 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1419 continue;
1420
1421 for (controller = (*it)->controllers; controller && *controller;
1422 controller++) {
1423 char *path;
1424
1425 /* We've already been placed in a user slice, so we
1426 * don't need to enter the cgroup again.
1427 */
1428 if ((*it)->systemd_user_slice) {
1429 entered = true;
1430 break;
1431 }
1432
1433 path = must_make_path((*it)->mountpoint,
1434 (*it)->init_cgroup,
1435 cgroup,
1436 "/cgroup.procs",
1437 NULL);
1438 if (!file_exists(path)) {
1439 free(path);
1440 path = must_make_path((*it)->mountpoint,
1441 (*it)->init_cgroup,
1442 cgroup,
1443 "/tasks",
1444 NULL);
1445 }
1446 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1447 entered = write_int(path, (int)getpid());
1448 if (entered) {
1449 free(path);
1450 break;
1451 }
1452 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1453 free(path);
1454 }
1455 if (!entered)
1456 return false;
1457 }
1458
1459 return true;
1460 }
1461
1462 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1463 static bool cgv2_enter(const char *cgroup)
1464 {
1465 struct cgv2_hierarchy *v2;
1466 char *path;
1467 bool entered = false;
1468
1469 if (!cgv2_hierarchies)
1470 return true;
1471
1472 v2 = *cgv2_hierarchies;
1473
1474 if (!v2->mountpoint || !v2->base_cgroup)
1475 return false;
1476
1477 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1478 return true;
1479
1480 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1481 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1482 entered = write_int(path, (int)getpid());
1483 if (!entered) {
1484 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1485 free(path);
1486 return false;
1487 }
1488
1489 free(path);
1490
1491 return true;
1492 }
1493
1494 /* Wrapper around cgv{1,2}_enter(). */
1495 static bool cg_enter(const char *cgroup)
1496 {
1497 if (!cgv1_enter(cgroup)) {
1498 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups.\n", NULL);
1499 return false;
1500 }
1501
1502 if (!cgv2_enter(cgroup)) {
1503 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups.\n", NULL);
1504 return false;
1505 }
1506
1507 return true;
1508 }
1509
1510 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1511 static void cgv1_escape(void)
1512 {
1513 struct cgv1_hierarchy **it;
1514
1515 /* In case systemd hasn't already placed us in a user slice for the
1516 * cpuset v1 controller we will reside in the root cgroup. This means
1517 * that cgroup.clone_children will not have been initialized for us so
1518 * we need to do it.
1519 */
1520 for (it = cgv1_hierarchies; it && *it; it++)
1521 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1522 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset.\n", NULL);
1523
1524 if (!cgv1_enter("/"))
1525 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup.\n", NULL);
1526 }
1527
1528 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1529 static void cgv2_escape(void)
1530 {
1531 if (!cgv2_enter("/"))
1532 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup.\n", NULL);
1533 }
1534
1535 /* Wrapper around cgv{1,2}_escape(). */
1536 static void cg_escape(void)
1537 {
1538 cgv1_escape();
1539 cgv2_escape();
1540 }
1541
1542 /* Get uid and gid for @user. */
1543 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1544 {
1545 struct passwd pwent;
1546 struct passwd *pwentp = NULL;
1547 char *buf;
1548 size_t bufsize;
1549 int ret;
1550
1551 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1552 if (bufsize == -1)
1553 bufsize = 1024;
1554
1555 buf = malloc(bufsize);
1556 if (!buf)
1557 return false;
1558
1559 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1560 if (!pwentp) {
1561 if (ret == 0)
1562 mysyslog(LOG_ERR,
1563 "Could not find matched password record\n", NULL);
1564
1565 free(buf);
1566 return false;
1567 }
1568
1569 *uid = pwent.pw_uid;
1570 *gid = pwent.pw_gid;
1571 free(buf);
1572
1573 return true;
1574 }
1575
1576 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1577 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1578 {
1579 struct stat statbuf;
1580
1581 if (stat(path, &statbuf) < 0)
1582 return false;
1583
1584 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1585 return false;
1586
1587 return true;
1588 }
1589
1590 /* Create cpumask from cpulist aka turn:
1591 *
1592 * 0,2-3
1593 *
1594 * into bit array
1595 *
1596 * 1 0 1 1
1597 */
1598 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1599 {
1600 char *token;
1601 char *saveptr = NULL;
1602 size_t arrlen = BITS_TO_LONGS(nbits);
1603 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1604 if (!bitarr)
1605 return NULL;
1606
1607 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1608 errno = 0;
1609 unsigned start = strtoul(token, NULL, 0);
1610 unsigned end = start;
1611
1612 char *range = strchr(token, '-');
1613 if (range)
1614 end = strtoul(range + 1, NULL, 0);
1615 if (!(start <= end)) {
1616 free(bitarr);
1617 return NULL;
1618 }
1619
1620 if (end >= nbits) {
1621 free(bitarr);
1622 return NULL;
1623 }
1624
1625 while (start <= end)
1626 set_bit(start++, bitarr);
1627 }
1628
1629 return bitarr;
1630 }
1631
1632 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1633 {
1634 char *result;
1635 char **p;
1636 size_t sep_len = strlen(sep);
1637 size_t result_len = use_as_prefix * sep_len;
1638 size_t buf_len;
1639
1640 if (!parts)
1641 return NULL;
1642
1643 /* calculate new string length */
1644 for (p = (char **)parts; *p; p++)
1645 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1646
1647 buf_len = result_len + 1;
1648 result = calloc(buf_len, sizeof(char));
1649 if (!result)
1650 return NULL;
1651
1652 if (use_as_prefix)
1653 (void)strlcpy(result, sep, buf_len * sizeof(char));
1654
1655 for (p = (char **)parts; *p; p++) {
1656 if (p > (char **)parts)
1657 (void)strlcat(result, sep, buf_len * sizeof(char));
1658 (void)strlcat(result, *p, buf_len * sizeof(char));
1659 }
1660
1661 return result;
1662 }
1663
1664 /* The largest integer that can fit into long int is 2^64. This is a
1665 * 20-digit number.
1666 */
1667 #define __IN_TO_STR_LEN 21
1668 /* Turn cpumask into simple, comma-separated cpulist. */
1669 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1670 {
1671 size_t i;
1672 int ret;
1673 char numstr[__IN_TO_STR_LEN] = {0};
1674 char **cpulist = NULL;
1675
1676 for (i = 0; i <= nbits; i++) {
1677 if (is_set(i, bitarr)) {
1678 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1679 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1680 free_string_list(cpulist);
1681 return NULL;
1682 }
1683 must_append_string(&cpulist, numstr);
1684 }
1685 }
1686 return string_join(",", (const char **)cpulist, false);
1687 }
1688
1689 static ssize_t cg_get_max_cpus(char *cpulist)
1690 {
1691 char *c1, *c2;
1692 char *maxcpus = cpulist;
1693 size_t cpus = 0;
1694
1695 c1 = strrchr(maxcpus, ',');
1696 if (c1)
1697 c1++;
1698
1699 c2 = strrchr(maxcpus, '-');
1700 if (c2)
1701 c2++;
1702
1703 if (!c1 && !c2)
1704 c1 = maxcpus;
1705 else if (c1 < c2)
1706 c1 = c2;
1707
1708 /* If the above logic is correct, c1 should always hold a valid string
1709 * here.
1710 */
1711
1712 errno = 0;
1713 cpus = strtoul(c1, NULL, 0);
1714 if (errno != 0)
1715 return -1;
1716
1717 return cpus;
1718 }
1719
1720 static ssize_t write_nointr(int fd, const void* buf, size_t count)
1721 {
1722 ssize_t ret;
1723 again:
1724 ret = write(fd, buf, count);
1725 if (ret < 0 && errno == EINTR)
1726 goto again;
1727 return ret;
1728 }
1729
1730 static int write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
1731 {
1732 int fd, saved_errno;
1733 ssize_t ret;
1734
1735 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
1736 if (fd < 0)
1737 return -1;
1738 ret = write_nointr(fd, buf, count);
1739 if (ret < 0)
1740 goto out_error;
1741 if ((size_t)ret != count)
1742 goto out_error;
1743 if (add_newline) {
1744 ret = write_nointr(fd, "\n", 1);
1745 if (ret != 1)
1746 goto out_error;
1747 }
1748 close(fd);
1749 return 0;
1750
1751 out_error:
1752 saved_errno = errno;
1753 close(fd);
1754 errno = saved_errno;
1755 return -1;
1756 }
1757
1758 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1759 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1760 {
1761 char *lastslash, *fpath, oldv;
1762 int ret;
1763 ssize_t i;
1764
1765 ssize_t maxposs = 0, maxisol = 0;
1766 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1767 uint32_t *possmask = NULL, *isolmask = NULL;
1768 bool bret = false, flipped_bit = false;
1769
1770 lastslash = strrchr(path, '/');
1771 if (!lastslash) { // bug... this shouldn't be possible
1772 pam_cgfs_debug("Invalid path: %s.\n", path);
1773 return bret;
1774 }
1775 oldv = *lastslash;
1776 *lastslash = '\0';
1777 fpath = must_make_path(path, "cpuset.cpus", NULL);
1778 posscpus = read_file(fpath);
1779 if (!posscpus) {
1780 pam_cgfs_debug("Could not read file: %s.\n", fpath);
1781 goto on_error;
1782 }
1783
1784 /* Get maximum number of cpus found in possible cpuset. */
1785 maxposs = cg_get_max_cpus(posscpus);
1786 if (maxposs < 0)
1787 goto on_error;
1788
1789 if (!file_exists(__ISOL_CPUS)) {
1790 /* This system doesn't expose isolated cpus. */
1791 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist.\n");
1792 cpulist = posscpus;
1793 /* No isolated cpus but we weren't already initialized by
1794 * someone. We should simply copy the parents cpuset.cpus
1795 * values.
1796 */
1797 if (!am_initialized) {
1798 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup.\n");
1799 goto copy_parent;
1800 }
1801 /* No isolated cpus but we were already initialized by someone.
1802 * Nothing more to do for us.
1803 */
1804 goto on_success;
1805 }
1806
1807 isolcpus = read_file(__ISOL_CPUS);
1808 if (!isolcpus) {
1809 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1810 goto on_error;
1811 }
1812 if (!isdigit(isolcpus[0])) {
1813 pam_cgfs_debug("%s", "No isolated cpus detected.\n");
1814 cpulist = posscpus;
1815 /* No isolated cpus but we weren't already initialized by
1816 * someone. We should simply copy the parents cpuset.cpus
1817 * values.
1818 */
1819 if (!am_initialized) {
1820 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup.\n");
1821 goto copy_parent;
1822 }
1823 /* No isolated cpus but we were already initialized by someone.
1824 * Nothing more to do for us.
1825 */
1826 goto on_success;
1827 }
1828
1829 /* Get maximum number of cpus found in isolated cpuset. */
1830 maxisol = cg_get_max_cpus(isolcpus);
1831 if (maxisol < 0)
1832 goto on_error;
1833
1834 if (maxposs < maxisol)
1835 maxposs = maxisol;
1836 maxposs++;
1837
1838 possmask = cg_cpumask(posscpus, maxposs);
1839 if (!possmask) {
1840 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus.\n");
1841 goto on_error;
1842 }
1843
1844 isolmask = cg_cpumask(isolcpus, maxposs);
1845 if (!isolmask) {
1846 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus.\n");
1847 goto on_error;
1848 }
1849
1850 for (i = 0; i <= maxposs; i++) {
1851 if (is_set(i, isolmask) && is_set(i, possmask)) {
1852 flipped_bit = true;
1853 clear_bit(i, possmask);
1854 }
1855 }
1856
1857 if (!flipped_bit) {
1858 pam_cgfs_debug("%s", "No isolated cpus present in cpuset.\n");
1859 goto on_success;
1860 }
1861 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset.\n");
1862
1863 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1864 if (!cpulist) {
1865 pam_cgfs_debug("%s", "Could not create cpu list.\n");
1866 goto on_error;
1867 }
1868
1869 copy_parent:
1870 *lastslash = oldv;
1871 fpath = must_make_path(path, "cpuset.cpus", NULL);
1872 ret = write_to_file(fpath, cpulist, strlen(cpulist), false);
1873 if (ret < 0) {
1874 pam_cgfs_debug("Could not write cpu list to: %s.\n", fpath);
1875 goto on_error;
1876 }
1877
1878 on_success:
1879 bret = true;
1880
1881 on_error:
1882 free(fpath);
1883
1884 free(isolcpus);
1885 free(isolmask);
1886
1887 if (posscpus != cpulist)
1888 free(posscpus);
1889 free(possmask);
1890
1891 free(cpulist);
1892 return bret;
1893 }
1894
1895 int read_from_file(const char *filename, void* buf, size_t count)
1896 {
1897 int fd = -1, saved_errno;
1898 ssize_t ret;
1899
1900 fd = open(filename, O_RDONLY | O_CLOEXEC);
1901 if (fd < 0)
1902 return -1;
1903
1904 if (!buf || !count) {
1905 char buf2[100];
1906 size_t count2 = 0;
1907 while ((ret = read(fd, buf2, 100)) > 0)
1908 count2 += ret;
1909 if (ret >= 0)
1910 ret = count2;
1911 } else {
1912 memset(buf, 0, count);
1913 ret = read(fd, buf, count);
1914 }
1915
1916 if (ret < 0)
1917 pam_cgfs_debug("read %s: %s", filename, strerror(errno));
1918
1919 saved_errno = errno;
1920 close(fd);
1921 errno = saved_errno;
1922 return ret;
1923 }
1924
1925 /* Copy contents of parent(@path)/@file to @path/@file */
1926 static bool cg_copy_parent_file(char *path, char *file)
1927 {
1928 char *lastslash, *value = NULL, *fpath, oldv;
1929 int len = 0;
1930 int ret;
1931
1932 lastslash = strrchr(path, '/');
1933 if (!lastslash) { // bug... this shouldn't be possible
1934 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1935 return false;
1936 }
1937 oldv = *lastslash;
1938 *lastslash = '\0';
1939 fpath = must_make_path(path, file, NULL);
1940 len = read_from_file(fpath, NULL, 0);
1941 if (len <= 0)
1942 goto bad;
1943 value = must_alloc(len + 1);
1944 if (read_from_file(fpath, value, len) != len)
1945 goto bad;
1946 free(fpath);
1947 *lastslash = oldv;
1948 fpath = must_make_path(path, file, NULL);
1949 ret = write_to_file(fpath, value, len, false);
1950 if (ret < 0)
1951 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1952 free(fpath);
1953 free(value);
1954 return ret >= 0;
1955
1956 bad:
1957 pam_cgfs_debug("Error reading '%s'", fpath);
1958 free(fpath);
1959 free(value);
1960 return false;
1961 }
1962
1963 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1964 * controller we will reside in the root cgroup. This means that
1965 * cgroup.clone_children will not have been initialized for us so we need to do
1966 * it.
1967 */
1968 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1969 {
1970 char *clonechildrenpath, v;
1971
1972 if (!string_in_list(h->controllers, "cpuset"))
1973 return true;
1974
1975 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1976
1977 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
1978 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
1979 free(clonechildrenpath);
1980 return false;
1981 }
1982
1983 if (v == '1') { /* already set for us by someone else */
1984 free(clonechildrenpath);
1985 return true;
1986 }
1987
1988 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
1989 /* Set clone_children so children inherit our settings */
1990 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1991 free(clonechildrenpath);
1992 return false;
1993 }
1994 free(clonechildrenpath);
1995 return true;
1996 }
1997
1998 /*
1999 * Initialize the cpuset hierarchy in first directory of @gname and
2000 * set cgroup.clone_children so that children inherit settings.
2001 * Since the h->base_path is populated by init or ourselves, we know
2002 * it is already initialized.
2003 */
2004 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
2005 const char *cgroup)
2006 {
2007 char *cgpath, *clonechildrenpath, v, *slash;
2008
2009 if (!string_in_list(h->controllers, "cpuset"))
2010 return true;
2011
2012 if (*cgroup == '/')
2013 cgroup++;
2014 slash = strchr(cgroup, '/');
2015 if (slash)
2016 *slash = '\0';
2017
2018 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
2019 if (slash)
2020 *slash = '/';
2021 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
2022 pam_cgfs_debug("Failed to create '%s'", cgpath);
2023 free(cgpath);
2024 return false;
2025 }
2026 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
2027 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
2028 free(clonechildrenpath);
2029 free(cgpath);
2030 return true;
2031 }
2032 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2033 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
2034 free(clonechildrenpath);
2035 free(cgpath);
2036 return false;
2037 }
2038
2039 /* Make sure any isolated cpus are removed from cpuset.cpus. */
2040 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2041 pam_cgfs_debug("%s", "Failed to remove isolated cpus.\n");
2042 free(clonechildrenpath);
2043 free(cgpath);
2044 return false;
2045 }
2046
2047 if (v == '1') { /* already set for us by someone else */
2048 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\".\n");
2049 free(clonechildrenpath);
2050 free(cgpath);
2051 return true;
2052 }
2053
2054 /* copy parent's settings */
2055 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
2056 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings.\n");
2057 free(cgpath);
2058 free(clonechildrenpath);
2059 return false;
2060 }
2061 free(cgpath);
2062
2063 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
2064 /* Set clone_children so children inherit our settings */
2065 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2066 free(clonechildrenpath);
2067 return false;
2068 }
2069 free(clonechildrenpath);
2070 return true;
2071 }
2072
2073 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2074 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2075 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2076 * to the caller in @existed.
2077 */
2078 #define __PAM_CGFS_USER "/user/"
2079 #define __PAM_CGFS_USER_LEN 6
2080 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2081 {
2082 char *clean_base_cgroup, *path;
2083 char **controller;
2084 struct cgv1_hierarchy *it;
2085 bool created = false;
2086
2087 *existed = false;
2088 it = h;
2089 for (controller = it->controllers; controller && *controller;
2090 controller++) {
2091 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2092 return false;
2093
2094 /* If systemd has already created a cgroup for us, keep using
2095 * it.
2096 */
2097 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2098 it->base_cgroup, uid, gid,
2099 it->systemd_user_slice)) {
2100 return true;
2101 }
2102
2103 /* We need to make sure that we do not create an endless chain
2104 * of sub-cgroups. So we check if we have already logged in
2105 * somehow (sudo -i, su, etc.) and have created a
2106 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2107 * cgroups this is unnecessary since we use the init_cgroup
2108 * anyway, but for controllers which have an existing systemd
2109 * cgroup that does not match the current uid, this is pretty
2110 * useful.
2111 */
2112 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2113 free(it->base_cgroup);
2114 it->base_cgroup = must_copy_string("/");
2115 } else {
2116 clean_base_cgroup =
2117 strstr(it->base_cgroup, __PAM_CGFS_USER);
2118 if (clean_base_cgroup)
2119 *clean_base_cgroup = '\0';
2120 }
2121
2122 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2123 pam_cgfs_debug("Constructing path: %s.\n", path);
2124 if (file_exists(path)) {
2125 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2126 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d.\n", path, our_cg ? "" : "not ", uid, gid);
2127 free(path);
2128 if (our_cg)
2129 *existed = false;
2130 else
2131 *existed = true;
2132 return our_cg;
2133 }
2134 created = mkdir_parent(it->mountpoint, path);
2135 if (!created) {
2136 free(path);
2137 continue;
2138 }
2139 if (chown(path, uid, gid) < 0)
2140 mysyslog(LOG_WARNING,
2141 "Failed to chown %s to %d:%d: %s.\n", path,
2142 (int)uid, (int)gid, strerror(errno), NULL);
2143 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2144 free(path);
2145 break;
2146 }
2147
2148 return created;
2149 }
2150
2151 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2152 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2153 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2154 */
2155 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2156 {
2157
2158 char *path;
2159
2160 /* Better safe than sorry. */
2161 if (!h->controllers)
2162 return true;
2163
2164 /* Cgroups created by systemd for us which we re-use won't be removed
2165 * here, since we're using init_cgroup + cgroup as path instead of
2166 * base_cgroup + cgroup.
2167 */
2168 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2169 (void)recursive_rmdir(path);
2170 free(path);
2171
2172 return true;
2173 }
2174
2175 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2176 static bool cgv2_remove(const char *cgroup)
2177 {
2178 struct cgv2_hierarchy *v2;
2179 char *path;
2180
2181 if (!cgv2_hierarchies)
2182 return true;
2183
2184 v2 = *cgv2_hierarchies;
2185
2186 /* If we reused an already existing cgroup, don't bother trying to
2187 * remove (a potentially wrong)/the path.
2188 * Cgroups created by systemd for us which we re-use would be removed
2189 * here, since we're using base_cgroup + cgroup as path.
2190 */
2191 if (v2->systemd_user_slice)
2192 return true;
2193
2194 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2195 (void)recursive_rmdir(path);
2196 free(path);
2197
2198 return true;
2199 }
2200
2201 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2202 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2203 * back, to the caller if the creation failed due to @cgroup already existing
2204 * via @existed.
2205 */
2206 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2207 {
2208 struct cgv1_hierarchy **it, **rev_it;
2209 bool all_created = true;
2210
2211 for (it = cgv1_hierarchies; it && *it; it++) {
2212 if (!(*it)->controllers || !(*it)->mountpoint ||
2213 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2214 continue;
2215
2216 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2217 all_created = false;
2218 break;
2219 }
2220 }
2221
2222 if (all_created)
2223 return true;
2224
2225 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2226 rev_it++)
2227 cgv1_remove_one(*rev_it, cgroup);
2228
2229 return false;
2230 }
2231
2232 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2233 * the creation failed due to @cgroup already existing via @existed.
2234 */
2235 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2236 {
2237 int ret;
2238 char *clean_base_cgroup;
2239 char *path;
2240 struct cgv2_hierarchy *v2;
2241 bool our_cg = false, created = false;
2242
2243 *existed = false;
2244
2245 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2246 return true;
2247
2248 v2 = *cgv2_hierarchies;
2249
2250 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2251 * to be placed under our current cgroup.
2252 */
2253 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2254 uid, gid, v2->systemd_user_slice))
2255 goto delegate_files;
2256
2257 /* We need to make sure that we do not create an endless chain of
2258 * sub-cgroups. So we check if we have already logged in somehow (sudo
2259 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2260 * skip that part.
2261 */
2262 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2263 free(v2->base_cgroup);
2264 v2->base_cgroup = must_copy_string("/");
2265 } else {
2266 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2267 if (clean_base_cgroup)
2268 *clean_base_cgroup = '\0';
2269 }
2270
2271 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2272 pam_cgfs_debug("Constructing path \"%s\".\n", path);
2273 if (file_exists(path)) {
2274 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2275 pam_cgfs_debug(
2276 "%s existed and does %shave our uid: %d and gid: %d.\n",
2277 path, our_cg ? "" : "not ", uid, gid);
2278 free(path);
2279 if (our_cg) {
2280 *existed = false;
2281 goto delegate_files;
2282 } else {
2283 *existed = true;
2284 return false;
2285 }
2286 }
2287
2288 created = mkdir_parent(v2->mountpoint, path);
2289 if (!created) {
2290 free(path);
2291 return false;
2292 }
2293
2294 /* chown cgroup to user */
2295 if (chown(path, uid, gid) < 0)
2296 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2297 path, (int)uid, (int)gid, strerror(errno), NULL);
2298 else
2299 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2300 free(path);
2301
2302 delegate_files:
2303 /* chown cgroup.procs to user */
2304 if (v2->systemd_user_slice)
2305 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2306 "/cgroup.procs", NULL);
2307 else
2308 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2309 "/cgroup.procs", NULL);
2310 ret = chown(path, uid, gid);
2311 if (ret < 0)
2312 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2313 path, (int)uid, (int)gid, strerror(errno), NULL);
2314 else
2315 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2316 free(path);
2317
2318 /* chown cgroup.subtree_control to user */
2319 if (v2->systemd_user_slice)
2320 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2321 "/cgroup.subtree_control", NULL);
2322 else
2323 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2324 "/cgroup.subtree_control", NULL);
2325 ret = chown(path, uid, gid);
2326 if (ret < 0)
2327 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2328 path, (int)uid, (int)gid, strerror(errno), NULL);
2329 free(path);
2330
2331 /* chown cgroup.threads to user */
2332 if (v2->systemd_user_slice)
2333 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2334 "/cgroup.threads", NULL);
2335 else
2336 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2337 "/cgroup.threads", NULL);
2338 ret = chown(path, uid, gid);
2339 if (ret < 0 && errno != ENOENT)
2340 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2341 path, (int)uid, (int)gid, strerror(errno), NULL);
2342 free(path);
2343
2344 return true;
2345 }
2346
2347 /* Create writeable cgroups for @user at login. Details can be found in the
2348 * preamble/license at the top of this file.
2349 */
2350 static int handle_login(const char *user, uid_t uid, gid_t gid)
2351 {
2352 int idx = 0, ret;
2353 bool existed;
2354 char cg[MAXPATHLEN];
2355
2356 cg_escape();
2357
2358 while (idx >= 0) {
2359 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
2360 if (ret < 0 || ret >= MAXPATHLEN) {
2361 mysyslog(LOG_ERR, "Username too long.\n", NULL);
2362 return PAM_SESSION_ERR;
2363 }
2364
2365 existed = false;
2366 if (!cgv2_create(cg, uid, gid, &existed)) {
2367 if (existed) {
2368 cgv2_remove(cg);
2369 idx++;
2370 continue;
2371 }
2372 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
2373 return PAM_SESSION_ERR;
2374 }
2375
2376 existed = false;
2377 if (!cgv1_create(cg, uid, gid, &existed)) {
2378 if (existed) {
2379 cgv2_remove(cg);
2380 idx++;
2381 continue;
2382 }
2383 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
2384 return PAM_SESSION_ERR;
2385 }
2386
2387 if (!cg_enter(cg)) {
2388 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s.\n", cg, user, NULL);
2389 return PAM_SESSION_ERR;
2390 }
2391 break;
2392 }
2393
2394 return PAM_SUCCESS;
2395 }
2396
2397 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2398 * hierarchies.
2399 */
2400 static bool cgv1_prune_empty_cgroups(const char *user)
2401 {
2402 bool controller_removed = true;
2403 bool all_removed = true;
2404 struct cgv1_hierarchy **it;
2405
2406 for (it = cgv1_hierarchies; it && *it; it++) {
2407 int ret;
2408 char *path_base, *path_init;
2409 char **controller;
2410
2411 if (!(*it)->controllers || !(*it)->mountpoint ||
2412 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2413 continue;
2414
2415 for (controller = (*it)->controllers; controller && *controller;
2416 controller++) {
2417 bool path_base_rm, path_init_rm;
2418
2419 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2420 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_base);
2421 ret = recursive_rmdir(path_base);
2422 if (ret == -ENOENT || ret >= 0)
2423 path_base_rm = true;
2424 else
2425 path_base_rm = false;
2426 free(path_base);
2427
2428 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2429 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_init);
2430 ret = recursive_rmdir(path_init);
2431 if (ret == -ENOENT || ret >= 0)
2432 path_init_rm = true;
2433 else
2434 path_init_rm = false;
2435 free(path_init);
2436
2437 if (!path_base_rm && !path_init_rm) {
2438 controller_removed = false;
2439 continue;
2440 }
2441
2442 controller_removed = true;
2443 break;
2444 }
2445 if (!controller_removed)
2446 all_removed = false;
2447 }
2448
2449 return all_removed;
2450 }
2451
2452 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2453 * hierarchy.
2454 */
2455 static bool cgv2_prune_empty_cgroups(const char *user)
2456 {
2457 int ret;
2458 struct cgv2_hierarchy *v2;
2459 char *path_base, *path_init;
2460 bool path_base_rm, path_init_rm;
2461
2462 if (!cgv2_hierarchies)
2463 return true;
2464
2465 v2 = *cgv2_hierarchies;
2466
2467 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2468 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_base);
2469 ret = recursive_rmdir(path_base);
2470 if (ret == -ENOENT || ret >= 0)
2471 path_base_rm = true;
2472 else
2473 path_base_rm = false;
2474 free(path_base);
2475
2476 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2477 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_init);
2478 ret = recursive_rmdir(path_init);
2479 if (ret == -ENOENT || ret >= 0)
2480 path_init_rm = true;
2481 else
2482 path_init_rm = false;
2483 free(path_init);
2484
2485 if (!path_base_rm && !path_init_rm)
2486 return false;
2487
2488 return true;
2489 }
2490
2491 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2492 static void cg_prune_empty_cgroups(const char *user)
2493 {
2494 (void)cgv1_prune_empty_cgroups(user);
2495 (void)cgv2_prune_empty_cgroups(user);
2496 }
2497
2498 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2499 static void cgv1_free_hierarchies(void)
2500 {
2501 struct cgv1_hierarchy **it;
2502
2503 if (!cgv1_hierarchies)
2504 return;
2505
2506 for (it = cgv1_hierarchies; it && *it; it++) {
2507 if ((*it)->controllers) {
2508 char **tmp;
2509 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2510 free(*tmp);
2511
2512 free((*it)->controllers);
2513 }
2514 free((*it)->mountpoint);
2515 free((*it)->base_cgroup);
2516 free((*it)->fullcgpath);
2517 free((*it)->init_cgroup);
2518 }
2519 free(cgv1_hierarchies);
2520 }
2521
2522 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2523 static void cgv2_free_hierarchies(void)
2524 {
2525 struct cgv2_hierarchy **it;
2526
2527 if (!cgv2_hierarchies)
2528 return;
2529
2530 for (it = cgv2_hierarchies; it && *it; it++) {
2531 if ((*it)->controllers) {
2532 char **tmp;
2533 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2534 free(*tmp);
2535
2536 free((*it)->controllers);
2537 }
2538 free((*it)->mountpoint);
2539 free((*it)->base_cgroup);
2540 free((*it)->fullcgpath);
2541 free((*it)->init_cgroup);
2542 }
2543 free(cgv2_hierarchies);
2544 }
2545
2546 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2547 static void cg_exit(void)
2548 {
2549 cgv1_free_hierarchies();
2550 cgv2_free_hierarchies();
2551 }
2552
2553 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2554 const char **argv)
2555 {
2556 int ret;
2557 uid_t uid = 0;
2558 gid_t gid = 0;
2559 const char *PAM_user = NULL;
2560
2561 ret = pam_get_user(pamh, &PAM_user, NULL);
2562 if (ret != PAM_SUCCESS) {
2563 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2564 return PAM_SESSION_ERR;
2565 }
2566
2567 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2568 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2569 return PAM_SESSION_ERR;
2570 }
2571
2572 if (!cg_init(uid, gid)) {
2573 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2574 return PAM_SESSION_ERR;
2575 }
2576
2577 /* Try to prune cgroups, that are actually empty but were still marked
2578 * as busy by the kernel so we couldn't remove them on session close.
2579 */
2580 cg_prune_empty_cgroups(PAM_user);
2581
2582 if (cg_mount_mode == CGROUP_UNKNOWN)
2583 return PAM_SESSION_ERR;
2584
2585 if (argc > 1 && !strcmp(argv[0], "-c")) {
2586 char **clist = make_string_list(argv[1], ",");
2587
2588 /*
2589 * We don't allow using "all" and other controllers explicitly because
2590 * that simply doesn't make any sense.
2591 */
2592 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2593 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2594 free_string_list(clist);
2595 return PAM_SESSION_ERR;
2596 }
2597
2598 cg_mark_to_make_rw(clist);
2599 free_string_list(clist);
2600 }
2601
2602 return handle_login(PAM_user, uid, gid);
2603 }
2604
2605 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2606 const char **argv)
2607 {
2608 int ret;
2609 uid_t uid = 0;
2610 gid_t gid = 0;
2611 const char *PAM_user = NULL;
2612
2613 ret = pam_get_user(pamh, &PAM_user, NULL);
2614 if (ret != PAM_SUCCESS) {
2615 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2616 return PAM_SESSION_ERR;
2617 }
2618
2619 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2620 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2621 return PAM_SESSION_ERR;
2622 }
2623
2624 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2625 if (!cg_init(uid, gid))
2626 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2627
2628 if (argc > 1 && !strcmp(argv[0], "-c")) {
2629 char **clist = make_string_list(argv[1], ",");
2630
2631 /*
2632 * We don't allow using "all" and other controllers explicitly because
2633 * that simply doesn't make any sense.
2634 */
2635 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2636 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2637 free_string_list(clist);
2638 return PAM_SESSION_ERR;
2639 }
2640
2641 cg_mark_to_make_rw(clist);
2642 free_string_list(clist);
2643 }
2644 }
2645
2646 cg_prune_empty_cgroups(PAM_user);
2647 cg_exit();
2648
2649 return PAM_SUCCESS;
2650 }