]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
cgfs: remove redundancy utils
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #ifndef _GNU_SOURCE
37 #define _GNU_SOURCE 1
38 #endif
39 #include <ctype.h>
40 #include <dirent.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <linux/unistd.h>
44 #include <pwd.h>
45 #include <stdarg.h>
46 #include <stdbool.h>
47 #include <stdint.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <sys/mount.h>
52 #include <sys/param.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <sys/vfs.h>
56 #include <syslog.h>
57 #include <unistd.h>
58
59 #include "config.h"
60 #include "macro.h"
61 #include "utils.h"
62
63 #define PAM_SM_SESSION
64 #include <security/_pam_macros.h>
65 #include <security/pam_modules.h>
66
67 #ifndef HAVE_STRLCPY
68 #include "include/strlcpy.h"
69 #endif
70
71 #ifndef HAVE_STRLCAT
72 #include "include/strlcat.h"
73 #endif
74
75 #define pam_cgfs_debug_stream(stream, format, ...) \
76 do { \
77 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
78 __func__, __VA_ARGS__); \
79 } while (false)
80
81 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
82
83 #ifdef DEBUG
84 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
85 #else
86 #define pam_cgfs_debug(format, ...)
87 #endif /* DEBUG */
88
89 /* Taken over modified from the kernel sources. */
90 #define NBITS 32 /* bits in uint32_t */
91 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
92 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
93
94 static enum cg_mount_mode {
95 CGROUP_UNKNOWN = -1,
96 CGROUP_MIXED = 0,
97 CGROUP_PURE_V1 = 1,
98 CGROUP_PURE_V2 = 2,
99 CGROUP_UNINITIALIZED = 3,
100 } cg_mount_mode = CGROUP_UNINITIALIZED;
101
102 /* Common helper functions. Most of these have been taken from LXC. */
103 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
104 static int append_null_to_list(void ***list);
105 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
106 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
107 {
108 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
109 }
110 static char *copy_to_eol(char *s);
111 static void free_string_list(char **list);
112 static char *get_mountpoint(char *line);
113 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
114 static int handle_login(const char *user, uid_t uid, gid_t gid);
115 static inline bool is_set(unsigned bit, uint32_t *bitarr)
116 {
117 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
118 }
119 static bool is_lxcfs(const char *line);
120 static bool is_cgv1(char *line);
121 static bool is_cgv2(char *line);
122 static void must_add_to_list(char ***clist, char *entry);
123 static void must_append_controller(char **klist, char **nlist, char ***clist,
124 char *entry);
125 static void must_append_string(char ***list, char *entry);
126 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
127 static char *read_file(char *fnam);
128 static int recursive_rmdir(char *dirname);
129 static inline void set_bit(unsigned bit, uint32_t *bitarr)
130 {
131 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
132 }
133 static bool string_in_list(char **list, const char *entry);
134 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
135 static void trim(char *s);
136 static bool write_int(char *path, int v);
137
138 /* cgroupfs prototypes. */
139 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
140 static uint32_t *cg_cpumask(char *buf, size_t nbits);
141 static bool cg_copy_parent_file(char *path, char *file);
142 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
143 static bool cg_enter(const char *cgroup);
144 static void cg_escape(void);
145 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
146 static ssize_t cg_get_max_cpus(char *cpulist);
147 static int cg_get_version_of_mntpt(const char *path);
148 static bool cg_init(uid_t uid, gid_t gid);
149 static void cg_mark_to_make_rw(char **list);
150 static void cg_prune_empty_cgroups(const char *user);
151 static bool cg_systemd_created_user_slice(const char *base_cgroup,
152 const char *init_cgroup,
153 const char *in, uid_t uid);
154 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
155 const char *base_cgroup, uid_t uid,
156 gid_t gid,
157 bool systemd_user_slice);
158 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
159 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
160 const char *init_cgroup, uid_t uid);
161 static void cg_systemd_prune_init_scope(char *cg);
162 static bool is_lxcfs(const char *line);
163
164 /* cgroupfs v1 prototypes. */
165 struct cgv1_hierarchy {
166 char **controllers;
167 char *mountpoint;
168 char *base_cgroup;
169 char *fullcgpath;
170 char *init_cgroup;
171 bool create_rw_cgroup;
172 bool systemd_user_slice;
173 };
174
175 static struct cgv1_hierarchy **cgv1_hierarchies;
176
177 static void cgv1_add_controller(char **clist, char *mountpoint,
178 char *base_cgroup, char *init_cgroup);
179 static bool cgv1_controller_in_clist(char *cgline, char *c);
180 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
181 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
182 char **clist);
183 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
184 bool *existed);
185 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
186 uid_t uid, gid_t gid, bool *existed);
187 static bool cgv1_enter(const char *cgroup);
188 static void cgv1_escape(void);
189 static bool cgv1_get_controllers(char ***klist, char ***nlist);
190 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
191 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
192 char *line);
193 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
194 const char *cgroup);
195 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
196 static bool cgv1_init(uid_t uid, gid_t gid);
197 static void cgv1_mark_to_make_rw(char **clist);
198 static char *cgv1_must_prefix_named(char *entry);
199 static bool cgv1_prune_empty_cgroups(const char *user);
200 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
201 static bool is_cgv1(char *line);
202
203 /* cgroupfs v2 prototypes. */
204 struct cgv2_hierarchy {
205 char **controllers;
206 char *mountpoint;
207 char *base_cgroup;
208 char *fullcgpath;
209 char *init_cgroup;
210 bool create_rw_cgroup;
211 bool systemd_user_slice;
212 };
213
214 /* Actually this should only be a single hierarchy. But for the sake of
215 * parallelism and because the layout of the cgroupfs v2 is still somewhat
216 * changing, we'll leave it as an array of structs.
217 */
218 static struct cgv2_hierarchy **cgv2_hierarchies;
219
220 static void cgv2_add_controller(char **clist, char *mountpoint,
221 char *base_cgroup, char *init_cgroup,
222 bool systemd_user_slice);
223 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
224 bool *existed);
225 static bool cgv2_enter(const char *cgroup);
226 static void cgv2_escape(void);
227 static char *cgv2_get_current_cgroup(int pid);
228 static bool cgv2_init(uid_t uid, gid_t gid);
229 static void cgv2_mark_to_make_rw(char **clist);
230 static bool cgv2_prune_empty_cgroups(const char *user);
231 static bool cgv2_remove(const char *cgroup);
232 static bool is_cgv2(char *line);
233
234 static int do_mkdir(const char *path, mode_t mode)
235 {
236 int saved_errno;
237 mode_t mask;
238 int r;
239
240 mask = umask(0);
241 r = mkdir(path, mode);
242 saved_errno = errno;
243 umask(mask);
244 errno = saved_errno;
245 return (r);
246 }
247
248 /* Create directory and (if necessary) its parents. */
249 static bool mkdir_parent(const char *root, char *path)
250 {
251 char *b, orig, *e;
252
253 if (strlen(path) < strlen(root))
254 return false;
255
256 if (strlen(path) == strlen(root))
257 return true;
258
259 b = path + strlen(root) + 1;
260 while (true) {
261 while (*b && (*b == '/'))
262 b++;
263 if (!*b)
264 return true;
265
266 e = b + 1;
267 while (*e && *e != '/')
268 e++;
269
270 orig = *e;
271 if (orig)
272 *e = '\0';
273
274 if (file_exists(path))
275 goto next;
276
277 if (do_mkdir(path, 0755) < 0) {
278 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
279 return false;
280 }
281
282 next:
283 if (!orig)
284 return true;
285
286 *e = orig;
287 b = e + 1;
288 }
289
290 return false;
291 }
292
293 /* Common helper functions. Most of these have been taken from LXC. */
294 static void mysyslog(int err, const char *format, ...)
295 {
296 va_list args;
297
298 va_start(args, format);
299 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
300 vsyslog(err, format, args);
301 va_end(args);
302 closelog();
303 }
304
305 /* realloc() pointer in batch sizes; do not fail. */
306 #define BATCH_SIZE 50
307 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
308 {
309 int newbatches = (newlen / BATCH_SIZE) + 1;
310 int oldbatches = (oldlen / BATCH_SIZE) + 1;
311
312 if (!*mem || newbatches > oldbatches)
313 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
314 }
315
316 /* Append lines as is to pointer; do not fail. */
317 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
318 {
319 size_t full = oldlen + newlen;
320
321 batch_realloc(dest, oldlen, full + 1);
322
323 memcpy(*dest + oldlen, new, newlen + 1);
324 }
325
326 /* Read in whole file and return allocated pointer. */
327 static char *read_file(char *fnam)
328 {
329 FILE *f;
330 int linelen;
331 char *line = NULL, *buf = NULL;
332 size_t len = 0, fulllen = 0;
333
334 f = fopen(fnam, "r");
335 if (!f)
336 return NULL;
337
338 while ((linelen = getline(&line, &len, f)) != -1) {
339 append_line(&buf, fulllen, line, linelen);
340 fulllen += linelen;
341 }
342
343 fclose(f);
344 free(line);
345
346 return buf;
347 }
348
349 /* Given a pointer to a null-terminated array of pointers, realloc to add one
350 * entry, and point the new entry to NULL. Do not fail. Return the index to the
351 * second-to-last entry - that is, the one which is now available for use
352 * (keeping the list null-terminated).
353 */
354 static int append_null_to_list(void ***list)
355 {
356 int newentry = 0;
357
358 if (*list)
359 for (; (*list)[newentry]; newentry++)
360 ;
361
362 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
363 (*list)[newentry + 1] = NULL;
364
365 return newentry;
366 }
367
368 /* Append new entry to null-terminated array of pointer; make sure that array of
369 * pointers will still be null-terminated.
370 */
371 static void must_append_string(char ***list, char *entry)
372 {
373 int newentry;
374 char *copy;
375
376 newentry = append_null_to_list((void ***)list);
377 copy = must_copy_string(entry);
378 (*list)[newentry] = copy;
379 }
380
381 /* Remove newlines from string. */
382 static void trim(char *s)
383 {
384 size_t len = strlen(s);
385
386 while ((len > 0) && s[len - 1] == '\n')
387 s[--len] = '\0';
388 }
389
390 /* Make allocated copy of string. End of string is taken to be '\n'. */
391 static char *copy_to_eol(char *s)
392 {
393 char *newline, *sret;
394 size_t len;
395
396 newline = strchr(s, '\n');
397 if (!newline)
398 return NULL;
399
400 len = newline - s;
401 sret = must_realloc(NULL, len + 1);
402 memcpy(sret, s, len);
403 sret[len] = '\0';
404
405 return sret;
406 }
407
408 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
409 static bool is_lxcfs(const char *line)
410 {
411 char *p = strstr(line, " - ");
412 if (!p)
413 return false;
414
415 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
416 }
417
418 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
419 static bool is_cgv1(char *line)
420 {
421 char *p = strstr(line, " - ");
422 if (!p)
423 return false;
424
425 return strncmp(p, " - cgroup ", 10) == 0;
426 }
427
428 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
429 static bool is_cgv2(char *line)
430 {
431 char *p = strstr(line, " - ");
432 if (!p)
433 return false;
434
435 return strncmp(p, " - cgroup2 ", 11) == 0;
436 }
437
438 /* Given a null-terminated array of strings, check whether @entry is one of the
439 * strings
440 */
441 static bool string_in_list(char **list, const char *entry)
442 {
443 char **it;
444
445 for (it = list; it && *it; it++)
446 if (strcmp(*it, entry) == 0)
447 return true;
448
449 return false;
450 }
451
452 /*
453 * Creates a null-terminated array of strings, made by splitting the entries in
454 * @str on each @sep. Caller is responsible for calling free_string_list.
455 */
456 static char **make_string_list(const char *str, const char *sep)
457 {
458 char *copy, *tok;
459 char *saveptr = NULL;
460 char **clist = NULL;
461
462 copy = must_copy_string(str);
463
464 for (tok = strtok_r(copy, sep, &saveptr); tok;
465 tok = strtok_r(NULL, sep, &saveptr))
466 must_add_to_list(&clist, tok);
467
468 free(copy);
469
470 return clist;
471 }
472
473 /* Gets the length of a null-terminated array of strings. */
474 static size_t string_list_length(char **list)
475 {
476 size_t len = 0;
477 char **it;
478
479 for (it = list; it && *it; it++)
480 len++;
481
482 return len;
483 }
484
485 /* Free null-terminated array of strings. */
486 static void free_string_list(char **list)
487 {
488 char **it;
489
490 for (it = list; it && *it; it++)
491 free(*it);
492 free(list);
493 }
494
495 /* Write single integer to file. */
496 static bool write_int(char *path, int v)
497 {
498 FILE *f;
499 bool ret = true;
500
501 f = fopen(path, "w");
502 if (!f)
503 return false;
504
505 if (fprintf(f, "%d\n", v) < 0)
506 ret = false;
507
508 if (fclose(f) != 0)
509 ret = false;
510
511 return ret;
512 }
513
514 /* Recursively remove directory and its parents. */
515 static int recursive_rmdir(char *dirname)
516 {
517 struct dirent *direntp;
518 DIR *dir;
519 int r = 0;
520
521 dir = opendir(dirname);
522 if (!dir)
523 return -ENOENT;
524
525 while ((direntp = readdir(dir))) {
526 struct stat st;
527 char *pathname;
528
529 if (!strcmp(direntp->d_name, ".") ||
530 !strcmp(direntp->d_name, ".."))
531 continue;
532
533 pathname = must_make_path(dirname, direntp->d_name, NULL);
534
535 if (lstat(pathname, &st)) {
536 if (!r)
537 pam_cgfs_debug("Failed to stat %s\n", pathname);
538 r = -1;
539 goto next;
540 }
541
542 if (!S_ISDIR(st.st_mode))
543 goto next;
544
545 if (recursive_rmdir(pathname) < 0)
546 r = -1;
547
548 next:
549 free(pathname);
550 }
551
552 if (rmdir(dirname) < 0) {
553 if (!r)
554 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
555 r = -1;
556 }
557
558 if (closedir(dir) < 0) {
559 if (!r)
560 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
561 r = -1;
562 }
563
564 return r;
565 }
566
567 /* Add new entry to null-terminated array of pointers. Make sure array is still
568 * null-terminated.
569 */
570 static void must_add_to_list(char ***clist, char *entry)
571 {
572 int newentry;
573
574 newentry = append_null_to_list((void ***)clist);
575 (*clist)[newentry] = must_copy_string(entry);
576 }
577
578 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
579 static char *get_mountpoint(char *line)
580 {
581 int i;
582 char *p, *sret, *p2;
583 size_t len;
584
585 p = line;
586
587 for (i = 0; i < 4; i++) {
588 p = strchr(p, ' ');
589 if (!p)
590 return NULL;
591 p++;
592 }
593
594 p2 = strchr(p, ' ');
595 if (p2)
596 *p2 = '\0';
597
598 len = strlen(p);
599 sret = must_realloc(NULL, len + 1);
600 memcpy(sret, p, len);
601 sret[len] = '\0';
602
603 return sret;
604 }
605
606 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
607 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
608 * kernel controllers (@klist) and named controllers (@nlist).
609 */
610 static bool cgv1_get_controllers(char ***klist, char ***nlist)
611 {
612 FILE *f;
613 char *line = NULL;
614 size_t len = 0;
615
616 f = fopen("/proc/self/cgroup", "r");
617 if (!f)
618 return false;
619
620 while (getline(&line, &len, f) != -1) {
621 char *p, *p2, *tok;
622 char *saveptr = NULL;
623
624 p = strchr(line, ':');
625 if (!p)
626 continue;
627 p++;
628
629 p2 = strchr(p, ':');
630 if (!p2)
631 continue;
632 *p2 = '\0';
633
634 /* Skip the v2 hierarchy. */
635 if ((p2 - p) == 0)
636 continue;
637
638 for (tok = strtok_r(p, ",", &saveptr); tok;
639 tok = strtok_r(NULL, ",", &saveptr)) {
640 if (strncmp(tok, "name=", 5) == 0)
641 must_append_string(nlist, tok);
642 else
643 must_append_string(klist, tok);
644 }
645 }
646
647 free(line);
648 fclose(f);
649
650 return true;
651 }
652
653 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
654 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
655 static bool cgv2_get_controllers(char ***klist)
656 {
657 return -ENOSYS;
658 }
659 */
660
661 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
662 static char *cgv2_get_current_cgroup(int pid)
663 {
664 int ret;
665 char *cgroups_v2;
666 char *current_cgroup;
667 char *copy = NULL;
668 /* The largest integer that can fit into long int is 2^64. This is a
669 * 20-digit number. */
670 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
671 char path[__PIDLEN];
672
673 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
674 if (ret < 0 || ret >= __PIDLEN)
675 return NULL;
676
677 cgroups_v2 = read_file(path);
678 if (!cgroups_v2)
679 return NULL;
680
681 current_cgroup = strstr(cgroups_v2, "0::/");
682 if (!current_cgroup)
683 goto cleanup_on_err;
684
685 current_cgroup = current_cgroup + 3;
686 copy = copy_to_eol(current_cgroup);
687 if (!copy)
688 goto cleanup_on_err;
689
690 cleanup_on_err:
691 free(cgroups_v2);
692 if (copy)
693 trim(copy);
694
695 return copy;
696 }
697
698 /* Given two null-terminated lists of strings, return true if any string is in
699 * both.
700 */
701 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
702 {
703 char **it;
704
705 if (!l2)
706 return false;
707
708 for (it = l1; it && *it; it++)
709 if (string_in_list(l2, *it))
710 return true;
711
712 return false;
713 }
714
715 /* For a null-terminated list of controllers @clist, return true if any of those
716 * controllers is already listed the null-terminated list of hierarchies @hlist.
717 * Realistically, if one is present, all must be present.
718 */
719 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
720 {
721 struct cgv1_hierarchy **it;
722
723 for (it = hlist; it && *it; it++)
724 if ((*it)->controllers)
725 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
726 return true;
727
728 return false;
729
730 }
731
732 /* Set boolean to mark controllers under which we are supposed create a
733 * writeable cgroup.
734 */
735 static void cgv1_mark_to_make_rw(char **clist)
736 {
737 struct cgv1_hierarchy **it;
738
739 for (it = cgv1_hierarchies; it && *it; it++)
740 if ((*it)->controllers)
741 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
742 string_in_list(clist, "all"))
743 (*it)->create_rw_cgroup = true;
744 }
745
746 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
747 * the cgroupfs v2 hierarchy.
748 */
749 static void cgv2_mark_to_make_rw(char **clist)
750 {
751 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
752 if (cgv2_hierarchies)
753 (*cgv2_hierarchies)->create_rw_cgroup = true;
754 }
755
756 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
757 static void cg_mark_to_make_rw(char **clist)
758 {
759 cgv1_mark_to_make_rw(clist);
760 cgv2_mark_to_make_rw(clist);
761 }
762
763 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
764 static char *cgv1_must_prefix_named(char *entry)
765 {
766 char *s;
767 int ret;
768 size_t len;
769
770 len = strlen(entry);
771 s = must_realloc(NULL, len + 6);
772
773 ret = snprintf(s, len + 6, "name=%s", entry);
774 if (ret < 0 || (size_t)ret >= (len + 6)) {
775 free(s);
776 return NULL;
777 }
778
779 return s;
780 }
781
782 /* Append kernel controller in @klist or named controller in @nlist to @clist */
783 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
784 {
785 int newentry;
786 char *copy;
787
788 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
789 return;
790
791 newentry = append_null_to_list((void ***)clist);
792
793 if (strncmp(entry, "name=", 5) == 0)
794 copy = must_copy_string(entry);
795 else if (string_in_list(klist, entry))
796 copy = must_copy_string(entry);
797 else
798 copy = cgv1_must_prefix_named(entry);
799
800 (*clist)[newentry] = copy;
801 }
802
803 /* Get the controllers from a mountinfo line. There are other ways we could get
804 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
805 * could parse the mount options. But we simply assume that the mountpoint must
806 * be /sys/fs/cgroup/controller-list
807 */
808 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
809 {
810 int i;
811 char *p, *p2, *tok;
812 char *saveptr = NULL;
813 char **aret = NULL;
814
815 p = line;
816
817 for (i = 0; i < 4; i++) {
818 p = strchr(p, ' ');
819 if (!p)
820 return NULL;
821 p++;
822 }
823
824 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
825 return NULL;
826
827 p += 15;
828
829 p2 = strchr(p, ' ');
830 if (!p2)
831 return NULL;
832 *p2 = '\0';
833
834 for (tok = strtok_r(p, ",", &saveptr); tok;
835 tok = strtok_r(NULL, ",", &saveptr))
836 must_append_controller(klist, nlist, &aret, tok);
837
838 return aret;
839 }
840
841 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
842 static bool cgv1_controller_in_clist(char *cgline, char *c)
843 {
844 size_t len;
845 char *tok, *eol, *tmp;
846 char *saveptr = NULL;
847
848 eol = strchr(cgline, ':');
849 if (!eol)
850 return false;
851
852 len = eol - cgline;
853 tmp = alloca(len + 1);
854 memcpy(tmp, cgline, len);
855 tmp[len] = '\0';
856
857 for (tok = strtok_r(tmp, ",", &saveptr); tok;
858 tok = strtok_r(NULL, ",", &saveptr)) {
859 if (strcmp(tok, c) == 0)
860 return true;
861 }
862
863 return false;
864 }
865
866 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
867 * of a given cgv1 controller passed in via @controller.
868 */
869 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
870 {
871 char *p;
872
873 p = basecginfo;
874
875 while (true) {
876 p = strchr(p, ':');
877 if (!p)
878 return NULL;
879 p++;
880
881 if (cgv1_controller_in_clist(p, controller)) {
882 p = strchr(p, ':');
883 if (!p)
884 return NULL;
885 p++;
886
887 return copy_to_eol(p);
888 }
889
890 p = strchr(p, '\n');
891 if (!p)
892 return NULL;
893 p++;
894 }
895
896 return NULL;
897 }
898
899 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
900 * systems.
901 */
902 #define INIT_SCOPE "/init.scope"
903 static void cg_systemd_prune_init_scope(char *cg)
904 {
905 char *point;
906
907 if (!cg)
908 return;
909
910 point = cg + strlen(cg) - strlen(INIT_SCOPE);
911 if (point < cg)
912 return;
913
914 if (strcmp(point, INIT_SCOPE) == 0) {
915 if (point == cg)
916 *(point + 1) = '\0';
917 else
918 *point = '\0';
919 }
920 }
921
922 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
923 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
924 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
925 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
926 * from /proc/1/cgroup.
927 */
928 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
929 {
930 struct cgv1_hierarchy *new;
931 int newentry;
932
933 new = must_realloc(NULL, sizeof(*new));
934
935 new->controllers = clist;
936 new->mountpoint = mountpoint;
937 new->base_cgroup = base_cgroup;
938 new->fullcgpath = NULL;
939 new->create_rw_cgroup = false;
940 new->init_cgroup = init_cgroup;
941 new->systemd_user_slice = false;
942
943 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
944 cgv1_hierarchies[newentry] = new;
945 }
946
947 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
948 * currently) include the controllers mounted into the hierarchy (e.g. memory,
949 * pids, blkio), the mountpoint of that hierarchy (Should usually be
950 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
951 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
952 * base cgroup of the current process gathered from /proc/self/cgroup, and the
953 * init cgroup of PID1 gathered from /proc/1/cgroup.
954 */
955 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
956 {
957 struct cgv2_hierarchy *new;
958 int newentry;
959
960 new = must_realloc(NULL, sizeof(*new));
961
962 new->controllers = clist;
963 new->mountpoint = mountpoint;
964 new->base_cgroup = base_cgroup;
965 new->fullcgpath = NULL;
966 new->create_rw_cgroup = false;
967 new->init_cgroup = init_cgroup;
968 new->systemd_user_slice = systemd_user_slice;
969
970 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
971 cgv2_hierarchies[newentry] = new;
972 }
973
974 /* In Ubuntu 14.04, the paths created for us were
975 * '/user/$uid.user/$something.session' This can be merged better with
976 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
977 * reason about the correctness.
978 */
979 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
980 {
981 char *p;
982 size_t len;
983 int id;
984 char *copy = NULL;
985 bool bret = false;
986
987 copy = must_copy_string(in);
988 if (strlen(copy) < strlen("/user/1.user/1.session"))
989 goto cleanup;
990 p = copy + strlen(copy) - 1;
991
992 /* skip any trailing '/' (shouldn't be any, but be sure) */
993 while (p >= copy && *p == '/')
994 *(p--) = '\0';
995 if (p < copy)
996 goto cleanup;
997
998 /* Get last path element */
999 while (p >= copy && *p != '/')
1000 p--;
1001 if (p < copy)
1002 goto cleanup;
1003
1004 /* make sure it is something.session */
1005 len = strlen(p + 1);
1006 if (len < strlen("1.session") ||
1007 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1008 goto cleanup;
1009
1010 /* ok last path piece checks out, now check the second to last */
1011 *(p + 1) = '\0';
1012 while (p >= copy && *(--p) != '/')
1013 ;
1014
1015 if (sscanf(p + 1, "%d.user/", &id) != 1)
1016 goto cleanup;
1017
1018 if (id != (int)uid)
1019 goto cleanup;
1020
1021 bret = true;
1022
1023 cleanup:
1024 free(copy);
1025 return bret;
1026 }
1027
1028 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1029 * assume it belongs to $uid and chown it
1030 */
1031 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1032 const char *init_cgroup, uid_t uid)
1033 {
1034 int ret;
1035 char buf[100];
1036 size_t curlen, initlen;
1037
1038 curlen = strlen(base_cgroup);
1039 initlen = strlen(init_cgroup);
1040 if (curlen <= initlen)
1041 return false;
1042
1043 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1044 return false;
1045
1046 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1047 if (ret < 0 || ret >= 100)
1048 return false;
1049
1050 if (initlen == 1)
1051 initlen = 0; // skip the '/'
1052
1053 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1054 }
1055
1056 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1057 * is not the end of our systemd path, then we're not part of the PAM call that
1058 * created that path.
1059 *
1060 * The last piece is chowned to $uid, the user- part not.
1061 * Note: If the user creates paths that look like what we're looking for to
1062 * 'fool' us, either
1063 * - they fool us, we create new cgroups, and they get auto-logged-out.
1064 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1065 * lose ownership of their cgroups
1066 */
1067 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1068 const char *init_cgroup,
1069 const char *in, uid_t uid)
1070 {
1071 char *p;
1072 size_t len;
1073 int id;
1074 char *copy = NULL;
1075 bool bret = false;
1076
1077 copy = must_copy_string(in);
1078
1079 /* An old version of systemd has already created a cgroup for us. */
1080 if (cg_systemd_under_user_slice_1(in, uid))
1081 goto succeed;
1082
1083 /* A new version of systemd has already created a cgroup for us. */
1084 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1085 goto succeed;
1086
1087 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1088 goto cleanup;
1089
1090 p = copy + strlen(copy) - 1;
1091 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1092 while (p >= copy && *p == '/')
1093 *(p--) = '\0';
1094
1095 if (p < copy)
1096 goto cleanup;
1097
1098 /* Get last path element */
1099 while (p >= copy && *p != '/')
1100 p--;
1101
1102 if (p < copy)
1103 goto cleanup;
1104
1105 /* Make sure it is session-something.scope. */
1106 len = strlen(p + 1);
1107 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1108 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1109 goto cleanup;
1110
1111 /* Ok last path piece checks out, now check the second to last. */
1112 *(p + 1) = '\0';
1113 while (p >= copy && *(--p) != '/')
1114 ;
1115
1116 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1117 goto cleanup;
1118
1119 if (id != (int)uid)
1120 goto cleanup;
1121
1122 succeed:
1123 bret = true;
1124
1125 cleanup:
1126 free(copy);
1127 return bret;
1128 }
1129
1130 /* Chown existing cgroup that systemd has already created for us. */
1131 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1132 const char *base_cgroup, uid_t uid,
1133 gid_t gid, bool systemd_user_slice)
1134 {
1135 char *path;
1136
1137 if (!systemd_user_slice)
1138 return false;
1139
1140 path = must_make_path(mountpoint, base_cgroup, NULL);
1141
1142 /* A cgroup within name=systemd has already been created. So we only
1143 * need to chown it.
1144 */
1145 if (chown(path, uid, gid) < 0)
1146 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1147 path, (int)uid, (int)gid, strerror(errno), NULL);
1148 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
1149
1150 free(path);
1151 return true;
1152 }
1153
1154 /* Detect and store information about cgroupfs v1 hierarchies. */
1155 static bool cgv1_init(uid_t uid, gid_t gid)
1156 {
1157 FILE *f;
1158 struct cgv1_hierarchy **it;
1159 char *basecginfo;
1160 char *line = NULL;
1161 char **klist = NULL, **nlist = NULL;
1162 size_t len = 0;
1163
1164 basecginfo = read_file("/proc/self/cgroup");
1165 if (!basecginfo)
1166 return false;
1167
1168 f = fopen("/proc/self/mountinfo", "r");
1169 if (!f) {
1170 free(basecginfo);
1171 return false;
1172 }
1173
1174 cgv1_get_controllers(&klist, &nlist);
1175
1176 while (getline(&line, &len, f) != -1) {
1177 char **controller_list = NULL;
1178 char *mountpoint, *base_cgroup;
1179
1180 if (is_lxcfs(line) || !is_cgv1(line))
1181 continue;
1182
1183 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1184 if (!controller_list)
1185 continue;
1186
1187 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1188 free(controller_list);
1189 continue;
1190 }
1191
1192 mountpoint = get_mountpoint(line);
1193 if (!mountpoint) {
1194 free_string_list(controller_list);
1195 continue;
1196 }
1197
1198 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1199 if (!base_cgroup) {
1200 free_string_list(controller_list);
1201 free(mountpoint);
1202 continue;
1203 }
1204
1205 trim(base_cgroup);
1206 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1207 "mountpoint \"%s\" and cgroup \"%s\"\n",
1208 controller_list[0], mountpoint, base_cgroup);
1209 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1210 }
1211
1212 free_string_list(klist);
1213 free_string_list(nlist);
1214 free(basecginfo);
1215 fclose(f);
1216 free(line);
1217
1218 /* Retrieve init cgroup path for all controllers. */
1219 basecginfo = read_file("/proc/1/cgroup");
1220 if (!basecginfo)
1221 return false;
1222
1223 for (it = cgv1_hierarchies; it && *it; it++) {
1224 if ((*it)->controllers) {
1225 char *init_cgroup, *user_slice;
1226
1227 /* We've already stored the controller and received its
1228 * current cgroup. If we now fail to retrieve its init
1229 * cgroup, we should probably fail.
1230 */
1231 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1232 if (!init_cgroup) {
1233 free(basecginfo);
1234 return false;
1235 }
1236
1237 cg_systemd_prune_init_scope(init_cgroup);
1238 (*it)->init_cgroup = init_cgroup;
1239 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1240 "cgroup \"%s\"\n",
1241 (*(*it)->controllers), init_cgroup);
1242
1243 /* Check whether systemd has already created a cgroup
1244 * for us.
1245 */
1246 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1247 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1248 (*it)->systemd_user_slice = true;
1249
1250 free(user_slice);
1251 }
1252 }
1253 free(basecginfo);
1254
1255 return true;
1256 }
1257
1258 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1259 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1260 */
1261 static inline int cg_get_version_of_mntpt(const char *path)
1262 {
1263 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1264 return 1;
1265
1266 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1267 return 2;
1268
1269 return 0;
1270 }
1271
1272 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1273 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
1274 */
1275 static bool cgv2_init(uid_t uid, gid_t gid)
1276 {
1277 char *mountpoint;
1278 FILE *f = NULL;
1279 char *current_cgroup = NULL, *init_cgroup = NULL;
1280 char * line = NULL;
1281 size_t len = 0;
1282 int ret = false;
1283
1284 current_cgroup = cgv2_get_current_cgroup(getpid());
1285 if (!current_cgroup) {
1286 /* No v2 hierarchy present. We're done. */
1287 ret = true;
1288 goto cleanup;
1289 }
1290
1291 init_cgroup = cgv2_get_current_cgroup(1);
1292 if (!init_cgroup) {
1293 /* If we're here and didn't fail already above, then something's
1294 * certainly wrong, so error this time.
1295 */
1296 goto cleanup;
1297 }
1298
1299 cg_systemd_prune_init_scope(init_cgroup);
1300
1301 /* Check if the v2 hierarchy is mounted at its standard location.
1302 * If so we can skip the rest of the work here. Although the unified
1303 * hierarchy can be mounted multiple times, each of those mountpoints
1304 * will expose identical information.
1305 */
1306 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1307 char *user_slice;
1308 bool has_user_slice = false;
1309
1310 mountpoint = must_copy_string("/sys/fs/cgroup");
1311 if (!mountpoint)
1312 goto cleanup;
1313
1314 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1315 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1316 has_user_slice = true;
1317 free(user_slice);
1318
1319 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1320
1321 ret = true;
1322 goto cleanup;
1323 }
1324
1325 f = fopen("/proc/self/mountinfo", "r");
1326 if (!f)
1327 goto cleanup;
1328
1329 /* we support simple cgroup mounts and lxcfs mounts */
1330 while (getline(&line, &len, f) != -1) {
1331 char *user_slice;
1332 bool has_user_slice = false;
1333
1334 if (!is_cgv2(line))
1335 continue;
1336
1337 mountpoint = get_mountpoint(line);
1338 if (!mountpoint)
1339 continue;
1340
1341 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1342 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1343 has_user_slice = true;
1344 free(user_slice);
1345
1346 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1347
1348 /* Although the unified hierarchy can be mounted multiple times,
1349 * each of those mountpoints will expose identical information.
1350 * So let the first mountpoint we find, win.
1351 */
1352 ret = true;
1353 break;
1354 }
1355
1356 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1357 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1358 mountpoint, current_cgroup, init_cgroup);
1359
1360 cleanup:
1361 if (f)
1362 fclose(f);
1363 free(line);
1364
1365 if (!ret) {
1366 free(init_cgroup);
1367 free(current_cgroup);
1368 }
1369
1370 return ret;
1371 }
1372
1373 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1374 * cgroupfs v2 hierarchy.
1375 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1376 * where some controllers are mounted into their standard cgroupfs v1 locations
1377 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1378 * hierarchy (/sys/fs/cgroup).
1379 */
1380 static bool cg_init(uid_t uid, gid_t gid)
1381 {
1382 if (!cgv1_init(uid, gid))
1383 return false;
1384
1385 if (!cgv2_init(uid, gid))
1386 return false;
1387
1388 if (cgv1_hierarchies && cgv2_hierarchies) {
1389 cg_mount_mode = CGROUP_MIXED;
1390 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1391 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1392 cg_mount_mode = CGROUP_PURE_V1;
1393 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1394 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1395 cg_mount_mode = CGROUP_PURE_V2;
1396 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1397 } else {
1398 cg_mount_mode = CGROUP_UNKNOWN;
1399 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
1400 }
1401
1402 if (cg_mount_mode == CGROUP_UNKNOWN)
1403 return false;
1404
1405 return true;
1406 }
1407
1408 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1409 static bool cgv1_enter(const char *cgroup)
1410 {
1411 struct cgv1_hierarchy **it;
1412
1413 for (it = cgv1_hierarchies; it && *it; it++) {
1414 char **controller;
1415 bool entered = false;
1416
1417 if (!(*it)->controllers || !(*it)->mountpoint ||
1418 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1419 continue;
1420
1421 for (controller = (*it)->controllers; controller && *controller;
1422 controller++) {
1423 char *path;
1424
1425 /* We've already been placed in a user slice, so we
1426 * don't need to enter the cgroup again.
1427 */
1428 if ((*it)->systemd_user_slice) {
1429 entered = true;
1430 break;
1431 }
1432
1433 path = must_make_path((*it)->mountpoint,
1434 (*it)->init_cgroup,
1435 cgroup,
1436 "/cgroup.procs",
1437 NULL);
1438 if (!file_exists(path)) {
1439 free(path);
1440 path = must_make_path((*it)->mountpoint,
1441 (*it)->init_cgroup,
1442 cgroup,
1443 "/tasks",
1444 NULL);
1445 }
1446
1447 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1448 entered = write_int(path, (int)getpid());
1449 if (entered) {
1450 free(path);
1451 break;
1452 }
1453
1454 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1455 free(path);
1456 }
1457
1458 if (!entered)
1459 return false;
1460 }
1461
1462 return true;
1463 }
1464
1465 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1466 static bool cgv2_enter(const char *cgroup)
1467 {
1468 struct cgv2_hierarchy *v2;
1469 char *path;
1470 bool entered = false;
1471
1472 if (!cgv2_hierarchies)
1473 return true;
1474
1475 v2 = *cgv2_hierarchies;
1476
1477 if (!v2->mountpoint || !v2->base_cgroup)
1478 return false;
1479
1480 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1481 return true;
1482
1483 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1484 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1485
1486 entered = write_int(path, (int)getpid());
1487 if (!entered) {
1488 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1489 free(path);
1490 return false;
1491 }
1492
1493 free(path);
1494
1495 return true;
1496 }
1497
1498 /* Wrapper around cgv{1,2}_enter(). */
1499 static bool cg_enter(const char *cgroup)
1500 {
1501 if (!cgv1_enter(cgroup)) {
1502 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
1503 return false;
1504 }
1505
1506 if (!cgv2_enter(cgroup)) {
1507 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
1508 return false;
1509 }
1510
1511 return true;
1512 }
1513
1514 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1515 static void cgv1_escape(void)
1516 {
1517 struct cgv1_hierarchy **it;
1518
1519 /* In case systemd hasn't already placed us in a user slice for the
1520 * cpuset v1 controller we will reside in the root cgroup. This means
1521 * that cgroup.clone_children will not have been initialized for us so
1522 * we need to do it.
1523 */
1524 for (it = cgv1_hierarchies; it && *it; it++)
1525 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1526 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
1527
1528 if (!cgv1_enter("/"))
1529 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
1530 }
1531
1532 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1533 static void cgv2_escape(void)
1534 {
1535 if (!cgv2_enter("/"))
1536 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
1537 }
1538
1539 /* Wrapper around cgv{1,2}_escape(). */
1540 static void cg_escape(void)
1541 {
1542 cgv1_escape();
1543 cgv2_escape();
1544 }
1545
1546 /* Get uid and gid for @user. */
1547 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1548 {
1549 struct passwd pwent;
1550 struct passwd *pwentp = NULL;
1551 char *buf;
1552 size_t bufsize;
1553 int ret;
1554
1555 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1556 if (bufsize == -1)
1557 bufsize = 1024;
1558
1559 buf = malloc(bufsize);
1560 if (!buf)
1561 return false;
1562
1563 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1564 if (!pwentp) {
1565 if (ret == 0)
1566 mysyslog(LOG_ERR,
1567 "Could not find matched password record\n", NULL);
1568
1569 free(buf);
1570 return false;
1571 }
1572
1573 *uid = pwent.pw_uid;
1574 *gid = pwent.pw_gid;
1575 free(buf);
1576
1577 return true;
1578 }
1579
1580 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1581 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1582 {
1583 struct stat statbuf;
1584
1585 if (stat(path, &statbuf) < 0)
1586 return false;
1587
1588 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1589 return false;
1590
1591 return true;
1592 }
1593
1594 /* Create cpumask from cpulist aka turn:
1595 *
1596 * 0,2-3
1597 *
1598 * into bit array
1599 *
1600 * 1 0 1 1
1601 */
1602 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1603 {
1604 char *token;
1605 char *saveptr = NULL;
1606 size_t arrlen = BITS_TO_LONGS(nbits);
1607 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1608 if (!bitarr)
1609 return NULL;
1610
1611 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1612 errno = 0;
1613 unsigned start = strtoul(token, NULL, 0);
1614 unsigned end = start;
1615
1616 char *range = strchr(token, '-');
1617 if (range)
1618 end = strtoul(range + 1, NULL, 0);
1619
1620 if (!(start <= end)) {
1621 free(bitarr);
1622 return NULL;
1623 }
1624
1625 if (end >= nbits) {
1626 free(bitarr);
1627 return NULL;
1628 }
1629
1630 while (start <= end)
1631 set_bit(start++, bitarr);
1632 }
1633
1634 return bitarr;
1635 }
1636
1637 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1638 {
1639 char *result;
1640 char **p;
1641 size_t sep_len = strlen(sep);
1642 size_t result_len = use_as_prefix * sep_len;
1643 size_t buf_len;
1644
1645 if (!parts)
1646 return NULL;
1647
1648 /* calculate new string length */
1649 for (p = (char **)parts; *p; p++)
1650 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1651
1652 buf_len = result_len + 1;
1653 result = calloc(buf_len, sizeof(char));
1654 if (!result)
1655 return NULL;
1656
1657 if (use_as_prefix)
1658 (void)strlcpy(result, sep, buf_len * sizeof(char));
1659
1660 for (p = (char **)parts; *p; p++) {
1661 if (p > (char **)parts)
1662 (void)strlcat(result, sep, buf_len * sizeof(char));
1663
1664 (void)strlcat(result, *p, buf_len * sizeof(char));
1665 }
1666
1667 return result;
1668 }
1669
1670 /* The largest integer that can fit into long int is 2^64. This is a
1671 * 20-digit number.
1672 */
1673 #define __IN_TO_STR_LEN 21
1674 /* Turn cpumask into simple, comma-separated cpulist. */
1675 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1676 {
1677 size_t i;
1678 int ret;
1679 char numstr[__IN_TO_STR_LEN] = {0};
1680 char **cpulist = NULL;
1681
1682 for (i = 0; i <= nbits; i++) {
1683 if (is_set(i, bitarr)) {
1684 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1685 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1686 free_string_list(cpulist);
1687 return NULL;
1688 }
1689
1690 must_append_string(&cpulist, numstr);
1691 }
1692 }
1693
1694 return string_join(",", (const char **)cpulist, false);
1695 }
1696
1697 static ssize_t cg_get_max_cpus(char *cpulist)
1698 {
1699 char *c1, *c2;
1700 char *maxcpus = cpulist;
1701 size_t cpus = 0;
1702
1703 c1 = strrchr(maxcpus, ',');
1704 if (c1)
1705 c1++;
1706
1707 c2 = strrchr(maxcpus, '-');
1708 if (c2)
1709 c2++;
1710
1711 if (!c1 && !c2)
1712 c1 = maxcpus;
1713 else if (c1 < c2)
1714 c1 = c2;
1715
1716 if (!c1)
1717 return -1;
1718
1719 /* If the above logic is correct, c1 should always hold a valid string
1720 * here.
1721 */
1722 errno = 0;
1723 cpus = strtoul(c1, NULL, 0);
1724 if (errno != 0)
1725 return -1;
1726
1727 return cpus;
1728 }
1729
1730 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1731 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1732 {
1733 char *lastslash, *fpath, oldv;
1734 int ret;
1735 ssize_t i;
1736
1737 ssize_t maxposs = 0, maxisol = 0;
1738 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1739 uint32_t *possmask = NULL, *isolmask = NULL;
1740 bool bret = false, flipped_bit = false;
1741
1742 lastslash = strrchr(path, '/');
1743 if (!lastslash) { // bug... this shouldn't be possible
1744 pam_cgfs_debug("Invalid path: %s\n", path);
1745 return bret;
1746 }
1747
1748 oldv = *lastslash;
1749 *lastslash = '\0';
1750
1751 fpath = must_make_path(path, "cpuset.cpus", NULL);
1752 posscpus = read_file(fpath);
1753 if (!posscpus) {
1754 pam_cgfs_debug("Could not read file: %s\n", fpath);
1755 goto on_error;
1756 }
1757
1758 /* Get maximum number of cpus found in possible cpuset. */
1759 maxposs = cg_get_max_cpus(posscpus);
1760 if (maxposs < 0 || maxposs >= INT_MAX - 1)
1761 goto on_error;
1762
1763 if (!file_exists(__ISOL_CPUS)) {
1764 /* This system doesn't expose isolated cpus. */
1765 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
1766 cpulist = posscpus;
1767
1768 /* No isolated cpus but we weren't already initialized by
1769 * someone. We should simply copy the parents cpuset.cpus
1770 * values.
1771 */
1772 if (!am_initialized) {
1773 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1774 goto copy_parent;
1775 }
1776
1777 /* No isolated cpus but we were already initialized by someone.
1778 * Nothing more to do for us.
1779 */
1780 goto on_success;
1781 }
1782
1783 isolcpus = read_file(__ISOL_CPUS);
1784 if (!isolcpus) {
1785 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1786 goto on_error;
1787 }
1788
1789 if (!isdigit(isolcpus[0])) {
1790 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1791 cpulist = posscpus;
1792
1793 /* No isolated cpus but we weren't already initialized by
1794 * someone. We should simply copy the parents cpuset.cpus
1795 * values.
1796 */
1797 if (!am_initialized) {
1798 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1799 goto copy_parent;
1800 }
1801
1802 /* No isolated cpus but we were already initialized by someone.
1803 * Nothing more to do for us.
1804 */
1805 goto on_success;
1806 }
1807
1808 /* Get maximum number of cpus found in isolated cpuset. */
1809 maxisol = cg_get_max_cpus(isolcpus);
1810 if (maxisol < 0 || maxisol >= INT_MAX - 1)
1811 goto on_error;
1812
1813 if (maxposs < maxisol)
1814 maxposs = maxisol;
1815 maxposs++;
1816
1817 possmask = cg_cpumask(posscpus, maxposs);
1818 if (!possmask) {
1819 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1820 goto on_error;
1821 }
1822
1823 isolmask = cg_cpumask(isolcpus, maxposs);
1824 if (!isolmask) {
1825 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1826 goto on_error;
1827 }
1828
1829 for (i = 0; i <= maxposs; i++) {
1830 if (is_set(i, isolmask) && is_set(i, possmask)) {
1831 flipped_bit = true;
1832 clear_bit(i, possmask);
1833 }
1834 }
1835
1836 if (!flipped_bit) {
1837 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1838 goto on_success;
1839 }
1840 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1841
1842 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1843 if (!cpulist) {
1844 pam_cgfs_debug("%s", "Could not create cpu list\n");
1845 goto on_error;
1846 }
1847
1848 copy_parent:
1849 *lastslash = oldv;
1850
1851 free(fpath);
1852
1853 fpath = must_make_path(path, "cpuset.cpus", NULL);
1854 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0660);
1855 if (ret < 0) {
1856 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
1857 goto on_error;
1858 }
1859
1860 on_success:
1861 bret = true;
1862
1863 on_error:
1864 *lastslash = oldv;
1865
1866 free(fpath);
1867 free(isolcpus);
1868 free(isolmask);
1869
1870 if (posscpus != cpulist)
1871 free(posscpus);
1872 free(possmask);
1873
1874 free(cpulist);
1875 return bret;
1876 }
1877
1878 /* Copy contents of parent(@path)/@file to @path/@file */
1879 static bool cg_copy_parent_file(char *path, char *file)
1880 {
1881 char *lastslash, *value = NULL, *fpath, oldv;
1882 int len = 0;
1883 int ret;
1884
1885 lastslash = strrchr(path, '/');
1886 if (!lastslash) { // bug... this shouldn't be possible
1887 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1888 return false;
1889 }
1890
1891 oldv = *lastslash;
1892 *lastslash = '\0';
1893
1894 fpath = must_make_path(path, file, NULL);
1895 len = lxc_read_from_file(fpath, NULL, 0);
1896 if (len <= 0) {
1897 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1898 goto bad;
1899 }
1900
1901 value = must_realloc(NULL, len + 1);
1902 if (lxc_read_from_file(fpath, value, len) != len) {
1903 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1904 goto bad;
1905 }
1906 free(fpath);
1907
1908 *lastslash = oldv;
1909
1910 fpath = must_make_path(path, file, NULL);
1911 ret = lxc_write_to_file(fpath, value, len, false, 0660);
1912 if (ret < 0)
1913 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1914
1915 free(fpath);
1916 free(value);
1917 return ret >= 0;
1918
1919 bad:
1920 pam_cgfs_debug("Error reading '%s'", fpath);
1921 free(fpath);
1922 free(value);
1923 return false;
1924 }
1925
1926 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1927 * controller we will reside in the root cgroup. This means that
1928 * cgroup.clone_children will not have been initialized for us so we need to do
1929 * it.
1930 */
1931 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1932 {
1933 char *clonechildrenpath, v;
1934
1935 if (!string_in_list(h->controllers, "cpuset"))
1936 return true;
1937
1938 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1939
1940 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1941 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1942 free(clonechildrenpath);
1943 return false;
1944 }
1945
1946 if (v == '1') { /* already set for us by someone else */
1947 free(clonechildrenpath);
1948 return true;
1949 }
1950
1951 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1952 /* Set clone_children so children inherit our settings */
1953 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1954 free(clonechildrenpath);
1955 return false;
1956 }
1957
1958 free(clonechildrenpath);
1959 return true;
1960 }
1961
1962 /*
1963 * Initialize the cpuset hierarchy in first directory of @gname and
1964 * set cgroup.clone_children so that children inherit settings.
1965 * Since the h->base_path is populated by init or ourselves, we know
1966 * it is already initialized.
1967 */
1968 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1969 const char *cgroup)
1970 {
1971 char *cgpath, *clonechildrenpath, v, *slash;
1972
1973 if (!string_in_list(h->controllers, "cpuset"))
1974 return true;
1975
1976 if (*cgroup == '/')
1977 cgroup++;
1978 slash = strchr(cgroup, '/');
1979 if (slash)
1980 *slash = '\0';
1981
1982 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1983 if (slash)
1984 *slash = '/';
1985
1986 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
1987 pam_cgfs_debug("Failed to create '%s'", cgpath);
1988 free(cgpath);
1989 return false;
1990 }
1991
1992 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
1993 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
1994 free(clonechildrenpath);
1995 free(cgpath);
1996 return true;
1997 }
1998
1999 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
2000 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
2001 free(clonechildrenpath);
2002 free(cgpath);
2003 return false;
2004 }
2005
2006 /* Make sure any isolated cpus are removed from cpuset.cpus. */
2007 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2008 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
2009 free(clonechildrenpath);
2010 free(cgpath);
2011 return false;
2012 }
2013
2014 if (v == '1') { /* already set for us by someone else */
2015 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
2016 free(clonechildrenpath);
2017 free(cgpath);
2018 return true;
2019 }
2020
2021 /* copy parent's settings */
2022 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
2023 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
2024 free(cgpath);
2025 free(clonechildrenpath);
2026 return false;
2027 }
2028 free(cgpath);
2029
2030 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
2031 /* Set clone_children so children inherit our settings */
2032 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2033 free(clonechildrenpath);
2034 return false;
2035 }
2036 free(clonechildrenpath);
2037 return true;
2038 }
2039
2040 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2041 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2042 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2043 * to the caller in @existed.
2044 */
2045 #define __PAM_CGFS_USER "/user/"
2046 #define __PAM_CGFS_USER_LEN 6
2047 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2048 {
2049 char *clean_base_cgroup, *path;
2050 char **controller;
2051 struct cgv1_hierarchy *it;
2052 bool created = false;
2053
2054 *existed = false;
2055 it = h;
2056
2057 for (controller = it->controllers; controller && *controller;
2058 controller++) {
2059 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2060 return false;
2061
2062 /* If systemd has already created a cgroup for us, keep using
2063 * it.
2064 */
2065 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2066 it->base_cgroup, uid, gid,
2067 it->systemd_user_slice))
2068 return true;
2069
2070 /* We need to make sure that we do not create an endless chain
2071 * of sub-cgroups. So we check if we have already logged in
2072 * somehow (sudo -i, su, etc.) and have created a
2073 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2074 * cgroups this is unnecessary since we use the init_cgroup
2075 * anyway, but for controllers which have an existing systemd
2076 * cgroup that does not match the current uid, this is pretty
2077 * useful.
2078 */
2079 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2080 free(it->base_cgroup);
2081 it->base_cgroup = must_copy_string("/");
2082 } else {
2083 clean_base_cgroup =
2084 strstr(it->base_cgroup, __PAM_CGFS_USER);
2085 if (clean_base_cgroup)
2086 *clean_base_cgroup = '\0';
2087 }
2088
2089 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2090 pam_cgfs_debug("Constructing path: %s\n", path);
2091
2092 if (file_exists(path)) {
2093 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2094 if (our_cg)
2095 *existed = false;
2096 else
2097 *existed = true;
2098
2099 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2100 path, our_cg ? "" : "not ", uid, gid);
2101 free(path);
2102
2103 return our_cg;
2104 }
2105
2106 created = mkdir_parent(it->mountpoint, path);
2107 if (!created) {
2108 free(path);
2109 continue;
2110 }
2111
2112 if (chown(path, uid, gid) < 0)
2113 mysyslog(LOG_WARNING,
2114 "Failed to chown %s to %d:%d: %s\n", path,
2115 (int)uid, (int)gid, strerror(errno), NULL);
2116
2117 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2118 free(path);
2119 break;
2120 }
2121
2122 return created;
2123 }
2124
2125 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2126 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2127 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2128 */
2129 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2130 {
2131
2132 char *path;
2133
2134 /* Better safe than sorry. */
2135 if (!h->controllers)
2136 return true;
2137
2138 /* Cgroups created by systemd for us which we re-use won't be removed
2139 * here, since we're using init_cgroup + cgroup as path instead of
2140 * base_cgroup + cgroup.
2141 */
2142 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2143 (void)recursive_rmdir(path);
2144 free(path);
2145
2146 return true;
2147 }
2148
2149 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2150 static bool cgv2_remove(const char *cgroup)
2151 {
2152 struct cgv2_hierarchy *v2;
2153 char *path;
2154
2155 if (!cgv2_hierarchies)
2156 return true;
2157
2158 v2 = *cgv2_hierarchies;
2159
2160 /* If we reused an already existing cgroup, don't bother trying to
2161 * remove (a potentially wrong)/the path.
2162 * Cgroups created by systemd for us which we re-use would be removed
2163 * here, since we're using base_cgroup + cgroup as path.
2164 */
2165 if (v2->systemd_user_slice)
2166 return true;
2167
2168 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2169 (void)recursive_rmdir(path);
2170 free(path);
2171
2172 return true;
2173 }
2174
2175 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2176 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2177 * back, to the caller if the creation failed due to @cgroup already existing
2178 * via @existed.
2179 */
2180 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2181 {
2182 struct cgv1_hierarchy **it, **rev_it;
2183 bool all_created = true;
2184
2185 for (it = cgv1_hierarchies; it && *it; it++) {
2186 if (!(*it)->controllers || !(*it)->mountpoint ||
2187 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2188 continue;
2189
2190 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2191 all_created = false;
2192 break;
2193 }
2194 }
2195
2196 if (all_created)
2197 return true;
2198
2199 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2200 rev_it++)
2201 cgv1_remove_one(*rev_it, cgroup);
2202
2203 return false;
2204 }
2205
2206 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2207 * the creation failed due to @cgroup already existing via @existed.
2208 */
2209 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2210 {
2211 int ret;
2212 char *clean_base_cgroup;
2213 char *path;
2214 struct cgv2_hierarchy *v2;
2215 bool our_cg = false, created = false;
2216
2217 *existed = false;
2218
2219 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2220 return true;
2221
2222 v2 = *cgv2_hierarchies;
2223
2224 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2225 * to be placed under our current cgroup.
2226 */
2227 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2228 uid, gid, v2->systemd_user_slice))
2229 goto delegate_files;
2230
2231 /* We need to make sure that we do not create an endless chain of
2232 * sub-cgroups. So we check if we have already logged in somehow (sudo
2233 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2234 * skip that part.
2235 */
2236 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2237 free(v2->base_cgroup);
2238 v2->base_cgroup = must_copy_string("/");
2239 } else {
2240 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2241 if (clean_base_cgroup)
2242 *clean_base_cgroup = '\0';
2243 }
2244
2245 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2246 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2247
2248 if (file_exists(path)) {
2249 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2250 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2251 path, our_cg ? "" : "not ", uid, gid);
2252 free(path);
2253 if (our_cg) {
2254 *existed = false;
2255 goto delegate_files;
2256 } else {
2257 *existed = true;
2258 return false;
2259 }
2260 }
2261
2262 created = mkdir_parent(v2->mountpoint, path);
2263 if (!created) {
2264 free(path);
2265 return false;
2266 }
2267
2268 /* chown cgroup to user */
2269 if (chown(path, uid, gid) < 0)
2270 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2271 path, (int)uid, (int)gid, strerror(errno), NULL);
2272 else
2273 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2274 free(path);
2275
2276 delegate_files:
2277 /* chown cgroup.procs to user */
2278 if (v2->systemd_user_slice)
2279 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2280 "/cgroup.procs", NULL);
2281 else
2282 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2283 "/cgroup.procs", NULL);
2284
2285 ret = chown(path, uid, gid);
2286 if (ret < 0)
2287 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2288 path, (int)uid, (int)gid, strerror(errno), NULL);
2289 else
2290 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2291 free(path);
2292
2293 /* chown cgroup.subtree_control to user */
2294 if (v2->systemd_user_slice)
2295 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2296 "/cgroup.subtree_control", NULL);
2297 else
2298 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2299 "/cgroup.subtree_control", NULL);
2300
2301 ret = chown(path, uid, gid);
2302 if (ret < 0)
2303 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2304 path, (int)uid, (int)gid, strerror(errno), NULL);
2305 free(path);
2306
2307 /* chown cgroup.threads to user */
2308 if (v2->systemd_user_slice)
2309 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2310 "/cgroup.threads", NULL);
2311 else
2312 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2313 "/cgroup.threads", NULL);
2314 ret = chown(path, uid, gid);
2315 if (ret < 0 && errno != ENOENT)
2316 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2317 path, (int)uid, (int)gid, strerror(errno), NULL);
2318 free(path);
2319
2320 return true;
2321 }
2322
2323 /* Create writeable cgroups for @user at login. Details can be found in the
2324 * preamble/license at the top of this file.
2325 */
2326 static int handle_login(const char *user, uid_t uid, gid_t gid)
2327 {
2328 int idx = 0, ret;
2329 bool existed;
2330 char cg[PATH_MAX];
2331
2332 cg_escape();
2333
2334 while (idx >= 0) {
2335 ret = snprintf(cg, PATH_MAX, "/user/%s/%d", user, idx);
2336 if (ret < 0 || ret >= PATH_MAX) {
2337 mysyslog(LOG_ERR, "Username too long\n", NULL);
2338 return PAM_SESSION_ERR;
2339 }
2340
2341 existed = false;
2342 if (!cgv2_create(cg, uid, gid, &existed)) {
2343 if (existed) {
2344 cgv2_remove(cg);
2345 idx++;
2346 continue;
2347 }
2348
2349 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2350 return PAM_SESSION_ERR;
2351 }
2352
2353 existed = false;
2354 if (!cgv1_create(cg, uid, gid, &existed)) {
2355 if (existed) {
2356 cgv2_remove(cg);
2357 idx++;
2358 continue;
2359 }
2360
2361 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2362 return PAM_SESSION_ERR;
2363 }
2364
2365 if (!cg_enter(cg)) {
2366 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
2367 return PAM_SESSION_ERR;
2368 }
2369
2370 break;
2371 }
2372
2373 return PAM_SUCCESS;
2374 }
2375
2376 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2377 * hierarchies.
2378 */
2379 static bool cgv1_prune_empty_cgroups(const char *user)
2380 {
2381 bool controller_removed = true;
2382 bool all_removed = true;
2383 struct cgv1_hierarchy **it;
2384
2385 for (it = cgv1_hierarchies; it && *it; it++) {
2386 int ret;
2387 char *path_base, *path_init;
2388 char **controller;
2389
2390 if (!(*it)->controllers || !(*it)->mountpoint ||
2391 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2392 continue;
2393
2394 for (controller = (*it)->controllers; controller && *controller;
2395 controller++) {
2396 bool path_base_rm, path_init_rm;
2397
2398 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2399 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2400
2401 ret = recursive_rmdir(path_base);
2402 if (ret == -ENOENT || ret >= 0)
2403 path_base_rm = true;
2404 else
2405 path_base_rm = false;
2406 free(path_base);
2407
2408 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2409 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2410
2411 ret = recursive_rmdir(path_init);
2412 if (ret == -ENOENT || ret >= 0)
2413 path_init_rm = true;
2414 else
2415 path_init_rm = false;
2416 free(path_init);
2417
2418 if (!path_base_rm && !path_init_rm) {
2419 controller_removed = false;
2420 continue;
2421 }
2422
2423 controller_removed = true;
2424 break;
2425 }
2426
2427 if (!controller_removed)
2428 all_removed = false;
2429 }
2430
2431 return all_removed;
2432 }
2433
2434 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2435 * hierarchy.
2436 */
2437 static bool cgv2_prune_empty_cgroups(const char *user)
2438 {
2439 int ret;
2440 struct cgv2_hierarchy *v2;
2441 char *path_base, *path_init;
2442 bool path_base_rm, path_init_rm;
2443
2444 if (!cgv2_hierarchies)
2445 return true;
2446
2447 v2 = *cgv2_hierarchies;
2448
2449 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2450 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2451
2452 ret = recursive_rmdir(path_base);
2453 if (ret == -ENOENT || ret >= 0)
2454 path_base_rm = true;
2455 else
2456 path_base_rm = false;
2457 free(path_base);
2458
2459 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2460 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2461
2462 ret = recursive_rmdir(path_init);
2463 if (ret == -ENOENT || ret >= 0)
2464 path_init_rm = true;
2465 else
2466 path_init_rm = false;
2467 free(path_init);
2468
2469 if (!path_base_rm && !path_init_rm)
2470 return false;
2471
2472 return true;
2473 }
2474
2475 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2476 static void cg_prune_empty_cgroups(const char *user)
2477 {
2478 (void)cgv1_prune_empty_cgroups(user);
2479 (void)cgv2_prune_empty_cgroups(user);
2480 }
2481
2482 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2483 static void cgv1_free_hierarchies(void)
2484 {
2485 struct cgv1_hierarchy **it;
2486
2487 if (!cgv1_hierarchies)
2488 return;
2489
2490 for (it = cgv1_hierarchies; it && *it; it++) {
2491 if ((*it)->controllers) {
2492 char **tmp;
2493 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2494 free(*tmp);
2495
2496 free((*it)->controllers);
2497 }
2498
2499 free((*it)->mountpoint);
2500 free((*it)->base_cgroup);
2501 free((*it)->fullcgpath);
2502 free((*it)->init_cgroup);
2503 }
2504
2505 free(cgv1_hierarchies);
2506 }
2507
2508 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2509 static void cgv2_free_hierarchies(void)
2510 {
2511 struct cgv2_hierarchy **it;
2512
2513 if (!cgv2_hierarchies)
2514 return;
2515
2516 for (it = cgv2_hierarchies; it && *it; it++) {
2517 if ((*it)->controllers) {
2518 char **tmp;
2519
2520 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2521 free(*tmp);
2522
2523 free((*it)->controllers);
2524 }
2525
2526 free((*it)->mountpoint);
2527 free((*it)->base_cgroup);
2528 free((*it)->fullcgpath);
2529 free((*it)->init_cgroup);
2530 }
2531
2532 free(cgv2_hierarchies);
2533 }
2534
2535 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2536 static void cg_exit(void)
2537 {
2538 cgv1_free_hierarchies();
2539 cgv2_free_hierarchies();
2540 }
2541
2542 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2543 const char **argv)
2544 {
2545 int ret;
2546 uid_t uid = 0;
2547 gid_t gid = 0;
2548 const char *PAM_user = NULL;
2549
2550 ret = pam_get_user(pamh, &PAM_user, NULL);
2551 if (ret != PAM_SUCCESS) {
2552 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2553 return PAM_SESSION_ERR;
2554 }
2555
2556 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2557 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2558 return PAM_SESSION_ERR;
2559 }
2560
2561 if (!cg_init(uid, gid)) {
2562 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2563 return PAM_SESSION_ERR;
2564 }
2565
2566 /* Try to prune cgroups, that are actually empty but were still marked
2567 * as busy by the kernel so we couldn't remove them on session close.
2568 */
2569 cg_prune_empty_cgroups(PAM_user);
2570
2571 if (cg_mount_mode == CGROUP_UNKNOWN)
2572 return PAM_SESSION_ERR;
2573
2574 if (argc > 1 && !strcmp(argv[0], "-c")) {
2575 char **clist = make_string_list(argv[1], ",");
2576
2577 /*
2578 * We don't allow using "all" and other controllers explicitly because
2579 * that simply doesn't make any sense.
2580 */
2581 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2582 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2583 free_string_list(clist);
2584 return PAM_SESSION_ERR;
2585 }
2586
2587 cg_mark_to_make_rw(clist);
2588 free_string_list(clist);
2589 }
2590
2591 return handle_login(PAM_user, uid, gid);
2592 }
2593
2594 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2595 const char **argv)
2596 {
2597 int ret;
2598 uid_t uid = 0;
2599 gid_t gid = 0;
2600 const char *PAM_user = NULL;
2601
2602 ret = pam_get_user(pamh, &PAM_user, NULL);
2603 if (ret != PAM_SUCCESS) {
2604 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2605 return PAM_SESSION_ERR;
2606 }
2607
2608 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2609 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2610 return PAM_SESSION_ERR;
2611 }
2612
2613 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2614 if (!cg_init(uid, gid))
2615 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2616
2617 if (argc > 1 && !strcmp(argv[0], "-c")) {
2618 char **clist = make_string_list(argv[1], ",");
2619
2620 /*
2621 * We don't allow using "all" and other controllers explicitly because
2622 * that simply doesn't make any sense.
2623 */
2624 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2625 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2626 free_string_list(clist);
2627 return PAM_SESSION_ERR;
2628 }
2629
2630 cg_mark_to_make_rw(clist);
2631 free_string_list(clist);
2632 }
2633 }
2634
2635 cg_prune_empty_cgroups(PAM_user);
2636 cg_exit();
2637
2638 return PAM_SUCCESS;
2639 }