]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
pam: fix typo
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #include <ctype.h>
37 #include <dirent.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <pwd.h>
41 #include <stdarg.h>
42 #include <stdbool.h>
43 #include <stdint.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <syslog.h>
48 #include <unistd.h>
49 #include <linux/unistd.h>
50 #include <sys/mount.h>
51 #include <sys/param.h>
52 #include <sys/stat.h>
53 #include <sys/types.h>
54 #include <sys/vfs.h>
55
56 #define PAM_SM_SESSION
57 #include <security/_pam_macros.h>
58 #include <security/pam_modules.h>
59
60 #include "utils.h"
61
62 #define pam_cgfs_debug_stream(stream, format, ...) \
63 do { \
64 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
65 __func__, __VA_ARGS__); \
66 } while (false)
67
68 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
69
70 #ifdef DEBUG
71 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
72 #else
73 #define pam_cgfs_debug(format, ...)
74 #endif /* DEBUG */
75
76 /* Taken over modified from the kernel sources. */
77 #define NBITS 32 /* bits in uint32_t */
78 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
79 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
80
81 static enum cg_mount_mode {
82 CGROUP_UNKNOWN = -1,
83 CGROUP_MIXED = 0,
84 CGROUP_PURE_V1 = 1,
85 CGROUP_PURE_V2 = 2,
86 CGROUP_UNINITIALIZED = 3,
87 } cg_mount_mode = CGROUP_UNINITIALIZED;
88
89 /* Common helper functions. Most of these have been taken from LXC. */
90 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
91 static int append_null_to_list(void ***list);
92 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
93 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
94 {
95 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
96 }
97 static char *copy_to_eol(char *s);
98 static void free_string_list(char **list);
99 static char *get_mountpoint(char *line);
100 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
101 static int handle_login(const char *user, uid_t uid, gid_t gid);
102 static inline bool is_set(unsigned bit, uint32_t *bitarr)
103 {
104 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
105 }
106 static bool is_lxcfs(const char *line);
107 static bool is_cgv1(char *line);
108 static bool is_cgv2(char *line);
109 static void *must_alloc(size_t sz);
110 static void must_add_to_list(char ***clist, char *entry);
111 static void must_append_controller(char **klist, char **nlist, char ***clist,
112 char *entry);
113 static void must_append_string(char ***list, char *entry);
114 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
115 static char *read_file(char *fnam);
116 static int read_from_file(const char *filename, void* buf, size_t count);
117 static int recursive_rmdir(char *dirname);
118 static inline void set_bit(unsigned bit, uint32_t *bitarr)
119 {
120 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
121 }
122 static bool string_in_list(char **list, const char *entry);
123 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
124 static void trim(char *s);
125 static bool write_int(char *path, int v);
126 static ssize_t write_nointr(int fd, const void* buf, size_t count);
127 static int write_to_file(const char *filename, const void *buf, size_t count,
128 bool add_newline);
129
130 /* cgroupfs prototypes. */
131 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
132 static uint32_t *cg_cpumask(char *buf, size_t nbits);
133 static bool cg_copy_parent_file(char *path, char *file);
134 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
135 static bool cg_enter(const char *cgroup);
136 static void cg_escape(void);
137 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
138 static ssize_t cg_get_max_cpus(char *cpulist);
139 static int cg_get_version_of_mntpt(const char *path);
140 static bool cg_init(uid_t uid, gid_t gid);
141 static void cg_mark_to_make_rw(char **list);
142 static void cg_prune_empty_cgroups(const char *user);
143 static bool cg_systemd_created_user_slice(const char *base_cgroup,
144 const char *init_cgroup,
145 const char *in, uid_t uid);
146 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
147 const char *base_cgroup, uid_t uid,
148 gid_t gid,
149 bool systemd_user_slice);
150 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
151 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
152 const char *init_cgroup, uid_t uid);
153 static void cg_systemd_prune_init_scope(char *cg);
154 static bool is_lxcfs(const char *line);
155
156 /* cgroupfs v1 prototypes. */
157 struct cgv1_hierarchy {
158 char **controllers;
159 char *mountpoint;
160 char *base_cgroup;
161 char *fullcgpath;
162 char *init_cgroup;
163 bool create_rw_cgroup;
164 bool systemd_user_slice;
165 };
166
167 static struct cgv1_hierarchy **cgv1_hierarchies;
168
169 static void cgv1_add_controller(char **clist, char *mountpoint,
170 char *base_cgroup, char *init_cgroup);
171 static bool cgv1_controller_in_clist(char *cgline, char *c);
172 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
173 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
174 char **clist);
175 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
176 bool *existed);
177 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
178 uid_t uid, gid_t gid, bool *existed);
179 static bool cgv1_enter(const char *cgroup);
180 static void cgv1_escape(void);
181 static bool cgv1_get_controllers(char ***klist, char ***nlist);
182 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
183 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
184 char *line);
185 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
186 const char *cgroup);
187 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
188 static bool cgv1_init(uid_t uid, gid_t gid);
189 static void cgv1_mark_to_make_rw(char **clist);
190 static char *cgv1_must_prefix_named(char *entry);
191 static bool cgv1_prune_empty_cgroups(const char *user);
192 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
193 static bool is_cgv1(char *line);
194
195 /* cgroupfs v2 prototypes. */
196 struct cgv2_hierarchy {
197 char **controllers;
198 char *mountpoint;
199 char *base_cgroup;
200 char *fullcgpath;
201 char *init_cgroup;
202 bool create_rw_cgroup;
203 bool systemd_user_slice;
204 };
205
206 /* Actually this should only be a single hierarchy. But for the sake of
207 * parallelism and because the layout of the cgroupfs v2 is still somewhat
208 * changing, we'll leave it as an array of structs.
209 */
210 static struct cgv2_hierarchy **cgv2_hierarchies;
211
212 static void cgv2_add_controller(char **clist, char *mountpoint,
213 char *base_cgroup, char *init_cgroup,
214 bool systemd_user_slice);
215 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
216 bool *existed);
217 static bool cgv2_enter(const char *cgroup);
218 static void cgv2_escape(void);
219 static char *cgv2_get_current_cgroup(int pid);
220 static bool cgv2_init(uid_t uid, gid_t gid);
221 static void cgv2_mark_to_make_rw(char **clist);
222 static bool cgv2_prune_empty_cgroups(const char *user);
223 static bool cgv2_remove(const char *cgroup);
224 static bool is_cgv2(char *line);
225
226 static int do_mkdir(const char *path, mode_t mode)
227 {
228 int saved_errno;
229 mode_t mask;
230 int r;
231
232 mask = umask(0);
233 r = mkdir(path, mode);
234 saved_errno = errno;
235 umask(mask);
236 errno = saved_errno;
237 return (r);
238 }
239
240 /* Create directory and (if necessary) its parents. */
241 static bool mkdir_parent(const char *root, char *path)
242 {
243 char *b, orig, *e;
244
245 if (strlen(path) < strlen(root))
246 return false;
247
248 if (strlen(path) == strlen(root))
249 return true;
250
251 b = path + strlen(root) + 1;
252 while (true) {
253 while (*b && (*b == '/'))
254 b++;
255 if (!*b)
256 return true;
257
258 e = b + 1;
259 while (*e && *e != '/')
260 e++;
261
262 orig = *e;
263 if (orig)
264 *e = '\0';
265
266 if (file_exists(path))
267 goto next;
268
269 if (do_mkdir(path, 0755) < 0) {
270 pam_cgfs_debug("Failed to create %s: %s.\n", path, strerror(errno));
271 return false;
272 }
273
274 next:
275 if (!orig)
276 return true;
277
278 *e = orig;
279 b = e + 1;
280 }
281
282 return false;
283 }
284
285 /* Common helper functions. Most of these have been taken from LXC. */
286 static void mysyslog(int err, const char *format, ...)
287 {
288 va_list args;
289
290 va_start(args, format);
291 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
292 vsyslog(err, format, args);
293 va_end(args);
294 closelog();
295 }
296
297 /* realloc() pointer in batch sizes; do not fail. */
298 #define BATCH_SIZE 50
299 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
300 {
301 int newbatches = (newlen / BATCH_SIZE) + 1;
302 int oldbatches = (oldlen / BATCH_SIZE) + 1;
303
304 if (!*mem || newbatches > oldbatches)
305 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
306 }
307
308 /* Append lines as is to pointer; do not fail. */
309 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
310 {
311 size_t full = oldlen + newlen;
312
313 batch_realloc(dest, oldlen, full + 1);
314
315 memcpy(*dest + oldlen, new, newlen + 1);
316 }
317
318 /* Read in whole file and return allocated pointer. */
319 static char *read_file(char *fnam)
320 {
321 FILE *f;
322 int linelen;
323 char *line = NULL, *buf = NULL;
324 size_t len = 0, fulllen = 0;
325
326 f = fopen(fnam, "r");
327 if (!f)
328 return NULL;
329
330 while ((linelen = getline(&line, &len, f)) != -1) {
331 append_line(&buf, fulllen, line, linelen);
332 fulllen += linelen;
333 }
334
335 fclose(f);
336 free(line);
337
338 return buf;
339 }
340
341 /* Given a pointer to a null-terminated array of pointers, realloc to add one
342 * entry, and point the new entry to NULL. Do not fail. Return the index to the
343 * second-to-last entry - that is, the one which is now available for use
344 * (keeping the list null-terminated).
345 */
346 static int append_null_to_list(void ***list)
347 {
348 int newentry = 0;
349
350 if (*list)
351 for (; (*list)[newentry]; newentry++) {
352 ;
353 }
354
355 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
356 (*list)[newentry + 1] = NULL;
357
358 return newentry;
359 }
360
361 /* Append new entry to null-terminated array of pointer; make sure that array of
362 * pointers will still be null-terminated.
363 */
364 static void must_append_string(char ***list, char *entry)
365 {
366 int newentry;
367 char *copy;
368
369 newentry = append_null_to_list((void ***)list);
370 copy = must_copy_string(entry);
371 (*list)[newentry] = copy;
372 }
373
374 /* Remove newlines from string. */
375 static void trim(char *s)
376 {
377 size_t len = strlen(s);
378
379 while ((len > 0) && s[len - 1] == '\n')
380 s[--len] = '\0';
381 }
382
383 /* Allocate pointer; do not fail. */
384 static void *must_alloc(size_t sz)
385 {
386 return must_realloc(NULL, sz);
387 }
388
389 /* Make allocated copy of string. End of string is taken to be '\n'. */
390 static char *copy_to_eol(char *s)
391 {
392 char *newline, *sret;
393 size_t len;
394
395 newline = strchr(s, '\n');
396 if (!newline)
397 return NULL;
398
399 len = newline - s;
400 sret = must_alloc(len + 1);
401 memcpy(sret, s, len);
402 sret[len] = '\0';
403
404 return sret;
405 }
406
407 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
408 static bool is_lxcfs(const char *line)
409 {
410 char *p = strstr(line, " - ");
411 if (!p)
412 return false;
413
414 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
415 }
416
417 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
418 static bool is_cgv1(char *line)
419 {
420 char *p = strstr(line, " - ");
421 if (!p)
422 return false;
423
424 return strncmp(p, " - cgroup ", 10) == 0;
425 }
426
427 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
428 static bool is_cgv2(char *line)
429 {
430 char *p = strstr(line, " - ");
431 if (!p)
432 return false;
433
434 return strncmp(p, " - cgroup2 ", 11) == 0;
435 }
436
437 /* Given a null-terminated array of strings, check whether @entry is one of the
438 * strings
439 */
440 static bool string_in_list(char **list, const char *entry)
441 {
442 char **it;
443
444 for (it = list; it && *it; it++)
445 if (strcmp(*it, entry) == 0)
446 return true;
447
448 return false;
449 }
450
451 /*
452 * Creates a null-terminated array of strings, made by splitting the entries in
453 * @str on each @sep. Caller is responsible for calling free_string_list.
454 */
455 static char **make_string_list(const char *str, const char *sep)
456 {
457 char *copy, *tok;
458 char *saveptr = NULL;
459 char **clist = NULL;
460
461 copy = must_copy_string(str);
462
463 for (tok = strtok_r(copy, sep, &saveptr); tok;
464 tok = strtok_r(NULL, sep, &saveptr))
465 must_add_to_list(&clist, tok);
466
467 free(copy);
468
469 return clist;
470 }
471
472 /* Gets the length of a null-terminated array of strings. */
473 static size_t string_list_length(char **list)
474 {
475 size_t len = 0;
476 char **it;
477
478 for (it = list; it && *it; it++)
479 len++;
480
481 return len;
482 }
483
484 /* Free null-terminated array of strings. */
485 static void free_string_list(char **list)
486 {
487 char **it;
488
489 for (it = list; it && *it; it++)
490 free(*it);
491 free(list);
492 }
493
494 /* Write single integer to file. */
495 static bool write_int(char *path, int v)
496 {
497 FILE *f;
498 bool ret = true;
499
500 f = fopen(path, "w");
501 if (!f)
502 return false;
503
504 if (fprintf(f, "%d\n", v) < 0)
505 ret = false;
506
507 if (fclose(f) != 0)
508 ret = false;
509
510 return ret;
511 }
512
513 /* Recursively remove directory and its parents. */
514 static int recursive_rmdir(char *dirname)
515 {
516 struct dirent *direntp;
517 DIR *dir;
518 int r = 0;
519
520 dir = opendir(dirname);
521 if (!dir)
522 return -ENOENT;
523
524 while ((direntp = readdir(dir))) {
525 struct stat st;
526 char *pathname;
527
528 if (!strcmp(direntp->d_name, ".") ||
529 !strcmp(direntp->d_name, ".."))
530 continue;
531
532 pathname = must_make_path(dirname, direntp->d_name, NULL);
533
534 if (lstat(pathname, &st)) {
535 if (!r)
536 pam_cgfs_debug("Failed to stat %s.\n", pathname);
537 r = -1;
538 goto next;
539 }
540
541 if (!S_ISDIR(st.st_mode))
542 goto next;
543
544 if (recursive_rmdir(pathname) < 0)
545 r = -1;
546 next:
547 free(pathname);
548 }
549
550 if (rmdir(dirname) < 0) {
551 if (!r)
552 pam_cgfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
553 r = -1;
554 }
555
556 if (closedir(dir) < 0) {
557 if (!r)
558 pam_cgfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
559 r = -1;
560 }
561
562 return r;
563 }
564
565 /* Add new entry to null-terminated array of pointers. Make sure array is still
566 * null-terminated.
567 */
568 static void must_add_to_list(char ***clist, char *entry)
569 {
570 int newentry;
571
572 newentry = append_null_to_list((void ***)clist);
573 (*clist)[newentry] = must_copy_string(entry);
574 }
575
576 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
577 static char *get_mountpoint(char *line)
578 {
579 int i;
580 char *p, *sret, *p2;
581 size_t len;
582
583 p = line;
584
585 for (i = 0; i < 4; i++) {
586 p = strchr(p, ' ');
587 if (!p)
588 return NULL;
589 p++;
590 }
591
592 p2 = strchr(p, ' ');
593 if (p2)
594 *p2 = '\0';
595
596 len = strlen(p);
597 sret = must_alloc(len + 1);
598 memcpy(sret, p, len);
599 sret[len] = '\0';
600
601 return sret;
602 }
603
604 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
605 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
606 * kernel controllers (@klist) and named controllers (@nlist).
607 */
608 static bool cgv1_get_controllers(char ***klist, char ***nlist)
609 {
610 FILE *f;
611 char *line = NULL;
612 size_t len = 0;
613
614 f = fopen("/proc/self/cgroup", "r");
615 if (!f)
616 return false;
617
618 while (getline(&line, &len, f) != -1) {
619 char *p, *p2, *tok;
620 char *saveptr = NULL;
621
622 p = strchr(line, ':');
623 if (!p)
624 continue;
625 p++;
626
627 p2 = strchr(p, ':');
628 if (!p2)
629 continue;
630 *p2 = '\0';
631
632 /* Skip the v2 hierarchy. */
633 if ((p2 - p) == 0)
634 continue;
635
636 for (tok = strtok_r(p, ",", &saveptr); tok;
637 tok = strtok_r(NULL, ",", &saveptr)) {
638 if (strncmp(tok, "name=", 5) == 0)
639 must_append_string(nlist, tok);
640 else
641 must_append_string(klist, tok);
642 }
643 }
644
645 free(line);
646 fclose(f);
647
648 return true;
649 }
650
651 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
652 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
653 static bool cgv2_get_controllers(char ***klist)
654 {
655 return -ENOSYS;
656 }
657 */
658
659 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
660 static char *cgv2_get_current_cgroup(int pid)
661 {
662 int ret;
663 char *cgroups_v2;
664 char *current_cgroup;
665 char *copy = NULL;
666 /* The largest integer that can fit into long int is 2^64. This is a
667 * 20-digit number. */
668 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
669 char path[__PIDLEN];
670
671 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
672 if (ret < 0 || ret >= __PIDLEN)
673 return NULL;
674
675 cgroups_v2 = read_file(path);
676 if (!cgroups_v2)
677 return NULL;
678
679 current_cgroup = strstr(cgroups_v2, "0::/");
680 if (!current_cgroup)
681 goto cleanup_on_err;
682
683 current_cgroup = current_cgroup + 3;
684 copy = copy_to_eol(current_cgroup);
685 if (!copy)
686 goto cleanup_on_err;
687
688 cleanup_on_err:
689 free(cgroups_v2);
690 if (copy)
691 trim(copy);
692
693 return copy;
694 }
695
696 /* Given two null-terminated lists of strings, return true if any string is in
697 * both.
698 */
699 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
700 {
701 char **it;
702
703 if (!l2)
704 return false;
705
706 for (it = l1; it && *it; it++)
707 if (string_in_list(l2, *it))
708 return true;
709
710 return false;
711 }
712
713 /* For a null-terminated list of controllers @clist, return true if any of those
714 * controllers is already listed the null-terminated list of hierarchies @hlist.
715 * Realistically, if one is present, all must be present.
716 */
717 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
718 {
719 struct cgv1_hierarchy **it;
720
721 for (it = hlist; it && *it; it++)
722 if ((*it)->controllers)
723 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
724 return true;
725 return false;
726
727 }
728
729 /* Set boolean to mark controllers under which we are supposed create a
730 * writeable cgroup.
731 */
732 static void cgv1_mark_to_make_rw(char **clist)
733 {
734 struct cgv1_hierarchy **it;
735
736 for (it = cgv1_hierarchies; it && *it; it++)
737 if ((*it)->controllers)
738 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
739 string_in_list(clist, "all"))
740 (*it)->create_rw_cgroup = true;
741 }
742
743 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
744 * the cgroupfs v2 hierarchy.
745 */
746 static void cgv2_mark_to_make_rw(char **clist)
747 {
748 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
749 if (cgv2_hierarchies)
750 (*cgv2_hierarchies)->create_rw_cgroup = true;
751 }
752
753 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
754 static void cg_mark_to_make_rw(char **clist)
755 {
756 cgv1_mark_to_make_rw(clist);
757 cgv2_mark_to_make_rw(clist);
758 }
759
760 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
761 static char *cgv1_must_prefix_named(char *entry)
762 {
763 char *s;
764 int ret;
765 size_t len;
766
767 len = strlen(entry);
768 s = must_alloc(len + 6);
769
770 ret = snprintf(s, len + 6, "name=%s", entry);
771 if (ret < 0 || (size_t)ret >= (len + 6))
772 return NULL;
773
774 return s;
775 }
776
777 /* Append kernel controller in @klist or named controller in @nlist to @clist */
778 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
779 {
780 int newentry;
781 char *copy;
782
783 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
784 return;
785
786 newentry = append_null_to_list((void ***)clist);
787
788 if (strncmp(entry, "name=", 5) == 0)
789 copy = must_copy_string(entry);
790 else if (string_in_list(klist, entry))
791 copy = must_copy_string(entry);
792 else
793 copy = cgv1_must_prefix_named(entry);
794
795 (*clist)[newentry] = copy;
796 }
797
798 /* Get the controllers from a mountinfo line. There are other ways we could get
799 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
800 * could parse the mount options. But we simply assume that the mountpoint must
801 * be /sys/fs/cgroup/controller-list
802 */
803 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
804 {
805 int i;
806 char *p, *p2, *tok;
807 char *saveptr = NULL;
808 char **aret = NULL;
809
810 p = line;
811
812 for (i = 0; i < 4; i++) {
813 p = strchr(p, ' ');
814 if (!p)
815 return NULL;
816 p++;
817 }
818 if (!p)
819 return NULL;
820
821 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
822 return NULL;
823
824 p += 15;
825
826 p2 = strchr(p, ' ');
827 if (!p2)
828 return NULL;
829 *p2 = '\0';
830
831 for (tok = strtok_r(p, ",", &saveptr); tok;
832 tok = strtok_r(NULL, ",", &saveptr))
833 must_append_controller(klist, nlist, &aret, tok);
834
835 return aret;
836 }
837
838 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
839 static bool cgv1_controller_in_clist(char *cgline, char *c)
840 {
841 size_t len;
842 char *tok, *eol, *tmp;
843 char *saveptr = NULL;
844
845 eol = strchr(cgline, ':');
846 if (!eol)
847 return false;
848
849 len = eol - cgline;
850 tmp = alloca(len + 1);
851 memcpy(tmp, cgline, len);
852 tmp[len] = '\0';
853
854 for (tok = strtok_r(tmp, ",", &saveptr); tok;
855 tok = strtok_r(NULL, ",", &saveptr)) {
856 if (strcmp(tok, c) == 0)
857 return true;
858 }
859 return false;
860 }
861
862 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
863 * of a given cgv1 controller passed in via @controller.
864 */
865 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
866 {
867 char *p;
868
869 p = basecginfo;
870
871 while (true) {
872 p = strchr(p, ':');
873 if (!p)
874 return NULL;
875 p++;
876
877 if (cgv1_controller_in_clist(p, controller)) {
878 p = strchr(p, ':');
879 if (!p)
880 return NULL;
881 p++;
882
883 return copy_to_eol(p);
884 }
885
886 p = strchr(p, '\n');
887 if (!p)
888 return NULL;
889 p++;
890 }
891
892 return NULL;
893 }
894
895 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
896 * systems.
897 */
898 #define INIT_SCOPE "/init.scope"
899 static void cg_systemd_prune_init_scope(char *cg)
900 {
901 char *point;
902
903 if (!cg)
904 return;
905
906 point = cg + strlen(cg) - strlen(INIT_SCOPE);
907 if (point < cg)
908 return;
909
910 if (strcmp(point, INIT_SCOPE) == 0) {
911 if (point == cg)
912 *(point + 1) = '\0';
913 else
914 *point = '\0';
915 }
916 }
917
918 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
919 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
920 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
921 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
922 * from /proc/1/cgroup.
923 */
924 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
925 {
926 struct cgv1_hierarchy *new;
927 int newentry;
928
929 new = must_alloc(sizeof(*new));
930 new->controllers = clist;
931 new->mountpoint = mountpoint;
932 new->base_cgroup = base_cgroup;
933 new->fullcgpath = NULL;
934 new->create_rw_cgroup = false;
935 new->init_cgroup = init_cgroup;
936 new->systemd_user_slice = false;
937
938 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
939 cgv1_hierarchies[newentry] = new;
940 }
941
942 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
943 * currently) include the controllers mounted into the hierarchy (e.g. memory,
944 * pids, blkio), the mountpoint of that hierarchy (Should usually be
945 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
946 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
947 * base cgroup of the current process gathered from /proc/self/cgroup, and the
948 * init cgroup of PID1 gathered from /proc/1/cgroup.
949 */
950 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
951 {
952 struct cgv2_hierarchy *new;
953 int newentry;
954
955 new = must_alloc(sizeof(*new));
956 new->controllers = clist;
957 new->mountpoint = mountpoint;
958 new->base_cgroup = base_cgroup;
959 new->fullcgpath = NULL;
960 new->create_rw_cgroup = false;
961 new->init_cgroup = init_cgroup;
962 new->systemd_user_slice = systemd_user_slice;
963
964 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
965 cgv2_hierarchies[newentry] = new;
966 }
967
968 /* In Ubuntu 14.04, the paths created for us were
969 * '/user/$uid.user/$something.session' This can be merged better with
970 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
971 * reason about the correctness.
972 */
973 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
974 {
975 char *p;
976 size_t len;
977 int id;
978 char *copy = NULL;
979 bool bret = false;
980
981 copy = must_copy_string(in);
982 if (strlen(copy) < strlen("/user/1.user/1.session"))
983 goto cleanup;
984 p = copy + strlen(copy) - 1;
985
986 /* skip any trailing '/' (shouldn't be any, but be sure) */
987 while (p >= copy && *p == '/')
988 *(p--) = '\0';
989 if (p < copy)
990 goto cleanup;
991
992 /* Get last path element */
993 while (p >= copy && *p != '/')
994 p--;
995 if (p < copy)
996 goto cleanup;
997 /* make sure it is something.session */
998 len = strlen(p + 1);
999 if (len < strlen("1.session") ||
1000 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1001 goto cleanup;
1002
1003 /* ok last path piece checks out, now check the second to last */
1004 *(p + 1) = '\0';
1005 while (p >= copy && *(--p) != '/')
1006 ;
1007 if (sscanf(p + 1, "%d.user/", &id) != 1)
1008 goto cleanup;
1009
1010 if (id != (int)uid)
1011 goto cleanup;
1012
1013 bret = true;
1014
1015 cleanup:
1016 free(copy);
1017 return bret;
1018 }
1019
1020 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1021 * assume it belongs to $uid and chown it
1022 */
1023 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1024 const char *init_cgroup, uid_t uid)
1025 {
1026 int ret;
1027 char buf[100];
1028 size_t curlen, initlen;
1029
1030 curlen = strlen(base_cgroup);
1031 initlen = strlen(init_cgroup);
1032 if (curlen <= initlen)
1033 return false;
1034
1035 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1036 return false;
1037
1038 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1039 if (ret < 0 || ret >= 100)
1040 return false;
1041
1042 if (initlen == 1)
1043 initlen = 0; // skip the '/'
1044
1045 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1046 }
1047
1048 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1049 * is not the end of our systemd path, then we're not part of the PAM call that
1050 * created that path.
1051 *
1052 * The last piece is chowned to $uid, the user- part not.
1053 * Note: If the user creates paths that look like what we're looking for to
1054 * 'fool' us, either
1055 * - they fool us, we create new cgroups, and they get auto-logged-out.
1056 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1057 * lose ownership of their cgroups
1058 */
1059 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1060 const char *init_cgroup,
1061 const char *in, uid_t uid)
1062 {
1063 char *p;
1064 size_t len;
1065 int id;
1066 char *copy = NULL;
1067 bool bret = false;
1068
1069 copy = must_copy_string(in);
1070
1071 /* An old version of systemd has already created a cgroup for us. */
1072 if (cg_systemd_under_user_slice_1(in, uid))
1073 goto succeed;
1074
1075 /* A new version of systemd has already created a cgroup for us. */
1076 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1077 goto succeed;
1078
1079 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1080 goto cleanup;
1081
1082 p = copy + strlen(copy) - 1;
1083 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1084 while (p >= copy && *p == '/')
1085 *(p--) = '\0';
1086
1087 if (p < copy)
1088 goto cleanup;
1089
1090 /* Get last path element */
1091 while (p >= copy && *p != '/')
1092 p--;
1093
1094 if (p < copy)
1095 goto cleanup;
1096
1097 /* Make sure it is session-something.scope. */
1098 len = strlen(p + 1);
1099 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1100 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1101 goto cleanup;
1102
1103 /* Ok last path piece checks out, now check the second to last. */
1104 *(p + 1) = '\0';
1105 while (p >= copy && *(--p) != '/')
1106 ;
1107
1108 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1109 goto cleanup;
1110
1111 if (id != (int)uid)
1112 goto cleanup;
1113
1114 succeed:
1115 bret = true;
1116 cleanup:
1117 free(copy);
1118 return bret;
1119 }
1120
1121 /* Chown existing cgroup that systemd has already created for us. */
1122 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1123 const char *base_cgroup, uid_t uid,
1124 gid_t gid, bool systemd_user_slice)
1125 {
1126 char *path;
1127
1128 if (!systemd_user_slice)
1129 return false;
1130
1131 path = must_make_path(mountpoint, base_cgroup, NULL);
1132
1133 /* A cgroup within name=systemd has already been created. So we only
1134 * need to chown it.
1135 */
1136 if (chown(path, uid, gid) < 0)
1137 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
1138 path, (int)uid, (int)gid, strerror(errno), NULL);
1139 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
1140
1141 free(path);
1142 return true;
1143 }
1144
1145 /* Detect and store information about cgroupfs v1 hierarchies. */
1146 static bool cgv1_init(uid_t uid, gid_t gid)
1147 {
1148 FILE *f;
1149 struct cgv1_hierarchy **it;
1150 char *basecginfo;
1151 char *line = NULL;
1152 char **klist = NULL, **nlist = NULL;
1153 size_t len = 0;
1154
1155 basecginfo = read_file("/proc/self/cgroup");
1156 if (!basecginfo)
1157 return false;
1158
1159 f = fopen("/proc/self/mountinfo", "r");
1160 if (!f) {
1161 free(basecginfo);
1162 return false;
1163 }
1164
1165 cgv1_get_controllers(&klist, &nlist);
1166
1167 while (getline(&line, &len, f) != -1) {
1168 char **controller_list = NULL;
1169 char *mountpoint, *base_cgroup;
1170
1171 if (is_lxcfs(line) || !is_cgv1(line))
1172 continue;
1173
1174 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1175 if (!controller_list)
1176 continue;
1177
1178 if (cgv1_controller_list_is_dup(cgv1_hierarchies,
1179 controller_list)) {
1180 free(controller_list);
1181 continue;
1182 }
1183
1184 mountpoint = get_mountpoint(line);
1185 if (!mountpoint) {
1186 free_string_list(controller_list);
1187 continue;
1188 }
1189
1190 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1191 if (!base_cgroup) {
1192 free_string_list(controller_list);
1193 free(mountpoint);
1194 continue;
1195 }
1196 trim(base_cgroup);
1197 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1198 "mountpoint \"%s\" and cgroup \"%s\".\n",
1199 controller_list[0], mountpoint, base_cgroup);
1200 cgv1_add_controller(controller_list, mountpoint, base_cgroup,
1201 NULL);
1202 }
1203 free_string_list(klist);
1204 free_string_list(nlist);
1205 free(basecginfo);
1206 fclose(f);
1207 free(line);
1208
1209 /* Retrieve init cgroup path for all controllers. */
1210 basecginfo = read_file("/proc/1/cgroup");
1211 if (!basecginfo)
1212 return false;
1213
1214 for (it = cgv1_hierarchies; it && *it; it++) {
1215 if ((*it)->controllers) {
1216 char *init_cgroup, *user_slice;
1217 /* We've already stored the controller and received its
1218 * current cgroup. If we now fail to retrieve its init
1219 * cgroup, we should probably fail.
1220 */
1221 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1222 if (!init_cgroup) {
1223 free(basecginfo);
1224 return false;
1225 }
1226 cg_systemd_prune_init_scope(init_cgroup);
1227 (*it)->init_cgroup = init_cgroup;
1228 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1229 "cgroup \"%s\".\n",
1230 (*(*it)->controllers), init_cgroup);
1231 /* Check whether systemd has already created a cgroup
1232 * for us.
1233 */
1234 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1235 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1236 (*it)->systemd_user_slice = true;
1237 }
1238 }
1239 free(basecginfo);
1240
1241 return true;
1242 }
1243
1244 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1245 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1246 */
1247 static inline int cg_get_version_of_mntpt(const char *path)
1248 {
1249 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1250 return 1;
1251
1252 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1253 return 2;
1254
1255 return 0;
1256 }
1257
1258 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1259 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1260 */
1261 static bool cgv2_init(uid_t uid, gid_t gid)
1262 {
1263 char *mountpoint;
1264 FILE *f = NULL;
1265 char *current_cgroup = NULL, *init_cgroup = NULL;
1266 char * line = NULL;
1267 size_t len = 0;
1268 int ret = false;
1269
1270 current_cgroup = cgv2_get_current_cgroup(getpid());
1271 if (!current_cgroup) {
1272 /* No v2 hierarchy present. We're done. */
1273 ret = true;
1274 goto cleanup;
1275 }
1276
1277 init_cgroup = cgv2_get_current_cgroup(1);
1278 if (!init_cgroup) {
1279 /* If we're here and didn't fail already above, then something's
1280 * certainly wrong, so error this time.
1281 */
1282 goto cleanup;
1283 }
1284 cg_systemd_prune_init_scope(init_cgroup);
1285
1286 /* Check if the v2 hierarchy is mounted at its standard location.
1287 * If so we can skip the rest of the work here. Although the unified
1288 * hierarchy can be mounted multiple times, each of those mountpoints
1289 * will expose identical information.
1290 */
1291 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1292 char *user_slice;
1293 bool has_user_slice = false;
1294
1295 mountpoint = must_copy_string("/sys/fs/cgroup");
1296 if (!mountpoint)
1297 goto cleanup;
1298
1299 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1300 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1301 has_user_slice = true;
1302 free(user_slice);
1303
1304 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1305
1306 ret = true;
1307 goto cleanup;
1308 }
1309
1310 f = fopen("/proc/self/mountinfo", "r");
1311 if (!f)
1312 goto cleanup;
1313
1314 /* we support simple cgroup mounts and lxcfs mounts */
1315 while (getline(&line, &len, f) != -1) {
1316 char *user_slice;
1317 bool has_user_slice = false;
1318 if (!is_cgv2(line))
1319 continue;
1320
1321 mountpoint = get_mountpoint(line);
1322 if (!mountpoint)
1323 continue;
1324
1325 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1326 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1327 has_user_slice = true;
1328 free(user_slice);
1329
1330 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1331 /* Although the unified hierarchy can be mounted multiple times,
1332 * each of those mountpoints will expose identical information.
1333 * So let the first mountpoint we find, win.
1334 */
1335 ret = true;
1336 break;
1337 }
1338
1339 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1340 "current cgroup \"%s\" and init cgroup \"%s\".\n",
1341 mountpoint, current_cgroup, init_cgroup);
1342
1343 cleanup:
1344 if (f)
1345 fclose(f);
1346 free(line);
1347
1348 return ret;
1349 }
1350
1351 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1352 * cgroupfs v2 hierarchy.
1353 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1354 * where some controllers are mounted into their standard cgroupfs v1 locations
1355 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1356 * hierarchy (/sys/fs/cgroup).
1357 */
1358 static bool cg_init(uid_t uid, gid_t gid)
1359 {
1360 if (!cgv1_init(uid, gid))
1361 return false;
1362
1363 if (!cgv2_init(uid, gid))
1364 return false;
1365
1366 if (cgv1_hierarchies && cgv2_hierarchies) {
1367 cg_mount_mode = CGROUP_MIXED;
1368 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies.");
1369 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1370 cg_mount_mode = CGROUP_PURE_V1;
1371 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies.");
1372 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1373 cg_mount_mode = CGROUP_PURE_V2;
1374 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies.");
1375 } else {
1376 cg_mount_mode = CGROUP_UNKNOWN;
1377 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy.\n", NULL);
1378 }
1379
1380 if (cg_mount_mode == CGROUP_UNKNOWN)
1381 return false;
1382
1383 return true;
1384 }
1385
1386 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1387 static bool cgv1_enter(const char *cgroup)
1388 {
1389 struct cgv1_hierarchy **it;
1390
1391 for (it = cgv1_hierarchies; it && *it; it++) {
1392 char **controller;
1393 bool entered = false;
1394
1395 if (!(*it)->controllers || !(*it)->mountpoint ||
1396 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1397 continue;
1398
1399 for (controller = (*it)->controllers; controller && *controller;
1400 controller++) {
1401 char *path;
1402
1403 /* We've already been placed in a user slice, so we
1404 * don't need to enter the cgroup again.
1405 */
1406 if ((*it)->systemd_user_slice) {
1407 entered = true;
1408 break;
1409 }
1410
1411 path = must_make_path((*it)->mountpoint,
1412 (*it)->init_cgroup,
1413 cgroup,
1414 "/cgroup.procs",
1415 NULL);
1416 if (!file_exists(path)) {
1417 free(path);
1418 path = must_make_path((*it)->mountpoint,
1419 (*it)->init_cgroup,
1420 cgroup,
1421 "/tasks",
1422 NULL);
1423 }
1424 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1425 entered = write_int(path, (int)getpid());
1426 if (entered) {
1427 free(path);
1428 break;
1429 }
1430 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1431 free(path);
1432 }
1433 if (!entered)
1434 return false;
1435 }
1436
1437 return true;
1438 }
1439
1440 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1441 static bool cgv2_enter(const char *cgroup)
1442 {
1443 struct cgv2_hierarchy *v2;
1444 char *path;
1445 bool entered = false;
1446
1447 if (!cgv2_hierarchies)
1448 return true;
1449
1450 v2 = *cgv2_hierarchies;
1451
1452 if (!v2->mountpoint || !v2->base_cgroup)
1453 return false;
1454
1455 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1456 return true;
1457
1458 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1459 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1460 entered = write_int(path, (int)getpid());
1461 if (!entered) {
1462 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1463 free(path);
1464 return false;
1465 }
1466
1467 free(path);
1468
1469 return true;
1470 }
1471
1472 /* Wrapper around cgv{1,2}_enter(). */
1473 static bool cg_enter(const char *cgroup)
1474 {
1475 if (!cgv1_enter(cgroup)) {
1476 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups.\n", NULL);
1477 return false;
1478 }
1479
1480 if (!cgv2_enter(cgroup)) {
1481 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups.\n", NULL);
1482 return false;
1483 }
1484
1485 return true;
1486 }
1487
1488 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1489 static void cgv1_escape(void)
1490 {
1491 struct cgv1_hierarchy **it;
1492
1493 /* In case systemd hasn't already placed us in a user slice for the
1494 * cpuset v1 controller we will reside in the root cgroup. This means
1495 * that cgroup.clone_children will not have been initialized for us so
1496 * we need to do it.
1497 */
1498 for (it = cgv1_hierarchies; it && *it; it++)
1499 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1500 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset.\n", NULL);
1501
1502 if (!cgv1_enter("/"))
1503 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup.\n", NULL);
1504 }
1505
1506 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1507 static void cgv2_escape(void)
1508 {
1509 if (!cgv2_enter("/"))
1510 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup.\n", NULL);
1511 }
1512
1513 /* Wrapper around cgv{1,2}_escape(). */
1514 static void cg_escape(void)
1515 {
1516 cgv1_escape();
1517 cgv2_escape();
1518 }
1519
1520 /* Get uid and gid for @user. */
1521 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1522 {
1523 struct passwd pwent;
1524 struct passwd *pwentp = NULL;
1525 char *buf;
1526 size_t bufsize;
1527 int ret;
1528
1529 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1530 if (bufsize == -1)
1531 bufsize = 1024;
1532
1533 buf = malloc(bufsize);
1534 if (!buf)
1535 return false;
1536
1537 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1538 if (!pwentp) {
1539 if (ret == 0)
1540 mysyslog(LOG_ERR,
1541 "Could not find matched password record\n", NULL);
1542
1543 free(buf);
1544 return false;
1545 }
1546
1547 *uid = pwent.pw_uid;
1548 *gid = pwent.pw_gid;
1549 free(buf);
1550
1551 return true;
1552 }
1553
1554 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1555 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1556 {
1557 struct stat statbuf;
1558
1559 if (stat(path, &statbuf) < 0)
1560 return false;
1561
1562 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1563 return false;
1564
1565 return true;
1566 }
1567
1568 /* Create cpumask from cpulist aka turn:
1569 *
1570 * 0,2-3
1571 *
1572 * into bit array
1573 *
1574 * 1 0 1 1
1575 */
1576 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1577 {
1578 char *token;
1579 char *saveptr = NULL;
1580 size_t arrlen = BITS_TO_LONGS(nbits);
1581 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1582 if (!bitarr)
1583 return NULL;
1584
1585 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1586 errno = 0;
1587 unsigned start = strtoul(token, NULL, 0);
1588 unsigned end = start;
1589
1590 char *range = strchr(token, '-');
1591 if (range)
1592 end = strtoul(range + 1, NULL, 0);
1593 if (!(start <= end)) {
1594 free(bitarr);
1595 return NULL;
1596 }
1597
1598 if (end >= nbits) {
1599 free(bitarr);
1600 return NULL;
1601 }
1602
1603 while (start <= end)
1604 set_bit(start++, bitarr);
1605 }
1606
1607 return bitarr;
1608 }
1609
1610 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1611 {
1612 char *result;
1613 char **p;
1614 size_t sep_len = strlen(sep);
1615 size_t result_len = use_as_prefix * sep_len;
1616
1617 if (!parts)
1618 return NULL;
1619
1620 /* calculate new string length */
1621 for (p = (char **)parts; *p; p++)
1622 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1623
1624 result = calloc(result_len + 1, sizeof(char));
1625 if (!result)
1626 return NULL;
1627
1628 if (use_as_prefix)
1629 strcpy(result, sep);
1630 for (p = (char **)parts; *p; p++) {
1631 if (p > (char **)parts)
1632 strcat(result, sep);
1633 strcat(result, *p);
1634 }
1635
1636 return result;
1637 }
1638
1639 /* The largest integer that can fit into long int is 2^64. This is a
1640 * 20-digit number.
1641 */
1642 #define __IN_TO_STR_LEN 21
1643 /* Turn cpumask into simple, comma-separated cpulist. */
1644 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1645 {
1646 size_t i;
1647 int ret;
1648 char numstr[__IN_TO_STR_LEN] = {0};
1649 char **cpulist = NULL;
1650
1651 for (i = 0; i <= nbits; i++) {
1652 if (is_set(i, bitarr)) {
1653 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1654 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1655 free_string_list(cpulist);
1656 return NULL;
1657 }
1658 must_append_string(&cpulist, numstr);
1659 }
1660 }
1661 return string_join(",", (const char **)cpulist, false);
1662 }
1663
1664 static ssize_t cg_get_max_cpus(char *cpulist)
1665 {
1666 char *c1, *c2;
1667 char *maxcpus = cpulist;
1668 size_t cpus = 0;
1669
1670 c1 = strrchr(maxcpus, ',');
1671 if (c1)
1672 c1++;
1673
1674 c2 = strrchr(maxcpus, '-');
1675 if (c2)
1676 c2++;
1677
1678 if (!c1 && !c2)
1679 c1 = maxcpus;
1680 else if (c1 < c2)
1681 c1 = c2;
1682
1683 /* If the above logic is correct, c1 should always hold a valid string
1684 * here.
1685 */
1686
1687 errno = 0;
1688 cpus = strtoul(c1, NULL, 0);
1689 if (errno != 0)
1690 return -1;
1691
1692 return cpus;
1693 }
1694
1695 static ssize_t write_nointr(int fd, const void* buf, size_t count)
1696 {
1697 ssize_t ret;
1698 again:
1699 ret = write(fd, buf, count);
1700 if (ret < 0 && errno == EINTR)
1701 goto again;
1702 return ret;
1703 }
1704
1705 static int write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
1706 {
1707 int fd, saved_errno;
1708 ssize_t ret;
1709
1710 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
1711 if (fd < 0)
1712 return -1;
1713 ret = write_nointr(fd, buf, count);
1714 if (ret < 0)
1715 goto out_error;
1716 if ((size_t)ret != count)
1717 goto out_error;
1718 if (add_newline) {
1719 ret = write_nointr(fd, "\n", 1);
1720 if (ret != 1)
1721 goto out_error;
1722 }
1723 close(fd);
1724 return 0;
1725
1726 out_error:
1727 saved_errno = errno;
1728 close(fd);
1729 errno = saved_errno;
1730 return -1;
1731 }
1732
1733 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1734 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1735 {
1736 char *lastslash, *fpath, oldv;
1737 int ret;
1738 ssize_t i;
1739
1740 ssize_t maxposs = 0, maxisol = 0;
1741 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1742 uint32_t *possmask = NULL, *isolmask = NULL;
1743 bool bret = false, flipped_bit = false;
1744
1745 lastslash = strrchr(path, '/');
1746 if (!lastslash) { // bug... this shouldn't be possible
1747 pam_cgfs_debug("Invalid path: %s.\n", path);
1748 return bret;
1749 }
1750 oldv = *lastslash;
1751 *lastslash = '\0';
1752 fpath = must_make_path(path, "cpuset.cpus", NULL);
1753 posscpus = read_file(fpath);
1754 if (!posscpus) {
1755 pam_cgfs_debug("Could not read file: %s.\n", fpath);
1756 goto on_error;
1757 }
1758
1759 /* Get maximum number of cpus found in possible cpuset. */
1760 maxposs = cg_get_max_cpus(posscpus);
1761 if (maxposs < 0)
1762 goto on_error;
1763
1764 if (!file_exists(__ISOL_CPUS)) {
1765 /* This system doesn't expose isolated cpus. */
1766 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist.\n");
1767 cpulist = posscpus;
1768 /* No isolated cpus but we weren't already initialized by
1769 * someone. We should simply copy the parents cpuset.cpus
1770 * values.
1771 */
1772 if (!am_initialized) {
1773 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup.\n");
1774 goto copy_parent;
1775 }
1776 /* No isolated cpus but we were already initialized by someone.
1777 * Nothing more to do for us.
1778 */
1779 goto on_success;
1780 }
1781
1782 isolcpus = read_file(__ISOL_CPUS);
1783 if (!isolcpus) {
1784 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1785 goto on_error;
1786 }
1787 if (!isdigit(isolcpus[0])) {
1788 pam_cgfs_debug("%s", "No isolated cpus detected.\n");
1789 cpulist = posscpus;
1790 /* No isolated cpus but we weren't already initialized by
1791 * someone. We should simply copy the parents cpuset.cpus
1792 * values.
1793 */
1794 if (!am_initialized) {
1795 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup.\n");
1796 goto copy_parent;
1797 }
1798 /* No isolated cpus but we were already initialized by someone.
1799 * Nothing more to do for us.
1800 */
1801 goto on_success;
1802 }
1803
1804 /* Get maximum number of cpus found in isolated cpuset. */
1805 maxisol = cg_get_max_cpus(isolcpus);
1806 if (maxisol < 0)
1807 goto on_error;
1808
1809 if (maxposs < maxisol)
1810 maxposs = maxisol;
1811 maxposs++;
1812
1813 possmask = cg_cpumask(posscpus, maxposs);
1814 if (!possmask) {
1815 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus.\n");
1816 goto on_error;
1817 }
1818
1819 isolmask = cg_cpumask(isolcpus, maxposs);
1820 if (!isolmask) {
1821 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus.\n");
1822 goto on_error;
1823 }
1824
1825 for (i = 0; i <= maxposs; i++) {
1826 if (is_set(i, isolmask) && is_set(i, possmask)) {
1827 flipped_bit = true;
1828 clear_bit(i, possmask);
1829 }
1830 }
1831
1832 if (!flipped_bit) {
1833 pam_cgfs_debug("%s", "No isolated cpus present in cpuset.\n");
1834 goto on_success;
1835 }
1836 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset.\n");
1837
1838 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1839 if (!cpulist) {
1840 pam_cgfs_debug("%s", "Could not create cpu list.\n");
1841 goto on_error;
1842 }
1843
1844 copy_parent:
1845 *lastslash = oldv;
1846 fpath = must_make_path(path, "cpuset.cpus", NULL);
1847 ret = write_to_file(fpath, cpulist, strlen(cpulist), false);
1848 if (ret < 0) {
1849 pam_cgfs_debug("Could not write cpu list to: %s.\n", fpath);
1850 goto on_error;
1851 }
1852
1853 on_success:
1854 bret = true;
1855
1856 on_error:
1857 free(fpath);
1858
1859 free(isolcpus);
1860 free(isolmask);
1861
1862 if (posscpus != cpulist)
1863 free(posscpus);
1864 free(possmask);
1865
1866 free(cpulist);
1867 return bret;
1868 }
1869
1870 int read_from_file(const char *filename, void* buf, size_t count)
1871 {
1872 int fd = -1, saved_errno;
1873 ssize_t ret;
1874
1875 fd = open(filename, O_RDONLY | O_CLOEXEC);
1876 if (fd < 0)
1877 return -1;
1878
1879 if (!buf || !count) {
1880 char buf2[100];
1881 size_t count2 = 0;
1882 while ((ret = read(fd, buf2, 100)) > 0)
1883 count2 += ret;
1884 if (ret >= 0)
1885 ret = count2;
1886 } else {
1887 memset(buf, 0, count);
1888 ret = read(fd, buf, count);
1889 }
1890
1891 if (ret < 0)
1892 pam_cgfs_debug("read %s: %s", filename, strerror(errno));
1893
1894 saved_errno = errno;
1895 close(fd);
1896 errno = saved_errno;
1897 return ret;
1898 }
1899
1900 /* Copy contents of parent(@path)/@file to @path/@file */
1901 static bool cg_copy_parent_file(char *path, char *file)
1902 {
1903 char *lastslash, *value = NULL, *fpath, oldv;
1904 int len = 0;
1905 int ret;
1906
1907 lastslash = strrchr(path, '/');
1908 if (!lastslash) { // bug... this shouldn't be possible
1909 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1910 return false;
1911 }
1912 oldv = *lastslash;
1913 *lastslash = '\0';
1914 fpath = must_make_path(path, file, NULL);
1915 len = read_from_file(fpath, NULL, 0);
1916 if (len <= 0)
1917 goto bad;
1918 value = must_alloc(len + 1);
1919 if (read_from_file(fpath, value, len) != len)
1920 goto bad;
1921 free(fpath);
1922 *lastslash = oldv;
1923 fpath = must_make_path(path, file, NULL);
1924 ret = write_to_file(fpath, value, len, false);
1925 if (ret < 0)
1926 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1927 free(fpath);
1928 free(value);
1929 return ret >= 0;
1930
1931 bad:
1932 pam_cgfs_debug("Error reading '%s'", fpath);
1933 free(fpath);
1934 free(value);
1935 return false;
1936 }
1937
1938 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1939 * controller we will reside in the root cgroup. This means that
1940 * cgroup.clone_children will not have been initialized for us so we need to do
1941 * it.
1942 */
1943 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1944 {
1945 char *clonechildrenpath, v;
1946
1947 if (!string_in_list(h->controllers, "cpuset"))
1948 return true;
1949
1950 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1951
1952 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
1953 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
1954 free(clonechildrenpath);
1955 return false;
1956 }
1957
1958 if (v == '1') { /* already set for us by someone else */
1959 free(clonechildrenpath);
1960 return true;
1961 }
1962
1963 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
1964 /* Set clone_children so children inherit our settings */
1965 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1966 free(clonechildrenpath);
1967 return false;
1968 }
1969 free(clonechildrenpath);
1970 return true;
1971 }
1972
1973 /*
1974 * Initialize the cpuset hierarchy in first directory of @gname and
1975 * set cgroup.clone_children so that children inherit settings.
1976 * Since the h->base_path is populated by init or ourselves, we know
1977 * it is already initialized.
1978 */
1979 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1980 const char *cgroup)
1981 {
1982 char *cgpath, *clonechildrenpath, v, *slash;
1983
1984 if (!string_in_list(h->controllers, "cpuset"))
1985 return true;
1986
1987 if (*cgroup == '/')
1988 cgroup++;
1989 slash = strchr(cgroup, '/');
1990 if (slash)
1991 *slash = '\0';
1992
1993 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1994 if (slash)
1995 *slash = '/';
1996 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
1997 pam_cgfs_debug("Failed to create '%s'", cgpath);
1998 free(cgpath);
1999 return false;
2000 }
2001 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
2002 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
2003 free(clonechildrenpath);
2004 free(cgpath);
2005 return true;
2006 }
2007 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2008 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
2009 free(clonechildrenpath);
2010 free(cgpath);
2011 return false;
2012 }
2013
2014 /* Make sure any isolated cpus are removed from cpuset.cpus. */
2015 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2016 pam_cgfs_debug("%s", "Failed to remove isolated cpus.\n");
2017 free(clonechildrenpath);
2018 free(cgpath);
2019 return false;
2020 }
2021
2022 if (v == '1') { /* already set for us by someone else */
2023 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\".\n");
2024 free(clonechildrenpath);
2025 free(cgpath);
2026 return true;
2027 }
2028
2029 /* copy parent's settings */
2030 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
2031 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings.\n");
2032 free(cgpath);
2033 free(clonechildrenpath);
2034 return false;
2035 }
2036 free(cgpath);
2037
2038 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
2039 /* Set clone_children so children inherit our settings */
2040 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2041 free(clonechildrenpath);
2042 return false;
2043 }
2044 free(clonechildrenpath);
2045 return true;
2046 }
2047
2048 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2049 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2050 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2051 * to the caller in @existed.
2052 */
2053 #define __PAM_CGFS_USER "/user/"
2054 #define __PAM_CGFS_USER_LEN 6
2055 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2056 {
2057 char *clean_base_cgroup, *path;
2058 char **controller;
2059 struct cgv1_hierarchy *it;
2060 bool created = false;
2061
2062 *existed = false;
2063 it = h;
2064 for (controller = it->controllers; controller && *controller;
2065 controller++) {
2066 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2067 return false;
2068
2069 /* If systemd has already created a cgroup for us, keep using
2070 * it.
2071 */
2072 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2073 it->base_cgroup, uid, gid,
2074 it->systemd_user_slice)) {
2075 return true;
2076 }
2077
2078 /* We need to make sure that we do not create an endless chain
2079 * of sub-cgroups. So we check if we have already logged in
2080 * somehow (sudo -i, su, etc.) and have created a
2081 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2082 * cgroups this is unnecessary since we use the init_cgroup
2083 * anyway, but for controllers which have an existing systemd
2084 * cgroup that does not match the current uid, this is pretty
2085 * useful.
2086 */
2087 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2088 free(it->base_cgroup);
2089 it->base_cgroup = must_copy_string("/");
2090 } else {
2091 clean_base_cgroup =
2092 strstr(it->base_cgroup, __PAM_CGFS_USER);
2093 if (clean_base_cgroup)
2094 *clean_base_cgroup = '\0';
2095 }
2096
2097 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2098 pam_cgfs_debug("Constructing path: %s.\n", path);
2099 if (file_exists(path)) {
2100 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2101 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d.\n", path, our_cg ? "" : "not ", uid, gid);
2102 free(path);
2103 if (our_cg)
2104 *existed = false;
2105 else
2106 *existed = true;
2107 return our_cg;
2108 }
2109 created = mkdir_parent(it->mountpoint, path);
2110 if (!created) {
2111 free(path);
2112 continue;
2113 }
2114 if (chown(path, uid, gid) < 0)
2115 mysyslog(LOG_WARNING,
2116 "Failed to chown %s to %d:%d: %s.\n", path,
2117 (int)uid, (int)gid, strerror(errno), NULL);
2118 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2119 free(path);
2120 break;
2121 }
2122
2123 return created;
2124 }
2125
2126 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2127 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2128 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2129 */
2130 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2131 {
2132
2133 char *path;
2134
2135 /* Better safe than sorry. */
2136 if (!h->controllers)
2137 return true;
2138
2139 /* Cgroups created by systemd for us which we re-use won't be removed
2140 * here, since we're using init_cgroup + cgroup as path instead of
2141 * base_cgroup + cgroup.
2142 */
2143 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2144 (void)recursive_rmdir(path);
2145 free(path);
2146
2147 return true;
2148 }
2149
2150 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2151 static bool cgv2_remove(const char *cgroup)
2152 {
2153 struct cgv2_hierarchy *v2;
2154 char *path;
2155
2156 if (!cgv2_hierarchies)
2157 return true;
2158
2159 v2 = *cgv2_hierarchies;
2160
2161 /* If we reused an already existing cgroup, don't bother trying to
2162 * remove (a potentially wrong)/the path.
2163 * Cgroups created by systemd for us which we re-use would be removed
2164 * here, since we're using base_cgroup + cgroup as path.
2165 */
2166 if (v2->systemd_user_slice)
2167 return true;
2168
2169 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2170 (void)recursive_rmdir(path);
2171 free(path);
2172
2173 return true;
2174 }
2175
2176 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2177 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2178 * back, to the caller if the creation failed due to @cgroup already existing
2179 * via @existed.
2180 */
2181 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2182 {
2183 struct cgv1_hierarchy **it, **rev_it;
2184 bool all_created = true;
2185
2186 for (it = cgv1_hierarchies; it && *it; it++) {
2187 if (!(*it)->controllers || !(*it)->mountpoint ||
2188 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2189 continue;
2190
2191 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2192 all_created = false;
2193 break;
2194 }
2195 }
2196
2197 if (all_created)
2198 return true;
2199
2200 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2201 rev_it++)
2202 cgv1_remove_one(*rev_it, cgroup);
2203
2204 return false;
2205 }
2206
2207 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2208 * the creation failed due to @cgroup already existing via @existed.
2209 */
2210 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2211 {
2212 int ret;
2213 char *clean_base_cgroup;
2214 char *path;
2215 struct cgv2_hierarchy *v2;
2216 bool our_cg = false, created = false;
2217
2218 *existed = false;
2219
2220 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2221 return true;
2222
2223 v2 = *cgv2_hierarchies;
2224
2225 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2226 * to be placed under our current cgroup.
2227 */
2228 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2229 uid, gid, v2->systemd_user_slice))
2230 goto delegate_files;
2231
2232 /* We need to make sure that we do not create an endless chain of
2233 * sub-cgroups. So we check if we have already logged in somehow (sudo
2234 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2235 * skip that part.
2236 */
2237 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2238 free(v2->base_cgroup);
2239 v2->base_cgroup = must_copy_string("/");
2240 } else {
2241 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2242 if (clean_base_cgroup)
2243 *clean_base_cgroup = '\0';
2244 }
2245
2246 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2247 pam_cgfs_debug("Constructing path \"%s\".\n", path);
2248 if (file_exists(path)) {
2249 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2250 pam_cgfs_debug(
2251 "%s existed and does %shave our uid: %d and gid: %d.\n",
2252 path, our_cg ? "" : "not ", uid, gid);
2253 free(path);
2254 if (our_cg) {
2255 *existed = false;
2256 goto delegate_files;
2257 } else {
2258 *existed = true;
2259 return false;
2260 }
2261 }
2262
2263 created = mkdir_parent(v2->mountpoint, path);
2264 if (!created) {
2265 free(path);
2266 return false;
2267 }
2268
2269 /* chown cgroup to user */
2270 if (chown(path, uid, gid) < 0)
2271 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2272 path, (int)uid, (int)gid, strerror(errno), NULL);
2273 else
2274 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2275 free(path);
2276
2277 delegate_files:
2278 /* chown cgroup.procs to user */
2279 if (v2->systemd_user_slice)
2280 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2281 "/cgroup.procs", NULL);
2282 else
2283 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2284 "/cgroup.procs", NULL);
2285 ret = chown(path, uid, gid);
2286 if (ret < 0)
2287 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2288 path, (int)uid, (int)gid, strerror(errno), NULL);
2289 else
2290 pam_cgfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2291 free(path);
2292
2293 /* chown cgroup.subtree_control to user */
2294 if (v2->systemd_user_slice)
2295 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2296 "/cgroup.subtree_control", NULL);
2297 else
2298 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2299 "/cgroup.subtree_control", NULL);
2300 ret = chown(path, uid, gid);
2301 if (ret < 0)
2302 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2303 path, (int)uid, (int)gid, strerror(errno), NULL);
2304 free(path);
2305
2306 /* chown cgroup.threads to user */
2307 if (v2->systemd_user_slice)
2308 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2309 "/cgroup.threads", NULL);
2310 else
2311 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2312 "/cgroup.threads", NULL);
2313 ret = chown(path, uid, gid);
2314 if (ret < 0 && errno != ENOENT)
2315 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2316 path, (int)uid, (int)gid, strerror(errno), NULL);
2317 free(path);
2318
2319 return true;
2320 }
2321
2322 /* Create writeable cgroups for @user at login. Details can be found in the
2323 * preamble/license at the top of this file.
2324 */
2325 static int handle_login(const char *user, uid_t uid, gid_t gid)
2326 {
2327 int idx = 0, ret;
2328 bool existed;
2329 char cg[MAXPATHLEN];
2330
2331 cg_escape();
2332
2333 while (idx >= 0) {
2334 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
2335 if (ret < 0 || ret >= MAXPATHLEN) {
2336 mysyslog(LOG_ERR, "Username too long.\n", NULL);
2337 return PAM_SESSION_ERR;
2338 }
2339
2340 existed = false;
2341 if (!cgv2_create(cg, uid, gid, &existed)) {
2342 if (existed) {
2343 cgv2_remove(cg);
2344 idx++;
2345 continue;
2346 }
2347 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
2348 return PAM_SESSION_ERR;
2349 }
2350
2351 existed = false;
2352 if (!cgv1_create(cg, uid, gid, &existed)) {
2353 if (existed) {
2354 cgv2_remove(cg);
2355 idx++;
2356 continue;
2357 }
2358 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
2359 return PAM_SESSION_ERR;
2360 }
2361
2362 if (!cg_enter(cg)) {
2363 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s.\n", cg, user, NULL);
2364 return PAM_SESSION_ERR;
2365 }
2366 break;
2367 }
2368
2369 return PAM_SUCCESS;
2370 }
2371
2372 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2373 * hierarchies.
2374 */
2375 static bool cgv1_prune_empty_cgroups(const char *user)
2376 {
2377 bool controller_removed = true;
2378 bool all_removed = true;
2379 struct cgv1_hierarchy **it;
2380
2381 for (it = cgv1_hierarchies; it && *it; it++) {
2382 int ret;
2383 char *path_base, *path_init;
2384 char **controller;
2385
2386 if (!(*it)->controllers || !(*it)->mountpoint ||
2387 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2388 continue;
2389
2390 for (controller = (*it)->controllers; controller && *controller;
2391 controller++) {
2392 bool path_base_rm, path_init_rm;
2393
2394 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2395 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_base);
2396 ret = recursive_rmdir(path_base);
2397 if (ret == -ENOENT || ret >= 0)
2398 path_base_rm = true;
2399 else
2400 path_base_rm = false;
2401 free(path_base);
2402
2403 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2404 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_init);
2405 ret = recursive_rmdir(path_init);
2406 if (ret == -ENOENT || ret >= 0)
2407 path_init_rm = true;
2408 else
2409 path_init_rm = false;
2410 free(path_init);
2411
2412 if (!path_base_rm && !path_init_rm) {
2413 controller_removed = false;
2414 continue;
2415 }
2416
2417 controller_removed = true;
2418 break;
2419 }
2420 if (!controller_removed)
2421 all_removed = false;
2422 }
2423
2424 return all_removed;
2425 }
2426
2427 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2428 * hierarchy.
2429 */
2430 static bool cgv2_prune_empty_cgroups(const char *user)
2431 {
2432 int ret;
2433 struct cgv2_hierarchy *v2;
2434 char *path_base, *path_init;
2435 bool path_base_rm, path_init_rm;
2436
2437 if (!cgv2_hierarchies)
2438 return true;
2439
2440 v2 = *cgv2_hierarchies;
2441
2442 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2443 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_base);
2444 ret = recursive_rmdir(path_base);
2445 if (ret == -ENOENT || ret >= 0)
2446 path_base_rm = true;
2447 else
2448 path_base_rm = false;
2449 free(path_base);
2450
2451 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2452 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_init);
2453 ret = recursive_rmdir(path_init);
2454 if (ret == -ENOENT || ret >= 0)
2455 path_init_rm = true;
2456 else
2457 path_init_rm = false;
2458 free(path_init);
2459
2460 if (!path_base_rm && !path_init_rm)
2461 return false;
2462
2463 return true;
2464 }
2465
2466 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2467 static void cg_prune_empty_cgroups(const char *user)
2468 {
2469 (void)cgv1_prune_empty_cgroups(user);
2470 (void)cgv2_prune_empty_cgroups(user);
2471 }
2472
2473 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2474 static void cgv1_free_hierarchies(void)
2475 {
2476 struct cgv1_hierarchy **it;
2477
2478 if (!cgv1_hierarchies)
2479 return;
2480
2481 for (it = cgv1_hierarchies; it && *it; it++) {
2482 if ((*it)->controllers) {
2483 char **tmp;
2484 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2485 free(*tmp);
2486
2487 free((*it)->controllers);
2488 }
2489 free((*it)->mountpoint);
2490 free((*it)->base_cgroup);
2491 free((*it)->fullcgpath);
2492 free((*it)->init_cgroup);
2493 }
2494 free(cgv1_hierarchies);
2495 }
2496
2497 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2498 static void cgv2_free_hierarchies(void)
2499 {
2500 struct cgv2_hierarchy **it;
2501
2502 if (!cgv2_hierarchies)
2503 return;
2504
2505 for (it = cgv2_hierarchies; it && *it; it++) {
2506 if ((*it)->controllers) {
2507 char **tmp;
2508 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2509 free(*tmp);
2510
2511 free((*it)->controllers);
2512 }
2513 free((*it)->mountpoint);
2514 free((*it)->base_cgroup);
2515 free((*it)->fullcgpath);
2516 free((*it)->init_cgroup);
2517 }
2518 free(cgv2_hierarchies);
2519 }
2520
2521 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2522 static void cg_exit(void)
2523 {
2524 cgv1_free_hierarchies();
2525 cgv2_free_hierarchies();
2526 }
2527
2528 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2529 const char **argv)
2530 {
2531 int ret;
2532 uid_t uid = 0;
2533 gid_t gid = 0;
2534 const char *PAM_user = NULL;
2535
2536 ret = pam_get_user(pamh, &PAM_user, NULL);
2537 if (ret != PAM_SUCCESS) {
2538 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2539 return PAM_SESSION_ERR;
2540 }
2541
2542 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2543 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2544 return PAM_SESSION_ERR;
2545 }
2546
2547 if (!cg_init(uid, gid)) {
2548 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2549 return PAM_SESSION_ERR;
2550 }
2551
2552 /* Try to prune cgroups, that are actually empty but were still marked
2553 * as busy by the kernel so we couldn't remove them on session close.
2554 */
2555 cg_prune_empty_cgroups(PAM_user);
2556
2557 if (cg_mount_mode == CGROUP_UNKNOWN)
2558 return PAM_SESSION_ERR;
2559
2560 if (argc > 1 && !strcmp(argv[0], "-c")) {
2561 char **clist = make_string_list(argv[1], ",");
2562
2563 /*
2564 * We don't allow using "all" and other controllers explicitly because
2565 * that simply doesn't make any sense.
2566 */
2567 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2568 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2569 free_string_list(clist);
2570 return PAM_SESSION_ERR;
2571 }
2572
2573 cg_mark_to_make_rw(clist);
2574 free_string_list(clist);
2575 }
2576
2577 return handle_login(PAM_user, uid, gid);
2578 }
2579
2580 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2581 const char **argv)
2582 {
2583 int ret;
2584 uid_t uid = 0;
2585 gid_t gid = 0;
2586 const char *PAM_user = NULL;
2587
2588 ret = pam_get_user(pamh, &PAM_user, NULL);
2589 if (ret != PAM_SUCCESS) {
2590 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2591 return PAM_SESSION_ERR;
2592 }
2593
2594 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2595 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2596 return PAM_SESSION_ERR;
2597 }
2598
2599 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2600 if (!cg_init(uid, gid))
2601 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2602
2603 if (argc > 1 && !strcmp(argv[0], "-c")) {
2604 char **clist = make_string_list(argv[1], ",");
2605
2606 /*
2607 * We don't allow using "all" and other controllers explicitly because
2608 * that simply doesn't make any sense.
2609 */
2610 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2611 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2612 free_string_list(clist);
2613 return PAM_SESSION_ERR;
2614 }
2615
2616 cg_mark_to_make_rw(clist);
2617 free_string_list(clist);
2618 }
2619 }
2620
2621 cg_prune_empty_cgroups(PAM_user);
2622 cg_exit();
2623
2624 return PAM_SUCCESS;
2625 }