]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
tree-wide: make files cloexec whenever possible
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <ctype.h>
7 #include <dirent.h>
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <linux/unistd.h>
11 #include <pwd.h>
12 #include <stdarg.h>
13 #include <stdbool.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/mount.h>
19 #include <sys/param.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/vfs.h>
23 #include <syslog.h>
24 #include <unistd.h>
25
26 #include "config.h"
27 #include "file_utils.h"
28 #include "macro.h"
29 #include "memory_utils.h"
30 #include "string_utils.h"
31
32 #define PAM_SM_SESSION
33 #include <security/_pam_macros.h>
34 #include <security/pam_modules.h>
35
36 #ifndef HAVE_STRLCPY
37 #include "include/strlcpy.h"
38 #endif
39
40 #ifndef HAVE_STRLCAT
41 #include "include/strlcat.h"
42 #endif
43
44 #define pam_cgfs_debug_stream(stream, format, ...) \
45 do { \
46 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
47 __func__, __VA_ARGS__); \
48 } while (false)
49
50 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
51
52 #ifdef DEBUG
53 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
54 #else
55 #define pam_cgfs_debug(format, ...)
56 #endif /* DEBUG */
57
58 /* Taken over modified from the kernel sources. */
59 #define NBITS 32 /* bits in uint32_t */
60 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
61 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
62
63 static enum cg_mount_mode {
64 CGROUP_UNKNOWN = -1,
65 CGROUP_MIXED = 0,
66 CGROUP_PURE_V1 = 1,
67 CGROUP_PURE_V2 = 2,
68 CGROUP_UNINITIALIZED = 3,
69 } cg_mount_mode = CGROUP_UNINITIALIZED;
70
71 /* Common helper functions. Most of these have been taken from LXC. */
72 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
73 static int append_null_to_list(void ***list);
74 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
75 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
76 {
77 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
78 }
79 static char *copy_to_eol(char *s);
80 static void free_string_list(char **list);
81 static char *get_mountpoint(char *line);
82 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
83 static int handle_login(const char *user, uid_t uid, gid_t gid);
84 static inline bool is_set(unsigned bit, uint32_t *bitarr)
85 {
86 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
87 }
88 static bool is_lxcfs(const char *line);
89 static bool is_cgv1(char *line);
90 static bool is_cgv2(char *line);
91 static void must_add_to_list(char ***clist, char *entry);
92 static void must_append_controller(char **klist, char **nlist, char ***clist,
93 char *entry);
94 static void must_append_string(char ***list, char *entry);
95 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
96 static char *read_file(char *fnam);
97 static int recursive_rmdir(char *dirname);
98 static inline void set_bit(unsigned bit, uint32_t *bitarr)
99 {
100 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
101 }
102 static bool string_in_list(char **list, const char *entry);
103 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
104 static void trim(char *s);
105 static bool write_int(char *path, int v);
106
107 /* cgroupfs prototypes. */
108 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
109 static uint32_t *cg_cpumask(char *buf, size_t nbits);
110 static bool cg_copy_parent_file(char *path, char *file);
111 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
112 static bool cg_enter(const char *cgroup);
113 static void cg_escape(void);
114 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
115 static ssize_t cg_get_max_cpus(char *cpulist);
116 static int cg_get_version_of_mntpt(const char *path);
117 static bool cg_init(uid_t uid, gid_t gid);
118 static void cg_mark_to_make_rw(char **list);
119 static void cg_prune_empty_cgroups(const char *user);
120 static bool cg_systemd_created_user_slice(const char *base_cgroup,
121 const char *init_cgroup,
122 const char *in, uid_t uid);
123 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
124 const char *base_cgroup, uid_t uid,
125 gid_t gid,
126 bool systemd_user_slice);
127 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
128 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
129 const char *init_cgroup, uid_t uid);
130 static void cg_systemd_prune_init_scope(char *cg);
131 static bool is_lxcfs(const char *line);
132
133 /* cgroupfs v1 prototypes. */
134 struct cgv1_hierarchy {
135 char **controllers;
136 char *mountpoint;
137 char *base_cgroup;
138 char *fullcgpath;
139 char *init_cgroup;
140 bool create_rw_cgroup;
141 bool systemd_user_slice;
142 };
143
144 static struct cgv1_hierarchy **cgv1_hierarchies;
145
146 static void cgv1_add_controller(char **clist, char *mountpoint,
147 char *base_cgroup, char *init_cgroup);
148 static bool cgv1_controller_in_clist(char *cgline, char *c);
149 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
150 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
151 char **clist);
152 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
153 bool *existed);
154 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
155 uid_t uid, gid_t gid, bool *existed);
156 static bool cgv1_enter(const char *cgroup);
157 static void cgv1_escape(void);
158 static bool cgv1_get_controllers(char ***klist, char ***nlist);
159 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
160 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
161 char *line);
162 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
163 const char *cgroup);
164 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
165 static bool cgv1_init(uid_t uid, gid_t gid);
166 static void cgv1_mark_to_make_rw(char **clist);
167 static char *cgv1_must_prefix_named(char *entry);
168 static bool cgv1_prune_empty_cgroups(const char *user);
169 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
170 static bool is_cgv1(char *line);
171
172 /* cgroupfs v2 prototypes. */
173 struct cgv2_hierarchy {
174 char **controllers;
175 char *mountpoint;
176 char *base_cgroup;
177 char *fullcgpath;
178 char *init_cgroup;
179 bool create_rw_cgroup;
180 bool systemd_user_slice;
181 };
182
183 /* Actually this should only be a single hierarchy. But for the sake of
184 * parallelism and because the layout of the cgroupfs v2 is still somewhat
185 * changing, we'll leave it as an array of structs.
186 */
187 static struct cgv2_hierarchy **cgv2_hierarchies;
188
189 static void cgv2_add_controller(char **clist, char *mountpoint,
190 char *base_cgroup, char *init_cgroup,
191 bool systemd_user_slice);
192 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
193 bool *existed);
194 static bool cgv2_enter(const char *cgroup);
195 static void cgv2_escape(void);
196 static char *cgv2_get_current_cgroup(int pid);
197 static bool cgv2_init(uid_t uid, gid_t gid);
198 static void cgv2_mark_to_make_rw(char **clist);
199 static bool cgv2_prune_empty_cgroups(const char *user);
200 static bool cgv2_remove(const char *cgroup);
201 static bool is_cgv2(char *line);
202
203 static int do_mkdir(const char *path, mode_t mode)
204 {
205 int saved_errno;
206 mode_t mask;
207 int r;
208
209 mask = umask(0);
210 r = mkdir(path, mode);
211 saved_errno = errno;
212 umask(mask);
213 errno = saved_errno;
214 return (r);
215 }
216
217 /* Create directory and (if necessary) its parents. */
218 static bool mkdir_parent(const char *root, char *path)
219 {
220 char *b, orig, *e;
221
222 if (strlen(path) < strlen(root))
223 return false;
224
225 if (strlen(path) == strlen(root))
226 return true;
227
228 b = path + strlen(root) + 1;
229 for (;;) {
230 while (*b && (*b == '/'))
231 b++;
232 if (!*b)
233 return true;
234
235 e = b + 1;
236 while (*e && *e != '/')
237 e++;
238
239 orig = *e;
240 if (orig)
241 *e = '\0';
242
243 if (file_exists(path))
244 goto next;
245
246 if (do_mkdir(path, 0755) < 0) {
247 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
248 return false;
249 }
250
251 next:
252 if (!orig)
253 return true;
254
255 *e = orig;
256 b = e + 1;
257 }
258
259 return false;
260 }
261
262 /* Common helper functions. Most of these have been taken from LXC. */
263 static void mysyslog(int err, const char *format, ...)
264 {
265 va_list args;
266
267 va_start(args, format);
268 #pragma GCC diagnostic push
269 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
270 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
271 vsyslog(err, format, args);
272 #pragma GCC diagnostic pop
273 va_end(args);
274 closelog();
275 }
276
277 /* realloc() pointer in batch sizes; do not fail. */
278 #define BATCH_SIZE 50
279 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
280 {
281 int newbatches = (newlen / BATCH_SIZE) + 1;
282 int oldbatches = (oldlen / BATCH_SIZE) + 1;
283
284 if (!*mem || newbatches > oldbatches)
285 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
286 }
287
288 /* Append lines as is to pointer; do not fail. */
289 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
290 {
291 size_t full = oldlen + newlen;
292
293 batch_realloc(dest, oldlen, full + 1);
294
295 memcpy(*dest + oldlen, new, newlen + 1);
296 }
297
298 /* Read in whole file and return allocated pointer. */
299 static char *read_file(char *fnam)
300 {
301 FILE *f;
302 int linelen;
303 char *line = NULL, *buf = NULL;
304 size_t len = 0, fulllen = 0;
305
306 f = fopen(fnam, "r");
307 if (!f)
308 return NULL;
309
310 while ((linelen = getline(&line, &len, f)) != -1) {
311 append_line(&buf, fulllen, line, linelen);
312 fulllen += linelen;
313 }
314
315 fclose(f);
316 free(line);
317
318 return buf;
319 }
320
321 /* Given a pointer to a null-terminated array of pointers, realloc to add one
322 * entry, and point the new entry to NULL. Do not fail. Return the index to the
323 * second-to-last entry - that is, the one which is now available for use
324 * (keeping the list null-terminated).
325 */
326 static int append_null_to_list(void ***list)
327 {
328 int newentry = 0;
329
330 if (*list)
331 for (; (*list)[newentry]; newentry++)
332 ;
333
334 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
335 (*list)[newentry + 1] = NULL;
336
337 return newentry;
338 }
339
340 /* Append new entry to null-terminated array of pointer; make sure that array of
341 * pointers will still be null-terminated.
342 */
343 static void must_append_string(char ***list, char *entry)
344 {
345 int newentry;
346 char *copy;
347
348 newentry = append_null_to_list((void ***)list);
349 copy = must_copy_string(entry);
350 (*list)[newentry] = copy;
351 }
352
353 /* Remove newlines from string. */
354 static void trim(char *s)
355 {
356 size_t len = strlen(s);
357
358 while ((len > 0) && s[len - 1] == '\n')
359 s[--len] = '\0';
360 }
361
362 /* Make allocated copy of string. End of string is taken to be '\n'. */
363 static char *copy_to_eol(char *s)
364 {
365 char *newline, *sret;
366 size_t len;
367
368 newline = strchr(s, '\n');
369 if (!newline)
370 return NULL;
371
372 len = newline - s;
373 sret = must_realloc(NULL, len + 1);
374 memcpy(sret, s, len);
375 sret[len] = '\0';
376
377 return sret;
378 }
379
380 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
381 static bool is_lxcfs(const char *line)
382 {
383 char *p = strstr(line, " - ");
384 if (!p)
385 return false;
386
387 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
388 }
389
390 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
391 static bool is_cgv1(char *line)
392 {
393 char *p = strstr(line, " - ");
394 if (!p)
395 return false;
396
397 return strncmp(p, " - cgroup ", 10) == 0;
398 }
399
400 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
401 static bool is_cgv2(char *line)
402 {
403 char *p = strstr(line, " - ");
404 if (!p)
405 return false;
406
407 return strncmp(p, " - cgroup2 ", 11) == 0;
408 }
409
410 /* Given a null-terminated array of strings, check whether @entry is one of the
411 * strings
412 */
413 static bool string_in_list(char **list, const char *entry)
414 {
415 char **it;
416
417 for (it = list; it && *it; it++)
418 if (strcmp(*it, entry) == 0)
419 return true;
420
421 return false;
422 }
423
424 /*
425 * Creates a null-terminated array of strings, made by splitting the entries in
426 * @str on each @sep. Caller is responsible for calling free_string_list.
427 */
428 static char **make_string_list(const char *str, const char *sep)
429 {
430 char *copy, *tok;
431 char *saveptr = NULL;
432 char **clist = NULL;
433
434 copy = must_copy_string(str);
435
436 for (tok = strtok_r(copy, sep, &saveptr); tok;
437 tok = strtok_r(NULL, sep, &saveptr))
438 must_add_to_list(&clist, tok);
439
440 free(copy);
441
442 return clist;
443 }
444
445 /* Gets the length of a null-terminated array of strings. */
446 static size_t string_list_length(char **list)
447 {
448 size_t len = 0;
449 char **it;
450
451 for (it = list; it && *it; it++)
452 len++;
453
454 return len;
455 }
456
457 /* Free null-terminated array of strings. */
458 static void free_string_list(char **list)
459 {
460 char **it;
461
462 for (it = list; it && *it; it++)
463 free(*it);
464 free(list);
465 }
466
467 /* Write single integer to file. */
468 static bool write_int(char *path, int v)
469 {
470 FILE *f;
471 bool ret = true;
472
473 f = fopen(path, "w");
474 if (!f)
475 return false;
476
477 if (fprintf(f, "%d\n", v) < 0)
478 ret = false;
479
480 if (fclose(f) != 0)
481 ret = false;
482
483 return ret;
484 }
485
486 /* Recursively remove directory and its parents. */
487 static int recursive_rmdir(char *dirname)
488 {
489 __do_closedir DIR *dir = NULL;
490 struct dirent *direntp;
491 int r = 0;
492
493 dir = opendir(dirname);
494 if (!dir)
495 return -ENOENT;
496
497 while ((direntp = readdir(dir))) {
498 struct stat st;
499 char *pathname;
500
501 if (!strcmp(direntp->d_name, ".") ||
502 !strcmp(direntp->d_name, ".."))
503 continue;
504
505 pathname = must_make_path(dirname, direntp->d_name, NULL);
506
507 if (lstat(pathname, &st)) {
508 if (!r)
509 pam_cgfs_debug("Failed to stat %s\n", pathname);
510 r = -1;
511 goto next;
512 }
513
514 if (!S_ISDIR(st.st_mode))
515 goto next;
516
517 if (recursive_rmdir(pathname) < 0)
518 r = -1;
519
520 next:
521 free(pathname);
522 }
523
524 if (rmdir(dirname) < 0) {
525 if (!r)
526 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
527 r = -1;
528 }
529
530 return r;
531 }
532
533 /* Add new entry to null-terminated array of pointers. Make sure array is still
534 * null-terminated.
535 */
536 static void must_add_to_list(char ***clist, char *entry)
537 {
538 int newentry;
539
540 newentry = append_null_to_list((void ***)clist);
541 (*clist)[newentry] = must_copy_string(entry);
542 }
543
544 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
545 static char *get_mountpoint(char *line)
546 {
547 int i;
548 char *p, *sret, *p2;
549 size_t len;
550
551 p = line;
552
553 for (i = 0; i < 4; i++) {
554 p = strchr(p, ' ');
555 if (!p)
556 return NULL;
557 p++;
558 }
559
560 p2 = strchr(p, ' ');
561 if (p2)
562 *p2 = '\0';
563
564 len = strlen(p);
565 sret = must_realloc(NULL, len + 1);
566 memcpy(sret, p, len);
567 sret[len] = '\0';
568
569 return sret;
570 }
571
572 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
573 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
574 * kernel controllers (@klist) and named controllers (@nlist).
575 */
576 static bool cgv1_get_controllers(char ***klist, char ***nlist)
577 {
578 FILE *f;
579 char *line = NULL;
580 size_t len = 0;
581
582 f = fopen("/proc/self/cgroup", "r");
583 if (!f)
584 return false;
585
586 while (getline(&line, &len, f) != -1) {
587 char *p, *p2, *tok;
588 char *saveptr = NULL;
589
590 p = strchr(line, ':');
591 if (!p)
592 continue;
593 p++;
594
595 p2 = strchr(p, ':');
596 if (!p2)
597 continue;
598 *p2 = '\0';
599
600 /* Skip the v2 hierarchy. */
601 if ((p2 - p) == 0)
602 continue;
603
604 for (tok = strtok_r(p, ",", &saveptr); tok;
605 tok = strtok_r(NULL, ",", &saveptr)) {
606 if (strncmp(tok, "name=", 5) == 0)
607 must_append_string(nlist, tok);
608 else
609 must_append_string(klist, tok);
610 }
611 }
612
613 free(line);
614 fclose(f);
615
616 return true;
617 }
618
619 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
620 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
621 static bool cgv2_get_controllers(char ***klist)
622 {
623 return -ENOSYS;
624 }
625 */
626
627 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
628 static char *cgv2_get_current_cgroup(int pid)
629 {
630 int ret;
631 char *cgroups_v2;
632 char *current_cgroup;
633 char *copy = NULL;
634 /* The largest integer that can fit into long int is 2^64. This is a
635 * 20-digit number. */
636 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
637 char path[__PIDLEN];
638
639 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
640 if (ret < 0 || ret >= __PIDLEN)
641 return NULL;
642
643 cgroups_v2 = read_file(path);
644 if (!cgroups_v2)
645 return NULL;
646
647 current_cgroup = strstr(cgroups_v2, "0::/");
648 if (!current_cgroup)
649 goto cleanup_on_err;
650
651 current_cgroup = current_cgroup + 3;
652 copy = copy_to_eol(current_cgroup);
653 if (!copy)
654 goto cleanup_on_err;
655
656 cleanup_on_err:
657 free(cgroups_v2);
658 if (copy)
659 trim(copy);
660
661 return copy;
662 }
663
664 /* Given two null-terminated lists of strings, return true if any string is in
665 * both.
666 */
667 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
668 {
669 char **it;
670
671 if (!l2)
672 return false;
673
674 for (it = l1; it && *it; it++)
675 if (string_in_list(l2, *it))
676 return true;
677
678 return false;
679 }
680
681 /* For a null-terminated list of controllers @clist, return true if any of those
682 * controllers is already listed the null-terminated list of hierarchies @hlist.
683 * Realistically, if one is present, all must be present.
684 */
685 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
686 {
687 struct cgv1_hierarchy **it;
688
689 for (it = hlist; it && *it; it++)
690 if ((*it)->controllers)
691 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
692 return true;
693
694 return false;
695
696 }
697
698 /* Set boolean to mark controllers under which we are supposed create a
699 * writeable cgroup.
700 */
701 static void cgv1_mark_to_make_rw(char **clist)
702 {
703 struct cgv1_hierarchy **it;
704
705 for (it = cgv1_hierarchies; it && *it; it++)
706 if ((*it)->controllers)
707 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
708 string_in_list(clist, "all"))
709 (*it)->create_rw_cgroup = true;
710 }
711
712 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
713 * the cgroupfs v2 hierarchy.
714 */
715 static void cgv2_mark_to_make_rw(char **clist)
716 {
717 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
718 if (cgv2_hierarchies)
719 (*cgv2_hierarchies)->create_rw_cgroup = true;
720 }
721
722 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
723 static void cg_mark_to_make_rw(char **clist)
724 {
725 cgv1_mark_to_make_rw(clist);
726 cgv2_mark_to_make_rw(clist);
727 }
728
729 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
730 static char *cgv1_must_prefix_named(char *entry)
731 {
732 char *s;
733 int ret;
734 size_t len;
735
736 len = strlen(entry);
737 s = must_realloc(NULL, len + 6);
738
739 ret = snprintf(s, len + 6, "name=%s", entry);
740 if (ret < 0 || (size_t)ret >= (len + 6)) {
741 free(s);
742 return NULL;
743 }
744
745 return s;
746 }
747
748 /* Append kernel controller in @klist or named controller in @nlist to @clist */
749 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
750 {
751 int newentry;
752 char *copy;
753
754 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
755 return;
756
757 newentry = append_null_to_list((void ***)clist);
758
759 if (strncmp(entry, "name=", 5) == 0)
760 copy = must_copy_string(entry);
761 else if (string_in_list(klist, entry))
762 copy = must_copy_string(entry);
763 else
764 copy = cgv1_must_prefix_named(entry);
765
766 (*clist)[newentry] = copy;
767 }
768
769 /* Get the controllers from a mountinfo line. There are other ways we could get
770 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
771 * could parse the mount options. But we simply assume that the mountpoint must
772 * be /sys/fs/cgroup/controller-list
773 */
774 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
775 {
776 int i;
777 char *p, *p2, *tok;
778 char *saveptr = NULL;
779 char **aret = NULL;
780
781 p = line;
782
783 for (i = 0; i < 4; i++) {
784 p = strchr(p, ' ');
785 if (!p)
786 return NULL;
787 p++;
788 }
789
790 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
791 return NULL;
792
793 p += 15;
794
795 p2 = strchr(p, ' ');
796 if (!p2)
797 return NULL;
798 *p2 = '\0';
799
800 for (tok = strtok_r(p, ",", &saveptr); tok;
801 tok = strtok_r(NULL, ",", &saveptr))
802 must_append_controller(klist, nlist, &aret, tok);
803
804 return aret;
805 }
806
807 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
808 static bool cgv1_controller_in_clist(char *cgline, char *c)
809 {
810 __do_free char *tmp = NULL;
811 size_t len;
812 char *tok, *eol;
813 char *saveptr = NULL;
814
815 eol = strchr(cgline, ':');
816 if (!eol)
817 return false;
818
819 len = eol - cgline;
820 tmp = must_realloc(NULL, len + 1);
821 memcpy(tmp, cgline, len);
822 tmp[len] = '\0';
823
824 for (tok = strtok_r(tmp, ",", &saveptr); tok;
825 tok = strtok_r(NULL, ",", &saveptr)) {
826 if (strcmp(tok, c) == 0)
827 return true;
828 }
829
830 return false;
831 }
832
833 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
834 * of a given cgv1 controller passed in via @controller.
835 */
836 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
837 {
838 char *p;
839
840 p = basecginfo;
841
842 for (;;) {
843 p = strchr(p, ':');
844 if (!p)
845 return NULL;
846 p++;
847
848 if (cgv1_controller_in_clist(p, controller)) {
849 p = strchr(p, ':');
850 if (!p)
851 return NULL;
852 p++;
853
854 return copy_to_eol(p);
855 }
856
857 p = strchr(p, '\n');
858 if (!p)
859 return NULL;
860 p++;
861 }
862
863 return NULL;
864 }
865
866 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
867 * systems.
868 */
869 #define INIT_SCOPE "/init.scope"
870 static void cg_systemd_prune_init_scope(char *cg)
871 {
872 char *point;
873
874 if (!cg)
875 return;
876
877 point = cg + strlen(cg) - strlen(INIT_SCOPE);
878 if (point < cg)
879 return;
880
881 if (strcmp(point, INIT_SCOPE) == 0) {
882 if (point == cg)
883 *(point + 1) = '\0';
884 else
885 *point = '\0';
886 }
887 }
888
889 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
890 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
891 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
892 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
893 * from /proc/1/cgroup.
894 */
895 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
896 {
897 struct cgv1_hierarchy *new;
898 int newentry;
899
900 new = must_realloc(NULL, sizeof(*new));
901
902 new->controllers = clist;
903 new->mountpoint = mountpoint;
904 new->base_cgroup = base_cgroup;
905 new->fullcgpath = NULL;
906 new->create_rw_cgroup = false;
907 new->init_cgroup = init_cgroup;
908 new->systemd_user_slice = false;
909
910 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
911 cgv1_hierarchies[newentry] = new;
912 }
913
914 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
915 * currently) include the controllers mounted into the hierarchy (e.g. memory,
916 * pids, blkio), the mountpoint of that hierarchy (Should usually be
917 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
918 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
919 * base cgroup of the current process gathered from /proc/self/cgroup, and the
920 * init cgroup of PID1 gathered from /proc/1/cgroup.
921 */
922 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
923 {
924 struct cgv2_hierarchy *new;
925 int newentry;
926
927 new = must_realloc(NULL, sizeof(*new));
928
929 new->controllers = clist;
930 new->mountpoint = mountpoint;
931 new->base_cgroup = base_cgroup;
932 new->fullcgpath = NULL;
933 new->create_rw_cgroup = false;
934 new->init_cgroup = init_cgroup;
935 new->systemd_user_slice = systemd_user_slice;
936
937 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
938 cgv2_hierarchies[newentry] = new;
939 }
940
941 /* In Ubuntu 14.04, the paths created for us were
942 * '/user/$uid.user/$something.session' This can be merged better with
943 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
944 * reason about the correctness.
945 */
946 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
947 {
948 char *p;
949 size_t len;
950 int id;
951 char *copy = NULL;
952 bool bret = false;
953
954 copy = must_copy_string(in);
955 if (strlen(copy) < strlen("/user/1.user/1.session"))
956 goto cleanup;
957 p = copy + strlen(copy) - 1;
958
959 /* skip any trailing '/' (shouldn't be any, but be sure) */
960 while (p >= copy && *p == '/')
961 *(p--) = '\0';
962 if (p < copy)
963 goto cleanup;
964
965 /* Get last path element */
966 while (p >= copy && *p != '/')
967 p--;
968 if (p < copy)
969 goto cleanup;
970
971 /* make sure it is something.session */
972 len = strlen(p + 1);
973 if (len < strlen("1.session") ||
974 strncmp(p + 1 + len - 8, ".session", 8) != 0)
975 goto cleanup;
976
977 /* ok last path piece checks out, now check the second to last */
978 *(p + 1) = '\0';
979 while (p >= copy && *(--p) != '/')
980 ;
981
982 if (sscanf(p + 1, "%d.user/", &id) != 1)
983 goto cleanup;
984
985 if (id != (int)uid)
986 goto cleanup;
987
988 bret = true;
989
990 cleanup:
991 free(copy);
992 return bret;
993 }
994
995 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
996 * assume it belongs to $uid and chown it
997 */
998 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
999 const char *init_cgroup, uid_t uid)
1000 {
1001 int ret;
1002 char buf[100];
1003 size_t curlen, initlen;
1004
1005 curlen = strlen(base_cgroup);
1006 initlen = strlen(init_cgroup);
1007 if (curlen <= initlen)
1008 return false;
1009
1010 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1011 return false;
1012
1013 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1014 if (ret < 0 || ret >= 100)
1015 return false;
1016
1017 if (initlen == 1)
1018 initlen = 0; // skip the '/'
1019
1020 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1021 }
1022
1023 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1024 * is not the end of our systemd path, then we're not part of the PAM call that
1025 * created that path.
1026 *
1027 * The last piece is chowned to $uid, the user- part not.
1028 * Note: If the user creates paths that look like what we're looking for to
1029 * 'fool' us, either
1030 * - they fool us, we create new cgroups, and they get auto-logged-out.
1031 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1032 * lose ownership of their cgroups
1033 */
1034 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1035 const char *init_cgroup,
1036 const char *in, uid_t uid)
1037 {
1038 char *p;
1039 size_t len;
1040 int id;
1041 char *copy = NULL;
1042 bool bret = false;
1043
1044 copy = must_copy_string(in);
1045
1046 /* An old version of systemd has already created a cgroup for us. */
1047 if (cg_systemd_under_user_slice_1(in, uid))
1048 goto succeed;
1049
1050 /* A new version of systemd has already created a cgroup for us. */
1051 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1052 goto succeed;
1053
1054 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1055 goto cleanup;
1056
1057 p = copy + strlen(copy) - 1;
1058 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1059 while (p >= copy && *p == '/')
1060 *(p--) = '\0';
1061
1062 if (p < copy)
1063 goto cleanup;
1064
1065 /* Get last path element */
1066 while (p >= copy && *p != '/')
1067 p--;
1068
1069 if (p < copy)
1070 goto cleanup;
1071
1072 /* Make sure it is session-something.scope. */
1073 len = strlen(p + 1);
1074 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1075 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1076 goto cleanup;
1077
1078 /* Ok last path piece checks out, now check the second to last. */
1079 *(p + 1) = '\0';
1080 while (p >= copy && *(--p) != '/')
1081 ;
1082
1083 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1084 goto cleanup;
1085
1086 if (id != (int)uid)
1087 goto cleanup;
1088
1089 succeed:
1090 bret = true;
1091
1092 cleanup:
1093 free(copy);
1094 return bret;
1095 }
1096
1097 /* Chown existing cgroup that systemd has already created for us. */
1098 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1099 const char *base_cgroup, uid_t uid,
1100 gid_t gid, bool systemd_user_slice)
1101 {
1102 char *path;
1103
1104 if (!systemd_user_slice)
1105 return false;
1106
1107 path = must_make_path(mountpoint, base_cgroup, NULL);
1108
1109 /* A cgroup within name=systemd has already been created. So we only
1110 * need to chown it.
1111 */
1112 if (chown(path, uid, gid) < 0)
1113 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1114 path, (int)uid, (int)gid, strerror(errno), NULL);
1115 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
1116
1117 free(path);
1118 return true;
1119 }
1120
1121 /* Detect and store information about cgroupfs v1 hierarchies. */
1122 static bool cgv1_init(uid_t uid, gid_t gid)
1123 {
1124 FILE *f;
1125 struct cgv1_hierarchy **it;
1126 char *basecginfo;
1127 char *line = NULL;
1128 char **klist = NULL, **nlist = NULL;
1129 size_t len = 0;
1130
1131 basecginfo = read_file("/proc/self/cgroup");
1132 if (!basecginfo)
1133 return false;
1134
1135 f = fopen("/proc/self/mountinfo", "r");
1136 if (!f) {
1137 free(basecginfo);
1138 return false;
1139 }
1140
1141 cgv1_get_controllers(&klist, &nlist);
1142
1143 while (getline(&line, &len, f) != -1) {
1144 char **controller_list = NULL;
1145 char *mountpoint, *base_cgroup;
1146
1147 if (is_lxcfs(line) || !is_cgv1(line))
1148 continue;
1149
1150 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1151 if (!controller_list)
1152 continue;
1153
1154 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1155 free(controller_list);
1156 continue;
1157 }
1158
1159 mountpoint = get_mountpoint(line);
1160 if (!mountpoint) {
1161 free_string_list(controller_list);
1162 continue;
1163 }
1164
1165 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1166 if (!base_cgroup) {
1167 free_string_list(controller_list);
1168 free(mountpoint);
1169 continue;
1170 }
1171
1172 trim(base_cgroup);
1173 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1174 "mountpoint \"%s\" and cgroup \"%s\"\n",
1175 controller_list[0], mountpoint, base_cgroup);
1176 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1177 }
1178
1179 free_string_list(klist);
1180 free_string_list(nlist);
1181 free(basecginfo);
1182 fclose(f);
1183 free(line);
1184
1185 /* Retrieve init cgroup path for all controllers. */
1186 basecginfo = read_file("/proc/1/cgroup");
1187 if (!basecginfo)
1188 return false;
1189
1190 for (it = cgv1_hierarchies; it && *it; it++) {
1191 if ((*it)->controllers) {
1192 char *init_cgroup, *user_slice;
1193
1194 /* We've already stored the controller and received its
1195 * current cgroup. If we now fail to retrieve its init
1196 * cgroup, we should probably fail.
1197 */
1198 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1199 if (!init_cgroup) {
1200 free(basecginfo);
1201 return false;
1202 }
1203
1204 cg_systemd_prune_init_scope(init_cgroup);
1205 (*it)->init_cgroup = init_cgroup;
1206 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1207 "cgroup \"%s\"\n",
1208 (*(*it)->controllers), init_cgroup);
1209
1210 /* Check whether systemd has already created a cgroup
1211 * for us.
1212 */
1213 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1214 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1215 (*it)->systemd_user_slice = true;
1216
1217 free(user_slice);
1218 }
1219 }
1220 free(basecginfo);
1221
1222 return true;
1223 }
1224
1225 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1226 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1227 */
1228 static inline int cg_get_version_of_mntpt(const char *path)
1229 {
1230 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1231 return 1;
1232
1233 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1234 return 2;
1235
1236 return 0;
1237 }
1238
1239 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1240 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
1241 */
1242 static bool cgv2_init(uid_t uid, gid_t gid)
1243 {
1244 char *mountpoint;
1245 FILE *f = NULL;
1246 char *current_cgroup = NULL, *init_cgroup = NULL;
1247 char * line = NULL;
1248 size_t len = 0;
1249 int ret = false;
1250
1251 current_cgroup = cgv2_get_current_cgroup(getpid());
1252 if (!current_cgroup) {
1253 /* No v2 hierarchy present. We're done. */
1254 ret = true;
1255 goto cleanup;
1256 }
1257
1258 init_cgroup = cgv2_get_current_cgroup(1);
1259 if (!init_cgroup) {
1260 /* If we're here and didn't fail already above, then something's
1261 * certainly wrong, so error this time.
1262 */
1263 goto cleanup;
1264 }
1265
1266 cg_systemd_prune_init_scope(init_cgroup);
1267
1268 /* Check if the v2 hierarchy is mounted at its standard location.
1269 * If so we can skip the rest of the work here. Although the unified
1270 * hierarchy can be mounted multiple times, each of those mountpoints
1271 * will expose identical information.
1272 */
1273 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1274 char *user_slice;
1275 bool has_user_slice = false;
1276
1277 mountpoint = must_copy_string("/sys/fs/cgroup");
1278 if (!mountpoint)
1279 goto cleanup;
1280
1281 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1282 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1283 has_user_slice = true;
1284 free(user_slice);
1285
1286 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1287
1288 ret = true;
1289 goto cleanup;
1290 }
1291
1292 f = fopen("/proc/self/mountinfo", "r");
1293 if (!f)
1294 goto cleanup;
1295
1296 /* we support simple cgroup mounts and lxcfs mounts */
1297 while (getline(&line, &len, f) != -1) {
1298 char *user_slice;
1299 bool has_user_slice = false;
1300
1301 if (!is_cgv2(line))
1302 continue;
1303
1304 mountpoint = get_mountpoint(line);
1305 if (!mountpoint)
1306 continue;
1307
1308 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1309 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1310 has_user_slice = true;
1311 free(user_slice);
1312
1313 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1314
1315 /* Although the unified hierarchy can be mounted multiple times,
1316 * each of those mountpoints will expose identical information.
1317 * So let the first mountpoint we find, win.
1318 */
1319 ret = true;
1320 break;
1321 }
1322
1323 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1324 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1325 mountpoint, current_cgroup, init_cgroup);
1326
1327 cleanup:
1328 if (f)
1329 fclose(f);
1330 free(line);
1331
1332 if (!ret) {
1333 free(init_cgroup);
1334 free(current_cgroup);
1335 }
1336
1337 return ret;
1338 }
1339
1340 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1341 * cgroupfs v2 hierarchy.
1342 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1343 * where some controllers are mounted into their standard cgroupfs v1 locations
1344 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1345 * hierarchy (/sys/fs/cgroup).
1346 */
1347 static bool cg_init(uid_t uid, gid_t gid)
1348 {
1349 if (!cgv1_init(uid, gid))
1350 return false;
1351
1352 if (!cgv2_init(uid, gid))
1353 return false;
1354
1355 if (cgv1_hierarchies && cgv2_hierarchies) {
1356 cg_mount_mode = CGROUP_MIXED;
1357 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1358 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1359 cg_mount_mode = CGROUP_PURE_V1;
1360 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1361 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1362 cg_mount_mode = CGROUP_PURE_V2;
1363 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1364 } else {
1365 cg_mount_mode = CGROUP_UNKNOWN;
1366 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
1367 }
1368
1369 if (cg_mount_mode == CGROUP_UNKNOWN)
1370 return false;
1371
1372 return true;
1373 }
1374
1375 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1376 static bool cgv1_enter(const char *cgroup)
1377 {
1378 struct cgv1_hierarchy **it;
1379
1380 for (it = cgv1_hierarchies; it && *it; it++) {
1381 char **controller;
1382 bool entered = false;
1383
1384 if (!(*it)->controllers || !(*it)->mountpoint ||
1385 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1386 continue;
1387
1388 for (controller = (*it)->controllers; controller && *controller;
1389 controller++) {
1390 char *path;
1391
1392 /* We've already been placed in a user slice, so we
1393 * don't need to enter the cgroup again.
1394 */
1395 if ((*it)->systemd_user_slice) {
1396 entered = true;
1397 break;
1398 }
1399
1400 path = must_make_path((*it)->mountpoint,
1401 (*it)->init_cgroup,
1402 cgroup,
1403 "/cgroup.procs",
1404 NULL);
1405 if (!file_exists(path)) {
1406 free(path);
1407 path = must_make_path((*it)->mountpoint,
1408 (*it)->init_cgroup,
1409 cgroup,
1410 "/tasks",
1411 NULL);
1412 }
1413
1414 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1415 entered = write_int(path, (int)getpid());
1416 if (entered) {
1417 free(path);
1418 break;
1419 }
1420
1421 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1422 free(path);
1423 }
1424
1425 if (!entered)
1426 return false;
1427 }
1428
1429 return true;
1430 }
1431
1432 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1433 static bool cgv2_enter(const char *cgroup)
1434 {
1435 struct cgv2_hierarchy *v2;
1436 char *path;
1437 bool entered = false;
1438
1439 if (!cgv2_hierarchies)
1440 return true;
1441
1442 v2 = *cgv2_hierarchies;
1443
1444 if (!v2->mountpoint || !v2->base_cgroup)
1445 return false;
1446
1447 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1448 return true;
1449
1450 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1451 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1452
1453 entered = write_int(path, (int)getpid());
1454 if (!entered) {
1455 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1456 free(path);
1457 return false;
1458 }
1459
1460 free(path);
1461
1462 return true;
1463 }
1464
1465 /* Wrapper around cgv{1,2}_enter(). */
1466 static bool cg_enter(const char *cgroup)
1467 {
1468 if (!cgv1_enter(cgroup)) {
1469 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
1470 return false;
1471 }
1472
1473 if (!cgv2_enter(cgroup)) {
1474 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
1475 return false;
1476 }
1477
1478 return true;
1479 }
1480
1481 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1482 static void cgv1_escape(void)
1483 {
1484 struct cgv1_hierarchy **it;
1485
1486 /* In case systemd hasn't already placed us in a user slice for the
1487 * cpuset v1 controller we will reside in the root cgroup. This means
1488 * that cgroup.clone_children will not have been initialized for us so
1489 * we need to do it.
1490 */
1491 for (it = cgv1_hierarchies; it && *it; it++)
1492 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1493 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
1494
1495 if (!cgv1_enter("/"))
1496 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
1497 }
1498
1499 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1500 static void cgv2_escape(void)
1501 {
1502 if (!cgv2_enter("/"))
1503 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
1504 }
1505
1506 /* Wrapper around cgv{1,2}_escape(). */
1507 static void cg_escape(void)
1508 {
1509 cgv1_escape();
1510 cgv2_escape();
1511 }
1512
1513 /* Get uid and gid for @user. */
1514 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1515 {
1516 struct passwd pwent;
1517 struct passwd *pwentp = NULL;
1518 char *buf;
1519 size_t bufsize;
1520 int ret;
1521
1522 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1523 if (bufsize == -1)
1524 bufsize = 1024;
1525
1526 buf = malloc(bufsize);
1527 if (!buf)
1528 return false;
1529
1530 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1531 if (!pwentp) {
1532 if (ret == 0)
1533 mysyslog(LOG_ERR,
1534 "Could not find matched password record\n", NULL);
1535
1536 free(buf);
1537 return false;
1538 }
1539
1540 *uid = pwent.pw_uid;
1541 *gid = pwent.pw_gid;
1542 free(buf);
1543
1544 return true;
1545 }
1546
1547 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1548 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1549 {
1550 struct stat statbuf;
1551
1552 if (stat(path, &statbuf) < 0)
1553 return false;
1554
1555 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1556 return false;
1557
1558 return true;
1559 }
1560
1561 /* Create cpumask from cpulist aka turn:
1562 *
1563 * 0,2-3
1564 *
1565 * into bit array
1566 *
1567 * 1 0 1 1
1568 */
1569 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1570 {
1571 char *token;
1572 char *saveptr = NULL;
1573 size_t arrlen = BITS_TO_LONGS(nbits);
1574 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1575 if (!bitarr)
1576 return NULL;
1577
1578 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1579 errno = 0;
1580 unsigned start = strtoul(token, NULL, 0);
1581 unsigned end = start;
1582
1583 char *range = strchr(token, '-');
1584 if (range)
1585 end = strtoul(range + 1, NULL, 0);
1586
1587 if (!(start <= end)) {
1588 free(bitarr);
1589 return NULL;
1590 }
1591
1592 if (end >= nbits) {
1593 free(bitarr);
1594 return NULL;
1595 }
1596
1597 while (start <= end)
1598 set_bit(start++, bitarr);
1599 }
1600
1601 return bitarr;
1602 }
1603
1604 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1605 {
1606 char *result;
1607 char **p;
1608 size_t sep_len = strlen(sep);
1609 size_t result_len = use_as_prefix * sep_len;
1610 size_t buf_len;
1611
1612 if (!parts)
1613 return NULL;
1614
1615 /* calculate new string length */
1616 for (p = (char **)parts; *p; p++)
1617 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1618
1619 buf_len = result_len + 1;
1620 result = calloc(buf_len, sizeof(char));
1621 if (!result)
1622 return NULL;
1623
1624 if (use_as_prefix)
1625 (void)strlcpy(result, sep, buf_len * sizeof(char));
1626
1627 for (p = (char **)parts; *p; p++) {
1628 if (p > (char **)parts)
1629 (void)strlcat(result, sep, buf_len * sizeof(char));
1630
1631 (void)strlcat(result, *p, buf_len * sizeof(char));
1632 }
1633
1634 return result;
1635 }
1636
1637 /* The largest integer that can fit into long int is 2^64. This is a
1638 * 20-digit number.
1639 */
1640 #define __IN_TO_STR_LEN 21
1641 /* Turn cpumask into simple, comma-separated cpulist. */
1642 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1643 {
1644 size_t i;
1645 int ret;
1646 char numstr[__IN_TO_STR_LEN] = {0};
1647 char **cpulist = NULL;
1648
1649 for (i = 0; i <= nbits; i++) {
1650 if (is_set(i, bitarr)) {
1651 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1652 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1653 free_string_list(cpulist);
1654 return NULL;
1655 }
1656
1657 must_append_string(&cpulist, numstr);
1658 }
1659 }
1660
1661 return string_join(",", (const char **)cpulist, false);
1662 }
1663
1664 static ssize_t cg_get_max_cpus(char *cpulist)
1665 {
1666 char *c1, *c2;
1667 char *maxcpus = cpulist;
1668 size_t cpus = 0;
1669
1670 c1 = strrchr(maxcpus, ',');
1671 if (c1)
1672 c1++;
1673
1674 c2 = strrchr(maxcpus, '-');
1675 if (c2)
1676 c2++;
1677
1678 if (!c1 && !c2)
1679 c1 = maxcpus;
1680 else if (c1 < c2)
1681 c1 = c2;
1682
1683 if (!c1)
1684 return -1;
1685
1686 /* If the above logic is correct, c1 should always hold a valid string
1687 * here.
1688 */
1689 errno = 0;
1690 cpus = strtoul(c1, NULL, 0);
1691 if (errno != 0)
1692 return -1;
1693
1694 return cpus;
1695 }
1696
1697 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1698 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1699 {
1700 char *lastslash, *fpath, oldv;
1701 int ret;
1702 ssize_t i;
1703
1704 ssize_t maxposs = 0, maxisol = 0;
1705 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1706 uint32_t *possmask = NULL, *isolmask = NULL;
1707 bool bret = false, flipped_bit = false;
1708
1709 lastslash = strrchr(path, '/');
1710 if (!lastslash) { // bug... this shouldn't be possible
1711 pam_cgfs_debug("Invalid path: %s\n", path);
1712 return bret;
1713 }
1714
1715 oldv = *lastslash;
1716 *lastslash = '\0';
1717
1718 fpath = must_make_path(path, "cpuset.cpus", NULL);
1719 posscpus = read_file(fpath);
1720 if (!posscpus) {
1721 pam_cgfs_debug("Could not read file: %s\n", fpath);
1722 goto on_error;
1723 }
1724
1725 /* Get maximum number of cpus found in possible cpuset. */
1726 maxposs = cg_get_max_cpus(posscpus);
1727 if (maxposs < 0 || maxposs >= INT_MAX - 1)
1728 goto on_error;
1729
1730 if (!file_exists(__ISOL_CPUS)) {
1731 /* This system doesn't expose isolated cpus. */
1732 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
1733 cpulist = posscpus;
1734
1735 /* No isolated cpus but we weren't already initialized by
1736 * someone. We should simply copy the parents cpuset.cpus
1737 * values.
1738 */
1739 if (!am_initialized) {
1740 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1741 goto copy_parent;
1742 }
1743
1744 /* No isolated cpus but we were already initialized by someone.
1745 * Nothing more to do for us.
1746 */
1747 goto on_success;
1748 }
1749
1750 isolcpus = read_file(__ISOL_CPUS);
1751 if (!isolcpus) {
1752 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1753 goto on_error;
1754 }
1755
1756 if (!isdigit(isolcpus[0])) {
1757 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1758 cpulist = posscpus;
1759
1760 /* No isolated cpus but we weren't already initialized by
1761 * someone. We should simply copy the parents cpuset.cpus
1762 * values.
1763 */
1764 if (!am_initialized) {
1765 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1766 goto copy_parent;
1767 }
1768
1769 /* No isolated cpus but we were already initialized by someone.
1770 * Nothing more to do for us.
1771 */
1772 goto on_success;
1773 }
1774
1775 /* Get maximum number of cpus found in isolated cpuset. */
1776 maxisol = cg_get_max_cpus(isolcpus);
1777 if (maxisol < 0 || maxisol >= INT_MAX - 1)
1778 goto on_error;
1779
1780 if (maxposs < maxisol)
1781 maxposs = maxisol;
1782 maxposs++;
1783
1784 possmask = cg_cpumask(posscpus, maxposs);
1785 if (!possmask) {
1786 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1787 goto on_error;
1788 }
1789
1790 isolmask = cg_cpumask(isolcpus, maxposs);
1791 if (!isolmask) {
1792 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1793 goto on_error;
1794 }
1795
1796 for (i = 0; i <= maxposs; i++) {
1797 if (is_set(i, isolmask) && is_set(i, possmask)) {
1798 flipped_bit = true;
1799 clear_bit(i, possmask);
1800 }
1801 }
1802
1803 if (!flipped_bit) {
1804 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1805 goto on_success;
1806 }
1807 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1808
1809 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1810 if (!cpulist) {
1811 pam_cgfs_debug("%s", "Could not create cpu list\n");
1812 goto on_error;
1813 }
1814
1815 copy_parent:
1816 *lastslash = oldv;
1817
1818 free(fpath);
1819
1820 fpath = must_make_path(path, "cpuset.cpus", NULL);
1821 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0660);
1822 if (ret < 0) {
1823 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
1824 goto on_error;
1825 }
1826
1827 on_success:
1828 bret = true;
1829
1830 on_error:
1831 *lastslash = oldv;
1832
1833 free(fpath);
1834 free(isolcpus);
1835 free(isolmask);
1836
1837 if (posscpus != cpulist)
1838 free(posscpus);
1839 free(possmask);
1840
1841 free(cpulist);
1842 return bret;
1843 }
1844
1845 /* Copy contents of parent(@path)/@file to @path/@file */
1846 static bool cg_copy_parent_file(char *path, char *file)
1847 {
1848 char *lastslash, *value = NULL, *fpath, oldv;
1849 int len = 0;
1850 int ret;
1851
1852 lastslash = strrchr(path, '/');
1853 if (!lastslash) { // bug... this shouldn't be possible
1854 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1855 return false;
1856 }
1857
1858 oldv = *lastslash;
1859 *lastslash = '\0';
1860
1861 fpath = must_make_path(path, file, NULL);
1862 len = lxc_read_from_file(fpath, NULL, 0);
1863 if (len <= 0) {
1864 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1865 goto bad;
1866 }
1867
1868 value = must_realloc(NULL, len + 1);
1869 if (lxc_read_from_file(fpath, value, len) != len) {
1870 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1871 goto bad;
1872 }
1873 free(fpath);
1874
1875 *lastslash = oldv;
1876
1877 fpath = must_make_path(path, file, NULL);
1878 ret = lxc_write_to_file(fpath, value, len, false, 0660);
1879 if (ret < 0)
1880 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1881
1882 free(fpath);
1883 free(value);
1884 return ret >= 0;
1885
1886 bad:
1887 pam_cgfs_debug("Error reading '%s'", fpath);
1888 free(fpath);
1889 free(value);
1890 return false;
1891 }
1892
1893 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1894 * controller we will reside in the root cgroup. This means that
1895 * cgroup.clone_children will not have been initialized for us so we need to do
1896 * it.
1897 */
1898 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1899 {
1900 char *clonechildrenpath, v;
1901
1902 if (!string_in_list(h->controllers, "cpuset"))
1903 return true;
1904
1905 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1906
1907 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1908 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1909 free(clonechildrenpath);
1910 return false;
1911 }
1912
1913 if (v == '1') { /* already set for us by someone else */
1914 free(clonechildrenpath);
1915 return true;
1916 }
1917
1918 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1919 /* Set clone_children so children inherit our settings */
1920 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1921 free(clonechildrenpath);
1922 return false;
1923 }
1924
1925 free(clonechildrenpath);
1926 return true;
1927 }
1928
1929 /*
1930 * Initialize the cpuset hierarchy in first directory of @gname and
1931 * set cgroup.clone_children so that children inherit settings.
1932 * Since the h->base_path is populated by init or ourselves, we know
1933 * it is already initialized.
1934 */
1935 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1936 const char *cgroup)
1937 {
1938 char *cgpath, *clonechildrenpath, v, *slash;
1939
1940 if (!string_in_list(h->controllers, "cpuset"))
1941 return true;
1942
1943 if (*cgroup == '/')
1944 cgroup++;
1945 slash = strchr(cgroup, '/');
1946 if (slash)
1947 *slash = '\0';
1948
1949 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1950 if (slash)
1951 *slash = '/';
1952
1953 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
1954 pam_cgfs_debug("Failed to create '%s'", cgpath);
1955 free(cgpath);
1956 return false;
1957 }
1958
1959 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
1960 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
1961 free(clonechildrenpath);
1962 free(cgpath);
1963 return true;
1964 }
1965
1966 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1967 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1968 free(clonechildrenpath);
1969 free(cgpath);
1970 return false;
1971 }
1972
1973 /* Make sure any isolated cpus are removed from cpuset.cpus. */
1974 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
1975 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
1976 free(clonechildrenpath);
1977 free(cgpath);
1978 return false;
1979 }
1980
1981 if (v == '1') { /* already set for us by someone else */
1982 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
1983 free(clonechildrenpath);
1984 free(cgpath);
1985 return true;
1986 }
1987
1988 /* copy parent's settings */
1989 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
1990 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
1991 free(cgpath);
1992 free(clonechildrenpath);
1993 return false;
1994 }
1995 free(cgpath);
1996
1997 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1998 /* Set clone_children so children inherit our settings */
1999 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2000 free(clonechildrenpath);
2001 return false;
2002 }
2003 free(clonechildrenpath);
2004 return true;
2005 }
2006
2007 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2008 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2009 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2010 * to the caller in @existed.
2011 */
2012 #define __PAM_CGFS_USER "/user/"
2013 #define __PAM_CGFS_USER_LEN 6
2014 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2015 {
2016 char *clean_base_cgroup, *path;
2017 char **controller;
2018 struct cgv1_hierarchy *it;
2019 bool created = false;
2020
2021 *existed = false;
2022 it = h;
2023
2024 for (controller = it->controllers; controller && *controller;
2025 controller++) {
2026 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2027 return false;
2028
2029 /* If systemd has already created a cgroup for us, keep using
2030 * it.
2031 */
2032 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2033 it->base_cgroup, uid, gid,
2034 it->systemd_user_slice))
2035 return true;
2036
2037 /* We need to make sure that we do not create an endless chain
2038 * of sub-cgroups. So we check if we have already logged in
2039 * somehow (sudo -i, su, etc.) and have created a
2040 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2041 * cgroups this is unnecessary since we use the init_cgroup
2042 * anyway, but for controllers which have an existing systemd
2043 * cgroup that does not match the current uid, this is pretty
2044 * useful.
2045 */
2046 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2047 free(it->base_cgroup);
2048 it->base_cgroup = must_copy_string("/");
2049 } else {
2050 clean_base_cgroup =
2051 strstr(it->base_cgroup, __PAM_CGFS_USER);
2052 if (clean_base_cgroup)
2053 *clean_base_cgroup = '\0';
2054 }
2055
2056 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2057 pam_cgfs_debug("Constructing path: %s\n", path);
2058
2059 if (file_exists(path)) {
2060 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2061 if (our_cg)
2062 *existed = false;
2063 else
2064 *existed = true;
2065
2066 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2067 path, our_cg ? "" : "not ", uid, gid);
2068 free(path);
2069
2070 return our_cg;
2071 }
2072
2073 created = mkdir_parent(it->mountpoint, path);
2074 if (!created) {
2075 free(path);
2076 continue;
2077 }
2078
2079 if (chown(path, uid, gid) < 0)
2080 mysyslog(LOG_WARNING,
2081 "Failed to chown %s to %d:%d: %s\n", path,
2082 (int)uid, (int)gid, strerror(errno), NULL);
2083
2084 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2085 free(path);
2086 break;
2087 }
2088
2089 return created;
2090 }
2091
2092 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2093 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2094 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2095 */
2096 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2097 {
2098
2099 char *path;
2100
2101 /* Better safe than sorry. */
2102 if (!h->controllers)
2103 return true;
2104
2105 /* Cgroups created by systemd for us which we re-use won't be removed
2106 * here, since we're using init_cgroup + cgroup as path instead of
2107 * base_cgroup + cgroup.
2108 */
2109 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2110 (void)recursive_rmdir(path);
2111 free(path);
2112
2113 return true;
2114 }
2115
2116 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2117 static bool cgv2_remove(const char *cgroup)
2118 {
2119 struct cgv2_hierarchy *v2;
2120 char *path;
2121
2122 if (!cgv2_hierarchies)
2123 return true;
2124
2125 v2 = *cgv2_hierarchies;
2126
2127 /* If we reused an already existing cgroup, don't bother trying to
2128 * remove (a potentially wrong)/the path.
2129 * Cgroups created by systemd for us which we re-use would be removed
2130 * here, since we're using base_cgroup + cgroup as path.
2131 */
2132 if (v2->systemd_user_slice)
2133 return true;
2134
2135 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2136 (void)recursive_rmdir(path);
2137 free(path);
2138
2139 return true;
2140 }
2141
2142 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2143 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2144 * back, to the caller if the creation failed due to @cgroup already existing
2145 * via @existed.
2146 */
2147 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2148 {
2149 struct cgv1_hierarchy **it, **rev_it;
2150 bool all_created = true;
2151
2152 for (it = cgv1_hierarchies; it && *it; it++) {
2153 if (!(*it)->controllers || !(*it)->mountpoint ||
2154 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2155 continue;
2156
2157 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2158 all_created = false;
2159 break;
2160 }
2161 }
2162
2163 if (all_created)
2164 return true;
2165
2166 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2167 rev_it++)
2168 cgv1_remove_one(*rev_it, cgroup);
2169
2170 return false;
2171 }
2172
2173 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2174 * the creation failed due to @cgroup already existing via @existed.
2175 */
2176 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2177 {
2178 int ret;
2179 char *clean_base_cgroup;
2180 char *path;
2181 struct cgv2_hierarchy *v2;
2182 bool our_cg = false, created = false;
2183
2184 *existed = false;
2185
2186 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2187 return true;
2188
2189 v2 = *cgv2_hierarchies;
2190
2191 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2192 * to be placed under our current cgroup.
2193 */
2194 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2195 uid, gid, v2->systemd_user_slice))
2196 goto delegate_files;
2197
2198 /* We need to make sure that we do not create an endless chain of
2199 * sub-cgroups. So we check if we have already logged in somehow (sudo
2200 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2201 * skip that part.
2202 */
2203 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2204 free(v2->base_cgroup);
2205 v2->base_cgroup = must_copy_string("/");
2206 } else {
2207 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2208 if (clean_base_cgroup)
2209 *clean_base_cgroup = '\0';
2210 }
2211
2212 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2213 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2214
2215 if (file_exists(path)) {
2216 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2217 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2218 path, our_cg ? "" : "not ", uid, gid);
2219 free(path);
2220 if (our_cg) {
2221 *existed = false;
2222 goto delegate_files;
2223 } else {
2224 *existed = true;
2225 return false;
2226 }
2227 }
2228
2229 created = mkdir_parent(v2->mountpoint, path);
2230 if (!created) {
2231 free(path);
2232 return false;
2233 }
2234
2235 /* chown cgroup to user */
2236 if (chown(path, uid, gid) < 0)
2237 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2238 path, (int)uid, (int)gid, strerror(errno), NULL);
2239 else
2240 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2241 free(path);
2242
2243 delegate_files:
2244 /* chown cgroup.procs to user */
2245 if (v2->systemd_user_slice)
2246 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2247 "/cgroup.procs", NULL);
2248 else
2249 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2250 "/cgroup.procs", NULL);
2251
2252 ret = chown(path, uid, gid);
2253 if (ret < 0)
2254 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2255 path, (int)uid, (int)gid, strerror(errno), NULL);
2256 else
2257 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2258 free(path);
2259
2260 /* chown cgroup.subtree_control to user */
2261 if (v2->systemd_user_slice)
2262 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2263 "/cgroup.subtree_control", NULL);
2264 else
2265 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2266 "/cgroup.subtree_control", NULL);
2267
2268 ret = chown(path, uid, gid);
2269 if (ret < 0)
2270 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2271 path, (int)uid, (int)gid, strerror(errno), NULL);
2272 free(path);
2273
2274 /* chown cgroup.threads to user */
2275 if (v2->systemd_user_slice)
2276 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2277 "/cgroup.threads", NULL);
2278 else
2279 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2280 "/cgroup.threads", NULL);
2281 ret = chown(path, uid, gid);
2282 if (ret < 0 && errno != ENOENT)
2283 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2284 path, (int)uid, (int)gid, strerror(errno), NULL);
2285 free(path);
2286
2287 return true;
2288 }
2289
2290 /* Create writeable cgroups for @user at login. Details can be found in the
2291 * preamble/license at the top of this file.
2292 */
2293 static int handle_login(const char *user, uid_t uid, gid_t gid)
2294 {
2295 int idx = 0, ret;
2296 bool existed;
2297 char cg[PATH_MAX];
2298
2299 cg_escape();
2300
2301 while (idx >= 0) {
2302 ret = snprintf(cg, PATH_MAX, "/user/%s/%d", user, idx);
2303 if (ret < 0 || ret >= PATH_MAX) {
2304 mysyslog(LOG_ERR, "Username too long\n", NULL);
2305 return PAM_SESSION_ERR;
2306 }
2307
2308 existed = false;
2309 if (!cgv2_create(cg, uid, gid, &existed)) {
2310 if (existed) {
2311 cgv2_remove(cg);
2312 idx++;
2313 continue;
2314 }
2315
2316 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2317 return PAM_SESSION_ERR;
2318 }
2319
2320 existed = false;
2321 if (!cgv1_create(cg, uid, gid, &existed)) {
2322 if (existed) {
2323 cgv2_remove(cg);
2324 idx++;
2325 continue;
2326 }
2327
2328 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2329 return PAM_SESSION_ERR;
2330 }
2331
2332 if (!cg_enter(cg)) {
2333 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
2334 return PAM_SESSION_ERR;
2335 }
2336
2337 break;
2338 }
2339
2340 return PAM_SUCCESS;
2341 }
2342
2343 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2344 * hierarchies.
2345 */
2346 static bool cgv1_prune_empty_cgroups(const char *user)
2347 {
2348 bool controller_removed = true;
2349 bool all_removed = true;
2350 struct cgv1_hierarchy **it;
2351
2352 for (it = cgv1_hierarchies; it && *it; it++) {
2353 int ret;
2354 char *path_base, *path_init;
2355 char **controller;
2356
2357 if (!(*it)->controllers || !(*it)->mountpoint ||
2358 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2359 continue;
2360
2361 for (controller = (*it)->controllers; controller && *controller;
2362 controller++) {
2363 bool path_base_rm, path_init_rm;
2364
2365 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2366 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2367
2368 ret = recursive_rmdir(path_base);
2369 if (ret == -ENOENT || ret >= 0)
2370 path_base_rm = true;
2371 else
2372 path_base_rm = false;
2373 free(path_base);
2374
2375 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2376 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2377
2378 ret = recursive_rmdir(path_init);
2379 if (ret == -ENOENT || ret >= 0)
2380 path_init_rm = true;
2381 else
2382 path_init_rm = false;
2383 free(path_init);
2384
2385 if (!path_base_rm && !path_init_rm) {
2386 controller_removed = false;
2387 continue;
2388 }
2389
2390 controller_removed = true;
2391 break;
2392 }
2393
2394 if (!controller_removed)
2395 all_removed = false;
2396 }
2397
2398 return all_removed;
2399 }
2400
2401 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2402 * hierarchy.
2403 */
2404 static bool cgv2_prune_empty_cgroups(const char *user)
2405 {
2406 int ret;
2407 struct cgv2_hierarchy *v2;
2408 char *path_base, *path_init;
2409 bool path_base_rm, path_init_rm;
2410
2411 if (!cgv2_hierarchies)
2412 return true;
2413
2414 v2 = *cgv2_hierarchies;
2415
2416 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2417 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2418
2419 ret = recursive_rmdir(path_base);
2420 if (ret == -ENOENT || ret >= 0)
2421 path_base_rm = true;
2422 else
2423 path_base_rm = false;
2424 free(path_base);
2425
2426 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2427 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2428
2429 ret = recursive_rmdir(path_init);
2430 if (ret == -ENOENT || ret >= 0)
2431 path_init_rm = true;
2432 else
2433 path_init_rm = false;
2434 free(path_init);
2435
2436 if (!path_base_rm && !path_init_rm)
2437 return false;
2438
2439 return true;
2440 }
2441
2442 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2443 static void cg_prune_empty_cgroups(const char *user)
2444 {
2445 (void)cgv1_prune_empty_cgroups(user);
2446 (void)cgv2_prune_empty_cgroups(user);
2447 }
2448
2449 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2450 static void cgv1_free_hierarchies(void)
2451 {
2452 struct cgv1_hierarchy **it;
2453
2454 if (!cgv1_hierarchies)
2455 return;
2456
2457 for (it = cgv1_hierarchies; it && *it; it++) {
2458 if ((*it)->controllers) {
2459 char **tmp;
2460 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2461 free(*tmp);
2462
2463 free((*it)->controllers);
2464 }
2465
2466 free((*it)->mountpoint);
2467 free((*it)->base_cgroup);
2468 free((*it)->fullcgpath);
2469 free((*it)->init_cgroup);
2470 }
2471
2472 free(cgv1_hierarchies);
2473 }
2474
2475 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2476 static void cgv2_free_hierarchies(void)
2477 {
2478 struct cgv2_hierarchy **it;
2479
2480 if (!cgv2_hierarchies)
2481 return;
2482
2483 for (it = cgv2_hierarchies; it && *it; it++) {
2484 if ((*it)->controllers) {
2485 char **tmp;
2486
2487 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2488 free(*tmp);
2489
2490 free((*it)->controllers);
2491 }
2492
2493 free((*it)->mountpoint);
2494 free((*it)->base_cgroup);
2495 free((*it)->fullcgpath);
2496 free((*it)->init_cgroup);
2497 }
2498
2499 free(cgv2_hierarchies);
2500 }
2501
2502 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2503 static void cg_exit(void)
2504 {
2505 cgv1_free_hierarchies();
2506 cgv2_free_hierarchies();
2507 }
2508
2509 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2510 const char **argv)
2511 {
2512 int ret;
2513 uid_t uid = 0;
2514 gid_t gid = 0;
2515 const char *PAM_user = NULL;
2516
2517 ret = pam_get_user(pamh, &PAM_user, NULL);
2518 if (ret != PAM_SUCCESS) {
2519 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2520 return PAM_SESSION_ERR;
2521 }
2522
2523 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2524 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2525 return PAM_SESSION_ERR;
2526 }
2527
2528 if (!cg_init(uid, gid)) {
2529 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2530 return PAM_SESSION_ERR;
2531 }
2532
2533 /* Try to prune cgroups, that are actually empty but were still marked
2534 * as busy by the kernel so we couldn't remove them on session close.
2535 */
2536 cg_prune_empty_cgroups(PAM_user);
2537
2538 if (cg_mount_mode == CGROUP_UNKNOWN)
2539 return PAM_SESSION_ERR;
2540
2541 if (argc > 1 && !strcmp(argv[0], "-c")) {
2542 char **clist = make_string_list(argv[1], ",");
2543
2544 /*
2545 * We don't allow using "all" and other controllers explicitly because
2546 * that simply doesn't make any sense.
2547 */
2548 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2549 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2550 free_string_list(clist);
2551 return PAM_SESSION_ERR;
2552 }
2553
2554 cg_mark_to_make_rw(clist);
2555 free_string_list(clist);
2556 }
2557
2558 return handle_login(PAM_user, uid, gid);
2559 }
2560
2561 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2562 const char **argv)
2563 {
2564 int ret;
2565 uid_t uid = 0;
2566 gid_t gid = 0;
2567 const char *PAM_user = NULL;
2568
2569 ret = pam_get_user(pamh, &PAM_user, NULL);
2570 if (ret != PAM_SUCCESS) {
2571 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2572 return PAM_SESSION_ERR;
2573 }
2574
2575 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2576 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2577 return PAM_SESSION_ERR;
2578 }
2579
2580 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2581 if (!cg_init(uid, gid))
2582 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2583
2584 if (argc > 1 && !strcmp(argv[0], "-c")) {
2585 char **clist = make_string_list(argv[1], ",");
2586
2587 /*
2588 * We don't allow using "all" and other controllers explicitly because
2589 * that simply doesn't make any sense.
2590 */
2591 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2592 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2593 free_string_list(clist);
2594 return PAM_SESSION_ERR;
2595 }
2596
2597 cg_mark_to_make_rw(clist);
2598 free_string_list(clist);
2599 }
2600 }
2601
2602 cg_prune_empty_cgroups(PAM_user);
2603 cg_exit();
2604
2605 return PAM_SUCCESS;
2606 }