]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
Merge pull request #2629 from ssup2/master
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
6 *
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
29 *
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
36 #ifndef _GNU_SOURCE
37 #define _GNU_SOURCE 1
38 #endif
39 #include <ctype.h>
40 #include <dirent.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <linux/unistd.h>
44 #include <pwd.h>
45 #include <stdarg.h>
46 #include <stdbool.h>
47 #include <stdint.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <sys/mount.h>
52 #include <sys/param.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <sys/vfs.h>
56 #include <syslog.h>
57 #include <unistd.h>
58
59 #include "config.h"
60 #include "macro.h"
61 #include "utils.h"
62
63 #define PAM_SM_SESSION
64 #include <security/_pam_macros.h>
65 #include <security/pam_modules.h>
66
67 #ifndef HAVE_STRLCPY
68 #include "include/strlcpy.h"
69 #endif
70
71 #ifndef HAVE_STRLCAT
72 #include "include/strlcat.h"
73 #endif
74
75 #define pam_cgfs_debug_stream(stream, format, ...) \
76 do { \
77 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
78 __func__, __VA_ARGS__); \
79 } while (false)
80
81 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
82
83 #ifdef DEBUG
84 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
85 #else
86 #define pam_cgfs_debug(format, ...)
87 #endif /* DEBUG */
88
89 /* Taken over modified from the kernel sources. */
90 #define NBITS 32 /* bits in uint32_t */
91 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
92 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
93
94 static enum cg_mount_mode {
95 CGROUP_UNKNOWN = -1,
96 CGROUP_MIXED = 0,
97 CGROUP_PURE_V1 = 1,
98 CGROUP_PURE_V2 = 2,
99 CGROUP_UNINITIALIZED = 3,
100 } cg_mount_mode = CGROUP_UNINITIALIZED;
101
102 /* Common helper functions. Most of these have been taken from LXC. */
103 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
104 static int append_null_to_list(void ***list);
105 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
106 static inline void clear_bit(unsigned bit, uint32_t *bitarr)
107 {
108 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
109 }
110 static char *copy_to_eol(char *s);
111 static void free_string_list(char **list);
112 static char *get_mountpoint(char *line);
113 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
114 static int handle_login(const char *user, uid_t uid, gid_t gid);
115 static inline bool is_set(unsigned bit, uint32_t *bitarr)
116 {
117 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
118 }
119 static bool is_lxcfs(const char *line);
120 static bool is_cgv1(char *line);
121 static bool is_cgv2(char *line);
122 static void *must_alloc(size_t sz);
123 static void must_add_to_list(char ***clist, char *entry);
124 static void must_append_controller(char **klist, char **nlist, char ***clist,
125 char *entry);
126 static void must_append_string(char ***list, char *entry);
127 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
128 static char *read_file(char *fnam);
129 static int read_from_file(const char *filename, void* buf, size_t count);
130 static int recursive_rmdir(char *dirname);
131 static inline void set_bit(unsigned bit, uint32_t *bitarr)
132 {
133 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
134 }
135 static bool string_in_list(char **list, const char *entry);
136 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
137 static void trim(char *s);
138 static bool write_int(char *path, int v);
139 static ssize_t write_nointr(int fd, const void* buf, size_t count);
140 static int write_to_file(const char *filename, const void *buf, size_t count,
141 bool add_newline);
142
143 /* cgroupfs prototypes. */
144 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
145 static uint32_t *cg_cpumask(char *buf, size_t nbits);
146 static bool cg_copy_parent_file(char *path, char *file);
147 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
148 static bool cg_enter(const char *cgroup);
149 static void cg_escape(void);
150 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
151 static ssize_t cg_get_max_cpus(char *cpulist);
152 static int cg_get_version_of_mntpt(const char *path);
153 static bool cg_init(uid_t uid, gid_t gid);
154 static void cg_mark_to_make_rw(char **list);
155 static void cg_prune_empty_cgroups(const char *user);
156 static bool cg_systemd_created_user_slice(const char *base_cgroup,
157 const char *init_cgroup,
158 const char *in, uid_t uid);
159 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
160 const char *base_cgroup, uid_t uid,
161 gid_t gid,
162 bool systemd_user_slice);
163 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
164 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
165 const char *init_cgroup, uid_t uid);
166 static void cg_systemd_prune_init_scope(char *cg);
167 static bool is_lxcfs(const char *line);
168
169 /* cgroupfs v1 prototypes. */
170 struct cgv1_hierarchy {
171 char **controllers;
172 char *mountpoint;
173 char *base_cgroup;
174 char *fullcgpath;
175 char *init_cgroup;
176 bool create_rw_cgroup;
177 bool systemd_user_slice;
178 };
179
180 static struct cgv1_hierarchy **cgv1_hierarchies;
181
182 static void cgv1_add_controller(char **clist, char *mountpoint,
183 char *base_cgroup, char *init_cgroup);
184 static bool cgv1_controller_in_clist(char *cgline, char *c);
185 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
186 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
187 char **clist);
188 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
189 bool *existed);
190 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
191 uid_t uid, gid_t gid, bool *existed);
192 static bool cgv1_enter(const char *cgroup);
193 static void cgv1_escape(void);
194 static bool cgv1_get_controllers(char ***klist, char ***nlist);
195 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
196 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
197 char *line);
198 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
199 const char *cgroup);
200 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
201 static bool cgv1_init(uid_t uid, gid_t gid);
202 static void cgv1_mark_to_make_rw(char **clist);
203 static char *cgv1_must_prefix_named(char *entry);
204 static bool cgv1_prune_empty_cgroups(const char *user);
205 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
206 static bool is_cgv1(char *line);
207
208 /* cgroupfs v2 prototypes. */
209 struct cgv2_hierarchy {
210 char **controllers;
211 char *mountpoint;
212 char *base_cgroup;
213 char *fullcgpath;
214 char *init_cgroup;
215 bool create_rw_cgroup;
216 bool systemd_user_slice;
217 };
218
219 /* Actually this should only be a single hierarchy. But for the sake of
220 * parallelism and because the layout of the cgroupfs v2 is still somewhat
221 * changing, we'll leave it as an array of structs.
222 */
223 static struct cgv2_hierarchy **cgv2_hierarchies;
224
225 static void cgv2_add_controller(char **clist, char *mountpoint,
226 char *base_cgroup, char *init_cgroup,
227 bool systemd_user_slice);
228 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
229 bool *existed);
230 static bool cgv2_enter(const char *cgroup);
231 static void cgv2_escape(void);
232 static char *cgv2_get_current_cgroup(int pid);
233 static bool cgv2_init(uid_t uid, gid_t gid);
234 static void cgv2_mark_to_make_rw(char **clist);
235 static bool cgv2_prune_empty_cgroups(const char *user);
236 static bool cgv2_remove(const char *cgroup);
237 static bool is_cgv2(char *line);
238
239 static int do_mkdir(const char *path, mode_t mode)
240 {
241 int saved_errno;
242 mode_t mask;
243 int r;
244
245 mask = umask(0);
246 r = mkdir(path, mode);
247 saved_errno = errno;
248 umask(mask);
249 errno = saved_errno;
250 return (r);
251 }
252
253 /* Create directory and (if necessary) its parents. */
254 static bool mkdir_parent(const char *root, char *path)
255 {
256 char *b, orig, *e;
257
258 if (strlen(path) < strlen(root))
259 return false;
260
261 if (strlen(path) == strlen(root))
262 return true;
263
264 b = path + strlen(root) + 1;
265 while (true) {
266 while (*b && (*b == '/'))
267 b++;
268 if (!*b)
269 return true;
270
271 e = b + 1;
272 while (*e && *e != '/')
273 e++;
274
275 orig = *e;
276 if (orig)
277 *e = '\0';
278
279 if (file_exists(path))
280 goto next;
281
282 if (do_mkdir(path, 0755) < 0) {
283 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
284 return false;
285 }
286
287 next:
288 if (!orig)
289 return true;
290
291 *e = orig;
292 b = e + 1;
293 }
294
295 return false;
296 }
297
298 /* Common helper functions. Most of these have been taken from LXC. */
299 static void mysyslog(int err, const char *format, ...)
300 {
301 va_list args;
302
303 va_start(args, format);
304 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
305 vsyslog(err, format, args);
306 va_end(args);
307 closelog();
308 }
309
310 /* realloc() pointer in batch sizes; do not fail. */
311 #define BATCH_SIZE 50
312 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
313 {
314 int newbatches = (newlen / BATCH_SIZE) + 1;
315 int oldbatches = (oldlen / BATCH_SIZE) + 1;
316
317 if (!*mem || newbatches > oldbatches)
318 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
319 }
320
321 /* Append lines as is to pointer; do not fail. */
322 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
323 {
324 size_t full = oldlen + newlen;
325
326 batch_realloc(dest, oldlen, full + 1);
327
328 memcpy(*dest + oldlen, new, newlen + 1);
329 }
330
331 /* Read in whole file and return allocated pointer. */
332 static char *read_file(char *fnam)
333 {
334 FILE *f;
335 int linelen;
336 char *line = NULL, *buf = NULL;
337 size_t len = 0, fulllen = 0;
338
339 f = fopen(fnam, "r");
340 if (!f)
341 return NULL;
342
343 while ((linelen = getline(&line, &len, f)) != -1) {
344 append_line(&buf, fulllen, line, linelen);
345 fulllen += linelen;
346 }
347
348 fclose(f);
349 free(line);
350
351 return buf;
352 }
353
354 /* Given a pointer to a null-terminated array of pointers, realloc to add one
355 * entry, and point the new entry to NULL. Do not fail. Return the index to the
356 * second-to-last entry - that is, the one which is now available for use
357 * (keeping the list null-terminated).
358 */
359 static int append_null_to_list(void ***list)
360 {
361 int newentry = 0;
362
363 if (*list)
364 for (; (*list)[newentry]; newentry++)
365 ;
366
367 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
368 (*list)[newentry + 1] = NULL;
369
370 return newentry;
371 }
372
373 /* Append new entry to null-terminated array of pointer; make sure that array of
374 * pointers will still be null-terminated.
375 */
376 static void must_append_string(char ***list, char *entry)
377 {
378 int newentry;
379 char *copy;
380
381 newentry = append_null_to_list((void ***)list);
382 copy = must_copy_string(entry);
383 (*list)[newentry] = copy;
384 }
385
386 /* Remove newlines from string. */
387 static void trim(char *s)
388 {
389 size_t len = strlen(s);
390
391 while ((len > 0) && s[len - 1] == '\n')
392 s[--len] = '\0';
393 }
394
395 /* Allocate pointer; do not fail. */
396 static void *must_alloc(size_t sz)
397 {
398 return must_realloc(NULL, sz);
399 }
400
401 /* Make allocated copy of string. End of string is taken to be '\n'. */
402 static char *copy_to_eol(char *s)
403 {
404 char *newline, *sret;
405 size_t len;
406
407 newline = strchr(s, '\n');
408 if (!newline)
409 return NULL;
410
411 len = newline - s;
412 sret = must_alloc(len + 1);
413 memcpy(sret, s, len);
414 sret[len] = '\0';
415
416 return sret;
417 }
418
419 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
420 static bool is_lxcfs(const char *line)
421 {
422 char *p = strstr(line, " - ");
423 if (!p)
424 return false;
425
426 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
427 }
428
429 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
430 static bool is_cgv1(char *line)
431 {
432 char *p = strstr(line, " - ");
433 if (!p)
434 return false;
435
436 return strncmp(p, " - cgroup ", 10) == 0;
437 }
438
439 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
440 static bool is_cgv2(char *line)
441 {
442 char *p = strstr(line, " - ");
443 if (!p)
444 return false;
445
446 return strncmp(p, " - cgroup2 ", 11) == 0;
447 }
448
449 /* Given a null-terminated array of strings, check whether @entry is one of the
450 * strings
451 */
452 static bool string_in_list(char **list, const char *entry)
453 {
454 char **it;
455
456 for (it = list; it && *it; it++)
457 if (strcmp(*it, entry) == 0)
458 return true;
459
460 return false;
461 }
462
463 /*
464 * Creates a null-terminated array of strings, made by splitting the entries in
465 * @str on each @sep. Caller is responsible for calling free_string_list.
466 */
467 static char **make_string_list(const char *str, const char *sep)
468 {
469 char *copy, *tok;
470 char *saveptr = NULL;
471 char **clist = NULL;
472
473 copy = must_copy_string(str);
474
475 for (tok = strtok_r(copy, sep, &saveptr); tok;
476 tok = strtok_r(NULL, sep, &saveptr))
477 must_add_to_list(&clist, tok);
478
479 free(copy);
480
481 return clist;
482 }
483
484 /* Gets the length of a null-terminated array of strings. */
485 static size_t string_list_length(char **list)
486 {
487 size_t len = 0;
488 char **it;
489
490 for (it = list; it && *it; it++)
491 len++;
492
493 return len;
494 }
495
496 /* Free null-terminated array of strings. */
497 static void free_string_list(char **list)
498 {
499 char **it;
500
501 for (it = list; it && *it; it++)
502 free(*it);
503 free(list);
504 }
505
506 /* Write single integer to file. */
507 static bool write_int(char *path, int v)
508 {
509 FILE *f;
510 bool ret = true;
511
512 f = fopen(path, "w");
513 if (!f)
514 return false;
515
516 if (fprintf(f, "%d\n", v) < 0)
517 ret = false;
518
519 if (fclose(f) != 0)
520 ret = false;
521
522 return ret;
523 }
524
525 /* Recursively remove directory and its parents. */
526 static int recursive_rmdir(char *dirname)
527 {
528 struct dirent *direntp;
529 DIR *dir;
530 int r = 0;
531
532 dir = opendir(dirname);
533 if (!dir)
534 return -ENOENT;
535
536 while ((direntp = readdir(dir))) {
537 struct stat st;
538 char *pathname;
539
540 if (!strcmp(direntp->d_name, ".") ||
541 !strcmp(direntp->d_name, ".."))
542 continue;
543
544 pathname = must_make_path(dirname, direntp->d_name, NULL);
545
546 if (lstat(pathname, &st)) {
547 if (!r)
548 pam_cgfs_debug("Failed to stat %s\n", pathname);
549 r = -1;
550 goto next;
551 }
552
553 if (!S_ISDIR(st.st_mode))
554 goto next;
555
556 if (recursive_rmdir(pathname) < 0)
557 r = -1;
558
559 next:
560 free(pathname);
561 }
562
563 if (rmdir(dirname) < 0) {
564 if (!r)
565 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
566 r = -1;
567 }
568
569 if (closedir(dir) < 0) {
570 if (!r)
571 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
572 r = -1;
573 }
574
575 return r;
576 }
577
578 /* Add new entry to null-terminated array of pointers. Make sure array is still
579 * null-terminated.
580 */
581 static void must_add_to_list(char ***clist, char *entry)
582 {
583 int newentry;
584
585 newentry = append_null_to_list((void ***)clist);
586 (*clist)[newentry] = must_copy_string(entry);
587 }
588
589 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
590 static char *get_mountpoint(char *line)
591 {
592 int i;
593 char *p, *sret, *p2;
594 size_t len;
595
596 p = line;
597
598 for (i = 0; i < 4; i++) {
599 p = strchr(p, ' ');
600 if (!p)
601 return NULL;
602 p++;
603 }
604
605 p2 = strchr(p, ' ');
606 if (p2)
607 *p2 = '\0';
608
609 len = strlen(p);
610 sret = must_alloc(len + 1);
611 memcpy(sret, p, len);
612 sret[len] = '\0';
613
614 return sret;
615 }
616
617 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
618 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
619 * kernel controllers (@klist) and named controllers (@nlist).
620 */
621 static bool cgv1_get_controllers(char ***klist, char ***nlist)
622 {
623 FILE *f;
624 char *line = NULL;
625 size_t len = 0;
626
627 f = fopen("/proc/self/cgroup", "r");
628 if (!f)
629 return false;
630
631 while (getline(&line, &len, f) != -1) {
632 char *p, *p2, *tok;
633 char *saveptr = NULL;
634
635 p = strchr(line, ':');
636 if (!p)
637 continue;
638 p++;
639
640 p2 = strchr(p, ':');
641 if (!p2)
642 continue;
643 *p2 = '\0';
644
645 /* Skip the v2 hierarchy. */
646 if ((p2 - p) == 0)
647 continue;
648
649 for (tok = strtok_r(p, ",", &saveptr); tok;
650 tok = strtok_r(NULL, ",", &saveptr)) {
651 if (strncmp(tok, "name=", 5) == 0)
652 must_append_string(nlist, tok);
653 else
654 must_append_string(klist, tok);
655 }
656 }
657
658 free(line);
659 fclose(f);
660
661 return true;
662 }
663
664 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
665 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
666 static bool cgv2_get_controllers(char ***klist)
667 {
668 return -ENOSYS;
669 }
670 */
671
672 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
673 static char *cgv2_get_current_cgroup(int pid)
674 {
675 int ret;
676 char *cgroups_v2;
677 char *current_cgroup;
678 char *copy = NULL;
679 /* The largest integer that can fit into long int is 2^64. This is a
680 * 20-digit number. */
681 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
682 char path[__PIDLEN];
683
684 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
685 if (ret < 0 || ret >= __PIDLEN)
686 return NULL;
687
688 cgroups_v2 = read_file(path);
689 if (!cgroups_v2)
690 return NULL;
691
692 current_cgroup = strstr(cgroups_v2, "0::/");
693 if (!current_cgroup)
694 goto cleanup_on_err;
695
696 current_cgroup = current_cgroup + 3;
697 copy = copy_to_eol(current_cgroup);
698 if (!copy)
699 goto cleanup_on_err;
700
701 cleanup_on_err:
702 free(cgroups_v2);
703 if (copy)
704 trim(copy);
705
706 return copy;
707 }
708
709 /* Given two null-terminated lists of strings, return true if any string is in
710 * both.
711 */
712 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
713 {
714 char **it;
715
716 if (!l2)
717 return false;
718
719 for (it = l1; it && *it; it++)
720 if (string_in_list(l2, *it))
721 return true;
722
723 return false;
724 }
725
726 /* For a null-terminated list of controllers @clist, return true if any of those
727 * controllers is already listed the null-terminated list of hierarchies @hlist.
728 * Realistically, if one is present, all must be present.
729 */
730 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
731 {
732 struct cgv1_hierarchy **it;
733
734 for (it = hlist; it && *it; it++)
735 if ((*it)->controllers)
736 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
737 return true;
738
739 return false;
740
741 }
742
743 /* Set boolean to mark controllers under which we are supposed create a
744 * writeable cgroup.
745 */
746 static void cgv1_mark_to_make_rw(char **clist)
747 {
748 struct cgv1_hierarchy **it;
749
750 for (it = cgv1_hierarchies; it && *it; it++)
751 if ((*it)->controllers)
752 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
753 string_in_list(clist, "all"))
754 (*it)->create_rw_cgroup = true;
755 }
756
757 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
758 * the cgroupfs v2 hierarchy.
759 */
760 static void cgv2_mark_to_make_rw(char **clist)
761 {
762 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
763 if (cgv2_hierarchies)
764 (*cgv2_hierarchies)->create_rw_cgroup = true;
765 }
766
767 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
768 static void cg_mark_to_make_rw(char **clist)
769 {
770 cgv1_mark_to_make_rw(clist);
771 cgv2_mark_to_make_rw(clist);
772 }
773
774 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
775 static char *cgv1_must_prefix_named(char *entry)
776 {
777 char *s;
778 int ret;
779 size_t len;
780
781 len = strlen(entry);
782 s = must_alloc(len + 6);
783
784 ret = snprintf(s, len + 6, "name=%s", entry);
785 if (ret < 0 || (size_t)ret >= (len + 6)) {
786 free(s);
787 return NULL;
788 }
789
790 return s;
791 }
792
793 /* Append kernel controller in @klist or named controller in @nlist to @clist */
794 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
795 {
796 int newentry;
797 char *copy;
798
799 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
800 return;
801
802 newentry = append_null_to_list((void ***)clist);
803
804 if (strncmp(entry, "name=", 5) == 0)
805 copy = must_copy_string(entry);
806 else if (string_in_list(klist, entry))
807 copy = must_copy_string(entry);
808 else
809 copy = cgv1_must_prefix_named(entry);
810
811 (*clist)[newentry] = copy;
812 }
813
814 /* Get the controllers from a mountinfo line. There are other ways we could get
815 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
816 * could parse the mount options. But we simply assume that the mountpoint must
817 * be /sys/fs/cgroup/controller-list
818 */
819 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
820 {
821 int i;
822 char *p, *p2, *tok;
823 char *saveptr = NULL;
824 char **aret = NULL;
825
826 p = line;
827
828 for (i = 0; i < 4; i++) {
829 p = strchr(p, ' ');
830 if (!p)
831 return NULL;
832 p++;
833 }
834
835 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
836 return NULL;
837
838 p += 15;
839
840 p2 = strchr(p, ' ');
841 if (!p2)
842 return NULL;
843 *p2 = '\0';
844
845 for (tok = strtok_r(p, ",", &saveptr); tok;
846 tok = strtok_r(NULL, ",", &saveptr))
847 must_append_controller(klist, nlist, &aret, tok);
848
849 return aret;
850 }
851
852 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
853 static bool cgv1_controller_in_clist(char *cgline, char *c)
854 {
855 size_t len;
856 char *tok, *eol, *tmp;
857 char *saveptr = NULL;
858
859 eol = strchr(cgline, ':');
860 if (!eol)
861 return false;
862
863 len = eol - cgline;
864 tmp = alloca(len + 1);
865 memcpy(tmp, cgline, len);
866 tmp[len] = '\0';
867
868 for (tok = strtok_r(tmp, ",", &saveptr); tok;
869 tok = strtok_r(NULL, ",", &saveptr)) {
870 if (strcmp(tok, c) == 0)
871 return true;
872 }
873
874 return false;
875 }
876
877 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
878 * of a given cgv1 controller passed in via @controller.
879 */
880 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
881 {
882 char *p;
883
884 p = basecginfo;
885
886 while (true) {
887 p = strchr(p, ':');
888 if (!p)
889 return NULL;
890 p++;
891
892 if (cgv1_controller_in_clist(p, controller)) {
893 p = strchr(p, ':');
894 if (!p)
895 return NULL;
896 p++;
897
898 return copy_to_eol(p);
899 }
900
901 p = strchr(p, '\n');
902 if (!p)
903 return NULL;
904 p++;
905 }
906
907 return NULL;
908 }
909
910 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
911 * systems.
912 */
913 #define INIT_SCOPE "/init.scope"
914 static void cg_systemd_prune_init_scope(char *cg)
915 {
916 char *point;
917
918 if (!cg)
919 return;
920
921 point = cg + strlen(cg) - strlen(INIT_SCOPE);
922 if (point < cg)
923 return;
924
925 if (strcmp(point, INIT_SCOPE) == 0) {
926 if (point == cg)
927 *(point + 1) = '\0';
928 else
929 *point = '\0';
930 }
931 }
932
933 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
934 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
935 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
936 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
937 * from /proc/1/cgroup.
938 */
939 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
940 {
941 struct cgv1_hierarchy *new;
942 int newentry;
943
944 new = must_alloc(sizeof(*new));
945
946 new->controllers = clist;
947 new->mountpoint = mountpoint;
948 new->base_cgroup = base_cgroup;
949 new->fullcgpath = NULL;
950 new->create_rw_cgroup = false;
951 new->init_cgroup = init_cgroup;
952 new->systemd_user_slice = false;
953
954 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
955 cgv1_hierarchies[newentry] = new;
956 }
957
958 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
959 * currently) include the controllers mounted into the hierarchy (e.g. memory,
960 * pids, blkio), the mountpoint of that hierarchy (Should usually be
961 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
962 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
963 * base cgroup of the current process gathered from /proc/self/cgroup, and the
964 * init cgroup of PID1 gathered from /proc/1/cgroup.
965 */
966 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
967 {
968 struct cgv2_hierarchy *new;
969 int newentry;
970
971 new = must_alloc(sizeof(*new));
972
973 new->controllers = clist;
974 new->mountpoint = mountpoint;
975 new->base_cgroup = base_cgroup;
976 new->fullcgpath = NULL;
977 new->create_rw_cgroup = false;
978 new->init_cgroup = init_cgroup;
979 new->systemd_user_slice = systemd_user_slice;
980
981 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
982 cgv2_hierarchies[newentry] = new;
983 }
984
985 /* In Ubuntu 14.04, the paths created for us were
986 * '/user/$uid.user/$something.session' This can be merged better with
987 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
988 * reason about the correctness.
989 */
990 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
991 {
992 char *p;
993 size_t len;
994 int id;
995 char *copy = NULL;
996 bool bret = false;
997
998 copy = must_copy_string(in);
999 if (strlen(copy) < strlen("/user/1.user/1.session"))
1000 goto cleanup;
1001 p = copy + strlen(copy) - 1;
1002
1003 /* skip any trailing '/' (shouldn't be any, but be sure) */
1004 while (p >= copy && *p == '/')
1005 *(p--) = '\0';
1006 if (p < copy)
1007 goto cleanup;
1008
1009 /* Get last path element */
1010 while (p >= copy && *p != '/')
1011 p--;
1012 if (p < copy)
1013 goto cleanup;
1014
1015 /* make sure it is something.session */
1016 len = strlen(p + 1);
1017 if (len < strlen("1.session") ||
1018 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1019 goto cleanup;
1020
1021 /* ok last path piece checks out, now check the second to last */
1022 *(p + 1) = '\0';
1023 while (p >= copy && *(--p) != '/')
1024 ;
1025
1026 if (sscanf(p + 1, "%d.user/", &id) != 1)
1027 goto cleanup;
1028
1029 if (id != (int)uid)
1030 goto cleanup;
1031
1032 bret = true;
1033
1034 cleanup:
1035 free(copy);
1036 return bret;
1037 }
1038
1039 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1040 * assume it belongs to $uid and chown it
1041 */
1042 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1043 const char *init_cgroup, uid_t uid)
1044 {
1045 int ret;
1046 char buf[100];
1047 size_t curlen, initlen;
1048
1049 curlen = strlen(base_cgroup);
1050 initlen = strlen(init_cgroup);
1051 if (curlen <= initlen)
1052 return false;
1053
1054 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1055 return false;
1056
1057 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1058 if (ret < 0 || ret >= 100)
1059 return false;
1060
1061 if (initlen == 1)
1062 initlen = 0; // skip the '/'
1063
1064 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1065 }
1066
1067 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1068 * is not the end of our systemd path, then we're not part of the PAM call that
1069 * created that path.
1070 *
1071 * The last piece is chowned to $uid, the user- part not.
1072 * Note: If the user creates paths that look like what we're looking for to
1073 * 'fool' us, either
1074 * - they fool us, we create new cgroups, and they get auto-logged-out.
1075 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1076 * lose ownership of their cgroups
1077 */
1078 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1079 const char *init_cgroup,
1080 const char *in, uid_t uid)
1081 {
1082 char *p;
1083 size_t len;
1084 int id;
1085 char *copy = NULL;
1086 bool bret = false;
1087
1088 copy = must_copy_string(in);
1089
1090 /* An old version of systemd has already created a cgroup for us. */
1091 if (cg_systemd_under_user_slice_1(in, uid))
1092 goto succeed;
1093
1094 /* A new version of systemd has already created a cgroup for us. */
1095 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1096 goto succeed;
1097
1098 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1099 goto cleanup;
1100
1101 p = copy + strlen(copy) - 1;
1102 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1103 while (p >= copy && *p == '/')
1104 *(p--) = '\0';
1105
1106 if (p < copy)
1107 goto cleanup;
1108
1109 /* Get last path element */
1110 while (p >= copy && *p != '/')
1111 p--;
1112
1113 if (p < copy)
1114 goto cleanup;
1115
1116 /* Make sure it is session-something.scope. */
1117 len = strlen(p + 1);
1118 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1119 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1120 goto cleanup;
1121
1122 /* Ok last path piece checks out, now check the second to last. */
1123 *(p + 1) = '\0';
1124 while (p >= copy && *(--p) != '/')
1125 ;
1126
1127 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1128 goto cleanup;
1129
1130 if (id != (int)uid)
1131 goto cleanup;
1132
1133 succeed:
1134 bret = true;
1135
1136 cleanup:
1137 free(copy);
1138 return bret;
1139 }
1140
1141 /* Chown existing cgroup that systemd has already created for us. */
1142 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1143 const char *base_cgroup, uid_t uid,
1144 gid_t gid, bool systemd_user_slice)
1145 {
1146 char *path;
1147
1148 if (!systemd_user_slice)
1149 return false;
1150
1151 path = must_make_path(mountpoint, base_cgroup, NULL);
1152
1153 /* A cgroup within name=systemd has already been created. So we only
1154 * need to chown it.
1155 */
1156 if (chown(path, uid, gid) < 0)
1157 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1158 path, (int)uid, (int)gid, strerror(errno), NULL);
1159 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
1160
1161 free(path);
1162 return true;
1163 }
1164
1165 /* Detect and store information about cgroupfs v1 hierarchies. */
1166 static bool cgv1_init(uid_t uid, gid_t gid)
1167 {
1168 FILE *f;
1169 struct cgv1_hierarchy **it;
1170 char *basecginfo;
1171 char *line = NULL;
1172 char **klist = NULL, **nlist = NULL;
1173 size_t len = 0;
1174
1175 basecginfo = read_file("/proc/self/cgroup");
1176 if (!basecginfo)
1177 return false;
1178
1179 f = fopen("/proc/self/mountinfo", "r");
1180 if (!f) {
1181 free(basecginfo);
1182 return false;
1183 }
1184
1185 cgv1_get_controllers(&klist, &nlist);
1186
1187 while (getline(&line, &len, f) != -1) {
1188 char **controller_list = NULL;
1189 char *mountpoint, *base_cgroup;
1190
1191 if (is_lxcfs(line) || !is_cgv1(line))
1192 continue;
1193
1194 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1195 if (!controller_list)
1196 continue;
1197
1198 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1199 free(controller_list);
1200 continue;
1201 }
1202
1203 mountpoint = get_mountpoint(line);
1204 if (!mountpoint) {
1205 free_string_list(controller_list);
1206 continue;
1207 }
1208
1209 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1210 if (!base_cgroup) {
1211 free_string_list(controller_list);
1212 free(mountpoint);
1213 continue;
1214 }
1215
1216 trim(base_cgroup);
1217 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1218 "mountpoint \"%s\" and cgroup \"%s\"\n",
1219 controller_list[0], mountpoint, base_cgroup);
1220 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1221 }
1222
1223 free_string_list(klist);
1224 free_string_list(nlist);
1225 free(basecginfo);
1226 fclose(f);
1227 free(line);
1228
1229 /* Retrieve init cgroup path for all controllers. */
1230 basecginfo = read_file("/proc/1/cgroup");
1231 if (!basecginfo)
1232 return false;
1233
1234 for (it = cgv1_hierarchies; it && *it; it++) {
1235 if ((*it)->controllers) {
1236 char *init_cgroup, *user_slice;
1237
1238 /* We've already stored the controller and received its
1239 * current cgroup. If we now fail to retrieve its init
1240 * cgroup, we should probably fail.
1241 */
1242 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1243 if (!init_cgroup) {
1244 free(basecginfo);
1245 return false;
1246 }
1247
1248 cg_systemd_prune_init_scope(init_cgroup);
1249 (*it)->init_cgroup = init_cgroup;
1250 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1251 "cgroup \"%s\"\n",
1252 (*(*it)->controllers), init_cgroup);
1253
1254 /* Check whether systemd has already created a cgroup
1255 * for us.
1256 */
1257 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1258 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1259 (*it)->systemd_user_slice = true;
1260
1261 free(user_slice);
1262 }
1263 }
1264 free(basecginfo);
1265
1266 return true;
1267 }
1268
1269 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1270 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1271 */
1272 static inline int cg_get_version_of_mntpt(const char *path)
1273 {
1274 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1275 return 1;
1276
1277 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1278 return 2;
1279
1280 return 0;
1281 }
1282
1283 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1284 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1285 */
1286 static bool cgv2_init(uid_t uid, gid_t gid)
1287 {
1288 char *mountpoint;
1289 FILE *f = NULL;
1290 char *current_cgroup = NULL, *init_cgroup = NULL;
1291 char * line = NULL;
1292 size_t len = 0;
1293 int ret = false;
1294
1295 current_cgroup = cgv2_get_current_cgroup(getpid());
1296 if (!current_cgroup) {
1297 /* No v2 hierarchy present. We're done. */
1298 ret = true;
1299 goto cleanup;
1300 }
1301
1302 init_cgroup = cgv2_get_current_cgroup(1);
1303 if (!init_cgroup) {
1304 /* If we're here and didn't fail already above, then something's
1305 * certainly wrong, so error this time.
1306 */
1307 goto cleanup;
1308 }
1309
1310 cg_systemd_prune_init_scope(init_cgroup);
1311
1312 /* Check if the v2 hierarchy is mounted at its standard location.
1313 * If so we can skip the rest of the work here. Although the unified
1314 * hierarchy can be mounted multiple times, each of those mountpoints
1315 * will expose identical information.
1316 */
1317 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1318 char *user_slice;
1319 bool has_user_slice = false;
1320
1321 mountpoint = must_copy_string("/sys/fs/cgroup");
1322 if (!mountpoint)
1323 goto cleanup;
1324
1325 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1326 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1327 has_user_slice = true;
1328 free(user_slice);
1329
1330 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1331
1332 ret = true;
1333 goto cleanup;
1334 }
1335
1336 f = fopen("/proc/self/mountinfo", "r");
1337 if (!f)
1338 goto cleanup;
1339
1340 /* we support simple cgroup mounts and lxcfs mounts */
1341 while (getline(&line, &len, f) != -1) {
1342 char *user_slice;
1343 bool has_user_slice = false;
1344
1345 if (!is_cgv2(line))
1346 continue;
1347
1348 mountpoint = get_mountpoint(line);
1349 if (!mountpoint)
1350 continue;
1351
1352 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1353 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1354 has_user_slice = true;
1355 free(user_slice);
1356
1357 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1358
1359 /* Although the unified hierarchy can be mounted multiple times,
1360 * each of those mountpoints will expose identical information.
1361 * So let the first mountpoint we find, win.
1362 */
1363 ret = true;
1364 break;
1365 }
1366
1367 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1368 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1369 mountpoint, current_cgroup, init_cgroup);
1370
1371 cleanup:
1372 if (f)
1373 fclose(f);
1374 free(line);
1375
1376 if (!ret) {
1377 free(init_cgroup);
1378 free(current_cgroup);
1379 }
1380
1381 return ret;
1382 }
1383
1384 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1385 * cgroupfs v2 hierarchy.
1386 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1387 * where some controllers are mounted into their standard cgroupfs v1 locations
1388 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1389 * hierarchy (/sys/fs/cgroup).
1390 */
1391 static bool cg_init(uid_t uid, gid_t gid)
1392 {
1393 if (!cgv1_init(uid, gid))
1394 return false;
1395
1396 if (!cgv2_init(uid, gid))
1397 return false;
1398
1399 if (cgv1_hierarchies && cgv2_hierarchies) {
1400 cg_mount_mode = CGROUP_MIXED;
1401 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1402 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1403 cg_mount_mode = CGROUP_PURE_V1;
1404 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1405 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1406 cg_mount_mode = CGROUP_PURE_V2;
1407 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1408 } else {
1409 cg_mount_mode = CGROUP_UNKNOWN;
1410 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
1411 }
1412
1413 if (cg_mount_mode == CGROUP_UNKNOWN)
1414 return false;
1415
1416 return true;
1417 }
1418
1419 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1420 static bool cgv1_enter(const char *cgroup)
1421 {
1422 struct cgv1_hierarchy **it;
1423
1424 for (it = cgv1_hierarchies; it && *it; it++) {
1425 char **controller;
1426 bool entered = false;
1427
1428 if (!(*it)->controllers || !(*it)->mountpoint ||
1429 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1430 continue;
1431
1432 for (controller = (*it)->controllers; controller && *controller;
1433 controller++) {
1434 char *path;
1435
1436 /* We've already been placed in a user slice, so we
1437 * don't need to enter the cgroup again.
1438 */
1439 if ((*it)->systemd_user_slice) {
1440 entered = true;
1441 break;
1442 }
1443
1444 path = must_make_path((*it)->mountpoint,
1445 (*it)->init_cgroup,
1446 cgroup,
1447 "/cgroup.procs",
1448 NULL);
1449 if (!file_exists(path)) {
1450 free(path);
1451 path = must_make_path((*it)->mountpoint,
1452 (*it)->init_cgroup,
1453 cgroup,
1454 "/tasks",
1455 NULL);
1456 }
1457
1458 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1459 entered = write_int(path, (int)getpid());
1460 if (entered) {
1461 free(path);
1462 break;
1463 }
1464
1465 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1466 free(path);
1467 }
1468
1469 if (!entered)
1470 return false;
1471 }
1472
1473 return true;
1474 }
1475
1476 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1477 static bool cgv2_enter(const char *cgroup)
1478 {
1479 struct cgv2_hierarchy *v2;
1480 char *path;
1481 bool entered = false;
1482
1483 if (!cgv2_hierarchies)
1484 return true;
1485
1486 v2 = *cgv2_hierarchies;
1487
1488 if (!v2->mountpoint || !v2->base_cgroup)
1489 return false;
1490
1491 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1492 return true;
1493
1494 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1495 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1496
1497 entered = write_int(path, (int)getpid());
1498 if (!entered) {
1499 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1500 free(path);
1501 return false;
1502 }
1503
1504 free(path);
1505
1506 return true;
1507 }
1508
1509 /* Wrapper around cgv{1,2}_enter(). */
1510 static bool cg_enter(const char *cgroup)
1511 {
1512 if (!cgv1_enter(cgroup)) {
1513 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
1514 return false;
1515 }
1516
1517 if (!cgv2_enter(cgroup)) {
1518 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
1519 return false;
1520 }
1521
1522 return true;
1523 }
1524
1525 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1526 static void cgv1_escape(void)
1527 {
1528 struct cgv1_hierarchy **it;
1529
1530 /* In case systemd hasn't already placed us in a user slice for the
1531 * cpuset v1 controller we will reside in the root cgroup. This means
1532 * that cgroup.clone_children will not have been initialized for us so
1533 * we need to do it.
1534 */
1535 for (it = cgv1_hierarchies; it && *it; it++)
1536 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1537 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
1538
1539 if (!cgv1_enter("/"))
1540 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
1541 }
1542
1543 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1544 static void cgv2_escape(void)
1545 {
1546 if (!cgv2_enter("/"))
1547 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
1548 }
1549
1550 /* Wrapper around cgv{1,2}_escape(). */
1551 static void cg_escape(void)
1552 {
1553 cgv1_escape();
1554 cgv2_escape();
1555 }
1556
1557 /* Get uid and gid for @user. */
1558 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1559 {
1560 struct passwd pwent;
1561 struct passwd *pwentp = NULL;
1562 char *buf;
1563 size_t bufsize;
1564 int ret;
1565
1566 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1567 if (bufsize == -1)
1568 bufsize = 1024;
1569
1570 buf = malloc(bufsize);
1571 if (!buf)
1572 return false;
1573
1574 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1575 if (!pwentp) {
1576 if (ret == 0)
1577 mysyslog(LOG_ERR,
1578 "Could not find matched password record\n", NULL);
1579
1580 free(buf);
1581 return false;
1582 }
1583
1584 *uid = pwent.pw_uid;
1585 *gid = pwent.pw_gid;
1586 free(buf);
1587
1588 return true;
1589 }
1590
1591 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1592 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1593 {
1594 struct stat statbuf;
1595
1596 if (stat(path, &statbuf) < 0)
1597 return false;
1598
1599 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1600 return false;
1601
1602 return true;
1603 }
1604
1605 /* Create cpumask from cpulist aka turn:
1606 *
1607 * 0,2-3
1608 *
1609 * into bit array
1610 *
1611 * 1 0 1 1
1612 */
1613 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1614 {
1615 char *token;
1616 char *saveptr = NULL;
1617 size_t arrlen = BITS_TO_LONGS(nbits);
1618 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1619 if (!bitarr)
1620 return NULL;
1621
1622 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1623 errno = 0;
1624 unsigned start = strtoul(token, NULL, 0);
1625 unsigned end = start;
1626
1627 char *range = strchr(token, '-');
1628 if (range)
1629 end = strtoul(range + 1, NULL, 0);
1630
1631 if (!(start <= end)) {
1632 free(bitarr);
1633 return NULL;
1634 }
1635
1636 if (end >= nbits) {
1637 free(bitarr);
1638 return NULL;
1639 }
1640
1641 while (start <= end)
1642 set_bit(start++, bitarr);
1643 }
1644
1645 return bitarr;
1646 }
1647
1648 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1649 {
1650 char *result;
1651 char **p;
1652 size_t sep_len = strlen(sep);
1653 size_t result_len = use_as_prefix * sep_len;
1654 size_t buf_len;
1655
1656 if (!parts)
1657 return NULL;
1658
1659 /* calculate new string length */
1660 for (p = (char **)parts; *p; p++)
1661 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1662
1663 buf_len = result_len + 1;
1664 result = calloc(buf_len, sizeof(char));
1665 if (!result)
1666 return NULL;
1667
1668 if (use_as_prefix)
1669 (void)strlcpy(result, sep, buf_len * sizeof(char));
1670
1671 for (p = (char **)parts; *p; p++) {
1672 if (p > (char **)parts)
1673 (void)strlcat(result, sep, buf_len * sizeof(char));
1674
1675 (void)strlcat(result, *p, buf_len * sizeof(char));
1676 }
1677
1678 return result;
1679 }
1680
1681 /* The largest integer that can fit into long int is 2^64. This is a
1682 * 20-digit number.
1683 */
1684 #define __IN_TO_STR_LEN 21
1685 /* Turn cpumask into simple, comma-separated cpulist. */
1686 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1687 {
1688 size_t i;
1689 int ret;
1690 char numstr[__IN_TO_STR_LEN] = {0};
1691 char **cpulist = NULL;
1692
1693 for (i = 0; i <= nbits; i++) {
1694 if (is_set(i, bitarr)) {
1695 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1696 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1697 free_string_list(cpulist);
1698 return NULL;
1699 }
1700
1701 must_append_string(&cpulist, numstr);
1702 }
1703 }
1704
1705 return string_join(",", (const char **)cpulist, false);
1706 }
1707
1708 static ssize_t cg_get_max_cpus(char *cpulist)
1709 {
1710 char *c1, *c2;
1711 char *maxcpus = cpulist;
1712 size_t cpus = 0;
1713
1714 c1 = strrchr(maxcpus, ',');
1715 if (c1)
1716 c1++;
1717
1718 c2 = strrchr(maxcpus, '-');
1719 if (c2)
1720 c2++;
1721
1722 if (!c1 && !c2)
1723 c1 = maxcpus;
1724 else if (c1 < c2)
1725 c1 = c2;
1726
1727 if (!c1)
1728 return -1;
1729
1730 /* If the above logic is correct, c1 should always hold a valid string
1731 * here.
1732 */
1733 errno = 0;
1734 cpus = strtoul(c1, NULL, 0);
1735 if (errno != 0)
1736 return -1;
1737
1738 return cpus;
1739 }
1740
1741 static ssize_t write_nointr(int fd, const void* buf, size_t count)
1742 {
1743 ssize_t ret;
1744
1745 again:
1746 ret = write(fd, buf, count);
1747 if (ret < 0 && errno == EINTR)
1748 goto again;
1749
1750 return ret;
1751 }
1752
1753 static int write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
1754 {
1755 int fd, saved_errno;
1756 ssize_t ret;
1757
1758 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
1759 if (fd < 0)
1760 return -1;
1761
1762 ret = write_nointr(fd, buf, count);
1763 if (ret < 0)
1764 goto out_error;
1765 if ((size_t)ret != count)
1766 goto out_error;
1767
1768 if (add_newline) {
1769 ret = write_nointr(fd, "\n", 1);
1770 if (ret != 1)
1771 goto out_error;
1772 }
1773
1774 close(fd);
1775 return 0;
1776
1777 out_error:
1778 saved_errno = errno;
1779 close(fd);
1780 errno = saved_errno;
1781 return -1;
1782 }
1783
1784 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1785 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1786 {
1787 char *lastslash, *fpath, oldv;
1788 int ret;
1789 ssize_t i;
1790
1791 ssize_t maxposs = 0, maxisol = 0;
1792 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1793 uint32_t *possmask = NULL, *isolmask = NULL;
1794 bool bret = false, flipped_bit = false;
1795
1796 lastslash = strrchr(path, '/');
1797 if (!lastslash) { // bug... this shouldn't be possible
1798 pam_cgfs_debug("Invalid path: %s\n", path);
1799 return bret;
1800 }
1801
1802 oldv = *lastslash;
1803 *lastslash = '\0';
1804
1805 fpath = must_make_path(path, "cpuset.cpus", NULL);
1806 posscpus = read_file(fpath);
1807 if (!posscpus) {
1808 pam_cgfs_debug("Could not read file: %s\n", fpath);
1809 goto on_error;
1810 }
1811
1812 /* Get maximum number of cpus found in possible cpuset. */
1813 maxposs = cg_get_max_cpus(posscpus);
1814 if (maxposs < 0 || maxposs >= INT_MAX - 1)
1815 goto on_error;
1816
1817 if (!file_exists(__ISOL_CPUS)) {
1818 /* This system doesn't expose isolated cpus. */
1819 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
1820 cpulist = posscpus;
1821
1822 /* No isolated cpus but we weren't already initialized by
1823 * someone. We should simply copy the parents cpuset.cpus
1824 * values.
1825 */
1826 if (!am_initialized) {
1827 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1828 goto copy_parent;
1829 }
1830
1831 /* No isolated cpus but we were already initialized by someone.
1832 * Nothing more to do for us.
1833 */
1834 goto on_success;
1835 }
1836
1837 isolcpus = read_file(__ISOL_CPUS);
1838 if (!isolcpus) {
1839 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1840 goto on_error;
1841 }
1842
1843 if (!isdigit(isolcpus[0])) {
1844 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1845 cpulist = posscpus;
1846
1847 /* No isolated cpus but we weren't already initialized by
1848 * someone. We should simply copy the parents cpuset.cpus
1849 * values.
1850 */
1851 if (!am_initialized) {
1852 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1853 goto copy_parent;
1854 }
1855
1856 /* No isolated cpus but we were already initialized by someone.
1857 * Nothing more to do for us.
1858 */
1859 goto on_success;
1860 }
1861
1862 /* Get maximum number of cpus found in isolated cpuset. */
1863 maxisol = cg_get_max_cpus(isolcpus);
1864 if (maxisol < 0 || maxisol >= INT_MAX - 1)
1865 goto on_error;
1866
1867 if (maxposs < maxisol)
1868 maxposs = maxisol;
1869 maxposs++;
1870
1871 possmask = cg_cpumask(posscpus, maxposs);
1872 if (!possmask) {
1873 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1874 goto on_error;
1875 }
1876
1877 isolmask = cg_cpumask(isolcpus, maxposs);
1878 if (!isolmask) {
1879 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1880 goto on_error;
1881 }
1882
1883 for (i = 0; i <= maxposs; i++) {
1884 if (is_set(i, isolmask) && is_set(i, possmask)) {
1885 flipped_bit = true;
1886 clear_bit(i, possmask);
1887 }
1888 }
1889
1890 if (!flipped_bit) {
1891 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1892 goto on_success;
1893 }
1894 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1895
1896 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1897 if (!cpulist) {
1898 pam_cgfs_debug("%s", "Could not create cpu list\n");
1899 goto on_error;
1900 }
1901
1902 copy_parent:
1903 *lastslash = oldv;
1904
1905 free(fpath);
1906
1907 fpath = must_make_path(path, "cpuset.cpus", NULL);
1908 ret = write_to_file(fpath, cpulist, strlen(cpulist), false);
1909 if (ret < 0) {
1910 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
1911 goto on_error;
1912 }
1913
1914 on_success:
1915 bret = true;
1916
1917 on_error:
1918 *lastslash = oldv;
1919
1920 free(fpath);
1921 free(isolcpus);
1922 free(isolmask);
1923
1924 if (posscpus != cpulist)
1925 free(posscpus);
1926 free(possmask);
1927
1928 free(cpulist);
1929 return bret;
1930 }
1931
1932 int read_from_file(const char *filename, void* buf, size_t count)
1933 {
1934 int fd = -1, saved_errno;
1935 ssize_t ret;
1936
1937 fd = open(filename, O_RDONLY | O_CLOEXEC);
1938 if (fd < 0)
1939 return -1;
1940
1941 if (!buf || !count) {
1942 char buf2[100];
1943 size_t count2 = 0;
1944
1945 while ((ret = read(fd, buf2, 100)) > 0)
1946 count2 += ret;
1947 if (ret >= 0)
1948 ret = count2;
1949 } else {
1950 memset(buf, 0, count);
1951 ret = read(fd, buf, count);
1952 }
1953
1954 if (ret < 0)
1955 pam_cgfs_debug("read %s: %s", filename, strerror(errno));
1956
1957 saved_errno = errno;
1958 close(fd);
1959 errno = saved_errno;
1960 return ret;
1961 }
1962
1963 /* Copy contents of parent(@path)/@file to @path/@file */
1964 static bool cg_copy_parent_file(char *path, char *file)
1965 {
1966 char *lastslash, *value = NULL, *fpath, oldv;
1967 int len = 0;
1968 int ret;
1969
1970 lastslash = strrchr(path, '/');
1971 if (!lastslash) { // bug... this shouldn't be possible
1972 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1973 return false;
1974 }
1975
1976 oldv = *lastslash;
1977 *lastslash = '\0';
1978
1979 fpath = must_make_path(path, file, NULL);
1980 len = read_from_file(fpath, NULL, 0);
1981 if (len <= 0)
1982 goto bad;
1983
1984 value = must_alloc(len + 1);
1985 if (read_from_file(fpath, value, len) != len)
1986 goto bad;
1987 free(fpath);
1988
1989 *lastslash = oldv;
1990
1991 fpath = must_make_path(path, file, NULL);
1992 ret = write_to_file(fpath, value, len, false);
1993 if (ret < 0)
1994 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1995
1996 free(fpath);
1997 free(value);
1998 return ret >= 0;
1999
2000 bad:
2001 pam_cgfs_debug("Error reading '%s'", fpath);
2002 free(fpath);
2003 free(value);
2004 return false;
2005 }
2006
2007 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
2008 * controller we will reside in the root cgroup. This means that
2009 * cgroup.clone_children will not have been initialized for us so we need to do
2010 * it.
2011 */
2012 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
2013 {
2014 char *clonechildrenpath, v;
2015
2016 if (!string_in_list(h->controllers, "cpuset"))
2017 return true;
2018
2019 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
2020
2021 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2022 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
2023 free(clonechildrenpath);
2024 return false;
2025 }
2026
2027 if (v == '1') { /* already set for us by someone else */
2028 free(clonechildrenpath);
2029 return true;
2030 }
2031
2032 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
2033 /* Set clone_children so children inherit our settings */
2034 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2035 free(clonechildrenpath);
2036 return false;
2037 }
2038
2039 free(clonechildrenpath);
2040 return true;
2041 }
2042
2043 /*
2044 * Initialize the cpuset hierarchy in first directory of @gname and
2045 * set cgroup.clone_children so that children inherit settings.
2046 * Since the h->base_path is populated by init or ourselves, we know
2047 * it is already initialized.
2048 */
2049 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
2050 const char *cgroup)
2051 {
2052 char *cgpath, *clonechildrenpath, v, *slash;
2053
2054 if (!string_in_list(h->controllers, "cpuset"))
2055 return true;
2056
2057 if (*cgroup == '/')
2058 cgroup++;
2059 slash = strchr(cgroup, '/');
2060 if (slash)
2061 *slash = '\0';
2062
2063 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
2064 if (slash)
2065 *slash = '/';
2066
2067 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
2068 pam_cgfs_debug("Failed to create '%s'", cgpath);
2069 free(cgpath);
2070 return false;
2071 }
2072
2073 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
2074 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
2075 free(clonechildrenpath);
2076 free(cgpath);
2077 return true;
2078 }
2079
2080 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2081 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath);
2082 free(clonechildrenpath);
2083 free(cgpath);
2084 return false;
2085 }
2086
2087 /* Make sure any isolated cpus are removed from cpuset.cpus. */
2088 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2089 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
2090 free(clonechildrenpath);
2091 free(cgpath);
2092 return false;
2093 }
2094
2095 if (v == '1') { /* already set for us by someone else */
2096 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
2097 free(clonechildrenpath);
2098 free(cgpath);
2099 return true;
2100 }
2101
2102 /* copy parent's settings */
2103 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
2104 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
2105 free(cgpath);
2106 free(clonechildrenpath);
2107 return false;
2108 }
2109 free(cgpath);
2110
2111 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
2112 /* Set clone_children so children inherit our settings */
2113 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
2114 free(clonechildrenpath);
2115 return false;
2116 }
2117 free(clonechildrenpath);
2118 return true;
2119 }
2120
2121 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2122 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2123 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2124 * to the caller in @existed.
2125 */
2126 #define __PAM_CGFS_USER "/user/"
2127 #define __PAM_CGFS_USER_LEN 6
2128 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2129 {
2130 char *clean_base_cgroup, *path;
2131 char **controller;
2132 struct cgv1_hierarchy *it;
2133 bool created = false;
2134
2135 *existed = false;
2136 it = h;
2137
2138 for (controller = it->controllers; controller && *controller;
2139 controller++) {
2140 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2141 return false;
2142
2143 /* If systemd has already created a cgroup for us, keep using
2144 * it.
2145 */
2146 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2147 it->base_cgroup, uid, gid,
2148 it->systemd_user_slice))
2149 return true;
2150
2151 /* We need to make sure that we do not create an endless chain
2152 * of sub-cgroups. So we check if we have already logged in
2153 * somehow (sudo -i, su, etc.) and have created a
2154 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2155 * cgroups this is unnecessary since we use the init_cgroup
2156 * anyway, but for controllers which have an existing systemd
2157 * cgroup that does not match the current uid, this is pretty
2158 * useful.
2159 */
2160 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2161 free(it->base_cgroup);
2162 it->base_cgroup = must_copy_string("/");
2163 } else {
2164 clean_base_cgroup =
2165 strstr(it->base_cgroup, __PAM_CGFS_USER);
2166 if (clean_base_cgroup)
2167 *clean_base_cgroup = '\0';
2168 }
2169
2170 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2171 pam_cgfs_debug("Constructing path: %s\n", path);
2172
2173 if (file_exists(path)) {
2174 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2175 if (our_cg)
2176 *existed = false;
2177 else
2178 *existed = true;
2179
2180 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2181 path, our_cg ? "" : "not ", uid, gid);
2182 free(path);
2183
2184 return our_cg;
2185 }
2186
2187 created = mkdir_parent(it->mountpoint, path);
2188 if (!created) {
2189 free(path);
2190 continue;
2191 }
2192
2193 if (chown(path, uid, gid) < 0)
2194 mysyslog(LOG_WARNING,
2195 "Failed to chown %s to %d:%d: %s\n", path,
2196 (int)uid, (int)gid, strerror(errno), NULL);
2197
2198 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2199 free(path);
2200 break;
2201 }
2202
2203 return created;
2204 }
2205
2206 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2207 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2208 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2209 */
2210 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2211 {
2212
2213 char *path;
2214
2215 /* Better safe than sorry. */
2216 if (!h->controllers)
2217 return true;
2218
2219 /* Cgroups created by systemd for us which we re-use won't be removed
2220 * here, since we're using init_cgroup + cgroup as path instead of
2221 * base_cgroup + cgroup.
2222 */
2223 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2224 (void)recursive_rmdir(path);
2225 free(path);
2226
2227 return true;
2228 }
2229
2230 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2231 static bool cgv2_remove(const char *cgroup)
2232 {
2233 struct cgv2_hierarchy *v2;
2234 char *path;
2235
2236 if (!cgv2_hierarchies)
2237 return true;
2238
2239 v2 = *cgv2_hierarchies;
2240
2241 /* If we reused an already existing cgroup, don't bother trying to
2242 * remove (a potentially wrong)/the path.
2243 * Cgroups created by systemd for us which we re-use would be removed
2244 * here, since we're using base_cgroup + cgroup as path.
2245 */
2246 if (v2->systemd_user_slice)
2247 return true;
2248
2249 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2250 (void)recursive_rmdir(path);
2251 free(path);
2252
2253 return true;
2254 }
2255
2256 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2257 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2258 * back, to the caller if the creation failed due to @cgroup already existing
2259 * via @existed.
2260 */
2261 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2262 {
2263 struct cgv1_hierarchy **it, **rev_it;
2264 bool all_created = true;
2265
2266 for (it = cgv1_hierarchies; it && *it; it++) {
2267 if (!(*it)->controllers || !(*it)->mountpoint ||
2268 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2269 continue;
2270
2271 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2272 all_created = false;
2273 break;
2274 }
2275 }
2276
2277 if (all_created)
2278 return true;
2279
2280 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2281 rev_it++)
2282 cgv1_remove_one(*rev_it, cgroup);
2283
2284 return false;
2285 }
2286
2287 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2288 * the creation failed due to @cgroup already existing via @existed.
2289 */
2290 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2291 {
2292 int ret;
2293 char *clean_base_cgroup;
2294 char *path;
2295 struct cgv2_hierarchy *v2;
2296 bool our_cg = false, created = false;
2297
2298 *existed = false;
2299
2300 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2301 return true;
2302
2303 v2 = *cgv2_hierarchies;
2304
2305 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2306 * to be placed under our current cgroup.
2307 */
2308 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2309 uid, gid, v2->systemd_user_slice))
2310 goto delegate_files;
2311
2312 /* We need to make sure that we do not create an endless chain of
2313 * sub-cgroups. So we check if we have already logged in somehow (sudo
2314 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2315 * skip that part.
2316 */
2317 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2318 free(v2->base_cgroup);
2319 v2->base_cgroup = must_copy_string("/");
2320 } else {
2321 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2322 if (clean_base_cgroup)
2323 *clean_base_cgroup = '\0';
2324 }
2325
2326 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2327 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2328
2329 if (file_exists(path)) {
2330 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2331 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2332 path, our_cg ? "" : "not ", uid, gid);
2333 free(path);
2334 if (our_cg) {
2335 *existed = false;
2336 goto delegate_files;
2337 } else {
2338 *existed = true;
2339 return false;
2340 }
2341 }
2342
2343 created = mkdir_parent(v2->mountpoint, path);
2344 if (!created) {
2345 free(path);
2346 return false;
2347 }
2348
2349 /* chown cgroup to user */
2350 if (chown(path, uid, gid) < 0)
2351 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2352 path, (int)uid, (int)gid, strerror(errno), NULL);
2353 else
2354 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2355 free(path);
2356
2357 delegate_files:
2358 /* chown cgroup.procs to user */
2359 if (v2->systemd_user_slice)
2360 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2361 "/cgroup.procs", NULL);
2362 else
2363 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2364 "/cgroup.procs", NULL);
2365
2366 ret = chown(path, uid, gid);
2367 if (ret < 0)
2368 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2369 path, (int)uid, (int)gid, strerror(errno), NULL);
2370 else
2371 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2372 free(path);
2373
2374 /* chown cgroup.subtree_control to user */
2375 if (v2->systemd_user_slice)
2376 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2377 "/cgroup.subtree_control", NULL);
2378 else
2379 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2380 "/cgroup.subtree_control", NULL);
2381
2382 ret = chown(path, uid, gid);
2383 if (ret < 0)
2384 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2385 path, (int)uid, (int)gid, strerror(errno), NULL);
2386 free(path);
2387
2388 /* chown cgroup.threads to user */
2389 if (v2->systemd_user_slice)
2390 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2391 "/cgroup.threads", NULL);
2392 else
2393 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2394 "/cgroup.threads", NULL);
2395 ret = chown(path, uid, gid);
2396 if (ret < 0 && errno != ENOENT)
2397 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2398 path, (int)uid, (int)gid, strerror(errno), NULL);
2399 free(path);
2400
2401 return true;
2402 }
2403
2404 /* Create writeable cgroups for @user at login. Details can be found in the
2405 * preamble/license at the top of this file.
2406 */
2407 static int handle_login(const char *user, uid_t uid, gid_t gid)
2408 {
2409 int idx = 0, ret;
2410 bool existed;
2411 char cg[MAXPATHLEN];
2412
2413 cg_escape();
2414
2415 while (idx >= 0) {
2416 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
2417 if (ret < 0 || ret >= MAXPATHLEN) {
2418 mysyslog(LOG_ERR, "Username too long\n", NULL);
2419 return PAM_SESSION_ERR;
2420 }
2421
2422 existed = false;
2423 if (!cgv2_create(cg, uid, gid, &existed)) {
2424 if (existed) {
2425 cgv2_remove(cg);
2426 idx++;
2427 continue;
2428 }
2429
2430 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2431 return PAM_SESSION_ERR;
2432 }
2433
2434 existed = false;
2435 if (!cgv1_create(cg, uid, gid, &existed)) {
2436 if (existed) {
2437 cgv2_remove(cg);
2438 idx++;
2439 continue;
2440 }
2441
2442 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2443 return PAM_SESSION_ERR;
2444 }
2445
2446 if (!cg_enter(cg)) {
2447 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
2448 return PAM_SESSION_ERR;
2449 }
2450
2451 break;
2452 }
2453
2454 return PAM_SUCCESS;
2455 }
2456
2457 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2458 * hierarchies.
2459 */
2460 static bool cgv1_prune_empty_cgroups(const char *user)
2461 {
2462 bool controller_removed = true;
2463 bool all_removed = true;
2464 struct cgv1_hierarchy **it;
2465
2466 for (it = cgv1_hierarchies; it && *it; it++) {
2467 int ret;
2468 char *path_base, *path_init;
2469 char **controller;
2470
2471 if (!(*it)->controllers || !(*it)->mountpoint ||
2472 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2473 continue;
2474
2475 for (controller = (*it)->controllers; controller && *controller;
2476 controller++) {
2477 bool path_base_rm, path_init_rm;
2478
2479 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2480 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2481
2482 ret = recursive_rmdir(path_base);
2483 if (ret == -ENOENT || ret >= 0)
2484 path_base_rm = true;
2485 else
2486 path_base_rm = false;
2487 free(path_base);
2488
2489 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2490 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2491
2492 ret = recursive_rmdir(path_init);
2493 if (ret == -ENOENT || ret >= 0)
2494 path_init_rm = true;
2495 else
2496 path_init_rm = false;
2497 free(path_init);
2498
2499 if (!path_base_rm && !path_init_rm) {
2500 controller_removed = false;
2501 continue;
2502 }
2503
2504 controller_removed = true;
2505 break;
2506 }
2507
2508 if (!controller_removed)
2509 all_removed = false;
2510 }
2511
2512 return all_removed;
2513 }
2514
2515 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2516 * hierarchy.
2517 */
2518 static bool cgv2_prune_empty_cgroups(const char *user)
2519 {
2520 int ret;
2521 struct cgv2_hierarchy *v2;
2522 char *path_base, *path_init;
2523 bool path_base_rm, path_init_rm;
2524
2525 if (!cgv2_hierarchies)
2526 return true;
2527
2528 v2 = *cgv2_hierarchies;
2529
2530 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2531 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2532
2533 ret = recursive_rmdir(path_base);
2534 if (ret == -ENOENT || ret >= 0)
2535 path_base_rm = true;
2536 else
2537 path_base_rm = false;
2538 free(path_base);
2539
2540 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2541 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2542
2543 ret = recursive_rmdir(path_init);
2544 if (ret == -ENOENT || ret >= 0)
2545 path_init_rm = true;
2546 else
2547 path_init_rm = false;
2548 free(path_init);
2549
2550 if (!path_base_rm && !path_init_rm)
2551 return false;
2552
2553 return true;
2554 }
2555
2556 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2557 static void cg_prune_empty_cgroups(const char *user)
2558 {
2559 (void)cgv1_prune_empty_cgroups(user);
2560 (void)cgv2_prune_empty_cgroups(user);
2561 }
2562
2563 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2564 static void cgv1_free_hierarchies(void)
2565 {
2566 struct cgv1_hierarchy **it;
2567
2568 if (!cgv1_hierarchies)
2569 return;
2570
2571 for (it = cgv1_hierarchies; it && *it; it++) {
2572 if ((*it)->controllers) {
2573 char **tmp;
2574 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2575 free(*tmp);
2576
2577 free((*it)->controllers);
2578 }
2579
2580 free((*it)->mountpoint);
2581 free((*it)->base_cgroup);
2582 free((*it)->fullcgpath);
2583 free((*it)->init_cgroup);
2584 }
2585
2586 free(cgv1_hierarchies);
2587 }
2588
2589 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2590 static void cgv2_free_hierarchies(void)
2591 {
2592 struct cgv2_hierarchy **it;
2593
2594 if (!cgv2_hierarchies)
2595 return;
2596
2597 for (it = cgv2_hierarchies; it && *it; it++) {
2598 if ((*it)->controllers) {
2599 char **tmp;
2600
2601 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2602 free(*tmp);
2603
2604 free((*it)->controllers);
2605 }
2606
2607 free((*it)->mountpoint);
2608 free((*it)->base_cgroup);
2609 free((*it)->fullcgpath);
2610 free((*it)->init_cgroup);
2611 }
2612
2613 free(cgv2_hierarchies);
2614 }
2615
2616 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2617 static void cg_exit(void)
2618 {
2619 cgv1_free_hierarchies();
2620 cgv2_free_hierarchies();
2621 }
2622
2623 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2624 const char **argv)
2625 {
2626 int ret;
2627 uid_t uid = 0;
2628 gid_t gid = 0;
2629 const char *PAM_user = NULL;
2630
2631 ret = pam_get_user(pamh, &PAM_user, NULL);
2632 if (ret != PAM_SUCCESS) {
2633 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2634 return PAM_SESSION_ERR;
2635 }
2636
2637 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2638 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2639 return PAM_SESSION_ERR;
2640 }
2641
2642 if (!cg_init(uid, gid)) {
2643 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2644 return PAM_SESSION_ERR;
2645 }
2646
2647 /* Try to prune cgroups, that are actually empty but were still marked
2648 * as busy by the kernel so we couldn't remove them on session close.
2649 */
2650 cg_prune_empty_cgroups(PAM_user);
2651
2652 if (cg_mount_mode == CGROUP_UNKNOWN)
2653 return PAM_SESSION_ERR;
2654
2655 if (argc > 1 && !strcmp(argv[0], "-c")) {
2656 char **clist = make_string_list(argv[1], ",");
2657
2658 /*
2659 * We don't allow using "all" and other controllers explicitly because
2660 * that simply doesn't make any sense.
2661 */
2662 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2663 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2664 free_string_list(clist);
2665 return PAM_SESSION_ERR;
2666 }
2667
2668 cg_mark_to_make_rw(clist);
2669 free_string_list(clist);
2670 }
2671
2672 return handle_login(PAM_user, uid, gid);
2673 }
2674
2675 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2676 const char **argv)
2677 {
2678 int ret;
2679 uid_t uid = 0;
2680 gid_t gid = 0;
2681 const char *PAM_user = NULL;
2682
2683 ret = pam_get_user(pamh, &PAM_user, NULL);
2684 if (ret != PAM_SUCCESS) {
2685 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2686 return PAM_SESSION_ERR;
2687 }
2688
2689 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2690 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2691 return PAM_SESSION_ERR;
2692 }
2693
2694 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2695 if (!cg_init(uid, gid))
2696 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2697
2698 if (argc > 1 && !strcmp(argv[0], "-c")) {
2699 char **clist = make_string_list(argv[1], ",");
2700
2701 /*
2702 * We don't allow using "all" and other controllers explicitly because
2703 * that simply doesn't make any sense.
2704 */
2705 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2706 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2707 free_string_list(clist);
2708 return PAM_SESSION_ERR;
2709 }
2710
2711 cg_mark_to_make_rw(clist);
2712 free_string_list(clist);
2713 }
2714 }
2715
2716 cg_prune_empty_cgroups(PAM_user);
2717 cg_exit();
2718
2719 return PAM_SUCCESS;
2720 }