]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
github: Update for main branch
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include "config.h"
4
5 #include <ctype.h>
6 #include <dirent.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <linux/unistd.h>
10 #include <pwd.h>
11 #include <stdarg.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/mount.h>
18 #include <sys/param.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <sys/vfs.h>
22 #include <syslog.h>
23 #include <unistd.h>
24
25 #include "file_utils.h"
26 #include "macro.h"
27 #include "memory_utils.h"
28 #include "string_utils.h"
29
30 #define PAM_SM_SESSION
31 #include <security/_pam_macros.h>
32 #include <security/pam_modules.h>
33
34 #if !HAVE_STRLCPY
35 #include "strlcpy.h"
36 #endif
37
38 #if !HAVE_STRLCAT
39 #include "strlcat.h"
40 #endif
41
42 #define pam_cgfs_debug_stream(stream, format, ...) \
43 do { \
44 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
45 __func__, __VA_ARGS__); \
46 } while (false)
47
48 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
49
50 #ifdef DEBUG
51 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
52 #else
53 #define pam_cgfs_debug(format, ...) \
54 do { \
55 } while (false)
56 #endif /* DEBUG */
57
58 static enum cg_mount_mode {
59 CGROUP_UNKNOWN = -1,
60 CGROUP_MIXED = 0,
61 CGROUP_PURE_V1 = 1,
62 CGROUP_PURE_V2 = 2,
63 CGROUP_UNINITIALIZED = 3,
64 } cg_mount_mode = CGROUP_UNINITIALIZED;
65
66 /* Common helper functions. Most of these have been taken from LXC. */
67 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
68 static int append_null_to_list(void ***list);
69 static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
70 static char *copy_to_eol(char *s);
71 static char *get_mountpoint(char *line);
72 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
73 static int handle_login(const char *user, uid_t uid, gid_t gid);
74 static bool is_lxcfs(const char *line);
75 static bool is_cgv1(char *line);
76 static bool is_cgv2(char *line);
77 static void must_add_to_list(char ***clist, char *entry);
78 static void must_append_controller(char **klist, char **nlist, char ***clist,
79 char *entry);
80 static void must_append_string(char ***list, char *entry);
81 static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
82 static char *read_file(char *fnam);
83 static int recursive_rmdir(char *dirname);
84 static bool string_in_list(char **list, const char *entry);
85 static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
86 static void trim(char *s);
87 static bool write_int(char *path, int v);
88
89 /* cgroupfs prototypes. */
90 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
91 static uint32_t *cg_cpumask(char *buf, size_t nbits);
92 static bool cg_copy_parent_file(char *path, char *file);
93 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
94 static bool cg_enter(const char *cgroup);
95 static void cg_escape(void);
96 static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
97 static ssize_t cg_get_max_cpus(char *cpulist);
98 static int cg_get_version_of_mntpt(const char *path);
99 static bool cg_init(uid_t uid, gid_t gid);
100 static void cg_mark_to_make_rw(char **list);
101 static void cg_prune_empty_cgroups(const char *user);
102 static bool cg_systemd_created_user_slice(const char *base_cgroup,
103 const char *init_cgroup,
104 const char *in, uid_t uid);
105 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
106 const char *base_cgroup, uid_t uid,
107 gid_t gid,
108 bool systemd_user_slice);
109 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
110 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
111 const char *init_cgroup, uid_t uid);
112 static void cg_systemd_prune_init_scope(char *cg);
113 static bool is_lxcfs(const char *line);
114
115 /* cgroupfs v1 prototypes. */
116 struct cgv1_hierarchy {
117 char **controllers;
118 char *mountpoint;
119 char *base_cgroup;
120 char *fullcgpath;
121 char *init_cgroup;
122 bool create_rw_cgroup;
123 bool systemd_user_slice;
124 };
125
126 static struct cgv1_hierarchy **cgv1_hierarchies;
127
128 static void cgv1_add_controller(char **clist, char *mountpoint,
129 char *base_cgroup, char *init_cgroup);
130 static bool cgv1_controller_in_clist(char *cgline, char *c);
131 static bool cgv1_controller_lists_intersect(char **l1, char **l2);
132 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
133 char **clist);
134 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
135 bool *existed);
136 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
137 uid_t uid, gid_t gid, bool *existed);
138 static bool cgv1_enter(const char *cgroup);
139 static void cgv1_escape(void);
140 static bool cgv1_get_controllers(char ***klist, char ***nlist);
141 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
142 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
143 char *line);
144 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
145 const char *cgroup);
146 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
147 static bool cgv1_init(uid_t uid, gid_t gid);
148 static void cgv1_mark_to_make_rw(char **clist);
149 static char *cgv1_must_prefix_named(char *entry);
150 static bool cgv1_prune_empty_cgroups(const char *user);
151 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
152 static bool is_cgv1(char *line);
153
154 /* cgroupfs v2 prototypes. */
155 struct cgv2_hierarchy {
156 char **controllers;
157 char *mountpoint;
158 char *base_cgroup;
159 char *fullcgpath;
160 char *init_cgroup;
161 bool create_rw_cgroup;
162 bool systemd_user_slice;
163 };
164
165 /* Actually this should only be a single hierarchy. But for the sake of
166 * parallelism and because the layout of the cgroupfs v2 is still somewhat
167 * changing, we'll leave it as an array of structs.
168 */
169 static struct cgv2_hierarchy **cgv2_hierarchies;
170
171 static void cgv2_add_controller(char **clist, char *mountpoint,
172 char *base_cgroup, char *init_cgroup,
173 bool systemd_user_slice);
174 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
175 bool *existed);
176 static bool cgv2_enter(const char *cgroup);
177 static void cgv2_escape(void);
178 static char *cgv2_get_current_cgroup(int pid);
179 static bool cgv2_init(uid_t uid, gid_t gid);
180 static void cgv2_mark_to_make_rw(char **clist);
181 static bool cgv2_prune_empty_cgroups(const char *user);
182 static bool cgv2_remove(const char *cgroup);
183 static bool is_cgv2(char *line);
184
185 static int do_mkdir(const char *path, mode_t mode)
186 {
187 int saved_errno;
188 mode_t mask;
189 int r;
190
191 mask = umask(0);
192 r = mkdir(path, mode);
193 saved_errno = errno;
194 umask(mask);
195 errno = saved_errno;
196 return (r);
197 }
198
199 /* Create directory and (if necessary) its parents. */
200 static bool lxc_mkdir_parent(const char *root, char *path)
201 {
202 char *b, orig, *e;
203
204 if (strlen(path) < strlen(root))
205 return false;
206
207 if (strlen(path) == strlen(root))
208 return true;
209
210 b = path + strlen(root) + 1;
211 for (;;) {
212 while (*b && (*b == '/'))
213 b++;
214 if (!*b)
215 return true;
216
217 e = b + 1;
218 while (*e && *e != '/')
219 e++;
220
221 orig = *e;
222 if (orig)
223 *e = '\0';
224
225 if (file_exists(path))
226 goto next;
227
228 if (do_mkdir(path, 0755) < 0) {
229 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
230 return false;
231 }
232
233 next:
234 if (!orig)
235 return true;
236
237 *e = orig;
238 b = e + 1;
239 }
240
241 return false;
242 }
243
244 /* Common helper functions. Most of these have been taken from LXC. */
245 static void mysyslog(int err, const char *format, ...)
246 {
247 va_list args;
248
249 va_start(args, format);
250 #pragma GCC diagnostic push
251 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
252 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
253 vsyslog(err, format, args);
254 #pragma GCC diagnostic pop
255 va_end(args);
256 closelog();
257 }
258
259 /* realloc() pointer in batch sizes; do not fail. */
260 #define BATCH_SIZE 50
261 static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
262 {
263 int newbatches = (newlen / BATCH_SIZE) + 1;
264 int oldbatches = (oldlen / BATCH_SIZE) + 1;
265
266 if (!*mem || newbatches > oldbatches)
267 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
268 }
269
270 /* Append lines as is to pointer; do not fail. */
271 static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
272 {
273 size_t full = oldlen + newlen;
274
275 batch_realloc(dest, oldlen, full + 1);
276
277 memcpy(*dest + oldlen, new, newlen + 1);
278 }
279
280 /* Read in whole file and return allocated pointer. */
281 static char *read_file(char *fnam)
282 {
283 FILE *f;
284 int linelen;
285 char *line = NULL, *buf = NULL;
286 size_t len = 0, fulllen = 0;
287
288 f = fopen(fnam, "r");
289 if (!f)
290 return NULL;
291
292 while ((linelen = getline(&line, &len, f)) != -1) {
293 append_line(&buf, fulllen, line, linelen);
294 fulllen += linelen;
295 }
296
297 fclose(f);
298 free(line);
299
300 return buf;
301 }
302
303 /* Given a pointer to a null-terminated array of pointers, realloc to add one
304 * entry, and point the new entry to NULL. Do not fail. Return the index to the
305 * second-to-last entry - that is, the one which is now available for use
306 * (keeping the list null-terminated).
307 */
308 static int append_null_to_list(void ***list)
309 {
310 int newentry = 0;
311
312 if (*list)
313 for (; (*list)[newentry]; newentry++)
314 ;
315
316 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
317 (*list)[newentry + 1] = NULL;
318
319 return newentry;
320 }
321
322 /* Append new entry to null-terminated array of pointer; make sure that array of
323 * pointers will still be null-terminated.
324 */
325 static void must_append_string(char ***list, char *entry)
326 {
327 int newentry;
328 char *copy;
329
330 newentry = append_null_to_list((void ***)list);
331 copy = must_copy_string(entry);
332 (*list)[newentry] = copy;
333 }
334
335 /* Remove newlines from string. */
336 static void trim(char *s)
337 {
338 size_t len = strlen(s);
339
340 while ((len > 0) && s[len - 1] == '\n')
341 s[--len] = '\0';
342 }
343
344 /* Make allocated copy of string. End of string is taken to be '\n'. */
345 static char *copy_to_eol(char *s)
346 {
347 char *newline, *sret;
348 size_t len;
349
350 newline = strchr(s, '\n');
351 if (!newline)
352 return NULL;
353
354 len = newline - s;
355 sret = must_realloc(NULL, len + 1);
356 memcpy(sret, s, len);
357 sret[len] = '\0';
358
359 return sret;
360 }
361
362 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
363 static bool is_lxcfs(const char *line)
364 {
365 char *p = strstr(line, " - ");
366 if (!p)
367 return false;
368
369 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
370 }
371
372 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
373 static bool is_cgv1(char *line)
374 {
375 char *p = strstr(line, " - ");
376 if (!p)
377 return false;
378
379 return strncmp(p, " - cgroup ", 10) == 0;
380 }
381
382 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
383 static bool is_cgv2(char *line)
384 {
385 char *p = strstr(line, " - ");
386 if (!p)
387 return false;
388
389 return strncmp(p, " - cgroup2 ", 11) == 0;
390 }
391
392 /* Given a null-terminated array of strings, check whether @entry is one of the
393 * strings
394 */
395 static bool string_in_list(char **list, const char *entry)
396 {
397 char **it;
398
399 for (it = list; it && *it; it++)
400 if (strcmp(*it, entry) == 0)
401 return true;
402
403 return false;
404 }
405
406 /*
407 * Creates a null-terminated array of strings, made by splitting the entries in
408 * @str on each @sep. Caller is responsible for calling free_string_list.
409 */
410 static char **make_string_list(const char *str, const char *sep)
411 {
412 char *copy, *tok;
413 char *saveptr = NULL;
414 char **clist = NULL;
415
416 copy = must_copy_string(str);
417
418 for (tok = strtok_r(copy, sep, &saveptr); tok;
419 tok = strtok_r(NULL, sep, &saveptr))
420 must_add_to_list(&clist, tok);
421
422 free(copy);
423
424 return clist;
425 }
426
427 /* Gets the length of a null-terminated array of strings. */
428 static size_t string_list_length(char **list)
429 {
430 size_t len = 0;
431 char **it;
432
433 for (it = list; it && *it; it++)
434 len++;
435
436 return len;
437 }
438
439 /* Write single integer to file. */
440 static bool write_int(char *path, int v)
441 {
442 FILE *f;
443 bool ret = true;
444
445 f = fopen(path, "w");
446 if (!f)
447 return false;
448
449 if (fprintf(f, "%d\n", v) < 0)
450 ret = false;
451
452 if (fclose(f) != 0)
453 ret = false;
454
455 return ret;
456 }
457
458 /* Recursively remove directory and its parents. */
459 static int recursive_rmdir(char *dirname)
460 {
461 __do_closedir DIR *dir = NULL;
462 struct dirent *direntp;
463 int r = 0;
464
465 dir = opendir(dirname);
466 if (!dir)
467 return -ENOENT;
468
469 while ((direntp = readdir(dir))) {
470 struct stat st;
471 char *pathname;
472
473 if (!strcmp(direntp->d_name, ".") ||
474 !strcmp(direntp->d_name, ".."))
475 continue;
476
477 pathname = must_make_path(dirname, direntp->d_name, NULL);
478
479 if (lstat(pathname, &st)) {
480 if (!r)
481 pam_cgfs_debug("Failed to stat %s\n", pathname);
482 r = -1;
483 goto next;
484 }
485
486 if (!S_ISDIR(st.st_mode))
487 goto next;
488
489 if (recursive_rmdir(pathname) < 0)
490 r = -1;
491
492 next:
493 free(pathname);
494 }
495
496 if (rmdir(dirname) < 0) {
497 if (!r)
498 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
499 r = -1;
500 }
501
502 return r;
503 }
504
505 /* Add new entry to null-terminated array of pointers. Make sure array is still
506 * null-terminated.
507 */
508 static void must_add_to_list(char ***clist, char *entry)
509 {
510 int newentry;
511
512 newentry = append_null_to_list((void ***)clist);
513 (*clist)[newentry] = must_copy_string(entry);
514 }
515
516 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
517 static char *get_mountpoint(char *line)
518 {
519 int i;
520 char *p, *sret, *p2;
521 size_t len;
522
523 p = line;
524
525 for (i = 0; i < 4; i++) {
526 p = strchr(p, ' ');
527 if (!p)
528 return NULL;
529 p++;
530 }
531
532 p2 = strchr(p, ' ');
533 if (p2)
534 *p2 = '\0';
535
536 len = strlen(p);
537 sret = must_realloc(NULL, len + 1);
538 memcpy(sret, p, len);
539 sret[len] = '\0';
540
541 return sret;
542 }
543
544 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
545 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
546 * kernel controllers (@klist) and named controllers (@nlist).
547 */
548 static bool cgv1_get_controllers(char ***klist, char ***nlist)
549 {
550 FILE *f;
551 char *line = NULL;
552 size_t len = 0;
553
554 f = fopen("/proc/self/cgroup", "r");
555 if (!f)
556 return false;
557
558 while (getline(&line, &len, f) != -1) {
559 char *p, *p2, *tok;
560 char *saveptr = NULL;
561
562 p = strchr(line, ':');
563 if (!p)
564 continue;
565 p++;
566
567 p2 = strchr(p, ':');
568 if (!p2)
569 continue;
570 *p2 = '\0';
571
572 /* Skip the v2 hierarchy. */
573 if ((p2 - p) == 0)
574 continue;
575
576 for (tok = strtok_r(p, ",", &saveptr); tok;
577 tok = strtok_r(NULL, ",", &saveptr)) {
578 if (strncmp(tok, "name=", 5) == 0)
579 must_append_string(nlist, tok);
580 else
581 must_append_string(klist, tok);
582 }
583 }
584
585 free(line);
586 fclose(f);
587
588 return true;
589 }
590
591 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
592 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
593 static bool cgv2_get_controllers(char ***klist)
594 {
595 return -ENOSYS;
596 }
597 */
598
599 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
600 static char *cgv2_get_current_cgroup(int pid)
601 {
602 int ret;
603 char *cgroups_v2;
604 char *current_cgroup;
605 char *copy = NULL;
606 /* The largest integer that can fit into long int is 2^64. This is a
607 * 20-digit number. */
608 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
609 char path[__PIDLEN];
610
611 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
612 if (ret < 0 || ret >= __PIDLEN)
613 return NULL;
614
615 cgroups_v2 = read_file(path);
616 if (!cgroups_v2)
617 return NULL;
618
619 current_cgroup = strstr(cgroups_v2, "0::/");
620 if (!current_cgroup)
621 goto cleanup_on_err;
622
623 current_cgroup = current_cgroup + 3;
624 copy = copy_to_eol(current_cgroup);
625 if (!copy)
626 goto cleanup_on_err;
627
628 cleanup_on_err:
629 free(cgroups_v2);
630 if (copy)
631 trim(copy);
632
633 return copy;
634 }
635
636 /* Given two null-terminated lists of strings, return true if any string is in
637 * both.
638 */
639 static bool cgv1_controller_lists_intersect(char **l1, char **l2)
640 {
641 char **it;
642
643 if (!l2)
644 return false;
645
646 for (it = l1; it && *it; it++)
647 if (string_in_list(l2, *it))
648 return true;
649
650 return false;
651 }
652
653 /* For a null-terminated list of controllers @clist, return true if any of those
654 * controllers is already listed the null-terminated list of hierarchies @hlist.
655 * Realistically, if one is present, all must be present.
656 */
657 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
658 {
659 struct cgv1_hierarchy **it;
660
661 for (it = hlist; it && *it; it++)
662 if ((*it)->controllers)
663 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
664 return true;
665
666 return false;
667
668 }
669
670 /* Set boolean to mark controllers under which we are supposed create a
671 * writeable cgroup.
672 */
673 static void cgv1_mark_to_make_rw(char **clist)
674 {
675 struct cgv1_hierarchy **it;
676
677 for (it = cgv1_hierarchies; it && *it; it++)
678 if ((*it)->controllers)
679 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
680 string_in_list(clist, "all"))
681 (*it)->create_rw_cgroup = true;
682 }
683
684 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
685 * the cgroupfs v2 hierarchy.
686 */
687 static void cgv2_mark_to_make_rw(char **clist)
688 {
689 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
690 if (cgv2_hierarchies)
691 (*cgv2_hierarchies)->create_rw_cgroup = true;
692 }
693
694 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
695 static void cg_mark_to_make_rw(char **clist)
696 {
697 cgv1_mark_to_make_rw(clist);
698 cgv2_mark_to_make_rw(clist);
699 }
700
701 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
702 static char *cgv1_must_prefix_named(char *entry)
703 {
704 char *s;
705 int ret;
706 size_t len;
707
708 len = strlen(entry);
709 s = must_realloc(NULL, len + 6);
710
711 ret = snprintf(s, len + 6, "name=%s", entry);
712 if (ret < 0 || (size_t)ret >= (len + 6)) {
713 free(s);
714 return NULL;
715 }
716
717 return s;
718 }
719
720 /* Append kernel controller in @klist or named controller in @nlist to @clist */
721 static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
722 {
723 int newentry;
724 char *copy;
725
726 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
727 return;
728
729 newentry = append_null_to_list((void ***)clist);
730
731 if (strncmp(entry, "name=", 5) == 0)
732 copy = must_copy_string(entry);
733 else if (string_in_list(klist, entry))
734 copy = must_copy_string(entry);
735 else
736 copy = cgv1_must_prefix_named(entry);
737
738 (*clist)[newentry] = copy;
739 }
740
741 /* Get the controllers from a mountinfo line. There are other ways we could get
742 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
743 * could parse the mount options. But we simply assume that the mountpoint must
744 * be /sys/fs/cgroup/controller-list
745 */
746 static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
747 {
748 int i;
749 char *p, *p2, *tok;
750 char *saveptr = NULL;
751 char **aret = NULL;
752
753 p = line;
754
755 for (i = 0; i < 4; i++) {
756 p = strchr(p, ' ');
757 if (!p)
758 return NULL;
759 p++;
760 }
761
762 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
763 return NULL;
764
765 p += 15;
766
767 p2 = strchr(p, ' ');
768 if (!p2)
769 return NULL;
770 *p2 = '\0';
771
772 for (tok = strtok_r(p, ",", &saveptr); tok;
773 tok = strtok_r(NULL, ",", &saveptr))
774 must_append_controller(klist, nlist, &aret, tok);
775
776 return aret;
777 }
778
779 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
780 static bool cgv1_controller_in_clist(char *cgline, char *c)
781 {
782 __do_free char *tmp = NULL;
783 size_t len;
784 char *tok, *eol;
785 char *saveptr = NULL;
786
787 eol = strchr(cgline, ':');
788 if (!eol)
789 return false;
790
791 len = eol - cgline;
792 tmp = must_realloc(NULL, len + 1);
793 memcpy(tmp, cgline, len);
794 tmp[len] = '\0';
795
796 for (tok = strtok_r(tmp, ",", &saveptr); tok;
797 tok = strtok_r(NULL, ",", &saveptr)) {
798 if (strcmp(tok, c) == 0)
799 return true;
800 }
801
802 return false;
803 }
804
805 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
806 * of a given cgv1 controller passed in via @controller.
807 */
808 static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
809 {
810 char *p;
811
812 p = basecginfo;
813
814 for (;;) {
815 p = strchr(p, ':');
816 if (!p)
817 return NULL;
818 p++;
819
820 if (cgv1_controller_in_clist(p, controller)) {
821 p = strchr(p, ':');
822 if (!p)
823 return NULL;
824 p++;
825
826 return copy_to_eol(p);
827 }
828
829 p = strchr(p, '\n');
830 if (!p)
831 return NULL;
832 p++;
833 }
834
835 return NULL;
836 }
837
838 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
839 * systems.
840 */
841 #define INIT_SCOPE "/init.scope"
842 static void cg_systemd_prune_init_scope(char *cg)
843 {
844 char *point;
845
846 if (!cg)
847 return;
848
849 point = cg + strlen(cg) - strlen(INIT_SCOPE);
850 if (point < cg)
851 return;
852
853 if (strcmp(point, INIT_SCOPE) == 0) {
854 if (point == cg)
855 *(point + 1) = '\0';
856 else
857 *point = '\0';
858 }
859 }
860
861 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
862 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
863 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
864 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
865 * from /proc/1/cgroup.
866 */
867 static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
868 {
869 struct cgv1_hierarchy *new;
870 int newentry;
871
872 new = must_realloc(NULL, sizeof(*new));
873
874 new->controllers = clist;
875 new->mountpoint = mountpoint;
876 new->base_cgroup = base_cgroup;
877 new->fullcgpath = NULL;
878 new->create_rw_cgroup = false;
879 new->init_cgroup = init_cgroup;
880 new->systemd_user_slice = false;
881
882 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
883 cgv1_hierarchies[newentry] = new;
884 }
885
886 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
887 * currently) include the controllers mounted into the hierarchy (e.g. memory,
888 * pids, blkio), the mountpoint of that hierarchy (Should usually be
889 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
890 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
891 * base cgroup of the current process gathered from /proc/self/cgroup, and the
892 * init cgroup of PID1 gathered from /proc/1/cgroup.
893 */
894 static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
895 {
896 struct cgv2_hierarchy *new;
897 int newentry;
898
899 new = must_realloc(NULL, sizeof(*new));
900
901 new->controllers = clist;
902 new->mountpoint = mountpoint;
903 new->base_cgroup = base_cgroup;
904 new->fullcgpath = NULL;
905 new->create_rw_cgroup = false;
906 new->init_cgroup = init_cgroup;
907 new->systemd_user_slice = systemd_user_slice;
908
909 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
910 cgv2_hierarchies[newentry] = new;
911 }
912
913 /* In Ubuntu 14.04, the paths created for us were
914 * '/user/$uid.user/$something.session' This can be merged better with
915 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
916 * reason about the correctness.
917 */
918 static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
919 {
920 char *p;
921 size_t len;
922 int id;
923 char *copy = NULL;
924 bool bret = false;
925
926 copy = must_copy_string(in);
927 if (strlen(copy) < strlen("/user/1.user/1.session"))
928 goto cleanup;
929 p = copy + strlen(copy) - 1;
930
931 /* skip any trailing '/' (shouldn't be any, but be sure) */
932 while (p >= copy && *p == '/')
933 *(p--) = '\0';
934 if (p < copy)
935 goto cleanup;
936
937 /* Get last path element */
938 while (p >= copy && *p != '/')
939 p--;
940 if (p < copy)
941 goto cleanup;
942
943 /* make sure it is something.session */
944 len = strlen(p + 1);
945 if (len < strlen("1.session") ||
946 strncmp(p + 1 + len - 8, ".session", 8) != 0)
947 goto cleanup;
948
949 /* ok last path piece checks out, now check the second to last */
950 *(p + 1) = '\0';
951 while (p >= copy && *(--p) != '/')
952 ;
953
954 if (sscanf(p + 1, "%d.user/", &id) != 1)
955 goto cleanup;
956
957 if (id != (int)uid)
958 goto cleanup;
959
960 bret = true;
961
962 cleanup:
963 free(copy);
964 return bret;
965 }
966
967 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
968 * assume it belongs to $uid and chown it
969 */
970 static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
971 const char *init_cgroup, uid_t uid)
972 {
973 int ret;
974 char buf[100];
975 size_t curlen, initlen;
976
977 curlen = strlen(base_cgroup);
978 initlen = strlen(init_cgroup);
979 if (curlen <= initlen)
980 return false;
981
982 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
983 return false;
984
985 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
986 if (ret < 0 || ret >= 100)
987 return false;
988
989 if (initlen == 1)
990 initlen = 0; // skip the '/'
991
992 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
993 }
994
995 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
996 * is not the end of our systemd path, then we're not part of the PAM call that
997 * created that path.
998 *
999 * The last piece is chowned to $uid, the user- part not.
1000 * Note: If the user creates paths that look like what we're looking for to
1001 * 'fool' us, either
1002 * - they fool us, we create new cgroups, and they get auto-logged-out.
1003 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1004 * lose ownership of their cgroups
1005 */
1006 static bool cg_systemd_created_user_slice(const char *base_cgroup,
1007 const char *init_cgroup,
1008 const char *in, uid_t uid)
1009 {
1010 char *p;
1011 size_t len;
1012 int id;
1013 char *copy = NULL;
1014 bool bret = false;
1015
1016 copy = must_copy_string(in);
1017
1018 /* An old version of systemd has already created a cgroup for us. */
1019 if (cg_systemd_under_user_slice_1(in, uid))
1020 goto succeed;
1021
1022 /* A new version of systemd has already created a cgroup for us. */
1023 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1024 goto succeed;
1025
1026 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1027 goto cleanup;
1028
1029 p = copy + strlen(copy) - 1;
1030 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1031 while (p >= copy && *p == '/')
1032 *(p--) = '\0';
1033
1034 if (p < copy)
1035 goto cleanup;
1036
1037 /* Get last path element */
1038 while (p >= copy && *p != '/')
1039 p--;
1040
1041 if (p < copy)
1042 goto cleanup;
1043
1044 /* Make sure it is session-something.scope. */
1045 len = strlen(p + 1);
1046 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1047 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1048 goto cleanup;
1049
1050 /* Ok last path piece checks out, now check the second to last. */
1051 *(p + 1) = '\0';
1052 while (p >= copy && *(--p) != '/')
1053 ;
1054
1055 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1056 goto cleanup;
1057
1058 if (id != (int)uid)
1059 goto cleanup;
1060
1061 succeed:
1062 bret = true;
1063
1064 cleanup:
1065 free(copy);
1066 return bret;
1067 }
1068
1069 /* Chown existing cgroup that systemd has already created for us. */
1070 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1071 const char *base_cgroup, uid_t uid,
1072 gid_t gid, bool systemd_user_slice)
1073 {
1074 char *path;
1075
1076 if (!systemd_user_slice)
1077 return false;
1078
1079 path = must_make_path(mountpoint, base_cgroup, NULL);
1080
1081 /* A cgroup within name=systemd has already been created. So we only
1082 * need to chown it.
1083 */
1084 if (chown(path, uid, gid) < 0)
1085 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1086 path, (int)uid, (int)gid, strerror(errno), NULL);
1087 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
1088
1089 free(path);
1090 return true;
1091 }
1092
1093 /* Detect and store information about cgroupfs v1 hierarchies. */
1094 static bool cgv1_init(uid_t uid, gid_t gid)
1095 {
1096 FILE *f;
1097 struct cgv1_hierarchy **it;
1098 char *basecginfo;
1099 char *line = NULL;
1100 char **klist = NULL, **nlist = NULL;
1101 size_t len = 0;
1102
1103 basecginfo = read_file("/proc/self/cgroup");
1104 if (!basecginfo)
1105 return false;
1106
1107 f = fopen("/proc/self/mountinfo", "r");
1108 if (!f) {
1109 free(basecginfo);
1110 return false;
1111 }
1112
1113 cgv1_get_controllers(&klist, &nlist);
1114
1115 while (getline(&line, &len, f) != -1) {
1116 char **controller_list = NULL;
1117 char *mountpoint, *base_cgroup;
1118
1119 if (is_lxcfs(line) || !is_cgv1(line))
1120 continue;
1121
1122 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1123 if (!controller_list)
1124 continue;
1125
1126 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
1127 free(controller_list);
1128 continue;
1129 }
1130
1131 mountpoint = get_mountpoint(line);
1132 if (!mountpoint) {
1133 free_string_list(controller_list);
1134 continue;
1135 }
1136
1137 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1138 if (!base_cgroup) {
1139 free_string_list(controller_list);
1140 free(mountpoint);
1141 continue;
1142 }
1143
1144 trim(base_cgroup);
1145 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1146 "mountpoint \"%s\" and cgroup \"%s\"\n",
1147 controller_list[0], mountpoint, base_cgroup);
1148 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
1149 }
1150
1151 free_string_list(klist);
1152 free_string_list(nlist);
1153 free(basecginfo);
1154 fclose(f);
1155 free(line);
1156
1157 /* Retrieve init cgroup path for all controllers. */
1158 basecginfo = read_file("/proc/1/cgroup");
1159 if (!basecginfo)
1160 return false;
1161
1162 for (it = cgv1_hierarchies; it && *it; it++) {
1163 if ((*it)->controllers) {
1164 char *init_cgroup, *user_slice;
1165
1166 /* We've already stored the controller and received its
1167 * current cgroup. If we now fail to retrieve its init
1168 * cgroup, we should probably fail.
1169 */
1170 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1171 if (!init_cgroup) {
1172 free(basecginfo);
1173 return false;
1174 }
1175
1176 cg_systemd_prune_init_scope(init_cgroup);
1177 (*it)->init_cgroup = init_cgroup;
1178 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1179 "cgroup \"%s\"\n",
1180 (*(*it)->controllers), init_cgroup);
1181
1182 /* Check whether systemd has already created a cgroup
1183 * for us.
1184 */
1185 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1186 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1187 (*it)->systemd_user_slice = true;
1188
1189 free(user_slice);
1190 }
1191 }
1192 free(basecginfo);
1193
1194 return true;
1195 }
1196
1197 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1198 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1199 */
1200 static inline int cg_get_version_of_mntpt(const char *path)
1201 {
1202 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1203 return 1;
1204
1205 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1206 return 2;
1207
1208 return 0;
1209 }
1210
1211 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1212 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
1213 */
1214 static bool cgv2_init(uid_t uid, gid_t gid)
1215 {
1216 char *mountpoint;
1217 FILE *f = NULL;
1218 char *current_cgroup = NULL, *init_cgroup = NULL;
1219 char * line = NULL;
1220 size_t len = 0;
1221 int ret = false;
1222
1223 current_cgroup = cgv2_get_current_cgroup(getpid());
1224 if (!current_cgroup) {
1225 /* No v2 hierarchy present. We're done. */
1226 ret = true;
1227 goto cleanup;
1228 }
1229
1230 init_cgroup = cgv2_get_current_cgroup(1);
1231 if (!init_cgroup) {
1232 /* If we're here and didn't fail already above, then something's
1233 * certainly wrong, so error this time.
1234 */
1235 goto cleanup;
1236 }
1237
1238 cg_systemd_prune_init_scope(init_cgroup);
1239
1240 /* Check if the v2 hierarchy is mounted at its standard location.
1241 * If so we can skip the rest of the work here. Although the unified
1242 * hierarchy can be mounted multiple times, each of those mountpoints
1243 * will expose identical information.
1244 */
1245 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1246 char *user_slice;
1247 bool has_user_slice = false;
1248
1249 mountpoint = must_copy_string("/sys/fs/cgroup");
1250 if (!mountpoint)
1251 goto cleanup;
1252
1253 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1254 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1255 has_user_slice = true;
1256 free(user_slice);
1257
1258 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1259
1260 ret = true;
1261 goto cleanup;
1262 }
1263
1264 f = fopen("/proc/self/mountinfo", "r");
1265 if (!f)
1266 goto cleanup;
1267
1268 /* we support simple cgroup mounts and lxcfs mounts */
1269 while (getline(&line, &len, f) != -1) {
1270 char *user_slice;
1271 bool has_user_slice = false;
1272
1273 if (!is_cgv2(line))
1274 continue;
1275
1276 mountpoint = get_mountpoint(line);
1277 if (!mountpoint)
1278 continue;
1279
1280 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1281 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1282 has_user_slice = true;
1283 free(user_slice);
1284
1285 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1286
1287 /* Although the unified hierarchy can be mounted multiple times,
1288 * each of those mountpoints will expose identical information.
1289 * So let the first mountpoint we find, win.
1290 */
1291 ret = true;
1292 break;
1293 }
1294
1295 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1296 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1297 mountpoint, current_cgroup, init_cgroup);
1298
1299 cleanup:
1300 if (f)
1301 fclose(f);
1302 free(line);
1303
1304 if (!ret) {
1305 free(init_cgroup);
1306 free(current_cgroup);
1307 }
1308
1309 return ret;
1310 }
1311
1312 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1313 * cgroupfs v2 hierarchy.
1314 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1315 * where some controllers are mounted into their standard cgroupfs v1 locations
1316 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1317 * hierarchy (/sys/fs/cgroup).
1318 */
1319 static bool cg_init(uid_t uid, gid_t gid)
1320 {
1321 if (!cgv1_init(uid, gid))
1322 return false;
1323
1324 if (!cgv2_init(uid, gid))
1325 return false;
1326
1327 if (cgv1_hierarchies && cgv2_hierarchies) {
1328 cg_mount_mode = CGROUP_MIXED;
1329 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1330 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1331 cg_mount_mode = CGROUP_PURE_V1;
1332 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1333 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1334 cg_mount_mode = CGROUP_PURE_V2;
1335 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1336 } else {
1337 cg_mount_mode = CGROUP_UNKNOWN;
1338 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
1339 }
1340
1341 if (cg_mount_mode == CGROUP_UNKNOWN)
1342 return false;
1343
1344 return true;
1345 }
1346
1347 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1348 static bool cgv1_enter(const char *cgroup)
1349 {
1350 struct cgv1_hierarchy **it;
1351
1352 for (it = cgv1_hierarchies; it && *it; it++) {
1353 char **controller;
1354 bool entered = false;
1355
1356 if (!(*it)->controllers || !(*it)->mountpoint ||
1357 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1358 continue;
1359
1360 for (controller = (*it)->controllers; controller && *controller;
1361 controller++) {
1362 char *path;
1363
1364 /* We've already been placed in a user slice, so we
1365 * don't need to enter the cgroup again.
1366 */
1367 if ((*it)->systemd_user_slice) {
1368 entered = true;
1369 break;
1370 }
1371
1372 path = must_make_path((*it)->mountpoint,
1373 (*it)->init_cgroup,
1374 cgroup,
1375 "/cgroup.procs",
1376 NULL);
1377 if (!file_exists(path)) {
1378 free(path);
1379 path = must_make_path((*it)->mountpoint,
1380 (*it)->init_cgroup,
1381 cgroup,
1382 "/tasks",
1383 NULL);
1384 }
1385
1386 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1387 entered = write_int(path, (int)getpid());
1388 if (entered) {
1389 free(path);
1390 break;
1391 }
1392
1393 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
1394 free(path);
1395 }
1396
1397 if (!entered)
1398 return false;
1399 }
1400
1401 return true;
1402 }
1403
1404 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1405 static bool cgv2_enter(const char *cgroup)
1406 {
1407 struct cgv2_hierarchy *v2;
1408 char *path;
1409 bool entered = false;
1410
1411 if (!cgv2_hierarchies)
1412 return true;
1413
1414 v2 = *cgv2_hierarchies;
1415
1416 if (!v2->mountpoint || !v2->base_cgroup)
1417 return false;
1418
1419 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1420 return true;
1421
1422 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
1423 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1424
1425 entered = write_int(path, (int)getpid());
1426 if (!entered) {
1427 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1428 free(path);
1429 return false;
1430 }
1431
1432 free(path);
1433
1434 return true;
1435 }
1436
1437 /* Wrapper around cgv{1,2}_enter(). */
1438 static bool cg_enter(const char *cgroup)
1439 {
1440 if (!cgv1_enter(cgroup)) {
1441 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
1442 return false;
1443 }
1444
1445 if (!cgv2_enter(cgroup)) {
1446 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
1447 return false;
1448 }
1449
1450 return true;
1451 }
1452
1453 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1454 static void cgv1_escape(void)
1455 {
1456 struct cgv1_hierarchy **it;
1457
1458 /* In case systemd hasn't already placed us in a user slice for the
1459 * cpuset v1 controller we will reside in the root cgroup. This means
1460 * that cgroup.clone_children will not have been initialized for us so
1461 * we need to do it.
1462 */
1463 for (it = cgv1_hierarchies; it && *it; it++)
1464 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1465 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
1466
1467 if (!cgv1_enter("/"))
1468 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
1469 }
1470
1471 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1472 static void cgv2_escape(void)
1473 {
1474 if (!cgv2_enter("/"))
1475 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
1476 }
1477
1478 /* Wrapper around cgv{1,2}_escape(). */
1479 static void cg_escape(void)
1480 {
1481 cgv1_escape();
1482 cgv2_escape();
1483 }
1484
1485 /* Get uid and gid for @user. */
1486 static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1487 {
1488 struct passwd pwent;
1489 struct passwd *pwentp = NULL;
1490 char *buf;
1491 ssize_t bufsize;
1492 int ret;
1493
1494 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1495 if (bufsize == -1)
1496 bufsize = 1024;
1497
1498 buf = malloc(bufsize);
1499 if (!buf)
1500 return false;
1501
1502 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1503 if (!pwentp) {
1504 if (ret == 0)
1505 mysyslog(LOG_ERR,
1506 "Could not find matched password record\n", NULL);
1507
1508 free(buf);
1509 return false;
1510 }
1511
1512 *uid = pwent.pw_uid;
1513 *gid = pwent.pw_gid;
1514 free(buf);
1515
1516 return true;
1517 }
1518
1519 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1520 static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1521 {
1522 struct stat statbuf;
1523
1524 if (stat(path, &statbuf) < 0)
1525 return false;
1526
1527 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1528 return false;
1529
1530 return true;
1531 }
1532
1533 /* Create cpumask from cpulist aka turn:
1534 *
1535 * 0,2-3
1536 *
1537 * into bit array
1538 *
1539 * 1 0 1 1
1540 */
1541 static uint32_t *cg_cpumask(char *buf, size_t nbits)
1542 {
1543 char *token;
1544 char *saveptr = NULL;
1545 size_t arrlen = BITS_TO_LONGS(nbits);
1546 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1547 if (!bitarr)
1548 return NULL;
1549
1550 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1551 errno = 0;
1552 unsigned start = strtoul(token, NULL, 0);
1553 unsigned end = start;
1554
1555 char *range = strchr(token, '-');
1556 if (range)
1557 end = strtoul(range + 1, NULL, 0);
1558
1559 if (!(start <= end)) {
1560 free(bitarr);
1561 return NULL;
1562 }
1563
1564 if (end >= nbits) {
1565 free(bitarr);
1566 return NULL;
1567 }
1568
1569 while (start <= end)
1570 set_bit(start++, bitarr);
1571 }
1572
1573 return bitarr;
1574 }
1575
1576 static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1577 {
1578 char *result;
1579 char **p;
1580 size_t sep_len = strlen(sep);
1581 size_t result_len = use_as_prefix * sep_len;
1582 size_t buf_len;
1583
1584 if (!parts)
1585 return NULL;
1586
1587 /* calculate new string length */
1588 for (p = (char **)parts; *p; p++)
1589 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1590
1591 buf_len = result_len + 1;
1592 result = calloc(buf_len, sizeof(char));
1593 if (!result)
1594 return NULL;
1595
1596 if (use_as_prefix)
1597 (void)strlcpy(result, sep, buf_len * sizeof(char));
1598
1599 for (p = (char **)parts; *p; p++) {
1600 if (p > (char **)parts)
1601 (void)strlcat(result, sep, buf_len * sizeof(char));
1602
1603 (void)strlcat(result, *p, buf_len * sizeof(char));
1604 }
1605
1606 return result;
1607 }
1608
1609 /* The largest integer that can fit into long int is 2^64. This is a
1610 * 20-digit number.
1611 */
1612 #define __IN_TO_STR_LEN 21
1613 /* Turn cpumask into simple, comma-separated cpulist. */
1614 static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1615 {
1616 size_t i;
1617 int ret;
1618 char numstr[__IN_TO_STR_LEN] = {0};
1619 char **cpulist = NULL;
1620
1621 for (i = 0; i <= nbits; i++) {
1622 if (is_set(i, bitarr)) {
1623 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1624 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1625 free_string_list(cpulist);
1626 return NULL;
1627 }
1628
1629 must_append_string(&cpulist, numstr);
1630 }
1631 }
1632
1633 return string_join(",", (const char **)cpulist, false);
1634 }
1635
1636 static ssize_t cg_get_max_cpus(char *cpulist)
1637 {
1638 char *c1, *c2;
1639 char *maxcpus = cpulist;
1640 size_t cpus = 0;
1641
1642 c1 = strrchr(maxcpus, ',');
1643 if (c1)
1644 c1++;
1645
1646 c2 = strrchr(maxcpus, '-');
1647 if (c2)
1648 c2++;
1649
1650 if (!c1 && !c2)
1651 c1 = maxcpus;
1652 else if (c1 < c2)
1653 c1 = c2;
1654
1655 if (!c1)
1656 return -1;
1657
1658 /* If the above logic is correct, c1 should always hold a valid string
1659 * here.
1660 */
1661 errno = 0;
1662 cpus = strtoul(c1, NULL, 0);
1663 if (errno != 0)
1664 return -1;
1665
1666 return cpus;
1667 }
1668
1669 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1670 static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1671 {
1672 char *lastslash, *fpath, oldv;
1673 int ret;
1674 ssize_t i;
1675
1676 ssize_t maxposs = 0, maxisol = 0;
1677 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1678 uint32_t *possmask = NULL, *isolmask = NULL;
1679 bool bret = false, flipped_bit = false;
1680
1681 lastslash = strrchr(path, '/');
1682 if (!lastslash) { // bug... this shouldn't be possible
1683 pam_cgfs_debug("Invalid path: %s\n", path);
1684 return bret;
1685 }
1686
1687 oldv = *lastslash;
1688 *lastslash = '\0';
1689
1690 fpath = must_make_path(path, "cpuset.cpus", NULL);
1691 posscpus = read_file(fpath);
1692 if (!posscpus) {
1693 pam_cgfs_debug("Could not read file: %s\n", fpath);
1694 goto on_error;
1695 }
1696
1697 /* Get maximum number of cpus found in possible cpuset. */
1698 maxposs = cg_get_max_cpus(posscpus);
1699 if (maxposs < 0 || maxposs >= INT_MAX - 1)
1700 goto on_error;
1701
1702 if (!file_exists(__ISOL_CPUS)) {
1703 /* This system doesn't expose isolated cpus. */
1704 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
1705 cpulist = posscpus;
1706
1707 /* No isolated cpus but we weren't already initialized by
1708 * someone. We should simply copy the parents cpuset.cpus
1709 * values.
1710 */
1711 if (!am_initialized) {
1712 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1713 goto copy_parent;
1714 }
1715
1716 /* No isolated cpus but we were already initialized by someone.
1717 * Nothing more to do for us.
1718 */
1719 goto on_success;
1720 }
1721
1722 isolcpus = read_file(__ISOL_CPUS);
1723 if (!isolcpus) {
1724 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1725 goto on_error;
1726 }
1727
1728 if (!isdigit(isolcpus[0])) {
1729 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1730 cpulist = posscpus;
1731
1732 /* No isolated cpus but we weren't already initialized by
1733 * someone. We should simply copy the parents cpuset.cpus
1734 * values.
1735 */
1736 if (!am_initialized) {
1737 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1738 goto copy_parent;
1739 }
1740
1741 /* No isolated cpus but we were already initialized by someone.
1742 * Nothing more to do for us.
1743 */
1744 goto on_success;
1745 }
1746
1747 /* Get maximum number of cpus found in isolated cpuset. */
1748 maxisol = cg_get_max_cpus(isolcpus);
1749 if (maxisol < 0 || maxisol >= INT_MAX - 1)
1750 goto on_error;
1751
1752 if (maxposs < maxisol)
1753 maxposs = maxisol;
1754 maxposs++;
1755
1756 possmask = cg_cpumask(posscpus, maxposs);
1757 if (!possmask) {
1758 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1759 goto on_error;
1760 }
1761
1762 isolmask = cg_cpumask(isolcpus, maxposs);
1763 if (!isolmask) {
1764 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1765 goto on_error;
1766 }
1767
1768 for (i = 0; i <= maxposs; i++) {
1769 if (is_set(i, isolmask) && is_set(i, possmask)) {
1770 flipped_bit = true;
1771 clear_bit(i, possmask);
1772 }
1773 }
1774
1775 if (!flipped_bit) {
1776 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1777 goto on_success;
1778 }
1779 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1780
1781 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1782 if (!cpulist) {
1783 pam_cgfs_debug("%s", "Could not create cpu list\n");
1784 goto on_error;
1785 }
1786
1787 copy_parent:
1788 *lastslash = oldv;
1789
1790 free(fpath);
1791
1792 fpath = must_make_path(path, "cpuset.cpus", NULL);
1793 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0660);
1794 if (ret < 0) {
1795 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
1796 goto on_error;
1797 }
1798
1799 on_success:
1800 bret = true;
1801
1802 on_error:
1803 *lastslash = oldv;
1804
1805 free(fpath);
1806 free(isolcpus);
1807 free(isolmask);
1808
1809 if (posscpus != cpulist)
1810 free(posscpus);
1811 free(possmask);
1812
1813 free(cpulist);
1814 return bret;
1815 }
1816
1817 /* Copy contents of parent(@path)/@file to @path/@file */
1818 static bool cg_copy_parent_file(char *path, char *file)
1819 {
1820 char *lastslash, *value = NULL, *fpath, oldv;
1821 int len = 0;
1822 int ret;
1823
1824 lastslash = strrchr(path, '/');
1825 if (!lastslash) { // bug... this shouldn't be possible
1826 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1827 return false;
1828 }
1829
1830 oldv = *lastslash;
1831 *lastslash = '\0';
1832
1833 fpath = must_make_path(path, file, NULL);
1834 len = lxc_read_from_file(fpath, NULL, 0);
1835 if (len <= 0) {
1836 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1837 goto bad;
1838 }
1839
1840 value = must_realloc(NULL, len + 1);
1841 if (lxc_read_from_file(fpath, value, len) != len) {
1842 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
1843 goto bad;
1844 }
1845 free(fpath);
1846
1847 *lastslash = oldv;
1848
1849 fpath = must_make_path(path, file, NULL);
1850 ret = lxc_write_to_file(fpath, value, len, false, 0660);
1851 if (ret < 0)
1852 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
1853
1854 free(fpath);
1855 free(value);
1856 return ret >= 0;
1857
1858 bad:
1859 pam_cgfs_debug("Error reading '%s'", fpath);
1860 free(fpath);
1861 free(value);
1862 return false;
1863 }
1864
1865 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1866 * controller we will reside in the root cgroup. This means that
1867 * cgroup.clone_children will not have been initialized for us so we need to do
1868 * it.
1869 */
1870 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1871 {
1872 char *clonechildrenpath, v;
1873
1874 if (!string_in_list(h->controllers, "cpuset"))
1875 return true;
1876
1877 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1878
1879 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1880 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1881 free(clonechildrenpath);
1882 return false;
1883 }
1884
1885 if (v == '1') { /* already set for us by someone else */
1886 free(clonechildrenpath);
1887 return true;
1888 }
1889
1890 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1891 /* Set clone_children so children inherit our settings */
1892 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1893 free(clonechildrenpath);
1894 return false;
1895 }
1896
1897 free(clonechildrenpath);
1898 return true;
1899 }
1900
1901 /*
1902 * Initialize the cpuset hierarchy in first directory of @gname and
1903 * set cgroup.clone_children so that children inherit settings.
1904 * Since the h->base_path is populated by init or ourselves, we know
1905 * it is already initialized.
1906 */
1907 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1908 const char *cgroup)
1909 {
1910 char *cgpath, *clonechildrenpath, v, *slash;
1911
1912 if (!string_in_list(h->controllers, "cpuset"))
1913 return true;
1914
1915 if (*cgroup == '/')
1916 cgroup++;
1917 slash = strchr(cgroup, '/');
1918 if (slash)
1919 *slash = '\0';
1920
1921 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1922 if (slash)
1923 *slash = '/';
1924
1925 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
1926 pam_cgfs_debug("Failed to create '%s'", cgpath);
1927 free(cgpath);
1928 return false;
1929 }
1930
1931 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
1932 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
1933 free(clonechildrenpath);
1934 free(cgpath);
1935 return true;
1936 }
1937
1938 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1939 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
1940 free(clonechildrenpath);
1941 free(cgpath);
1942 return false;
1943 }
1944
1945 /* Make sure any isolated cpus are removed from cpuset.cpus. */
1946 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
1947 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
1948 free(clonechildrenpath);
1949 free(cgpath);
1950 return false;
1951 }
1952
1953 if (v == '1') { /* already set for us by someone else */
1954 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
1955 free(clonechildrenpath);
1956 free(cgpath);
1957 return true;
1958 }
1959
1960 /* copy parent's settings */
1961 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
1962 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
1963 free(cgpath);
1964 free(clonechildrenpath);
1965 return false;
1966 }
1967 free(cgpath);
1968
1969 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
1970 /* Set clone_children so children inherit our settings */
1971 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1972 free(clonechildrenpath);
1973 return false;
1974 }
1975 free(clonechildrenpath);
1976 return true;
1977 }
1978
1979 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
1980 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
1981 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
1982 * to the caller in @existed.
1983 */
1984 #define __PAM_CGFS_USER "/user/"
1985 #define __PAM_CGFS_USER_LEN 6
1986 static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1987 {
1988 char *clean_base_cgroup, *path;
1989 char **controller;
1990 struct cgv1_hierarchy *it;
1991 bool created = false;
1992
1993 *existed = false;
1994 it = h;
1995
1996 for (controller = it->controllers; controller && *controller;
1997 controller++) {
1998 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
1999 return false;
2000
2001 /* If systemd has already created a cgroup for us, keep using
2002 * it.
2003 */
2004 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2005 it->base_cgroup, uid, gid,
2006 it->systemd_user_slice))
2007 return true;
2008
2009 /* We need to make sure that we do not create an endless chain
2010 * of sub-cgroups. So we check if we have already logged in
2011 * somehow (sudo -i, su, etc.) and have created a
2012 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2013 * cgroups this is unnecessary since we use the init_cgroup
2014 * anyway, but for controllers which have an existing systemd
2015 * cgroup that does not match the current uid, this is pretty
2016 * useful.
2017 */
2018 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2019 free(it->base_cgroup);
2020 it->base_cgroup = must_copy_string("/");
2021 } else {
2022 clean_base_cgroup =
2023 strstr(it->base_cgroup, __PAM_CGFS_USER);
2024 if (clean_base_cgroup)
2025 *clean_base_cgroup = '\0';
2026 }
2027
2028 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2029 pam_cgfs_debug("Constructing path: %s\n", path);
2030
2031 if (file_exists(path)) {
2032 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2033 if (our_cg)
2034 *existed = false;
2035 else
2036 *existed = true;
2037
2038 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2039 path, our_cg ? "" : "not ", uid, gid);
2040 free(path);
2041
2042 return our_cg;
2043 }
2044
2045 created = lxc_mkdir_parent(it->mountpoint, path);
2046 if (!created) {
2047 free(path);
2048 continue;
2049 }
2050
2051 if (chown(path, uid, gid) < 0)
2052 mysyslog(LOG_WARNING,
2053 "Failed to chown %s to %d:%d: %s\n", path,
2054 (int)uid, (int)gid, strerror(errno), NULL);
2055
2056 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2057 free(path);
2058 break;
2059 }
2060
2061 return created;
2062 }
2063
2064 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2065 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2066 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2067 */
2068 static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2069 {
2070
2071 char *path;
2072
2073 /* Better safe than sorry. */
2074 if (!h->controllers)
2075 return true;
2076
2077 /* Cgroups created by systemd for us which we re-use won't be removed
2078 * here, since we're using init_cgroup + cgroup as path instead of
2079 * base_cgroup + cgroup.
2080 */
2081 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2082 (void)recursive_rmdir(path);
2083 free(path);
2084
2085 return true;
2086 }
2087
2088 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2089 static bool cgv2_remove(const char *cgroup)
2090 {
2091 struct cgv2_hierarchy *v2;
2092 char *path;
2093
2094 if (!cgv2_hierarchies)
2095 return true;
2096
2097 v2 = *cgv2_hierarchies;
2098
2099 /* If we reused an already existing cgroup, don't bother trying to
2100 * remove (a potentially wrong)/the path.
2101 * Cgroups created by systemd for us which we re-use would be removed
2102 * here, since we're using base_cgroup + cgroup as path.
2103 */
2104 if (v2->systemd_user_slice)
2105 return true;
2106
2107 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2108 (void)recursive_rmdir(path);
2109 free(path);
2110
2111 return true;
2112 }
2113
2114 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2115 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2116 * back, to the caller if the creation failed due to @cgroup already existing
2117 * via @existed.
2118 */
2119 static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2120 {
2121 struct cgv1_hierarchy **it, **rev_it;
2122 bool all_created = true;
2123
2124 for (it = cgv1_hierarchies; it && *it; it++) {
2125 if (!(*it)->controllers || !(*it)->mountpoint ||
2126 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2127 continue;
2128
2129 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2130 all_created = false;
2131 break;
2132 }
2133 }
2134
2135 if (all_created)
2136 return true;
2137
2138 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2139 rev_it++)
2140 cgv1_remove_one(*rev_it, cgroup);
2141
2142 return false;
2143 }
2144
2145 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2146 * the creation failed due to @cgroup already existing via @existed.
2147 */
2148 static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2149 {
2150 int ret;
2151 char *clean_base_cgroup;
2152 char *path;
2153 struct cgv2_hierarchy *v2;
2154 bool our_cg = false, created = false;
2155
2156 *existed = false;
2157
2158 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2159 return true;
2160
2161 v2 = *cgv2_hierarchies;
2162
2163 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2164 * to be placed under our current cgroup.
2165 */
2166 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2167 uid, gid, v2->systemd_user_slice))
2168 goto delegate_files;
2169
2170 /* We need to make sure that we do not create an endless chain of
2171 * sub-cgroups. So we check if we have already logged in somehow (sudo
2172 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2173 * skip that part.
2174 */
2175 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2176 free(v2->base_cgroup);
2177 v2->base_cgroup = must_copy_string("/");
2178 } else {
2179 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2180 if (clean_base_cgroup)
2181 *clean_base_cgroup = '\0';
2182 }
2183
2184 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2185 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2186
2187 if (file_exists(path)) {
2188 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2189 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2190 path, our_cg ? "" : "not ", uid, gid);
2191 free(path);
2192 if (our_cg) {
2193 *existed = false;
2194 goto delegate_files;
2195 } else {
2196 *existed = true;
2197 return false;
2198 }
2199 }
2200
2201 created = lxc_mkdir_parent(v2->mountpoint, path);
2202 if (!created) {
2203 free(path);
2204 return false;
2205 }
2206
2207 /* chown cgroup to user */
2208 if (chown(path, uid, gid) < 0)
2209 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2210 path, (int)uid, (int)gid, strerror(errno), NULL);
2211 else
2212 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2213 free(path);
2214
2215 delegate_files:
2216 /* chown cgroup.procs to user */
2217 if (v2->systemd_user_slice)
2218 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2219 "/cgroup.procs", NULL);
2220 else
2221 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2222 "/cgroup.procs", NULL);
2223
2224 ret = chown(path, uid, gid);
2225 if (ret < 0)
2226 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2227 path, (int)uid, (int)gid, strerror(errno), NULL);
2228 else
2229 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
2230 free(path);
2231
2232 /* chown cgroup.subtree_control to user */
2233 if (v2->systemd_user_slice)
2234 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2235 "/cgroup.subtree_control", NULL);
2236 else
2237 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2238 "/cgroup.subtree_control", NULL);
2239
2240 ret = chown(path, uid, gid);
2241 if (ret < 0)
2242 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2243 path, (int)uid, (int)gid, strerror(errno), NULL);
2244 free(path);
2245
2246 /* chown cgroup.threads to user */
2247 if (v2->systemd_user_slice)
2248 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2249 "/cgroup.threads", NULL);
2250 else
2251 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2252 "/cgroup.threads", NULL);
2253 ret = chown(path, uid, gid);
2254 if (ret < 0 && errno != ENOENT)
2255 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2256 path, (int)uid, (int)gid, strerror(errno), NULL);
2257 free(path);
2258
2259 return true;
2260 }
2261
2262 /* Create writeable cgroups for @user at login. Details can be found in the
2263 * preamble/license at the top of this file.
2264 */
2265 static int handle_login(const char *user, uid_t uid, gid_t gid)
2266 {
2267 int idx = 0, ret;
2268 bool existed;
2269 char cg[PATH_MAX];
2270
2271 cg_escape();
2272
2273 while (idx >= 0) {
2274 ret = snprintf(cg, PATH_MAX, "/user/%s/%d", user, idx);
2275 if (ret < 0 || ret >= PATH_MAX) {
2276 mysyslog(LOG_ERR, "Username too long\n", NULL);
2277 return PAM_SESSION_ERR;
2278 }
2279
2280 existed = false;
2281 if (!cgv2_create(cg, uid, gid, &existed)) {
2282 if (existed) {
2283 cgv2_remove(cg);
2284 idx++;
2285 continue;
2286 }
2287
2288 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2289 return PAM_SESSION_ERR;
2290 }
2291
2292 existed = false;
2293 if (!cgv1_create(cg, uid, gid, &existed)) {
2294 if (existed) {
2295 cgv2_remove(cg);
2296 idx++;
2297 continue;
2298 }
2299
2300 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
2301 return PAM_SESSION_ERR;
2302 }
2303
2304 if (!cg_enter(cg)) {
2305 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
2306 return PAM_SESSION_ERR;
2307 }
2308
2309 break;
2310 }
2311
2312 return PAM_SUCCESS;
2313 }
2314
2315 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2316 * hierarchies.
2317 */
2318 static bool cgv1_prune_empty_cgroups(const char *user)
2319 {
2320 bool controller_removed = true;
2321 bool all_removed = true;
2322 struct cgv1_hierarchy **it;
2323
2324 for (it = cgv1_hierarchies; it && *it; it++) {
2325 int ret;
2326 char *path_base, *path_init;
2327 char **controller;
2328
2329 if (!(*it)->controllers || !(*it)->mountpoint ||
2330 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2331 continue;
2332
2333 for (controller = (*it)->controllers; controller && *controller;
2334 controller++) {
2335 bool path_base_rm, path_init_rm;
2336
2337 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2338 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2339
2340 ret = recursive_rmdir(path_base);
2341 if (ret == -ENOENT || ret >= 0)
2342 path_base_rm = true;
2343 else
2344 path_base_rm = false;
2345 free(path_base);
2346
2347 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2348 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2349
2350 ret = recursive_rmdir(path_init);
2351 if (ret == -ENOENT || ret >= 0)
2352 path_init_rm = true;
2353 else
2354 path_init_rm = false;
2355 free(path_init);
2356
2357 if (!path_base_rm && !path_init_rm) {
2358 controller_removed = false;
2359 continue;
2360 }
2361
2362 controller_removed = true;
2363 break;
2364 }
2365
2366 if (!controller_removed)
2367 all_removed = false;
2368 }
2369
2370 return all_removed;
2371 }
2372
2373 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2374 * hierarchy.
2375 */
2376 static bool cgv2_prune_empty_cgroups(const char *user)
2377 {
2378 int ret;
2379 struct cgv2_hierarchy *v2;
2380 char *path_base, *path_init;
2381 bool path_base_rm, path_init_rm;
2382
2383 if (!cgv2_hierarchies)
2384 return true;
2385
2386 v2 = *cgv2_hierarchies;
2387
2388 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2389 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2390
2391 ret = recursive_rmdir(path_base);
2392 if (ret == -ENOENT || ret >= 0)
2393 path_base_rm = true;
2394 else
2395 path_base_rm = false;
2396 free(path_base);
2397
2398 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2399 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2400
2401 ret = recursive_rmdir(path_init);
2402 if (ret == -ENOENT || ret >= 0)
2403 path_init_rm = true;
2404 else
2405 path_init_rm = false;
2406 free(path_init);
2407
2408 if (!path_base_rm && !path_init_rm)
2409 return false;
2410
2411 return true;
2412 }
2413
2414 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2415 static void cg_prune_empty_cgroups(const char *user)
2416 {
2417 (void)cgv1_prune_empty_cgroups(user);
2418 (void)cgv2_prune_empty_cgroups(user);
2419 }
2420
2421 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2422 static void cgv1_free_hierarchies(void)
2423 {
2424 struct cgv1_hierarchy **it;
2425
2426 if (!cgv1_hierarchies)
2427 return;
2428
2429 for (it = cgv1_hierarchies; it && *it; it++) {
2430 if ((*it)->controllers) {
2431 char **tmp;
2432 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2433 free(*tmp);
2434
2435 free((*it)->controllers);
2436 }
2437
2438 free((*it)->mountpoint);
2439 free((*it)->base_cgroup);
2440 free((*it)->fullcgpath);
2441 free((*it)->init_cgroup);
2442 }
2443
2444 free(cgv1_hierarchies);
2445 }
2446
2447 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2448 static void cgv2_free_hierarchies(void)
2449 {
2450 struct cgv2_hierarchy **it;
2451
2452 if (!cgv2_hierarchies)
2453 return;
2454
2455 for (it = cgv2_hierarchies; it && *it; it++) {
2456 if ((*it)->controllers) {
2457 char **tmp;
2458
2459 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2460 free(*tmp);
2461
2462 free((*it)->controllers);
2463 }
2464
2465 free((*it)->mountpoint);
2466 free((*it)->base_cgroup);
2467 free((*it)->fullcgpath);
2468 free((*it)->init_cgroup);
2469 }
2470
2471 free(cgv2_hierarchies);
2472 }
2473
2474 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2475 static void cg_exit(void)
2476 {
2477 cgv1_free_hierarchies();
2478 cgv2_free_hierarchies();
2479 }
2480
2481 int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2482 const char **argv)
2483 {
2484 int ret;
2485 uid_t uid = 0;
2486 gid_t gid = 0;
2487 const char *PAM_user = NULL;
2488
2489 ret = pam_get_user(pamh, &PAM_user, NULL);
2490 if (ret != PAM_SUCCESS) {
2491 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2492 return PAM_SESSION_ERR;
2493 }
2494
2495 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2496 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2497 return PAM_SESSION_ERR;
2498 }
2499
2500 if (!cg_init(uid, gid)) {
2501 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2502 return PAM_SESSION_ERR;
2503 }
2504
2505 /* Try to prune cgroups, that are actually empty but were still marked
2506 * as busy by the kernel so we couldn't remove them on session close.
2507 */
2508 cg_prune_empty_cgroups(PAM_user);
2509
2510 if (cg_mount_mode == CGROUP_UNKNOWN)
2511 return PAM_SESSION_ERR;
2512
2513 if (argc > 1 && !strcmp(argv[0], "-c")) {
2514 char **clist = make_string_list(argv[1], ",");
2515
2516 /*
2517 * We don't allow using "all" and other controllers explicitly because
2518 * that simply doesn't make any sense.
2519 */
2520 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2521 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2522 free_string_list(clist);
2523 return PAM_SESSION_ERR;
2524 }
2525
2526 cg_mark_to_make_rw(clist);
2527 free_string_list(clist);
2528 }
2529
2530 return handle_login(PAM_user, uid, gid);
2531 }
2532
2533 int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2534 const char **argv)
2535 {
2536 int ret;
2537 uid_t uid = 0;
2538 gid_t gid = 0;
2539 const char *PAM_user = NULL;
2540
2541 ret = pam_get_user(pamh, &PAM_user, NULL);
2542 if (ret != PAM_SUCCESS) {
2543 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2544 return PAM_SESSION_ERR;
2545 }
2546
2547 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2548 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
2549 return PAM_SESSION_ERR;
2550 }
2551
2552 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2553 if (!cg_init(uid, gid))
2554 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2555
2556 if (argc > 1 && !strcmp(argv[0], "-c")) {
2557 char **clist = make_string_list(argv[1], ",");
2558
2559 /*
2560 * We don't allow using "all" and other controllers explicitly because
2561 * that simply doesn't make any sense.
2562 */
2563 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2564 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
2565 free_string_list(clist);
2566 return PAM_SESSION_ERR;
2567 }
2568
2569 cg_mark_to_make_rw(clist);
2570 free_string_list(clist);
2571 }
2572 }
2573
2574 cg_prune_empty_cgroups(PAM_user);
2575 cg_exit();
2576
2577 return PAM_SUCCESS;
2578 }