]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/pam/pam_cgfs.c
rename functions which clash with libsystemd's
[mirror_lxc.git] / src / lxc / pam / pam_cgfs.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
7ac43d3d 2
1160ce89
CB
3#include "config.h"
4
7ac43d3d
CB
5#include <ctype.h>
6#include <dirent.h>
7#include <errno.h>
8#include <fcntl.h>
d38dd64a 9#include <linux/unistd.h>
7ac43d3d
CB
10#include <pwd.h>
11#include <stdarg.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
7ac43d3d
CB
17#include <sys/mount.h>
18#include <sys/param.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21#include <sys/vfs.h>
d38dd64a
CB
22#include <syslog.h>
23#include <unistd.h>
24
c4a090be 25#include "file_utils.h"
d38dd64a 26#include "macro.h"
6421783a 27#include "memory_utils.h"
c4a090be 28#include "string_utils.h"
7ac43d3d
CB
29
30#define PAM_SM_SESSION
31#include <security/_pam_macros.h>
32#include <security/pam_modules.h>
33
34498dea 34#if !HAVE_STRLCPY
58db1a61 35#include "strlcpy.h"
43f984ea
DJ
36#endif
37
34498dea 38#if !HAVE_STRLCAT
58db1a61 39#include "strlcat.h"
9c8b2b13
DJ
40#endif
41
8c8e7282
CB
42#define pam_cgfs_debug_stream(stream, format, ...) \
43 do { \
44 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
45 __func__, __VA_ARGS__); \
7ac43d3d
CB
46 } while (false)
47
48#define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
49
50#ifdef DEBUG
51#define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
52#else
8c8e7282
CB
53#define pam_cgfs_debug(format, ...) \
54 do { \
55 } while (false)
7ac43d3d
CB
56#endif /* DEBUG */
57
7ac43d3d
CB
58static enum cg_mount_mode {
59 CGROUP_UNKNOWN = -1,
60 CGROUP_MIXED = 0,
61 CGROUP_PURE_V1 = 1,
62 CGROUP_PURE_V2 = 2,
63 CGROUP_UNINITIALIZED = 3,
64} cg_mount_mode = CGROUP_UNINITIALIZED;
65
66/* Common helper functions. Most of these have been taken from LXC. */
67static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
68static int append_null_to_list(void ***list);
69static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
7ac43d3d 70static char *copy_to_eol(char *s);
7ac43d3d
CB
71static char *get_mountpoint(char *line);
72static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
73static int handle_login(const char *user, uid_t uid, gid_t gid);
7ac43d3d
CB
74static bool is_lxcfs(const char *line);
75static bool is_cgv1(char *line);
76static bool is_cgv2(char *line);
7ac43d3d
CB
77static void must_add_to_list(char ***clist, char *entry);
78static void must_append_controller(char **klist, char **nlist, char ***clist,
79 char *entry);
80static void must_append_string(char ***list, char *entry);
81static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
82static char *read_file(char *fnam);
7ac43d3d 83static int recursive_rmdir(char *dirname);
7ac43d3d
CB
84static bool string_in_list(char **list, const char *entry);
85static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
86static void trim(char *s);
87static bool write_int(char *path, int v);
7ac43d3d
CB
88
89/* cgroupfs prototypes. */
90static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
91static uint32_t *cg_cpumask(char *buf, size_t nbits);
92static bool cg_copy_parent_file(char *path, char *file);
93static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
94static bool cg_enter(const char *cgroup);
95static void cg_escape(void);
96static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
97static ssize_t cg_get_max_cpus(char *cpulist);
98static int cg_get_version_of_mntpt(const char *path);
99static bool cg_init(uid_t uid, gid_t gid);
100static void cg_mark_to_make_rw(char **list);
101static void cg_prune_empty_cgroups(const char *user);
102static bool cg_systemd_created_user_slice(const char *base_cgroup,
103 const char *init_cgroup,
104 const char *in, uid_t uid);
105static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
106 const char *base_cgroup, uid_t uid,
107 gid_t gid,
108 bool systemd_user_slice);
109static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
110static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
111 const char *init_cgroup, uid_t uid);
112static void cg_systemd_prune_init_scope(char *cg);
113static bool is_lxcfs(const char *line);
114
115/* cgroupfs v1 prototypes. */
116struct cgv1_hierarchy {
117 char **controllers;
118 char *mountpoint;
119 char *base_cgroup;
120 char *fullcgpath;
121 char *init_cgroup;
122 bool create_rw_cgroup;
123 bool systemd_user_slice;
124};
125
126static struct cgv1_hierarchy **cgv1_hierarchies;
127
128static void cgv1_add_controller(char **clist, char *mountpoint,
129 char *base_cgroup, char *init_cgroup);
130static bool cgv1_controller_in_clist(char *cgline, char *c);
131static bool cgv1_controller_lists_intersect(char **l1, char **l2);
132static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
133 char **clist);
134static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
135 bool *existed);
136static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
137 uid_t uid, gid_t gid, bool *existed);
138static bool cgv1_enter(const char *cgroup);
139static void cgv1_escape(void);
140static bool cgv1_get_controllers(char ***klist, char ***nlist);
141static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
142static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
143 char *line);
144static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
145 const char *cgroup);
146static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
147static bool cgv1_init(uid_t uid, gid_t gid);
148static void cgv1_mark_to_make_rw(char **clist);
149static char *cgv1_must_prefix_named(char *entry);
150static bool cgv1_prune_empty_cgroups(const char *user);
151static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
152static bool is_cgv1(char *line);
153
154/* cgroupfs v2 prototypes. */
155struct cgv2_hierarchy {
156 char **controllers;
157 char *mountpoint;
158 char *base_cgroup;
159 char *fullcgpath;
160 char *init_cgroup;
161 bool create_rw_cgroup;
162 bool systemd_user_slice;
163};
164
165/* Actually this should only be a single hierarchy. But for the sake of
166 * parallelism and because the layout of the cgroupfs v2 is still somewhat
167 * changing, we'll leave it as an array of structs.
168 */
169static struct cgv2_hierarchy **cgv2_hierarchies;
170
171static void cgv2_add_controller(char **clist, char *mountpoint,
172 char *base_cgroup, char *init_cgroup,
173 bool systemd_user_slice);
174static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
175 bool *existed);
176static bool cgv2_enter(const char *cgroup);
177static void cgv2_escape(void);
178static char *cgv2_get_current_cgroup(int pid);
179static bool cgv2_init(uid_t uid, gid_t gid);
180static void cgv2_mark_to_make_rw(char **clist);
181static bool cgv2_prune_empty_cgroups(const char *user);
182static bool cgv2_remove(const char *cgroup);
183static bool is_cgv2(char *line);
184
c4a4578f
JC
185static int do_mkdir(const char *path, mode_t mode)
186{
187 int saved_errno;
188 mode_t mask;
189 int r;
190
191 mask = umask(0);
192 r = mkdir(path, mode);
193 saved_errno = errno;
194 umask(mask);
195 errno = saved_errno;
196 return (r);
197}
198
7ac43d3d 199/* Create directory and (if necessary) its parents. */
539c3977 200static bool lxc_mkdir_parent(const char *root, char *path)
7ac43d3d
CB
201{
202 char *b, orig, *e;
203
204 if (strlen(path) < strlen(root))
205 return false;
206
207 if (strlen(path) == strlen(root))
208 return true;
209
210 b = path + strlen(root) + 1;
51a8a74c 211 for (;;) {
7ac43d3d
CB
212 while (*b && (*b == '/'))
213 b++;
214 if (!*b)
215 return true;
216
217 e = b + 1;
218 while (*e && *e != '/')
219 e++;
220
221 orig = *e;
222 if (orig)
223 *e = '\0';
224
225 if (file_exists(path))
226 goto next;
227
c4a4578f 228 if (do_mkdir(path, 0755) < 0) {
2082ac62 229 pam_cgfs_debug("Failed to create %s: %s\n", path, strerror(errno));
7ac43d3d
CB
230 return false;
231 }
232
233 next:
234 if (!orig)
235 return true;
236
237 *e = orig;
238 b = e + 1;
239 }
240
241 return false;
242}
243
244/* Common helper functions. Most of these have been taken from LXC. */
245static void mysyslog(int err, const char *format, ...)
246{
247 va_list args;
248
249 va_start(args, format);
d07545c7
CB
250#pragma GCC diagnostic push
251#pragma GCC diagnostic ignored "-Wformat-nonliteral"
7ac43d3d
CB
252 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
253 vsyslog(err, format, args);
d07545c7 254#pragma GCC diagnostic pop
7ac43d3d
CB
255 va_end(args);
256 closelog();
257}
258
259/* realloc() pointer in batch sizes; do not fail. */
260#define BATCH_SIZE 50
261static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
262{
263 int newbatches = (newlen / BATCH_SIZE) + 1;
264 int oldbatches = (oldlen / BATCH_SIZE) + 1;
265
266 if (!*mem || newbatches > oldbatches)
267 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
268}
269
270/* Append lines as is to pointer; do not fail. */
271static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
272{
273 size_t full = oldlen + newlen;
274
275 batch_realloc(dest, oldlen, full + 1);
276
277 memcpy(*dest + oldlen, new, newlen + 1);
278}
279
280/* Read in whole file and return allocated pointer. */
281static char *read_file(char *fnam)
282{
283 FILE *f;
284 int linelen;
285 char *line = NULL, *buf = NULL;
286 size_t len = 0, fulllen = 0;
287
288 f = fopen(fnam, "r");
289 if (!f)
290 return NULL;
291
292 while ((linelen = getline(&line, &len, f)) != -1) {
293 append_line(&buf, fulllen, line, linelen);
294 fulllen += linelen;
295 }
296
297 fclose(f);
298 free(line);
299
300 return buf;
301}
302
303/* Given a pointer to a null-terminated array of pointers, realloc to add one
304 * entry, and point the new entry to NULL. Do not fail. Return the index to the
305 * second-to-last entry - that is, the one which is now available for use
306 * (keeping the list null-terminated).
307 */
308static int append_null_to_list(void ***list)
309{
310 int newentry = 0;
311
312 if (*list)
2082ac62 313 for (; (*list)[newentry]; newentry++)
7ac43d3d 314 ;
7ac43d3d
CB
315
316 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
317 (*list)[newentry + 1] = NULL;
318
319 return newentry;
320}
321
322/* Append new entry to null-terminated array of pointer; make sure that array of
323 * pointers will still be null-terminated.
324 */
325static void must_append_string(char ***list, char *entry)
326{
327 int newentry;
328 char *copy;
329
330 newentry = append_null_to_list((void ***)list);
331 copy = must_copy_string(entry);
332 (*list)[newentry] = copy;
333}
334
335/* Remove newlines from string. */
336static void trim(char *s)
337{
338 size_t len = strlen(s);
339
340 while ((len > 0) && s[len - 1] == '\n')
341 s[--len] = '\0';
342}
343
7ac43d3d
CB
344/* Make allocated copy of string. End of string is taken to be '\n'. */
345static char *copy_to_eol(char *s)
346{
347 char *newline, *sret;
348 size_t len;
349
350 newline = strchr(s, '\n');
351 if (!newline)
352 return NULL;
353
354 len = newline - s;
f25a2044 355 sret = must_realloc(NULL, len + 1);
7ac43d3d
CB
356 memcpy(sret, s, len);
357 sret[len] = '\0';
358
359 return sret;
360}
361
362/* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
363static bool is_lxcfs(const char *line)
364{
365 char *p = strstr(line, " - ");
366 if (!p)
367 return false;
368
369 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
370}
371
372/* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
373static bool is_cgv1(char *line)
374{
375 char *p = strstr(line, " - ");
376 if (!p)
377 return false;
378
379 return strncmp(p, " - cgroup ", 10) == 0;
380}
381
382/* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
383static bool is_cgv2(char *line)
384{
385 char *p = strstr(line, " - ");
386 if (!p)
387 return false;
388
389 return strncmp(p, " - cgroup2 ", 11) == 0;
390}
391
392/* Given a null-terminated array of strings, check whether @entry is one of the
393 * strings
394 */
395static bool string_in_list(char **list, const char *entry)
396{
397 char **it;
398
399 for (it = list; it && *it; it++)
400 if (strcmp(*it, entry) == 0)
401 return true;
402
403 return false;
404}
405
406/*
407 * Creates a null-terminated array of strings, made by splitting the entries in
408 * @str on each @sep. Caller is responsible for calling free_string_list.
409 */
410static char **make_string_list(const char *str, const char *sep)
411{
412 char *copy, *tok;
413 char *saveptr = NULL;
414 char **clist = NULL;
415
416 copy = must_copy_string(str);
417
418 for (tok = strtok_r(copy, sep, &saveptr); tok;
419 tok = strtok_r(NULL, sep, &saveptr))
420 must_add_to_list(&clist, tok);
421
422 free(copy);
423
424 return clist;
425}
426
427/* Gets the length of a null-terminated array of strings. */
428static size_t string_list_length(char **list)
429{
430 size_t len = 0;
431 char **it;
432
433 for (it = list; it && *it; it++)
434 len++;
435
436 return len;
437}
438
7ac43d3d
CB
439/* Write single integer to file. */
440static bool write_int(char *path, int v)
441{
442 FILE *f;
443 bool ret = true;
444
445 f = fopen(path, "w");
446 if (!f)
447 return false;
448
449 if (fprintf(f, "%d\n", v) < 0)
450 ret = false;
451
452 if (fclose(f) != 0)
453 ret = false;
454
455 return ret;
456}
457
458/* Recursively remove directory and its parents. */
459static int recursive_rmdir(char *dirname)
460{
4110345b 461 __do_closedir DIR *dir = NULL;
7ac43d3d 462 struct dirent *direntp;
7ac43d3d
CB
463 int r = 0;
464
465 dir = opendir(dirname);
466 if (!dir)
467 return -ENOENT;
468
469 while ((direntp = readdir(dir))) {
470 struct stat st;
471 char *pathname;
472
7ac43d3d
CB
473 if (!strcmp(direntp->d_name, ".") ||
474 !strcmp(direntp->d_name, ".."))
475 continue;
476
477 pathname = must_make_path(dirname, direntp->d_name, NULL);
478
479 if (lstat(pathname, &st)) {
480 if (!r)
2082ac62 481 pam_cgfs_debug("Failed to stat %s\n", pathname);
7ac43d3d
CB
482 r = -1;
483 goto next;
484 }
485
486 if (!S_ISDIR(st.st_mode))
487 goto next;
488
489 if (recursive_rmdir(pathname) < 0)
490 r = -1;
2082ac62 491
7ac43d3d
CB
492next:
493 free(pathname);
494 }
495
496 if (rmdir(dirname) < 0) {
497 if (!r)
2082ac62 498 pam_cgfs_debug("Failed to delete %s: %s\n", dirname, strerror(errno));
7ac43d3d
CB
499 r = -1;
500 }
501
7ac43d3d
CB
502 return r;
503}
504
505/* Add new entry to null-terminated array of pointers. Make sure array is still
506 * null-terminated.
507 */
508static void must_add_to_list(char ***clist, char *entry)
509{
510 int newentry;
511
512 newentry = append_null_to_list((void ***)clist);
513 (*clist)[newentry] = must_copy_string(entry);
514}
515
516/* Get mountpoint from a /proc/<pid>/mountinfo line. */
517static char *get_mountpoint(char *line)
518{
519 int i;
520 char *p, *sret, *p2;
521 size_t len;
522
523 p = line;
524
525 for (i = 0; i < 4; i++) {
526 p = strchr(p, ' ');
527 if (!p)
528 return NULL;
529 p++;
530 }
531
532 p2 = strchr(p, ' ');
533 if (p2)
534 *p2 = '\0';
535
536 len = strlen(p);
f25a2044 537 sret = must_realloc(NULL, len + 1);
7ac43d3d
CB
538 memcpy(sret, p, len);
539 sret[len] = '\0';
540
541 return sret;
542}
543
544/* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
545 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
546 * kernel controllers (@klist) and named controllers (@nlist).
547 */
548static bool cgv1_get_controllers(char ***klist, char ***nlist)
549{
550 FILE *f;
551 char *line = NULL;
552 size_t len = 0;
553
554 f = fopen("/proc/self/cgroup", "r");
555 if (!f)
556 return false;
557
558 while (getline(&line, &len, f) != -1) {
559 char *p, *p2, *tok;
560 char *saveptr = NULL;
561
562 p = strchr(line, ':');
563 if (!p)
564 continue;
565 p++;
566
567 p2 = strchr(p, ':');
568 if (!p2)
569 continue;
570 *p2 = '\0';
571
572 /* Skip the v2 hierarchy. */
573 if ((p2 - p) == 0)
574 continue;
575
576 for (tok = strtok_r(p, ",", &saveptr); tok;
577 tok = strtok_r(NULL, ",", &saveptr)) {
578 if (strncmp(tok, "name=", 5) == 0)
579 must_append_string(nlist, tok);
580 else
581 must_append_string(klist, tok);
582 }
583 }
584
585 free(line);
586 fclose(f);
587
588 return true;
589}
590
591/* Get list of controllers for cgroupfs v2 hierarchy by looking at
592 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
593static bool cgv2_get_controllers(char ***klist)
594{
595 return -ENOSYS;
596}
597*/
598
599/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
600static char *cgv2_get_current_cgroup(int pid)
601{
602 int ret;
603 char *cgroups_v2;
604 char *current_cgroup;
605 char *copy = NULL;
606 /* The largest integer that can fit into long int is 2^64. This is a
607 * 20-digit number. */
608#define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
609 char path[__PIDLEN];
610
611 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
612 if (ret < 0 || ret >= __PIDLEN)
613 return NULL;
614
615 cgroups_v2 = read_file(path);
616 if (!cgroups_v2)
617 return NULL;
618
619 current_cgroup = strstr(cgroups_v2, "0::/");
620 if (!current_cgroup)
621 goto cleanup_on_err;
622
623 current_cgroup = current_cgroup + 3;
624 copy = copy_to_eol(current_cgroup);
625 if (!copy)
626 goto cleanup_on_err;
627
628cleanup_on_err:
629 free(cgroups_v2);
630 if (copy)
631 trim(copy);
632
633 return copy;
634}
635
636/* Given two null-terminated lists of strings, return true if any string is in
637 * both.
638 */
639static bool cgv1_controller_lists_intersect(char **l1, char **l2)
640{
641 char **it;
642
643 if (!l2)
644 return false;
645
646 for (it = l1; it && *it; it++)
647 if (string_in_list(l2, *it))
648 return true;
649
650 return false;
651}
652
653/* For a null-terminated list of controllers @clist, return true if any of those
654 * controllers is already listed the null-terminated list of hierarchies @hlist.
655 * Realistically, if one is present, all must be present.
656 */
657static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
658{
659 struct cgv1_hierarchy **it;
660
661 for (it = hlist; it && *it; it++)
662 if ((*it)->controllers)
663 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
664 return true;
2082ac62 665
7ac43d3d
CB
666 return false;
667
668}
669
670/* Set boolean to mark controllers under which we are supposed create a
671 * writeable cgroup.
672 */
673static void cgv1_mark_to_make_rw(char **clist)
674{
675 struct cgv1_hierarchy **it;
676
677 for (it = cgv1_hierarchies; it && *it; it++)
678 if ((*it)->controllers)
679 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
2082ac62 680 string_in_list(clist, "all"))
7ac43d3d
CB
681 (*it)->create_rw_cgroup = true;
682}
683
684/* Set boolean to mark whether we are supposed to create a writeable cgroup in
685 * the cgroupfs v2 hierarchy.
686 */
687static void cgv2_mark_to_make_rw(char **clist)
688{
689 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
690 if (cgv2_hierarchies)
691 (*cgv2_hierarchies)->create_rw_cgroup = true;
692}
693
694/* Wrapper around cgv{1,2}_mark_to_make_rw(). */
695static void cg_mark_to_make_rw(char **clist)
696{
697 cgv1_mark_to_make_rw(clist);
698 cgv2_mark_to_make_rw(clist);
699}
700
701/* Prefix any named controllers with "name=", e.g. "name=systemd". */
702static char *cgv1_must_prefix_named(char *entry)
703{
704 char *s;
705 int ret;
706 size_t len;
707
708 len = strlen(entry);
f25a2044 709 s = must_realloc(NULL, len + 6);
7ac43d3d
CB
710
711 ret = snprintf(s, len + 6, "name=%s", entry);
8ddce7df 712 if (ret < 0 || (size_t)ret >= (len + 6)) {
713 free(s);
7ac43d3d 714 return NULL;
8ddce7df 715 }
7ac43d3d
CB
716
717 return s;
718}
719
720/* Append kernel controller in @klist or named controller in @nlist to @clist */
721static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
722{
723 int newentry;
724 char *copy;
725
726 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
727 return;
728
729 newentry = append_null_to_list((void ***)clist);
730
731 if (strncmp(entry, "name=", 5) == 0)
732 copy = must_copy_string(entry);
733 else if (string_in_list(klist, entry))
734 copy = must_copy_string(entry);
735 else
736 copy = cgv1_must_prefix_named(entry);
737
738 (*clist)[newentry] = copy;
739}
740
741/* Get the controllers from a mountinfo line. There are other ways we could get
742 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
743 * could parse the mount options. But we simply assume that the mountpoint must
744 * be /sys/fs/cgroup/controller-list
745 */
746static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
747{
748 int i;
749 char *p, *p2, *tok;
750 char *saveptr = NULL;
751 char **aret = NULL;
752
753 p = line;
754
755 for (i = 0; i < 4; i++) {
756 p = strchr(p, ' ');
757 if (!p)
758 return NULL;
759 p++;
760 }
7ac43d3d
CB
761
762 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
763 return NULL;
764
765 p += 15;
766
767 p2 = strchr(p, ' ');
768 if (!p2)
769 return NULL;
770 *p2 = '\0';
771
772 for (tok = strtok_r(p, ",", &saveptr); tok;
773 tok = strtok_r(NULL, ",", &saveptr))
774 must_append_controller(klist, nlist, &aret, tok);
775
776 return aret;
777}
778
779/* Check if a cgroupfs v2 controller is present in the string @cgline. */
780static bool cgv1_controller_in_clist(char *cgline, char *c)
781{
6421783a 782 __do_free char *tmp = NULL;
7ac43d3d 783 size_t len;
6421783a 784 char *tok, *eol;
7ac43d3d
CB
785 char *saveptr = NULL;
786
787 eol = strchr(cgline, ':');
788 if (!eol)
789 return false;
790
791 len = eol - cgline;
6421783a 792 tmp = must_realloc(NULL, len + 1);
7ac43d3d
CB
793 memcpy(tmp, cgline, len);
794 tmp[len] = '\0';
795
796 for (tok = strtok_r(tmp, ",", &saveptr); tok;
797 tok = strtok_r(NULL, ",", &saveptr)) {
798 if (strcmp(tok, c) == 0)
799 return true;
800 }
2082ac62 801
7ac43d3d
CB
802 return false;
803}
804
805/* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
806 * of a given cgv1 controller passed in via @controller.
807 */
808static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
809{
810 char *p;
811
812 p = basecginfo;
813
51a8a74c 814 for (;;) {
7ac43d3d
CB
815 p = strchr(p, ':');
816 if (!p)
817 return NULL;
818 p++;
819
820 if (cgv1_controller_in_clist(p, controller)) {
821 p = strchr(p, ':');
822 if (!p)
823 return NULL;
824 p++;
825
826 return copy_to_eol(p);
827 }
828
829 p = strchr(p, '\n');
830 if (!p)
831 return NULL;
832 p++;
833 }
834
835 return NULL;
836}
837
838/* Remove /init.scope from string @cg. This will mostly affect systemd-based
839 * systems.
840 */
841#define INIT_SCOPE "/init.scope"
842static void cg_systemd_prune_init_scope(char *cg)
843{
844 char *point;
845
846 if (!cg)
847 return;
848
849 point = cg + strlen(cg) - strlen(INIT_SCOPE);
850 if (point < cg)
851 return;
852
853 if (strcmp(point, INIT_SCOPE) == 0) {
854 if (point == cg)
855 *(point + 1) = '\0';
856 else
857 *point = '\0';
858 }
859}
860
861/* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
862 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
863 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
864 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
865 * from /proc/1/cgroup.
866 */
867static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
868{
869 struct cgv1_hierarchy *new;
870 int newentry;
871
f25a2044 872 new = must_realloc(NULL, sizeof(*new));
2082ac62 873
7ac43d3d
CB
874 new->controllers = clist;
875 new->mountpoint = mountpoint;
876 new->base_cgroup = base_cgroup;
877 new->fullcgpath = NULL;
878 new->create_rw_cgroup = false;
879 new->init_cgroup = init_cgroup;
880 new->systemd_user_slice = false;
881
882 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
883 cgv1_hierarchies[newentry] = new;
884}
885
886/* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
887 * currently) include the controllers mounted into the hierarchy (e.g. memory,
888 * pids, blkio), the mountpoint of that hierarchy (Should usually be
889 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
890 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
891 * base cgroup of the current process gathered from /proc/self/cgroup, and the
892 * init cgroup of PID1 gathered from /proc/1/cgroup.
893 */
894static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
895{
896 struct cgv2_hierarchy *new;
897 int newentry;
898
f25a2044 899 new = must_realloc(NULL, sizeof(*new));
2082ac62 900
7ac43d3d
CB
901 new->controllers = clist;
902 new->mountpoint = mountpoint;
903 new->base_cgroup = base_cgroup;
904 new->fullcgpath = NULL;
905 new->create_rw_cgroup = false;
906 new->init_cgroup = init_cgroup;
907 new->systemd_user_slice = systemd_user_slice;
908
909 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
910 cgv2_hierarchies[newentry] = new;
911}
912
913/* In Ubuntu 14.04, the paths created for us were
914 * '/user/$uid.user/$something.session' This can be merged better with
915 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
916 * reason about the correctness.
917 */
918static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
919{
920 char *p;
921 size_t len;
922 int id;
923 char *copy = NULL;
924 bool bret = false;
925
926 copy = must_copy_string(in);
927 if (strlen(copy) < strlen("/user/1.user/1.session"))
928 goto cleanup;
929 p = copy + strlen(copy) - 1;
930
931 /* skip any trailing '/' (shouldn't be any, but be sure) */
932 while (p >= copy && *p == '/')
933 *(p--) = '\0';
934 if (p < copy)
935 goto cleanup;
936
937 /* Get last path element */
938 while (p >= copy && *p != '/')
939 p--;
940 if (p < copy)
941 goto cleanup;
2082ac62 942
7ac43d3d
CB
943 /* make sure it is something.session */
944 len = strlen(p + 1);
945 if (len < strlen("1.session") ||
946 strncmp(p + 1 + len - 8, ".session", 8) != 0)
947 goto cleanup;
948
949 /* ok last path piece checks out, now check the second to last */
950 *(p + 1) = '\0';
951 while (p >= copy && *(--p) != '/')
952 ;
2082ac62 953
7ac43d3d
CB
954 if (sscanf(p + 1, "%d.user/", &id) != 1)
955 goto cleanup;
956
957 if (id != (int)uid)
958 goto cleanup;
959
960 bret = true;
961
962cleanup:
963 free(copy);
964 return bret;
965}
966
967/* So long as our path relative to init starts with /user.slice/user-$uid.slice,
968 * assume it belongs to $uid and chown it
969 */
970static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
971 const char *init_cgroup, uid_t uid)
972{
973 int ret;
974 char buf[100];
975 size_t curlen, initlen;
976
977 curlen = strlen(base_cgroup);
978 initlen = strlen(init_cgroup);
979 if (curlen <= initlen)
980 return false;
981
982 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
983 return false;
984
985 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
986 if (ret < 0 || ret >= 100)
987 return false;
988
989 if (initlen == 1)
990 initlen = 0; // skip the '/'
991
992 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
993}
994
995/* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
996 * is not the end of our systemd path, then we're not part of the PAM call that
997 * created that path.
998 *
999 * The last piece is chowned to $uid, the user- part not.
1000 * Note: If the user creates paths that look like what we're looking for to
1001 * 'fool' us, either
1002 * - they fool us, we create new cgroups, and they get auto-logged-out.
1003 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1004 * lose ownership of their cgroups
1005 */
1006static bool cg_systemd_created_user_slice(const char *base_cgroup,
1007 const char *init_cgroup,
1008 const char *in, uid_t uid)
1009{
1010 char *p;
1011 size_t len;
1012 int id;
1013 char *copy = NULL;
1014 bool bret = false;
1015
1016 copy = must_copy_string(in);
1017
1018 /* An old version of systemd has already created a cgroup for us. */
1019 if (cg_systemd_under_user_slice_1(in, uid))
1020 goto succeed;
1021
1022 /* A new version of systemd has already created a cgroup for us. */
1023 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1024 goto succeed;
1025
1026 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1027 goto cleanup;
1028
1029 p = copy + strlen(copy) - 1;
1030 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1031 while (p >= copy && *p == '/')
1032 *(p--) = '\0';
1033
1034 if (p < copy)
1035 goto cleanup;
1036
1037 /* Get last path element */
1038 while (p >= copy && *p != '/')
1039 p--;
1040
1041 if (p < copy)
1042 goto cleanup;
1043
1044 /* Make sure it is session-something.scope. */
1045 len = strlen(p + 1);
1046 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1047 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1048 goto cleanup;
1049
1050 /* Ok last path piece checks out, now check the second to last. */
1051 *(p + 1) = '\0';
1052 while (p >= copy && *(--p) != '/')
1053 ;
1054
1055 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1056 goto cleanup;
1057
1058 if (id != (int)uid)
1059 goto cleanup;
1060
1061succeed:
1062 bret = true;
2082ac62 1063
7ac43d3d
CB
1064cleanup:
1065 free(copy);
1066 return bret;
1067}
1068
1069/* Chown existing cgroup that systemd has already created for us. */
1070static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1071 const char *base_cgroup, uid_t uid,
1072 gid_t gid, bool systemd_user_slice)
1073{
1074 char *path;
1075
1076 if (!systemd_user_slice)
1077 return false;
1078
1079 path = must_make_path(mountpoint, base_cgroup, NULL);
1080
1081 /* A cgroup within name=systemd has already been created. So we only
1082 * need to chown it.
1083 */
1084 if (chown(path, uid, gid) < 0)
2082ac62 1085 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
1086 path, (int)uid, (int)gid, strerror(errno), NULL);
1087 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
7ac43d3d
CB
1088
1089 free(path);
1090 return true;
1091}
1092
1093/* Detect and store information about cgroupfs v1 hierarchies. */
1094static bool cgv1_init(uid_t uid, gid_t gid)
1095{
1096 FILE *f;
1097 struct cgv1_hierarchy **it;
1098 char *basecginfo;
1099 char *line = NULL;
1100 char **klist = NULL, **nlist = NULL;
1101 size_t len = 0;
1102
1103 basecginfo = read_file("/proc/self/cgroup");
1104 if (!basecginfo)
1105 return false;
1106
1107 f = fopen("/proc/self/mountinfo", "r");
1108 if (!f) {
1109 free(basecginfo);
1110 return false;
1111 }
1112
1113 cgv1_get_controllers(&klist, &nlist);
1114
1115 while (getline(&line, &len, f) != -1) {
1116 char **controller_list = NULL;
1117 char *mountpoint, *base_cgroup;
1118
1119 if (is_lxcfs(line) || !is_cgv1(line))
1120 continue;
1121
1122 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1123 if (!controller_list)
1124 continue;
1125
9159b38c 1126 if (cgv1_controller_list_is_dup(cgv1_hierarchies, controller_list)) {
7ac43d3d
CB
1127 free(controller_list);
1128 continue;
1129 }
1130
1131 mountpoint = get_mountpoint(line);
1132 if (!mountpoint) {
1133 free_string_list(controller_list);
1134 continue;
1135 }
1136
1137 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1138 if (!base_cgroup) {
1139 free_string_list(controller_list);
1140 free(mountpoint);
1141 continue;
1142 }
9159b38c 1143
7ac43d3d
CB
1144 trim(base_cgroup);
1145 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
9159b38c 1146 "mountpoint \"%s\" and cgroup \"%s\"\n",
1147 controller_list[0], mountpoint, base_cgroup);
1148 cgv1_add_controller(controller_list, mountpoint, base_cgroup, NULL);
7ac43d3d 1149 }
9159b38c 1150
7ac43d3d
CB
1151 free_string_list(klist);
1152 free_string_list(nlist);
1153 free(basecginfo);
1154 fclose(f);
1155 free(line);
1156
1157 /* Retrieve init cgroup path for all controllers. */
1158 basecginfo = read_file("/proc/1/cgroup");
1159 if (!basecginfo)
1160 return false;
1161
1162 for (it = cgv1_hierarchies; it && *it; it++) {
1163 if ((*it)->controllers) {
1164 char *init_cgroup, *user_slice;
9159b38c 1165
7ac43d3d
CB
1166 /* We've already stored the controller and received its
1167 * current cgroup. If we now fail to retrieve its init
1168 * cgroup, we should probably fail.
1169 */
1170 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
1171 if (!init_cgroup) {
1172 free(basecginfo);
1173 return false;
1174 }
9159b38c 1175
7ac43d3d
CB
1176 cg_systemd_prune_init_scope(init_cgroup);
1177 (*it)->init_cgroup = init_cgroup;
1178 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
2082ac62 1179 "cgroup \"%s\"\n",
1180 (*(*it)->controllers), init_cgroup);
1181
7ac43d3d
CB
1182 /* Check whether systemd has already created a cgroup
1183 * for us.
1184 */
1185 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1186 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1187 (*it)->systemd_user_slice = true;
9159b38c 1188
1189 free(user_slice);
7ac43d3d
CB
1190 }
1191 }
1192 free(basecginfo);
1193
1194 return true;
1195}
1196
1197/* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1198 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1199 */
1200static inline int cg_get_version_of_mntpt(const char *path)
1201{
1202 if (has_fs_type(path, CGROUP_SUPER_MAGIC))
1203 return 1;
1204
1205 if (has_fs_type(path, CGROUP2_SUPER_MAGIC))
1206 return 2;
1207
1208 return 0;
1209}
1210
1211/* Detect and store information about the cgroupfs v2 hierarchy. Currently only
d0d68468 1212 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
7ac43d3d
CB
1213 */
1214static bool cgv2_init(uid_t uid, gid_t gid)
1215{
1216 char *mountpoint;
1217 FILE *f = NULL;
1218 char *current_cgroup = NULL, *init_cgroup = NULL;
1219 char * line = NULL;
1220 size_t len = 0;
1221 int ret = false;
1222
1223 current_cgroup = cgv2_get_current_cgroup(getpid());
1224 if (!current_cgroup) {
1225 /* No v2 hierarchy present. We're done. */
1226 ret = true;
1227 goto cleanup;
1228 }
1229
1230 init_cgroup = cgv2_get_current_cgroup(1);
1231 if (!init_cgroup) {
1232 /* If we're here and didn't fail already above, then something's
1233 * certainly wrong, so error this time.
1234 */
1235 goto cleanup;
1236 }
90a170d8 1237
7ac43d3d
CB
1238 cg_systemd_prune_init_scope(init_cgroup);
1239
1240 /* Check if the v2 hierarchy is mounted at its standard location.
1241 * If so we can skip the rest of the work here. Although the unified
1242 * hierarchy can be mounted multiple times, each of those mountpoints
1243 * will expose identical information.
1244 */
1245 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1246 char *user_slice;
1247 bool has_user_slice = false;
1248
1249 mountpoint = must_copy_string("/sys/fs/cgroup");
1250 if (!mountpoint)
1251 goto cleanup;
1252
1253 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1254 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1255 has_user_slice = true;
1256 free(user_slice);
1257
1258 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1259
1260 ret = true;
1261 goto cleanup;
1262 }
1263
1264 f = fopen("/proc/self/mountinfo", "r");
1265 if (!f)
1266 goto cleanup;
1267
1268 /* we support simple cgroup mounts and lxcfs mounts */
1269 while (getline(&line, &len, f) != -1) {
1270 char *user_slice;
1271 bool has_user_slice = false;
90a170d8 1272
7ac43d3d
CB
1273 if (!is_cgv2(line))
1274 continue;
1275
1276 mountpoint = get_mountpoint(line);
1277 if (!mountpoint)
1278 continue;
1279
1280 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1281 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1282 has_user_slice = true;
1283 free(user_slice);
1284
1285 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
90a170d8 1286
7ac43d3d
CB
1287 /* Although the unified hierarchy can be mounted multiple times,
1288 * each of those mountpoints will expose identical information.
1289 * So let the first mountpoint we find, win.
1290 */
1291 ret = true;
1292 break;
1293 }
1294
1295 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
d97c3a34 1296 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1297 mountpoint, current_cgroup, init_cgroup);
7ac43d3d
CB
1298
1299cleanup:
1300 if (f)
1301 fclose(f);
1302 free(line);
1303
90a170d8 1304 if (!ret) {
1305 free(init_cgroup);
d97c3a34 1306 free(current_cgroup);
90a170d8 1307 }
d97c3a34 1308
7ac43d3d
CB
1309 return ret;
1310}
1311
1312/* Detect and store information about mounted cgroupfs v1 hierarchies and the
1313 * cgroupfs v2 hierarchy.
1314 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1315 * where some controllers are mounted into their standard cgroupfs v1 locations
1316 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1317 * hierarchy (/sys/fs/cgroup).
1318 */
1319static bool cg_init(uid_t uid, gid_t gid)
1320{
1321 if (!cgv1_init(uid, gid))
1322 return false;
1323
1324 if (!cgv2_init(uid, gid))
1325 return false;
1326
1327 if (cgv1_hierarchies && cgv2_hierarchies) {
1328 cg_mount_mode = CGROUP_MIXED;
2082ac62 1329 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
7ac43d3d
CB
1330 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1331 cg_mount_mode = CGROUP_PURE_V1;
2082ac62 1332 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
7ac43d3d
CB
1333 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1334 cg_mount_mode = CGROUP_PURE_V2;
2082ac62 1335 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
7ac43d3d
CB
1336 } else {
1337 cg_mount_mode = CGROUP_UNKNOWN;
2082ac62 1338 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy\n", NULL);
7ac43d3d
CB
1339 }
1340
1341 if (cg_mount_mode == CGROUP_UNKNOWN)
1342 return false;
1343
1344 return true;
1345}
1346
1347/* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1348static bool cgv1_enter(const char *cgroup)
1349{
1350 struct cgv1_hierarchy **it;
1351
1352 for (it = cgv1_hierarchies; it && *it; it++) {
1353 char **controller;
1354 bool entered = false;
1355
1356 if (!(*it)->controllers || !(*it)->mountpoint ||
1357 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1358 continue;
1359
1360 for (controller = (*it)->controllers; controller && *controller;
1361 controller++) {
1362 char *path;
1363
1364 /* We've already been placed in a user slice, so we
1365 * don't need to enter the cgroup again.
1366 */
1367 if ((*it)->systemd_user_slice) {
1368 entered = true;
1369 break;
1370 }
1371
1372 path = must_make_path((*it)->mountpoint,
1373 (*it)->init_cgroup,
1374 cgroup,
1375 "/cgroup.procs",
1376 NULL);
1377 if (!file_exists(path)) {
1378 free(path);
1379 path = must_make_path((*it)->mountpoint,
1380 (*it)->init_cgroup,
1381 cgroup,
1382 "/tasks",
1383 NULL);
1384 }
2082ac62 1385
1386 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
7ac43d3d
CB
1387 entered = write_int(path, (int)getpid());
1388 if (entered) {
1389 free(path);
1390 break;
1391 }
2082ac62 1392
1393 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path);
7ac43d3d
CB
1394 free(path);
1395 }
2082ac62 1396
7ac43d3d
CB
1397 if (!entered)
1398 return false;
1399 }
1400
1401 return true;
1402}
1403
1404/* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1405static bool cgv2_enter(const char *cgroup)
1406{
1407 struct cgv2_hierarchy *v2;
1408 char *path;
1409 bool entered = false;
1410
1411 if (!cgv2_hierarchies)
1412 return true;
1413
1414 v2 = *cgv2_hierarchies;
1415
1416 if (!v2->mountpoint || !v2->base_cgroup)
1417 return false;
1418
1419 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1420 return true;
1421
1422 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
2082ac62 1423 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
1424
7ac43d3d
CB
1425 entered = write_int(path, (int)getpid());
1426 if (!entered) {
2082ac62 1427 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path);
7ac43d3d
CB
1428 free(path);
1429 return false;
1430 }
1431
1432 free(path);
1433
1434 return true;
1435}
1436
1437/* Wrapper around cgv{1,2}_enter(). */
1438static bool cg_enter(const char *cgroup)
1439{
1440 if (!cgv1_enter(cgroup)) {
2082ac62 1441 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups\n", NULL);
7ac43d3d
CB
1442 return false;
1443 }
1444
1445 if (!cgv2_enter(cgroup)) {
2082ac62 1446 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups\n", NULL);
7ac43d3d
CB
1447 return false;
1448 }
1449
1450 return true;
1451}
1452
1453/* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1454static void cgv1_escape(void)
1455{
1456 struct cgv1_hierarchy **it;
1457
1458 /* In case systemd hasn't already placed us in a user slice for the
1459 * cpuset v1 controller we will reside in the root cgroup. This means
1460 * that cgroup.clone_children will not have been initialized for us so
1461 * we need to do it.
1462 */
1463 for (it = cgv1_hierarchies; it && *it; it++)
1464 if (!cgv1_handle_root_cpuset_hierarchy(*it))
2082ac62 1465 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset\n", NULL);
7ac43d3d
CB
1466
1467 if (!cgv1_enter("/"))
2082ac62 1468 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL);
7ac43d3d
CB
1469}
1470
1471/* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1472static void cgv2_escape(void)
1473{
1474 if (!cgv2_enter("/"))
2082ac62 1475 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL);
7ac43d3d
CB
1476}
1477
1478/* Wrapper around cgv{1,2}_escape(). */
1479static void cg_escape(void)
1480{
1481 cgv1_escape();
1482 cgv2_escape();
1483}
1484
1485/* Get uid and gid for @user. */
1486static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
1487{
2b65c74c 1488 struct passwd pwent;
2dce415b
DJ
1489 struct passwd *pwentp = NULL;
1490 char *buf;
8c8e7282 1491 ssize_t bufsize;
2dce415b
DJ
1492 int ret;
1493
1494 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1495 if (bufsize == -1)
1496 bufsize = 1024;
1497
1498 buf = malloc(bufsize);
1499 if (!buf)
1500 return false;
1501
1502 ret = getpwnam_r(user, &pwent, buf, bufsize, &pwentp);
1503 if (!pwentp) {
1504 if (ret == 0)
2b65c74c 1505 mysyslog(LOG_ERR,
03e7d72a 1506 "Could not find matched password record\n", NULL);
7ac43d3d 1507
2dce415b 1508 free(buf);
7ac43d3d 1509 return false;
2dce415b 1510 }
7ac43d3d 1511
2dce415b
DJ
1512 *uid = pwent.pw_uid;
1513 *gid = pwent.pw_gid;
1514 free(buf);
7ac43d3d
CB
1515
1516 return true;
1517}
1518
1519/* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1520static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1521{
1522 struct stat statbuf;
1523
1524 if (stat(path, &statbuf) < 0)
1525 return false;
1526
1527 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1528 return false;
1529
1530 return true;
1531}
1532
1533/* Create cpumask from cpulist aka turn:
1534 *
1535 * 0,2-3
1536 *
1537 * into bit array
1538 *
1539 * 1 0 1 1
1540 */
1541static uint32_t *cg_cpumask(char *buf, size_t nbits)
1542{
1543 char *token;
1544 char *saveptr = NULL;
1545 size_t arrlen = BITS_TO_LONGS(nbits);
1546 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1547 if (!bitarr)
1548 return NULL;
1549
1550 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1551 errno = 0;
1552 unsigned start = strtoul(token, NULL, 0);
1553 unsigned end = start;
1554
1555 char *range = strchr(token, '-');
1556 if (range)
1557 end = strtoul(range + 1, NULL, 0);
03e7d72a 1558
7ac43d3d
CB
1559 if (!(start <= end)) {
1560 free(bitarr);
1561 return NULL;
1562 }
1563
1564 if (end >= nbits) {
1565 free(bitarr);
1566 return NULL;
1567 }
1568
1569 while (start <= end)
1570 set_bit(start++, bitarr);
1571 }
1572
1573 return bitarr;
1574}
1575
1576static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
1577{
1578 char *result;
1579 char **p;
1580 size_t sep_len = strlen(sep);
1581 size_t result_len = use_as_prefix * sep_len;
9c8b2b13 1582 size_t buf_len;
7ac43d3d
CB
1583
1584 if (!parts)
1585 return NULL;
1586
1587 /* calculate new string length */
1588 for (p = (char **)parts; *p; p++)
1589 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1590
9c8b2b13
DJ
1591 buf_len = result_len + 1;
1592 result = calloc(buf_len, sizeof(char));
7ac43d3d
CB
1593 if (!result)
1594 return NULL;
1595
1596 if (use_as_prefix)
9c8b2b13 1597 (void)strlcpy(result, sep, buf_len * sizeof(char));
43f984ea 1598
7ac43d3d
CB
1599 for (p = (char **)parts; *p; p++) {
1600 if (p > (char **)parts)
9c8b2b13 1601 (void)strlcat(result, sep, buf_len * sizeof(char));
2082ac62 1602
9c8b2b13 1603 (void)strlcat(result, *p, buf_len * sizeof(char));
7ac43d3d
CB
1604 }
1605
1606 return result;
1607}
1608
1609/* The largest integer that can fit into long int is 2^64. This is a
1610 * 20-digit number.
1611 */
1612#define __IN_TO_STR_LEN 21
1613/* Turn cpumask into simple, comma-separated cpulist. */
1614static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1615{
1616 size_t i;
1617 int ret;
1618 char numstr[__IN_TO_STR_LEN] = {0};
1619 char **cpulist = NULL;
1620
1621 for (i = 0; i <= nbits; i++) {
1622 if (is_set(i, bitarr)) {
1623 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1624 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1625 free_string_list(cpulist);
1626 return NULL;
1627 }
03e7d72a 1628
7ac43d3d
CB
1629 must_append_string(&cpulist, numstr);
1630 }
1631 }
03e7d72a 1632
7ac43d3d
CB
1633 return string_join(",", (const char **)cpulist, false);
1634}
1635
1636static ssize_t cg_get_max_cpus(char *cpulist)
1637{
1638 char *c1, *c2;
1639 char *maxcpus = cpulist;
1640 size_t cpus = 0;
1641
1642 c1 = strrchr(maxcpus, ',');
1643 if (c1)
1644 c1++;
1645
1646 c2 = strrchr(maxcpus, '-');
1647 if (c2)
1648 c2++;
1649
1650 if (!c1 && !c2)
1651 c1 = maxcpus;
1652 else if (c1 < c2)
1653 c1 = c2;
1654
03e7d72a 1655 if (!c1)
1656 return -1;
1657
7ac43d3d
CB
1658 /* If the above logic is correct, c1 should always hold a valid string
1659 * here.
1660 */
7ac43d3d
CB
1661 errno = 0;
1662 cpus = strtoul(c1, NULL, 0);
1663 if (errno != 0)
1664 return -1;
1665
1666 return cpus;
1667}
1668
7ac43d3d
CB
1669#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1670static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1671{
1672 char *lastslash, *fpath, oldv;
1673 int ret;
1674 ssize_t i;
1675
1676 ssize_t maxposs = 0, maxisol = 0;
1677 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1678 uint32_t *possmask = NULL, *isolmask = NULL;
1679 bool bret = false, flipped_bit = false;
1680
1681 lastslash = strrchr(path, '/');
1682 if (!lastslash) { // bug... this shouldn't be possible
2082ac62 1683 pam_cgfs_debug("Invalid path: %s\n", path);
7ac43d3d
CB
1684 return bret;
1685 }
8ae3983e 1686
7ac43d3d
CB
1687 oldv = *lastslash;
1688 *lastslash = '\0';
8ae3983e 1689
7ac43d3d
CB
1690 fpath = must_make_path(path, "cpuset.cpus", NULL);
1691 posscpus = read_file(fpath);
1692 if (!posscpus) {
2082ac62 1693 pam_cgfs_debug("Could not read file: %s\n", fpath);
7ac43d3d
CB
1694 goto on_error;
1695 }
1696
1697 /* Get maximum number of cpus found in possible cpuset. */
1698 maxposs = cg_get_max_cpus(posscpus);
92d5ea57 1699 if (maxposs < 0 || maxposs >= INT_MAX - 1)
7ac43d3d
CB
1700 goto on_error;
1701
1702 if (!file_exists(__ISOL_CPUS)) {
1703 /* This system doesn't expose isolated cpus. */
2082ac62 1704 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist\n");
7ac43d3d 1705 cpulist = posscpus;
8ae3983e 1706
7ac43d3d
CB
1707 /* No isolated cpus but we weren't already initialized by
1708 * someone. We should simply copy the parents cpuset.cpus
1709 * values.
1710 */
1711 if (!am_initialized) {
2082ac62 1712 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
7ac43d3d
CB
1713 goto copy_parent;
1714 }
8ae3983e 1715
7ac43d3d
CB
1716 /* No isolated cpus but we were already initialized by someone.
1717 * Nothing more to do for us.
1718 */
1719 goto on_success;
1720 }
1721
1722 isolcpus = read_file(__ISOL_CPUS);
1723 if (!isolcpus) {
1724 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1725 goto on_error;
1726 }
8ae3983e 1727
7ac43d3d 1728 if (!isdigit(isolcpus[0])) {
2082ac62 1729 pam_cgfs_debug("%s", "No isolated cpus detected\n");
7ac43d3d 1730 cpulist = posscpus;
8ae3983e 1731
7ac43d3d
CB
1732 /* No isolated cpus but we weren't already initialized by
1733 * someone. We should simply copy the parents cpuset.cpus
1734 * values.
1735 */
1736 if (!am_initialized) {
2082ac62 1737 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
7ac43d3d
CB
1738 goto copy_parent;
1739 }
8ae3983e 1740
7ac43d3d
CB
1741 /* No isolated cpus but we were already initialized by someone.
1742 * Nothing more to do for us.
1743 */
1744 goto on_success;
1745 }
1746
1747 /* Get maximum number of cpus found in isolated cpuset. */
1748 maxisol = cg_get_max_cpus(isolcpus);
92d5ea57 1749 if (maxisol < 0 || maxisol >= INT_MAX - 1)
7ac43d3d
CB
1750 goto on_error;
1751
1752 if (maxposs < maxisol)
1753 maxposs = maxisol;
1754 maxposs++;
1755
1756 possmask = cg_cpumask(posscpus, maxposs);
1757 if (!possmask) {
2082ac62 1758 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
7ac43d3d
CB
1759 goto on_error;
1760 }
1761
1762 isolmask = cg_cpumask(isolcpus, maxposs);
1763 if (!isolmask) {
2082ac62 1764 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
7ac43d3d
CB
1765 goto on_error;
1766 }
1767
1768 for (i = 0; i <= maxposs; i++) {
1769 if (is_set(i, isolmask) && is_set(i, possmask)) {
1770 flipped_bit = true;
1771 clear_bit(i, possmask);
1772 }
1773 }
1774
1775 if (!flipped_bit) {
2082ac62 1776 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
7ac43d3d
CB
1777 goto on_success;
1778 }
2082ac62 1779 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
7ac43d3d
CB
1780
1781 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
1782 if (!cpulist) {
2082ac62 1783 pam_cgfs_debug("%s", "Could not create cpu list\n");
7ac43d3d
CB
1784 goto on_error;
1785 }
1786
1787copy_parent:
1788 *lastslash = oldv;
8ae3983e 1789
6926e3b8 1790 free(fpath);
8ae3983e 1791
7ac43d3d 1792 fpath = must_make_path(path, "cpuset.cpus", NULL);
a6de11a7 1793 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0660);
7ac43d3d 1794 if (ret < 0) {
2082ac62 1795 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath);
7ac43d3d
CB
1796 goto on_error;
1797 }
1798
1799on_success:
1800 bret = true;
1801
1802on_error:
3a88eb8e 1803 *lastslash = oldv;
7ac43d3d 1804
3a88eb8e 1805 free(fpath);
7ac43d3d
CB
1806 free(isolcpus);
1807 free(isolmask);
1808
1809 if (posscpus != cpulist)
1810 free(posscpus);
1811 free(possmask);
1812
1813 free(cpulist);
1814 return bret;
1815}
1816
7ac43d3d
CB
1817/* Copy contents of parent(@path)/@file to @path/@file */
1818static bool cg_copy_parent_file(char *path, char *file)
1819{
1820 char *lastslash, *value = NULL, *fpath, oldv;
1821 int len = 0;
1822 int ret;
1823
1824 lastslash = strrchr(path, '/');
1825 if (!lastslash) { // bug... this shouldn't be possible
1826 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1827 return false;
1828 }
2082ac62 1829
7ac43d3d
CB
1830 oldv = *lastslash;
1831 *lastslash = '\0';
2082ac62 1832
7ac43d3d 1833 fpath = must_make_path(path, file, NULL);
a6de11a7 1834 len = lxc_read_from_file(fpath, NULL, 0);
1835 if (len <= 0) {
1836 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
7ac43d3d 1837 goto bad;
a6de11a7 1838 }
2082ac62 1839
f25a2044 1840 value = must_realloc(NULL, len + 1);
a6de11a7 1841 if (lxc_read_from_file(fpath, value, len) != len) {
1842 pam_cgfs_debug("Failed to read %s: %s", fpath, strerror(errno));
7ac43d3d 1843 goto bad;
a6de11a7 1844 }
7ac43d3d 1845 free(fpath);
2082ac62 1846
7ac43d3d 1847 *lastslash = oldv;
2082ac62 1848
7ac43d3d 1849 fpath = must_make_path(path, file, NULL);
a6de11a7 1850 ret = lxc_write_to_file(fpath, value, len, false, 0660);
7ac43d3d
CB
1851 if (ret < 0)
1852 pam_cgfs_debug("Unable to write %s to %s", value, fpath);
2082ac62 1853
7ac43d3d
CB
1854 free(fpath);
1855 free(value);
1856 return ret >= 0;
1857
1858bad:
1859 pam_cgfs_debug("Error reading '%s'", fpath);
1860 free(fpath);
1861 free(value);
1862 return false;
1863}
1864
1865/* In case systemd hasn't already placed us in a user slice for the cpuset v1
1866 * controller we will reside in the root cgroup. This means that
1867 * cgroup.clone_children will not have been initialized for us so we need to do
1868 * it.
1869 */
1870static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
1871{
1872 char *clonechildrenpath, v;
1873
1874 if (!string_in_list(h->controllers, "cpuset"))
1875 return true;
1876
1877 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
1878
a6de11a7 1879 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1880 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
7ac43d3d
CB
1881 free(clonechildrenpath);
1882 return false;
1883 }
1884
1885 if (v == '1') { /* already set for us by someone else */
1886 free(clonechildrenpath);
1887 return true;
1888 }
1889
a6de11a7 1890 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
7ac43d3d
CB
1891 /* Set clone_children so children inherit our settings */
1892 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1893 free(clonechildrenpath);
1894 return false;
1895 }
2082ac62 1896
7ac43d3d
CB
1897 free(clonechildrenpath);
1898 return true;
1899}
1900
1901/*
1902 * Initialize the cpuset hierarchy in first directory of @gname and
1903 * set cgroup.clone_children so that children inherit settings.
1904 * Since the h->base_path is populated by init or ourselves, we know
1905 * it is already initialized.
1906 */
1907static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
1908 const char *cgroup)
1909{
1910 char *cgpath, *clonechildrenpath, v, *slash;
1911
1912 if (!string_in_list(h->controllers, "cpuset"))
1913 return true;
1914
1915 if (*cgroup == '/')
1916 cgroup++;
1917 slash = strchr(cgroup, '/');
1918 if (slash)
1919 *slash = '\0';
1920
1921 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
1922 if (slash)
1923 *slash = '/';
2082ac62 1924
c4a4578f 1925 if (do_mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
7ac43d3d
CB
1926 pam_cgfs_debug("Failed to create '%s'", cgpath);
1927 free(cgpath);
1928 return false;
1929 }
2082ac62 1930
7ac43d3d
CB
1931 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
1932 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
1933 free(clonechildrenpath);
1934 free(cgpath);
1935 return true;
1936 }
2082ac62 1937
a6de11a7 1938 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
1939 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath, strerror(errno));
7ac43d3d
CB
1940 free(clonechildrenpath);
1941 free(cgpath);
1942 return false;
1943 }
1944
1945 /* Make sure any isolated cpus are removed from cpuset.cpus. */
1946 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2082ac62 1947 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
7ac43d3d
CB
1948 free(clonechildrenpath);
1949 free(cgpath);
1950 return false;
1951 }
1952
1953 if (v == '1') { /* already set for us by someone else */
2082ac62 1954 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
7ac43d3d
CB
1955 free(clonechildrenpath);
1956 free(cgpath);
1957 return true;
1958 }
1959
1960 /* copy parent's settings */
1961 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
2082ac62 1962 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
7ac43d3d
CB
1963 free(cgpath);
1964 free(clonechildrenpath);
1965 return false;
1966 }
1967 free(cgpath);
1968
a6de11a7 1969 if (lxc_write_to_file(clonechildrenpath, "1", 1, false, 0660) < 0) {
7ac43d3d
CB
1970 /* Set clone_children so children inherit our settings */
1971 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath);
1972 free(clonechildrenpath);
1973 return false;
1974 }
1975 free(clonechildrenpath);
1976 return true;
1977}
1978
1979/* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
1980 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
1981 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
1982 * to the caller in @existed.
1983 */
1984#define __PAM_CGFS_USER "/user/"
1985#define __PAM_CGFS_USER_LEN 6
1986static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
1987{
1988 char *clean_base_cgroup, *path;
1989 char **controller;
1990 struct cgv1_hierarchy *it;
1991 bool created = false;
1992
1993 *existed = false;
1994 it = h;
2082ac62 1995
7ac43d3d
CB
1996 for (controller = it->controllers; controller && *controller;
1997 controller++) {
1998 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
1999 return false;
2000
2001 /* If systemd has already created a cgroup for us, keep using
2002 * it.
2003 */
2004 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2005 it->base_cgroup, uid, gid,
2082ac62 2006 it->systemd_user_slice))
7ac43d3d 2007 return true;
7ac43d3d
CB
2008
2009 /* We need to make sure that we do not create an endless chain
2010 * of sub-cgroups. So we check if we have already logged in
2011 * somehow (sudo -i, su, etc.) and have created a
2012 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2013 * cgroups this is unnecessary since we use the init_cgroup
2014 * anyway, but for controllers which have an existing systemd
2015 * cgroup that does not match the current uid, this is pretty
2016 * useful.
2017 */
2018 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2019 free(it->base_cgroup);
2020 it->base_cgroup = must_copy_string("/");
2021 } else {
2022 clean_base_cgroup =
2023 strstr(it->base_cgroup, __PAM_CGFS_USER);
2024 if (clean_base_cgroup)
2025 *clean_base_cgroup = '\0';
2026 }
2027
2028 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2082ac62 2029 pam_cgfs_debug("Constructing path: %s\n", path);
2030
7ac43d3d
CB
2031 if (file_exists(path)) {
2032 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
7ac43d3d
CB
2033 if (our_cg)
2034 *existed = false;
2035 else
2036 *existed = true;
2082ac62 2037
2038 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2039 path, our_cg ? "" : "not ", uid, gid);
2040 free(path);
2041
7ac43d3d
CB
2042 return our_cg;
2043 }
2082ac62 2044
539c3977 2045 created = lxc_mkdir_parent(it->mountpoint, path);
7ac43d3d
CB
2046 if (!created) {
2047 free(path);
2048 continue;
2049 }
2082ac62 2050
7ac43d3d
CB
2051 if (chown(path, uid, gid) < 0)
2052 mysyslog(LOG_WARNING,
2082ac62 2053 "Failed to chown %s to %d:%d: %s\n", path,
2054 (int)uid, (int)gid, strerror(errno), NULL);
2055
2056 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
7ac43d3d
CB
2057 free(path);
2058 break;
2059 }
2060
2061 return created;
2062}
2063
2064/* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2065 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2066 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2067 */
2068static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
2069{
2070
2071 char *path;
2072
2073 /* Better safe than sorry. */
2074 if (!h->controllers)
2075 return true;
2076
2077 /* Cgroups created by systemd for us which we re-use won't be removed
2078 * here, since we're using init_cgroup + cgroup as path instead of
2079 * base_cgroup + cgroup.
2080 */
2081 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2082 (void)recursive_rmdir(path);
2083 free(path);
2084
2085 return true;
2086}
2087
2088/* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2089static bool cgv2_remove(const char *cgroup)
2090{
2091 struct cgv2_hierarchy *v2;
2092 char *path;
2093
2094 if (!cgv2_hierarchies)
2095 return true;
2096
2097 v2 = *cgv2_hierarchies;
2098
2099 /* If we reused an already existing cgroup, don't bother trying to
2100 * remove (a potentially wrong)/the path.
2101 * Cgroups created by systemd for us which we re-use would be removed
2102 * here, since we're using base_cgroup + cgroup as path.
2103 */
2104 if (v2->systemd_user_slice)
2105 return true;
2106
2107 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2108 (void)recursive_rmdir(path);
2109 free(path);
2110
2111 return true;
2112}
2113
2114/* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2115 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2116 * back, to the caller if the creation failed due to @cgroup already existing
2117 * via @existed.
2118 */
2119static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2120{
2121 struct cgv1_hierarchy **it, **rev_it;
2122 bool all_created = true;
2123
2124 for (it = cgv1_hierarchies; it && *it; it++) {
2125 if (!(*it)->controllers || !(*it)->mountpoint ||
2126 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2127 continue;
2128
2129 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2130 all_created = false;
2131 break;
2132 }
2133 }
2134
2135 if (all_created)
2136 return true;
2137
2138 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2139 rev_it++)
2140 cgv1_remove_one(*rev_it, cgroup);
2141
2142 return false;
2143}
2144
2145/* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2146 * the creation failed due to @cgroup already existing via @existed.
2147 */
2148static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
2149{
2150 int ret;
2151 char *clean_base_cgroup;
2152 char *path;
2153 struct cgv2_hierarchy *v2;
2154 bool our_cg = false, created = false;
2155
2156 *existed = false;
2157
2158 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2159 return true;
2160
2161 v2 = *cgv2_hierarchies;
2162
2163 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2164 * to be placed under our current cgroup.
2165 */
2166 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2167 uid, gid, v2->systemd_user_slice))
2168 goto delegate_files;
2169
2170 /* We need to make sure that we do not create an endless chain of
2171 * sub-cgroups. So we check if we have already logged in somehow (sudo
2172 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2173 * skip that part.
2174 */
2175 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2176 free(v2->base_cgroup);
2177 v2->base_cgroup = must_copy_string("/");
2178 } else {
2179 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2180 if (clean_base_cgroup)
2181 *clean_base_cgroup = '\0';
2182 }
2183
2184 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2082ac62 2185 pam_cgfs_debug("Constructing path \"%s\"\n", path);
2186
7ac43d3d
CB
2187 if (file_exists(path)) {
2188 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2082ac62 2189 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2190 path, our_cg ? "" : "not ", uid, gid);
7ac43d3d
CB
2191 free(path);
2192 if (our_cg) {
2193 *existed = false;
2194 goto delegate_files;
2195 } else {
2196 *existed = true;
2197 return false;
2198 }
2199 }
2200
539c3977 2201 created = lxc_mkdir_parent(v2->mountpoint, path);
7ac43d3d
CB
2202 if (!created) {
2203 free(path);
2204 return false;
2205 }
2206
2207 /* chown cgroup to user */
2208 if (chown(path, uid, gid) < 0)
2082ac62 2209 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2210 path, (int)uid, (int)gid, strerror(errno), NULL);
7ac43d3d 2211 else
2082ac62 2212 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
7ac43d3d
CB
2213 free(path);
2214
2215delegate_files:
2216 /* chown cgroup.procs to user */
2217 if (v2->systemd_user_slice)
2218 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2219 "/cgroup.procs", NULL);
2220 else
2221 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2222 "/cgroup.procs", NULL);
2082ac62 2223
7ac43d3d
CB
2224 ret = chown(path, uid, gid);
2225 if (ret < 0)
2082ac62 2226 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
2227 path, (int)uid, (int)gid, strerror(errno), NULL);
7ac43d3d 2228 else
2082ac62 2229 pam_cgfs_debug("Chowned %s to %d:%d\n", path, (int)uid, (int)gid);
7ac43d3d
CB
2230 free(path);
2231
2232 /* chown cgroup.subtree_control to user */
2233 if (v2->systemd_user_slice)
2234 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2235 "/cgroup.subtree_control", NULL);
2236 else
2237 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2238 "/cgroup.subtree_control", NULL);
2082ac62 2239
7ac43d3d
CB
2240 ret = chown(path, uid, gid);
2241 if (ret < 0)
2082ac62 2242 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
7ac43d3d
CB
2243 path, (int)uid, (int)gid, strerror(errno), NULL);
2244 free(path);
2245
2246 /* chown cgroup.threads to user */
2247 if (v2->systemd_user_slice)
2248 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2249 "/cgroup.threads", NULL);
2250 else
2251 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2252 "/cgroup.threads", NULL);
2253 ret = chown(path, uid, gid);
2254 if (ret < 0 && errno != ENOENT)
2082ac62 2255 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s\n",
7ac43d3d
CB
2256 path, (int)uid, (int)gid, strerror(errno), NULL);
2257 free(path);
2258
2259 return true;
2260}
2261
2262/* Create writeable cgroups for @user at login. Details can be found in the
2263 * preamble/license at the top of this file.
2264 */
2265static int handle_login(const char *user, uid_t uid, gid_t gid)
2266{
2267 int idx = 0, ret;
2268 bool existed;
bb3c1e56 2269 char cg[PATH_MAX];
7ac43d3d
CB
2270
2271 cg_escape();
2272
2273 while (idx >= 0) {
bb3c1e56
CB
2274 ret = snprintf(cg, PATH_MAX, "/user/%s/%d", user, idx);
2275 if (ret < 0 || ret >= PATH_MAX) {
2082ac62 2276 mysyslog(LOG_ERR, "Username too long\n", NULL);
7ac43d3d
CB
2277 return PAM_SESSION_ERR;
2278 }
2279
2280 existed = false;
2281 if (!cgv2_create(cg, uid, gid, &existed)) {
2282 if (existed) {
2283 cgv2_remove(cg);
2284 idx++;
2285 continue;
2286 }
2082ac62 2287
2288 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
7ac43d3d
CB
2289 return PAM_SESSION_ERR;
2290 }
2291
2292 existed = false;
2293 if (!cgv1_create(cg, uid, gid, &existed)) {
2294 if (existed) {
2295 cgv2_remove(cg);
2296 idx++;
2297 continue;
2298 }
2082ac62 2299
2300 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s\n", user, NULL);
7ac43d3d
CB
2301 return PAM_SESSION_ERR;
2302 }
2303
2304 if (!cg_enter(cg)) {
2082ac62 2305 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s\n", cg, user, NULL);
7ac43d3d
CB
2306 return PAM_SESSION_ERR;
2307 }
2082ac62 2308
7ac43d3d
CB
2309 break;
2310 }
2311
2312 return PAM_SUCCESS;
2313}
2314
2315/* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2316 * hierarchies.
2317 */
2318static bool cgv1_prune_empty_cgroups(const char *user)
2319{
2320 bool controller_removed = true;
2321 bool all_removed = true;
2322 struct cgv1_hierarchy **it;
2323
2324 for (it = cgv1_hierarchies; it && *it; it++) {
2325 int ret;
2326 char *path_base, *path_init;
2327 char **controller;
2328
2329 if (!(*it)->controllers || !(*it)->mountpoint ||
2330 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2331 continue;
2332
2333 for (controller = (*it)->controllers; controller && *controller;
2334 controller++) {
2335 bool path_base_rm, path_init_rm;
2336
2337 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2082ac62 2338 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base);
2339
7ac43d3d
CB
2340 ret = recursive_rmdir(path_base);
2341 if (ret == -ENOENT || ret >= 0)
2342 path_base_rm = true;
2343 else
2344 path_base_rm = false;
2345 free(path_base);
2346
2347 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2082ac62 2348 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init);
2349
7ac43d3d
CB
2350 ret = recursive_rmdir(path_init);
2351 if (ret == -ENOENT || ret >= 0)
2352 path_init_rm = true;
2353 else
2354 path_init_rm = false;
2355 free(path_init);
2356
2357 if (!path_base_rm && !path_init_rm) {
2358 controller_removed = false;
2359 continue;
2360 }
2361
2362 controller_removed = true;
2363 break;
2364 }
2082ac62 2365
7ac43d3d
CB
2366 if (!controller_removed)
2367 all_removed = false;
2368 }
2369
2370 return all_removed;
2371}
2372
2373/* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2374 * hierarchy.
2375 */
2376static bool cgv2_prune_empty_cgroups(const char *user)
2377{
2378 int ret;
2379 struct cgv2_hierarchy *v2;
2380 char *path_base, *path_init;
2381 bool path_base_rm, path_init_rm;
2382
2383 if (!cgv2_hierarchies)
2384 return true;
2385
2386 v2 = *cgv2_hierarchies;
2387
2388 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2082ac62 2389 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base);
2390
7ac43d3d
CB
2391 ret = recursive_rmdir(path_base);
2392 if (ret == -ENOENT || ret >= 0)
2393 path_base_rm = true;
2394 else
2395 path_base_rm = false;
2396 free(path_base);
2397
2398 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2082ac62 2399 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init);
2400
7ac43d3d
CB
2401 ret = recursive_rmdir(path_init);
2402 if (ret == -ENOENT || ret >= 0)
2403 path_init_rm = true;
2404 else
2405 path_init_rm = false;
2406 free(path_init);
2407
2408 if (!path_base_rm && !path_init_rm)
2409 return false;
2410
2411 return true;
2412}
2413
2414/* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2415static void cg_prune_empty_cgroups(const char *user)
2416{
2417 (void)cgv1_prune_empty_cgroups(user);
2418 (void)cgv2_prune_empty_cgroups(user);
2419}
2420
2421/* Free allocated information for detected cgroupfs v1 hierarchies. */
2422static void cgv1_free_hierarchies(void)
2423{
2424 struct cgv1_hierarchy **it;
2425
2426 if (!cgv1_hierarchies)
2427 return;
2428
2429 for (it = cgv1_hierarchies; it && *it; it++) {
2430 if ((*it)->controllers) {
2431 char **tmp;
2432 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2433 free(*tmp);
2434
2435 free((*it)->controllers);
2436 }
2082ac62 2437
7ac43d3d
CB
2438 free((*it)->mountpoint);
2439 free((*it)->base_cgroup);
2440 free((*it)->fullcgpath);
2441 free((*it)->init_cgroup);
2442 }
2082ac62 2443
7ac43d3d
CB
2444 free(cgv1_hierarchies);
2445}
2446
2447/* Free allocated information for the detected cgroupfs v2 hierarchy. */
2448static void cgv2_free_hierarchies(void)
2449{
2450 struct cgv2_hierarchy **it;
2451
2452 if (!cgv2_hierarchies)
2453 return;
2454
2455 for (it = cgv2_hierarchies; it && *it; it++) {
2456 if ((*it)->controllers) {
2457 char **tmp;
2082ac62 2458
7ac43d3d
CB
2459 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2460 free(*tmp);
2461
2462 free((*it)->controllers);
2463 }
2082ac62 2464
7ac43d3d
CB
2465 free((*it)->mountpoint);
2466 free((*it)->base_cgroup);
2467 free((*it)->fullcgpath);
2468 free((*it)->init_cgroup);
2469 }
2082ac62 2470
7ac43d3d
CB
2471 free(cgv2_hierarchies);
2472}
2473
2474/* Wrapper around cgv{1,2}_free_hierarchies(). */
2475static void cg_exit(void)
2476{
2477 cgv1_free_hierarchies();
2478 cgv2_free_hierarchies();
2479}
2480
2481int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2482 const char **argv)
2483{
2484 int ret;
2485 uid_t uid = 0;
2486 gid_t gid = 0;
2487 const char *PAM_user = NULL;
2488
2489 ret = pam_get_user(pamh, &PAM_user, NULL);
2490 if (ret != PAM_SUCCESS) {
2491 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2492 return PAM_SESSION_ERR;
2493 }
2494
2495 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2082ac62 2496 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
7ac43d3d
CB
2497 return PAM_SESSION_ERR;
2498 }
2499
2500 if (!cg_init(uid, gid)) {
2501 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2502 return PAM_SESSION_ERR;
2503 }
2504
2505 /* Try to prune cgroups, that are actually empty but were still marked
2506 * as busy by the kernel so we couldn't remove them on session close.
2507 */
2508 cg_prune_empty_cgroups(PAM_user);
2509
2510 if (cg_mount_mode == CGROUP_UNKNOWN)
2511 return PAM_SESSION_ERR;
2512
2513 if (argc > 1 && !strcmp(argv[0], "-c")) {
2514 char **clist = make_string_list(argv[1], ",");
2515
2516 /*
2517 * We don't allow using "all" and other controllers explicitly because
2518 * that simply doesn't make any sense.
2519 */
2520 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2082ac62 2521 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
7ac43d3d
CB
2522 free_string_list(clist);
2523 return PAM_SESSION_ERR;
2524 }
2525
2526 cg_mark_to_make_rw(clist);
2527 free_string_list(clist);
2528 }
2529
2530 return handle_login(PAM_user, uid, gid);
2531}
2532
2533int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2534 const char **argv)
2535{
2536 int ret;
2537 uid_t uid = 0;
2538 gid_t gid = 0;
2539 const char *PAM_user = NULL;
2540
2541 ret = pam_get_user(pamh, &PAM_user, NULL);
2542 if (ret != PAM_SUCCESS) {
2543 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2544 return PAM_SESSION_ERR;
2545 }
2546
2547 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2082ac62 2548 mysyslog(LOG_ERR, "Failed to get uid and gid for %s\n", PAM_user, NULL);
7ac43d3d
CB
2549 return PAM_SESSION_ERR;
2550 }
2551
2552 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2553 if (!cg_init(uid, gid))
2554 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2555
2556 if (argc > 1 && !strcmp(argv[0], "-c")) {
2557 char **clist = make_string_list(argv[1], ",");
2558
2559 /*
2560 * We don't allow using "all" and other controllers explicitly because
2561 * that simply doesn't make any sense.
2562 */
2563 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2082ac62 2564 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL);
7ac43d3d
CB
2565 free_string_list(clist);
2566 return PAM_SESSION_ERR;
2567 }
2568
2569 cg_mark_to_make_rw(clist);
2570 free_string_list(clist);
2571 }
2572 }
2573
2574 cg_prune_empty_cgroups(PAM_user);
2575 cg_exit();
2576
2577 return PAM_SUCCESS;
2578}