]> git.proxmox.com Git - mirror_lxcfs.git/blame - pam/pam_cgfs.c
pam: adapt to changed cgroup v2 layout
[mirror_lxcfs.git] / pam / pam_cgfs.c
CommitLineData
df54106a
SH
1/* pam-cgfs
2 *
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
e65cfafc 5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
df54106a 6 *
e65cfafc
CB
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
df54106a
SH
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
16 *
e65cfafc
CB
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
20 *
21 * user.slice/user-uid.slice/session-n.scope
22 *
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
edd25678 29 *
df54106a
SH
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
32 *
33 * See COPYING file for details.
34 */
35
04742595 36#include <ctype.h>
e65cfafc
CB
37#include <dirent.h>
38#include <errno.h>
04742595 39#include <fcntl.h>
e65cfafc
CB
40#include <pwd.h>
41#include <stdarg.h>
42#include <stdbool.h>
04742595 43#include <stdint.h>
df54106a
SH
44#include <stdio.h>
45#include <stdlib.h>
e65cfafc 46#include <string.h>
df54106a 47#include <syslog.h>
e65cfafc
CB
48#include <unistd.h>
49#include <linux/unistd.h>
df54106a 50#include <sys/mount.h>
df54106a 51#include <sys/param.h>
e65cfafc
CB
52#include <sys/stat.h>
53#include <sys/types.h>
54#include <sys/vfs.h>
df54106a
SH
55
56#define PAM_SM_SESSION
57#include <security/_pam_macros.h>
58#include <security/pam_modules.h>
59
e65cfafc
CB
60#include "macro.h"
61
62#ifndef CGROUP_SUPER_MAGIC
63#define CGROUP_SUPER_MAGIC 0x27e0eb
64#endif
df54106a 65
e65cfafc
CB
66#ifndef CGROUP2_SUPER_MAGIC
67#define CGROUP2_SUPER_MAGIC 0x63677270
68#endif
df54106a 69
04742595
CB
70/* Taken over modified from the kernel sources. */
71#define NBITS 32 /* bits in uint32_t */
72#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
73#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
74
e65cfafc
CB
75static enum cg_mount_mode {
76 CGROUP_UNKNOWN = -1,
77 CGROUP_MIXED = 0,
78 CGROUP_PURE_V1 = 1,
79 CGROUP_PURE_V2 = 2,
80 CGROUP_UNINITIALIZED = 3,
81} cg_mount_mode = CGROUP_UNINITIALIZED;
82
7c029b0f 83/* Common helper functions. Most of these have been taken from LXC. */
e65cfafc
CB
84static void append_line(char **dest, size_t oldlen, char *new, size_t newlen);
85static int append_null_to_list(void ***list);
86static void batch_realloc(char **mem, size_t oldlen, size_t newlen);
04742595
CB
87static inline void clear_bit(unsigned bit, uint32_t *bitarr)
88{
89 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
90}
e65cfafc
CB
91static char *copy_to_eol(char *s);
92static bool file_exists(const char *f);
93static void free_string_list(char **list);
94static char *get_mountpoint(char *line);
95static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid);
96static int handle_login(const char *user, uid_t uid, gid_t gid);
04742595
CB
97static inline bool is_set(unsigned bit, uint32_t *bitarr)
98{
99 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
100}
e65cfafc
CB
101/* __typeof__ should be safe to use with all compilers. */
102typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
103static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val);
104static bool is_lxcfs(const char *line);
105static bool is_cgv1(char *line);
106static bool is_cgv2(char *line);
107static bool mkdir_p(const char *root, char *path);
108static void *must_alloc(size_t sz);
109static void must_add_to_list(char ***clist, char *entry);
110static void must_append_controller(char **klist, char **nlist, char ***clist,
111 char *entry);
112static void must_append_string(char ***list, char *entry);
113static char *must_copy_string(const char *entry);
114static char *must_make_path(const char *first, ...) __attribute__((sentinel));
115static void *must_realloc(void *orig, size_t sz);
116static void mysyslog(int err, const char *format, ...) __attribute__((sentinel));
117static char *read_file(char *fnam);
04742595 118static int read_from_file(const char *filename, void* buf, size_t count);
e65cfafc 119static int recursive_rmdir(char *dirname);
04742595
CB
120static inline void set_bit(unsigned bit, uint32_t *bitarr)
121{
122 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
123}
e65cfafc 124static bool string_in_list(char **list, const char *entry);
808fd1ef 125static char *string_join(const char *sep, const char **parts, bool use_as_prefix);
e65cfafc
CB
126static void trim(char *s);
127static bool write_int(char *path, int v);
808fd1ef
CB
128static ssize_t write_nointr(int fd, const void* buf, size_t count);
129static int write_to_file(const char *filename, const void *buf, size_t count,
130 bool add_newline);
e65cfafc
CB
131
132/* cgroupfs prototypes. */
04742595
CB
133static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid);
134static uint32_t *cg_cpumask(char *buf, size_t nbits);
135static bool cg_copy_parent_file(char *path, char *file);
136static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits);
137static bool cg_enter(const char *cgroup);
138static void cg_escape(void);
139static bool cg_filter_and_set_cpus(char *path, bool am_initialized);
140static ssize_t cg_get_max_cpus(char *cpulist);
141static int cg_get_version_of_mntpt(const char *path);
142static bool cg_init(uid_t uid, gid_t gid);
7559c0b6 143static void cg_mark_to_make_rw(char **list);
04742595 144static void cg_prune_empty_cgroups(const char *user);
e65cfafc
CB
145static bool cg_systemd_created_user_slice(const char *base_cgroup,
146 const char *init_cgroup,
147 const char *in, uid_t uid);
148static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
149 const char *base_cgroup, uid_t uid,
150 gid_t gid,
151 bool systemd_user_slice);
04742595
CB
152static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid);
153static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
154 const char *init_cgroup, uid_t uid);
e65cfafc 155static void cg_systemd_prune_init_scope(char *cg);
e65cfafc
CB
156static bool is_lxcfs(const char *line);
157
158/* cgroupfs v1 prototypes. */
159struct cgv1_hierarchy {
160 char **controllers;
161 char *mountpoint;
162 char *base_cgroup;
163 char *fullcgpath;
164 char *init_cgroup;
165 bool create_rw_cgroup;
166 bool systemd_user_slice;
167};
168
169static struct cgv1_hierarchy **cgv1_hierarchies;
170
171static void cgv1_add_controller(char **clist, char *mountpoint,
172 char *base_cgroup, char *init_cgroup);
173static bool cgv1_controller_in_clist(char *cgline, char *c);
174static bool cgv1_controller_lists_intersect(char **l1, char **l2);
175static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist,
176 char **clist);
177static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid,
178 bool *existed);
179static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup,
180 uid_t uid, gid_t gid, bool *existed);
181static bool cgv1_enter(const char *cgroup);
182static void cgv1_escape(void);
183static bool cgv1_get_controllers(char ***klist, char ***nlist);
184static char *cgv1_get_current_cgroup(char *basecginfo, char *controller);
185static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist,
186 char *line);
04742595
CB
187static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
188 const char *cgroup);
189static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h);
e65cfafc
CB
190static bool cgv1_init(uid_t uid, gid_t gid);
191static void cgv1_mark_to_make_rw(char **clist);
192static char *cgv1_must_prefix_named(char *entry);
193static bool cgv1_prune_empty_cgroups(const char *user);
194static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup);
195static bool is_cgv1(char *line);
196
197/* cgroupfs v2 prototypes. */
198struct cgv2_hierarchy {
199 char **controllers;
200 char *mountpoint;
201 char *base_cgroup;
202 char *fullcgpath;
203 char *init_cgroup;
204 bool create_rw_cgroup;
205 bool systemd_user_slice;
206};
207
208/* Actually this should only be a single hierarchy. But for the sake of
209 * parallelism and because the layout of the cgroupfs v2 is still somewhat
210 * changing, we'll leave it as an array of structs.
211 */
212static struct cgv2_hierarchy **cgv2_hierarchies;
213
214static void cgv2_add_controller(char **clist, char *mountpoint,
215 char *base_cgroup, char *init_cgroup,
216 bool systemd_user_slice);
217static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid,
218 bool *existed);
219static bool cgv2_enter(const char *cgroup);
220static void cgv2_escape(void);
221static char *cgv2_get_current_cgroup(int pid);
222static bool cgv2_init(uid_t uid, gid_t gid);
223static void cgv2_mark_to_make_rw(char **clist);
224static bool cgv2_prune_empty_cgroups(const char *user);
225static bool cgv2_remove(const char *cgroup);
226static bool is_cgv2(char *line);
227
7c029b0f 228/* Common helper functions. Most of these have been taken from LXC. */
df54106a
SH
229static void mysyslog(int err, const char *format, ...)
230{
231 va_list args;
232
233 va_start(args, format);
04742595 234 openlog("PAM-CGFS", LOG_CONS | LOG_PID, LOG_AUTH);
df54106a
SH
235 vsyslog(err, format, args);
236 va_end(args);
237 closelog();
238}
239
e65cfafc
CB
240/* realloc() pointer; do not fail. */
241static void *must_realloc(void *orig, size_t sz)
242{
243 void *ret;
df54106a 244
e65cfafc
CB
245 do {
246 ret = realloc(orig, sz);
247 } while (!ret);
248
249 return ret;
250}
251
252/* realloc() pointer in batch sizes; do not fail. */
253#define BATCH_SIZE 50
254static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
df54106a 255{
e65cfafc
CB
256 int newbatches = (newlen / BATCH_SIZE) + 1;
257 int oldbatches = (oldlen / BATCH_SIZE) + 1;
258
259 if (!*mem || newbatches > oldbatches)
260 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
261}
262
263/* Append lines as is to pointer; do not fail. */
264static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
265{
266 size_t full = oldlen + newlen;
267
268 batch_realloc(dest, oldlen, full + 1);
269
270 memcpy(*dest + oldlen, new, newlen + 1);
271}
272
273/* Read in whole file and return allocated pointer. */
274static char *read_file(char *fnam)
275{
276 FILE *f;
277 int linelen;
278 char *line = NULL, *buf = NULL;
279 size_t len = 0, fulllen = 0;
280
281 f = fopen(fnam, "r");
282 if (!f)
283 return NULL;
284
285 while ((linelen = getline(&line, &len, f)) != -1) {
286 append_line(&buf, fulllen, line, linelen);
287 fulllen += linelen;
288 }
289
290 fclose(f);
291 free(line);
292
293 return buf;
294}
295
296/* Given a pointer to a null-terminated array of pointers, realloc to add one
297 * entry, and point the new entry to NULL. Do not fail. Return the index to the
298 * second-to-last entry - that is, the one which is now available for use
299 * (keeping the list null-terminated).
300 */
301static int append_null_to_list(void ***list)
302{
303 int newentry = 0;
304
305 if (*list)
306 for (; (*list)[newentry]; newentry++) {
307 ;
308 }
309
310 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
311 (*list)[newentry + 1] = NULL;
312
313 return newentry;
314}
315
316/* Make allocated copy of string; do not fail. */
317static char *must_copy_string(const char *entry)
318{
319 char *ret;
320
321 if (!entry)
322 return NULL;
df54106a
SH
323
324 do {
e65cfafc
CB
325 ret = strdup(entry);
326 } while (!ret);
df54106a 327
e65cfafc
CB
328 return ret;
329}
330
331/* Append new entry to null-terminated array of pointer; make sure that array of
332 * pointers will still be null-terminated.
333 */
334static void must_append_string(char ***list, char *entry)
335{
336 int newentry;
337 char *copy;
338
339 newentry = append_null_to_list((void ***)list);
340 copy = must_copy_string(entry);
341 (*list)[newentry] = copy;
342}
343
344/* Remove newlines from string. */
345static void trim(char *s)
346{
347 size_t len = strlen(s);
348
fa00d59a 349 while ((len > 0) && s[len - 1] == '\n')
e65cfafc
CB
350 s[--len] = '\0';
351}
352
353/* Allocate pointer; do not fail. */
354static void *must_alloc(size_t sz)
355{
356 return must_realloc(NULL, sz);
357}
358
359/* Make allocated copy of string. End of string is taken to be '\n'. */
360static char *copy_to_eol(char *s)
361{
362 char *newline, *sret;
363 size_t len;
364
365 newline = strchr(s, '\n');
366 if (!newline)
367 return NULL;
368
369 len = newline - s;
370 sret = must_alloc(len + 1);
371 memcpy(sret, s, len);
372 sret[len] = '\0';
373
374 return sret;
375}
376
377/* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
378static bool is_lxcfs(const char *line)
379{
380 char *p = strstr(line, " - ");
381 if (!p)
382 return false;
383
384 return strncmp(p, " - fuse.lxcfs ", 14) == 0;
385}
386
387/* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
388static bool is_cgv1(char *line)
389{
390 char *p = strstr(line, " - ");
391 if (!p)
392 return false;
393
394 return strncmp(p, " - cgroup ", 10) == 0;
395}
396
397/* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
398static bool is_cgv2(char *line)
399{
400 char *p = strstr(line, " - ");
401 if (!p)
402 return false;
403
404 return strncmp(p, " - cgroup2 ", 11) == 0;
405}
406
407/* Given a null-terminated array of strings, check whether @entry is one of the
408 * strings
409 */
410static bool string_in_list(char **list, const char *entry)
411{
412 char **it;
413
414 for (it = list; it && *it; it++)
415 if (strcmp(*it, entry) == 0)
416 return true;
417
418 return false;
419}
420
7559c0b6
AS
421/*
422 * Creates a null-terminated array of strings, made by splitting the entries in
423 * @str on each @sep. Caller is responsible for calling free_string_list.
424 */
425static char **make_string_list(const char *str, const char *sep)
426{
427 char *copy, *tok;
428 char *saveptr = NULL;
429 char **clist = NULL;
430
431 copy = must_copy_string(str);
432
433 for (tok = strtok_r(copy, sep, &saveptr); tok;
434 tok = strtok_r(NULL, sep, &saveptr))
435 must_add_to_list(&clist, tok);
436
437 free(copy);
438
439 return clist;
440}
441
442/* Gets the length of a null-terminated array of strings. */
443static size_t string_list_length(char **list)
444{
445 size_t len = 0;
446 char **it;
447
448 for (it = list; it && *it; it++)
449 len++;
450
451 return len;
452}
453
e65cfafc
CB
454/* Free null-terminated array of strings. */
455static void free_string_list(char **list)
456{
457 char **it;
458
459 for (it = list; it && *it; it++)
460 free(*it);
461 free(list);
462}
463
464/* Concatenate all passed-in strings into one path. Do not fail. If any piece
465 * is not prefixed with '/', add a '/'. Does not remove duplicate '///' from the
466 * created path.
467 */
468static char *must_make_path(const char *first, ...)
469{
470 va_list args;
471 char *cur, *dest;
472 size_t full_len;
473
474 full_len = strlen(first);
475
476 dest = must_copy_string(first);
df54106a 477
e65cfafc 478 va_start(args, first);
df54106a 479 while ((cur = va_arg(args, char *)) != NULL) {
e65cfafc
CB
480 full_len += strlen(cur);
481
482 if (cur[0] != '/')
483 full_len++;
484
485 dest = must_realloc(dest, full_len + 1);
486
487 if (cur[0] != '/')
488 strcat(dest, "/");
489
df54106a 490 strcat(dest, cur);
df54106a
SH
491 }
492 va_end(args);
493
494 return dest;
495}
496
e65cfafc
CB
497/* Write single integer to file. */
498static bool write_int(char *path, int v)
df54106a 499{
e65cfafc
CB
500 FILE *f;
501 bool ret = true;
502
503 f = fopen(path, "w");
504 if (!f)
505 return false;
df54106a 506
e65cfafc
CB
507 if (fprintf(f, "%d\n", v) < 0)
508 ret = false;
509
510 if (fclose(f) != 0)
511 ret = false;
512
513 return ret;
df54106a
SH
514}
515
e65cfafc
CB
516/* Check if a given file exists. */
517static bool file_exists(const char *f)
df54106a 518{
e65cfafc 519 struct stat statbuf;
df54106a 520
e65cfafc 521 return stat(f, &statbuf) == 0;
df54106a
SH
522}
523
e65cfafc 524/* Create directory and (if necessary) its parents. */
df54106a
SH
525static bool mkdir_p(const char *root, char *path)
526{
527 char *b, orig, *e;
528
529 if (strlen(path) < strlen(root))
530 return false;
e65cfafc 531
df54106a
SH
532 if (strlen(path) == strlen(root))
533 return true;
534
535 b = path + strlen(root) + 1;
e65cfafc
CB
536 while (true) {
537 while (*b && (*b == '/'))
df54106a
SH
538 b++;
539 if (!*b)
540 return true;
e65cfafc 541
df54106a
SH
542 e = b + 1;
543 while (*e && *e != '/')
544 e++;
e65cfafc 545
df54106a
SH
546 orig = *e;
547 if (orig)
548 *e = '\0';
e65cfafc
CB
549
550 if (file_exists(path))
df54106a 551 goto next;
e65cfafc 552
df54106a 553 if (mkdir(path, 0755) < 0) {
b36273fa 554 lxcfs_debug("Failed to create %s: %s.\n", path, strerror(errno));
df54106a
SH
555 return false;
556 }
e65cfafc
CB
557
558 next:
df54106a
SH
559 if (!orig)
560 return true;
e65cfafc 561
df54106a
SH
562 *e = orig;
563 b = e + 1;
564 }
78a2a9f3 565
e65cfafc 566 return false;
df54106a
SH
567}
568
e65cfafc
CB
569/* Recursively remove directory and its parents. */
570static int recursive_rmdir(char *dirname)
571{
572 struct dirent *direntp;
573 DIR *dir;
574 int r = 0;
df54106a 575
e65cfafc
CB
576 dir = opendir(dirname);
577 if (!dir)
578 return -ENOENT;
df54106a 579
e65cfafc
CB
580 while ((direntp = readdir(dir))) {
581 struct stat st;
582 char *pathname;
583
584 if (!direntp)
585 break;
586
587 if (!strcmp(direntp->d_name, ".") ||
588 !strcmp(direntp->d_name, ".."))
589 continue;
590
591 pathname = must_make_path(dirname, direntp->d_name, NULL);
592
593 if (lstat(pathname, &st)) {
594 if (!r)
595 lxcfs_debug("Failed to stat %s.\n", pathname);
596 r = -1;
597 goto next;
edd25678 598 }
e65cfafc
CB
599
600 if (!S_ISDIR(st.st_mode))
601 goto next;
602
603 if (recursive_rmdir(pathname) < 0)
604 r = -1;
605next:
606 free(pathname);
2be80971 607 }
e65cfafc
CB
608
609 if (rmdir(dirname) < 0) {
610 if (!r)
b36273fa 611 lxcfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
e65cfafc
CB
612 r = -1;
613 }
614
615 if (closedir(dir) < 0) {
616 if (!r)
b36273fa 617 lxcfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno));
e65cfafc
CB
618 r = -1;
619 }
620
621 return r;
622}
623
624/* Add new entry to null-terminated array of pointers. Make sure array is still
625 * null-terminated.
626 */
627static void must_add_to_list(char ***clist, char *entry)
628{
629 int newentry;
630
631 newentry = append_null_to_list((void ***)clist);
632 (*clist)[newentry] = must_copy_string(entry);
2be80971
SH
633}
634
e65cfafc
CB
635/* Get mountpoint from a /proc/<pid>/mountinfo line. */
636static char *get_mountpoint(char *line)
df54106a
SH
637{
638 int i;
e65cfafc
CB
639 char *p, *sret, *p2;
640 size_t len;
641
642 p = line;
643
644 for (i = 0; i < 4; i++) {
645 p = strchr(p, ' ');
646 if (!p)
647 return NULL;
648 p++;
649 }
df54106a 650
e65cfafc
CB
651 p2 = strchr(p, ' ');
652 if (p2)
653 *p2 = '\0';
654
655 len = strlen(p);
656 sret = must_alloc(len + 1);
657 memcpy(sret, p, len);
658 sret[len] = '\0';
659
660 return sret;
661}
662
663/* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
664 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
665 * kernel controllers (@klist) and named controllers (@nlist).
666 */
667static bool cgv1_get_controllers(char ***klist, char ***nlist)
668{
669 FILE *f;
670 char *line = NULL;
671 size_t len = 0;
672
673 f = fopen("/proc/self/cgroup", "r");
674 if (!f)
675 return false;
676
677 while (getline(&line, &len, f) != -1) {
678 char *p, *p2, *tok;
679 char *saveptr = NULL;
680
681 p = strchr(line, ':');
682 if (!p)
df54106a 683 continue;
e65cfafc
CB
684 p++;
685
686 p2 = strchr(p, ':');
687 if (!p2)
688 continue;
689 *p2 = '\0';
690
691 /* Skip the v2 hierarchy. */
692 if ((p2 - p) == 0)
df54106a 693 continue;
e65cfafc
CB
694
695 for (tok = strtok_r(p, ",", &saveptr); tok;
696 tok = strtok_r(NULL, ",", &saveptr)) {
697 if (strncmp(tok, "name=", 5) == 0)
698 must_append_string(nlist, tok);
699 else
700 must_append_string(klist, tok);
df54106a
SH
701 }
702 }
e65cfafc
CB
703
704 free(line);
705 fclose(f);
706
707 return true;
df54106a
SH
708}
709
e65cfafc
CB
710/* Get list of controllers for cgroupfs v2 hierarchy by looking at
711 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
712static bool cgv2_get_controllers(char ***klist)
df54106a 713{
e65cfafc
CB
714 return -ENOSYS;
715}
716*/
78a2a9f3 717
e65cfafc
CB
718/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
719static char *cgv2_get_current_cgroup(int pid)
720{
721 int ret;
722 char *cgroups_v2;
723 char *current_cgroup;
724 char *copy = NULL;
725 /* The largest integer that can fit into long int is 2^64. This is a
726 * 20-digit number. */
727#define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
728 char path[__PIDLEN];
729
730 ret = snprintf(path, __PIDLEN, "/proc/%d/cgroup", pid);
731 if (ret < 0 || ret >= __PIDLEN)
732 return NULL;
733
734 cgroups_v2 = read_file(path);
735 if (!cgroups_v2)
736 return NULL;
737
738 current_cgroup = strstr(cgroups_v2, "0::/");
739 if (!current_cgroup)
740 goto cleanup_on_err;
741
742 current_cgroup = current_cgroup + 3;
743 copy = copy_to_eol(current_cgroup);
744 if (!copy)
745 goto cleanup_on_err;
746
747cleanup_on_err:
748 free(cgroups_v2);
749 if (copy)
750 trim(copy);
751
752 return copy;
df54106a
SH
753}
754
e65cfafc
CB
755/* Given two null-terminated lists of strings, return true if any string is in
756 * both.
757 */
758static bool cgv1_controller_lists_intersect(char **l1, char **l2)
df54106a 759{
e65cfafc 760 char **it;
df54106a 761
e65cfafc
CB
762 if (!l2)
763 return false;
764
765 for (it = l1; it && *it; it++)
766 if (string_in_list(l2, *it))
767 return true;
768
769 return false;
df54106a
SH
770}
771
e65cfafc
CB
772/* For a null-terminated list of controllers @clist, return true if any of those
773 * controllers is already listed the null-terminated list of hierarchies @hlist.
774 * Realistically, if one is present, all must be present.
775 */
776static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy **hlist, char **clist)
df54106a 777{
e65cfafc
CB
778 struct cgv1_hierarchy **it;
779
780 for (it = hlist; it && *it; it++)
781 if ((*it)->controllers)
782 if (cgv1_controller_lists_intersect((*it)->controllers, clist))
783 return true;
df54106a 784 return false;
e65cfafc
CB
785
786}
787
788/* Set boolean to mark controllers under which we are supposed create a
789 * writeable cgroup.
790 */
791static void cgv1_mark_to_make_rw(char **clist)
792{
793 struct cgv1_hierarchy **it;
794
795 for (it = cgv1_hierarchies; it && *it; it++)
796 if ((*it)->controllers)
7559c0b6
AS
797 if (cgv1_controller_lists_intersect((*it)->controllers, clist) ||
798 string_in_list(clist, "all"))
e65cfafc
CB
799 (*it)->create_rw_cgroup = true;
800}
801
802/* Set boolean to mark whether we are supposed to create a writeable cgroup in
803 * the cgroupfs v2 hierarchy.
804 */
805static void cgv2_mark_to_make_rw(char **clist)
806{
7559c0b6 807 if (string_in_list(clist, "unified") || string_in_list(clist, "all"))
e65cfafc
CB
808 if (cgv2_hierarchies)
809 (*cgv2_hierarchies)->create_rw_cgroup = true;
df54106a
SH
810}
811
e65cfafc 812/* Wrapper around cgv{1,2}_mark_to_make_rw(). */
7559c0b6 813static void cg_mark_to_make_rw(char **clist)
df54106a 814{
e65cfafc
CB
815 cgv1_mark_to_make_rw(clist);
816 cgv2_mark_to_make_rw(clist);
e65cfafc
CB
817}
818
819/* Prefix any named controllers with "name=", e.g. "name=systemd". */
820static char *cgv1_must_prefix_named(char *entry)
821{
822 char *s;
823 int ret;
824 size_t len;
825
826 len = strlen(entry);
827 s = must_alloc(len + 6);
828
829 ret = snprintf(s, len + 6, "name=%s", entry);
830 if (ret < 0 || (size_t)ret >= (len + 6))
831 return NULL;
832
833 return s;
834}
835
836/* Append kernel controller in @klist or named controller in @nlist to @clist */
837static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
838{
839 int newentry;
840 char *copy;
841
842 if (string_in_list(klist, entry) && string_in_list(nlist, entry))
843 return;
844
845 newentry = append_null_to_list((void ***)clist);
846
847 if (strncmp(entry, "name=", 5) == 0)
848 copy = must_copy_string(entry);
849 else if (string_in_list(klist, entry))
850 copy = must_copy_string(entry);
851 else
852 copy = cgv1_must_prefix_named(entry);
853
854 (*clist)[newentry] = copy;
855}
856
857/* Get the controllers from a mountinfo line. There are other ways we could get
858 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
859 * could parse the mount options. But we simply assume that the mountpoint must
860 * be /sys/fs/cgroup/controller-list
861 */
862static char **cgv1_get_proc_mountinfo_controllers(char **klist, char **nlist, char *line)
863{
864 int i;
865 char *p, *p2, *tok;
866 char *saveptr = NULL;
867 char **aret = NULL;
868
869 p = line;
870
871 for (i = 0; i < 4; i++) {
872 p = strchr(p, ' ');
873 if (!p)
874 return NULL;
875 p++;
876 }
877 if (!p)
878 return NULL;
879
880 if (strncmp(p, "/sys/fs/cgroup/", 15) != 0)
881 return NULL;
882
883 p += 15;
884
885 p2 = strchr(p, ' ');
886 if (!p2)
887 return NULL;
888 *p2 = '\0';
889
890 for (tok = strtok_r(p, ",", &saveptr); tok;
891 tok = strtok_r(NULL, ",", &saveptr))
892 must_append_controller(klist, nlist, &aret, tok);
893
894 return aret;
895}
896
897/* Check if a cgroupfs v2 controller is present in the string @cgline. */
898static bool cgv1_controller_in_clist(char *cgline, char *c)
899{
900 size_t len;
901 char *tok, *eol, *tmp;
902 char *saveptr = NULL;
903
904 eol = strchr(cgline, ':');
905 if (!eol)
906 return false;
907
908 len = eol - cgline;
909 tmp = alloca(len + 1);
910 memcpy(tmp, cgline, len);
911 tmp[len] = '\0';
912
913 for (tok = strtok_r(tmp, ",", &saveptr); tok;
914 tok = strtok_r(NULL, ",", &saveptr)) {
915 if (strcmp(tok, c) == 0)
df54106a
SH
916 return true;
917 }
918 return false;
919}
920
e65cfafc
CB
921/* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
922 * of a given cgv1 controller passed in via @controller.
df54106a 923 */
e65cfafc 924static char *cgv1_get_current_cgroup(char *basecginfo, char *controller)
df54106a 925{
e65cfafc
CB
926 char *p;
927
928 p = basecginfo;
929
930 while (true) {
931 p = strchr(p, ':');
932 if (!p)
933 return NULL;
934 p++;
935
936 if (cgv1_controller_in_clist(p, controller)) {
937 p = strchr(p, ':');
938 if (!p)
939 return NULL;
940 p++;
941
942 return copy_to_eol(p);
943 }
944
945 p = strchr(p, '\n');
946 if (!p)
947 return NULL;
948 p++;
df54106a 949 }
e65cfafc
CB
950
951 return NULL;
df54106a
SH
952}
953
e65cfafc
CB
954/* Remove /init.scope from string @cg. This will mostly affect systemd-based
955 * systems.
956 */
df54106a 957#define INIT_SCOPE "/init.scope"
e65cfafc 958static void cg_systemd_prune_init_scope(char *cg)
df54106a
SH
959{
960 char *point;
961
962 if (!cg)
963 return;
964
e65cfafc
CB
965 point = cg + strlen(cg) - strlen(INIT_SCOPE);
966 if (point < cg)
df54106a 967 return;
826297d7 968
df54106a
SH
969 if (strcmp(point, INIT_SCOPE) == 0) {
970 if (point == cg)
e65cfafc 971 *(point + 1) = '\0';
df54106a
SH
972 else
973 *point = '\0';
974 }
975}
976
e65cfafc
CB
977/* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
978 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
979 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
980 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
981 * from /proc/1/cgroup.
982 */
983static void cgv1_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup)
984{
985 struct cgv1_hierarchy *new;
986 int newentry;
987
988 new = must_alloc(sizeof(*new));
989 new->controllers = clist;
990 new->mountpoint = mountpoint;
991 new->base_cgroup = base_cgroup;
992 new->fullcgpath = NULL;
993 new->create_rw_cgroup = false;
994 new->init_cgroup = init_cgroup;
995 new->systemd_user_slice = false;
996
997 newentry = append_null_to_list((void ***)&cgv1_hierarchies);
998 cgv1_hierarchies[newentry] = new;
999}
1000
1001/* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
1002 * currently) include the controllers mounted into the hierarchy (e.g. memory,
1003 * pids, blkio), the mountpoint of that hierarchy (Should usually be
1004 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
1005 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
1006 * base cgroup of the current process gathered from /proc/self/cgroup, and the
1007 * init cgroup of PID1 gathered from /proc/1/cgroup.
1008 */
1009static void cgv2_add_controller(char **clist, char *mountpoint, char *base_cgroup, char *init_cgroup, bool systemd_user_slice)
1010{
1011 struct cgv2_hierarchy *new;
1012 int newentry;
1013
1014 new = must_alloc(sizeof(*new));
1015 new->controllers = clist;
1016 new->mountpoint = mountpoint;
1017 new->base_cgroup = base_cgroup;
1018 new->fullcgpath = NULL;
1019 new->create_rw_cgroup = false;
1020 new->init_cgroup = init_cgroup;
1021 new->systemd_user_slice = systemd_user_slice;
1022
1023 newentry = append_null_to_list((void ***)&cgv2_hierarchies);
1024 cgv2_hierarchies[newentry] = new;
1025}
1026
1027/* In Ubuntu 14.04, the paths created for us were
1028 * '/user/$uid.user/$something.session' This can be merged better with
1029 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
1030 * reason about the correctness.
1031 */
1032static bool cg_systemd_under_user_slice_1(const char *in, uid_t uid)
1033{
1034 char *p;
1035 size_t len;
1036 int id;
1037 char *copy = NULL;
1038 bool bret = false;
1039
1040 copy = must_copy_string(in);
1041 if (strlen(copy) < strlen("/user/1.user/1.session"))
1042 goto cleanup;
1043 p = copy + strlen(copy) - 1;
1044
1045 /* skip any trailing '/' (shouldn't be any, but be sure) */
1046 while (p >= copy && *p == '/')
1047 *(p--) = '\0';
1048 if (p < copy)
1049 goto cleanup;
1050
1051 /* Get last path element */
1052 while (p >= copy && *p != '/')
1053 p--;
1054 if (p < copy)
1055 goto cleanup;
1056 /* make sure it is something.session */
1057 len = strlen(p + 1);
1058 if (len < strlen("1.session") ||
1059 strncmp(p + 1 + len - 8, ".session", 8) != 0)
1060 goto cleanup;
1061
1062 /* ok last path piece checks out, now check the second to last */
1063 *(p + 1) = '\0';
1064 while (p >= copy && *(--p) != '/')
1065 ;
1066 if (sscanf(p + 1, "%d.user/", &id) != 1)
1067 goto cleanup;
1068
1069 if (id != (int)uid)
1070 goto cleanup;
1071
1072 bret = true;
1073
1074cleanup:
1075 free(copy);
1076 return bret;
1077}
1078
1079/* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1080 * assume it belongs to $uid and chown it
1081 */
1082static bool cg_systemd_under_user_slice_2(const char *base_cgroup,
1083 const char *init_cgroup, uid_t uid)
1084{
1085 int ret;
1086 char buf[100];
1087 size_t curlen, initlen;
1088
1089 curlen = strlen(base_cgroup);
1090 initlen = strlen(init_cgroup);
1091 if (curlen <= initlen)
1092 return false;
1093
1094 if (strncmp(base_cgroup, init_cgroup, initlen) != 0)
1095 return false;
1096
1097 ret = snprintf(buf, 100, "/user.slice/user-%d.slice/", (int)uid);
1098 if (ret < 0 || ret >= 100)
1099 return false;
1100
1101 if (initlen == 1)
1102 initlen = 0; // skip the '/'
1103
1104 return strncmp(base_cgroup + initlen, buf, strlen(buf)) == 0;
1105}
1106
1107/* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1108 * is not the end of our systemd path, then we're not part of the PAM call that
1109 * created that path.
1110 *
1111 * The last piece is chowned to $uid, the user- part not.
1112 * Note: If the user creates paths that look like what we're looking for to
1113 * 'fool' us, either
1114 * - they fool us, we create new cgroups, and they get auto-logged-out.
1115 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1116 * lose ownership of their cgroups
1117 */
1118static bool cg_systemd_created_user_slice(const char *base_cgroup,
1119 const char *init_cgroup,
1120 const char *in, uid_t uid)
1121{
1122 char *p;
1123 size_t len;
1124 int id;
1125 char *copy = NULL;
1126 bool bret = false;
1127
1128 copy = must_copy_string(in);
1129
1130 /* An old version of systemd has already created a cgroup for us. */
1131 if (cg_systemd_under_user_slice_1(in, uid))
1132 goto succeed;
1133
1134 /* A new version of systemd has already created a cgroup for us. */
1135 if (cg_systemd_under_user_slice_2(base_cgroup, init_cgroup, uid))
1136 goto succeed;
1137
1138 if (strlen(copy) < strlen("/user-0.slice/session-0.scope"))
1139 goto cleanup;
1140
1141 p = copy + strlen(copy) - 1;
1142 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1143 while (p >= copy && *p == '/')
1144 *(p--) = '\0';
1145
1146 if (p < copy)
1147 goto cleanup;
1148
1149 /* Get last path element */
1150 while (p >= copy && *p != '/')
1151 p--;
1152
1153 if (p < copy)
1154 goto cleanup;
1155
1156 /* Make sure it is session-something.scope. */
1157 len = strlen(p + 1);
1158 if (strncmp(p + 1, "session-", strlen("session-")) != 0 ||
1159 strncmp(p + 1 + len - 6, ".scope", 6) != 0)
1160 goto cleanup;
1161
1162 /* Ok last path piece checks out, now check the second to last. */
1163 *(p + 1) = '\0';
1164 while (p >= copy && *(--p) != '/')
1165 ;
1166
1167 if (sscanf(p + 1, "user-%d.slice/", &id) != 1)
1168 goto cleanup;
1169
1170 if (id != (int)uid)
1171 goto cleanup;
1172
1173succeed:
1174 bret = true;
1175cleanup:
1176 free(copy);
1177 return bret;
1178}
1179
1180/* Chown existing cgroup that systemd has already created for us. */
1181static bool cg_systemd_chown_existing_cgroup(const char *mountpoint,
1182 const char *base_cgroup, uid_t uid,
1183 gid_t gid, bool systemd_user_slice)
1184{
1185 char *path;
1186
1187 if (!systemd_user_slice)
1188 return false;
1189
1190 path = must_make_path(mountpoint, base_cgroup, NULL);
1191
1192 /* A cgroup within name=systemd has already been created. So we only
1193 * need to chown it.
1194 */
1195 if (chown(path, uid, gid) < 0)
b36273fa
CB
1196 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
1197 path, (int)uid, (int)gid, strerror(errno), NULL);
1198 lxcfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
e65cfafc
CB
1199
1200 free(path);
1201 return true;
1202}
1203
1204/* Detect and store information about cgroupfs v1 hierarchies. */
1205static bool cgv1_init(uid_t uid, gid_t gid)
df54106a
SH
1206{
1207 FILE *f;
e65cfafc
CB
1208 struct cgv1_hierarchy **it;
1209 char *basecginfo;
df54106a 1210 char *line = NULL;
e65cfafc 1211 char **klist = NULL, **nlist = NULL;
df54106a 1212 size_t len = 0;
df54106a 1213
e65cfafc
CB
1214 basecginfo = read_file("/proc/self/cgroup");
1215 if (!basecginfo)
1216 return false;
1217
1218 f = fopen("/proc/self/mountinfo", "r");
a6e9ec7d
CB
1219 if (!f) {
1220 free(basecginfo);
df54106a 1221 return false;
a6e9ec7d 1222 }
e65cfafc
CB
1223
1224 cgv1_get_controllers(&klist, &nlist);
1225
df54106a 1226 while (getline(&line, &len, f) != -1) {
e65cfafc
CB
1227 char **controller_list = NULL;
1228 char *mountpoint, *base_cgroup;
1229
a6e9ec7d 1230 if (is_lxcfs(line) || !is_cgv1(line))
e65cfafc
CB
1231 continue;
1232
1233 controller_list = cgv1_get_proc_mountinfo_controllers(klist, nlist, line);
1234 if (!controller_list)
1235 continue;
1236
1237 if (cgv1_controller_list_is_dup(cgv1_hierarchies,
1238 controller_list)) {
1239 free(controller_list);
1240 continue;
df54106a 1241 }
e65cfafc
CB
1242
1243 mountpoint = get_mountpoint(line);
1244 if (!mountpoint) {
1245 free_string_list(controller_list);
1246 continue;
df54106a 1247 }
e65cfafc
CB
1248
1249 base_cgroup = cgv1_get_current_cgroup(basecginfo, controller_list[0]);
1250 if (!base_cgroup) {
1251 free_string_list(controller_list);
1252 free(mountpoint);
1253 continue;
edd25678 1254 }
e65cfafc
CB
1255 trim(base_cgroup);
1256 lxcfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1257 "mountpoint \"%s\" and cgroup \"%s\".\n",
1258 controller_list[0], mountpoint, base_cgroup);
1259 cgv1_add_controller(controller_list, mountpoint, base_cgroup,
1260 NULL);
df54106a 1261 }
e65cfafc
CB
1262 free_string_list(klist);
1263 free_string_list(nlist);
1264 free(basecginfo);
df54106a 1265 fclose(f);
c65c5956 1266 free(line);
df54106a 1267
e65cfafc
CB
1268 /* Retrieve init cgroup path for all controllers. */
1269 basecginfo = read_file("/proc/1/cgroup");
1270 if (!basecginfo)
1271 return false;
df54106a 1272
e65cfafc
CB
1273 for (it = cgv1_hierarchies; it && *it; it++) {
1274 if ((*it)->controllers) {
1275 char *init_cgroup, *user_slice;
a6e9ec7d
CB
1276 /* We've already stored the controller and received its
1277 * current cgroup. If we now fail to retrieve its init
1278 * cgroup, we should probably fail.
1279 */
e65cfafc 1280 init_cgroup = cgv1_get_current_cgroup(basecginfo, (*it)->controllers[0]);
a6e9ec7d
CB
1281 if (!init_cgroup) {
1282 free(basecginfo);
1283 return false;
1284 }
e65cfafc
CB
1285 cg_systemd_prune_init_scope(init_cgroup);
1286 (*it)->init_cgroup = init_cgroup;
1287 lxcfs_debug("cgroupfs v1 controller \"%s\" has init "
1288 "cgroup \"%s\".\n",
1289 (*(*it)->controllers), init_cgroup);
1290 /* Check whether systemd has already created a cgroup
1291 * for us.
1292 */
1293 user_slice = must_make_path((*it)->mountpoint, (*it)->base_cgroup, NULL);
1294 if (cg_systemd_created_user_slice((*it)->base_cgroup, (*it)->init_cgroup, user_slice, uid))
1295 (*it)->systemd_user_slice = true;
df54106a
SH
1296 }
1297 }
e65cfafc
CB
1298 free(basecginfo);
1299
1300 return true;
df54106a 1301}
e65cfafc
CB
1302
1303/* __typeof__ should be safe to use with all compilers. */
1304typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
1305/* Check whether given mountpoint has mount type specified via @magic_val. */
1306static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
1307{
1308 return (fs->f_type == (fs_type_magic)magic_val);
1309}
1310
1311/* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1312 * statfs fails. If @path is null /sys/fs/cgroup is checked.
df54106a 1313 */
e65cfafc 1314static int cg_get_version_of_mntpt(const char *path)
df54106a 1315{
e65cfafc
CB
1316 int ret;
1317 struct statfs sb;
1318
1319 if (path)
1320 ret = statfs(path, &sb);
1321 else
1322 ret = statfs("/sys/fs/cgroup", &sb);
1323
1324 if (ret < 0)
1325 return -1;
1326
1327 if (has_fs_type(&sb, CGROUP_SUPER_MAGIC))
1328 return 1;
1329 else if (has_fs_type(&sb, CGROUP2_SUPER_MAGIC))
1330 return 2;
1331
1332 return 0;
1333}
1334
1335/* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1336 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1337 */
1338static bool cgv2_init(uid_t uid, gid_t gid)
1339{
1340 char *mountpoint;
e65cfafc
CB
1341 FILE *f = NULL;
1342 char *current_cgroup = NULL, *init_cgroup = NULL;
1343 char * line = NULL;
df54106a 1344 size_t len = 0;
e4992b3e 1345 int ret = false;
df54106a 1346
e65cfafc
CB
1347 current_cgroup = cgv2_get_current_cgroup(getpid());
1348 if (!current_cgroup) {
1349 /* No v2 hierarchy present. We're done. */
e4992b3e 1350 ret = true;
e65cfafc
CB
1351 goto cleanup;
1352 }
1353
1354 init_cgroup = cgv2_get_current_cgroup(1);
1355 if (!init_cgroup) {
1356 /* If we're here and didn't fail already above, then something's
1357 * certainly wrong, so error this time.
1358 */
1359 goto cleanup;
1360 }
1361 cg_systemd_prune_init_scope(init_cgroup);
1362
1363 /* Check if the v2 hierarchy is mounted at its standard location.
1364 * If so we can skip the rest of the work here. Although the unified
1365 * hierarchy can be mounted multiple times, each of those mountpoints
1366 * will expose identical information.
1367 */
1368 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1369 char *user_slice;
1370 bool has_user_slice = false;
1371
1372 mountpoint = must_copy_string("/sys/fs/cgroup");
1373 if (!mountpoint)
1374 goto cleanup;
1375
1376 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1377 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1378 has_user_slice = true;
1379 free(user_slice);
1380
1381 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1382
e4992b3e 1383 ret = true;
e65cfafc
CB
1384 goto cleanup;
1385 }
1386
1387 f = fopen("/proc/self/mountinfo", "r");
df54106a 1388 if (!f)
e4992b3e 1389 goto cleanup;
e65cfafc
CB
1390
1391 /* we support simple cgroup mounts and lxcfs mounts */
df54106a 1392 while (getline(&line, &len, f) != -1) {
e65cfafc
CB
1393 char *user_slice;
1394 bool has_user_slice = false;
1395 if (!is_cgv2(line))
1396 continue;
1397
1398 mountpoint = get_mountpoint(line);
1399 if (!mountpoint)
1400 continue;
df54106a 1401
e65cfafc
CB
1402 user_slice = must_make_path(mountpoint, current_cgroup, NULL);
1403 if (cg_systemd_created_user_slice(current_cgroup, init_cgroup, user_slice, uid))
1404 has_user_slice = true;
1405 free(user_slice);
df54106a 1406
e65cfafc
CB
1407 cgv2_add_controller(NULL, mountpoint, current_cgroup, init_cgroup, has_user_slice);
1408 /* Although the unified hierarchy can be mounted multiple times,
1409 * each of those mountpoints will expose identical information.
1410 * So let the first mountpoint we find, win.
1411 */
ca2003d4 1412 ret = true;
e65cfafc 1413 break;
df54106a
SH
1414 }
1415
e65cfafc
CB
1416 lxcfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1417 "current cgroup \"%s\" and init cgroup \"%s\".\n",
1418 mountpoint, current_cgroup, init_cgroup);
df54106a 1419
e65cfafc
CB
1420cleanup:
1421 if (f)
1422 fclose(f);
1423 free(line);
df54106a 1424
e4992b3e 1425 return ret;
df54106a
SH
1426}
1427
e65cfafc
CB
1428/* Detect and store information about mounted cgroupfs v1 hierarchies and the
1429 * cgroupfs v2 hierarchy.
1430 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1431 * where some controllers are mounted into their standard cgroupfs v1 locations
1432 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1433 * hierarchy (/sys/fs/cgroup).
4deb6092 1434 */
e65cfafc 1435static bool cg_init(uid_t uid, gid_t gid)
4deb6092 1436{
e65cfafc 1437 if (!cgv1_init(uid, gid))
4deb6092
SH
1438 return false;
1439
e65cfafc 1440 if (!cgv2_init(uid, gid))
4deb6092
SH
1441 return false;
1442
e65cfafc
CB
1443 if (cgv1_hierarchies && cgv2_hierarchies) {
1444 cg_mount_mode = CGROUP_MIXED;
1445 lxcfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies.");
1446 } else if (cgv1_hierarchies && !cgv2_hierarchies) {
1447 cg_mount_mode = CGROUP_PURE_V1;
1448 lxcfs_debug("%s\n", "Detected cgroupfs v1 hierarchies.");
1449 } else if (cgv2_hierarchies && !cgv1_hierarchies) {
1450 cg_mount_mode = CGROUP_PURE_V2;
1451 lxcfs_debug("%s\n", "Detected cgroupfs v2 hierarchies.");
1452 } else {
1453 cg_mount_mode = CGROUP_UNKNOWN;
1454 mysyslog(LOG_ERR, "Could not detect cgroupfs hierarchy.\n", NULL);
1455 }
4deb6092 1456
e65cfafc 1457 if (cg_mount_mode == CGROUP_UNKNOWN)
4deb6092
SH
1458 return false;
1459
1460 return true;
1461}
1462
e65cfafc
CB
1463/* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1464static bool cgv1_enter(const char *cgroup)
475a859c 1465{
e65cfafc
CB
1466 struct cgv1_hierarchy **it;
1467
1468 for (it = cgv1_hierarchies; it && *it; it++) {
e65cfafc
CB
1469 char **controller;
1470 bool entered = false;
1471
1472 if (!(*it)->controllers || !(*it)->mountpoint ||
1473 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
1474 continue;
1475
1476 for (controller = (*it)->controllers; controller && *controller;
1477 controller++) {
a6e9ec7d 1478 char *path;
e65cfafc 1479
a403c004
CB
1480 /* We've already been placed in a user slice, so we
1481 * don't need to enter the cgroup again.
1482 */
1483 if ((*it)->systemd_user_slice) {
1484 entered = true;
1485 break;
1486 }
e65cfafc
CB
1487
1488 path = must_make_path((*it)->mountpoint,
1489 (*it)->init_cgroup,
1490 cgroup,
1491 "/cgroup.procs",
1492 NULL);
1493 if (!file_exists(path)) {
1494 free(path);
1495 path = must_make_path((*it)->mountpoint,
1496 (*it)->init_cgroup,
1497 cgroup,
1498 "/tasks",
1499 NULL);
1500 }
1501 lxcfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
1502 entered = write_int(path, (int)getpid());
a6e9ec7d
CB
1503 if (entered) {
1504 free(path);
e65cfafc 1505 break;
a6e9ec7d 1506 }
e65cfafc 1507 lxcfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup.\n", path);
a6e9ec7d 1508 free(path);
e65cfafc
CB
1509 }
1510 if (!entered)
1511 return false;
1512 }
475a859c 1513
e65cfafc 1514 return true;
475a859c
SH
1515}
1516
e65cfafc
CB
1517/* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1518static bool cgv2_enter(const char *cgroup)
78a2a9f3 1519{
e65cfafc
CB
1520 struct cgv2_hierarchy *v2;
1521 char *path;
1522 bool entered = false;
78a2a9f3 1523
e65cfafc 1524 if (!cgv2_hierarchies)
4deb6092
SH
1525 return true;
1526
e65cfafc 1527 v2 = *cgv2_hierarchies;
475a859c 1528
e65cfafc 1529 if (!v2->mountpoint || !v2->base_cgroup)
78a2a9f3
SH
1530 return false;
1531
e65cfafc
CB
1532 if (!v2->create_rw_cgroup || v2->systemd_user_slice)
1533 return true;
78a2a9f3 1534
423a3b4f 1535 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, "/cgroup.procs", NULL);
e65cfafc
CB
1536 lxcfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1537 entered = write_int(path, (int)getpid());
1538 if (!entered) {
1539 lxcfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\".\n", path);
1540 free(path);
78a2a9f3 1541 return false;
e65cfafc 1542 }
78a2a9f3 1543
e65cfafc 1544 free(path);
78a2a9f3
SH
1545
1546 return true;
1547}
edd25678 1548
e65cfafc
CB
1549/* Wrapper around cgv{1,2}_enter(). */
1550static bool cg_enter(const char *cgroup)
1551{
1552 if (!cgv1_enter(cgroup)) {
1553 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to enter cgroups.\n", NULL);
edd25678 1554 return false;
e65cfafc 1555 }
edd25678 1556
e65cfafc
CB
1557 if (!cgv2_enter(cgroup)) {
1558 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to enter cgroups.\n", NULL);
78a2a9f3
SH
1559 return false;
1560 }
1561
edd25678
SH
1562 return true;
1563}
1564
e65cfafc
CB
1565/* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1566static void cgv1_escape(void)
df54106a 1567{
04742595
CB
1568 struct cgv1_hierarchy **it;
1569
1570 /* In case systemd hasn't already placed us in a user slice for the
1571 * cpuset v1 controller we will reside in the root cgroup. This means
1572 * that cgroup.clone_children will not have been initialized for us so
1573 * we need to do it.
1574 */
1575 for (it = cgv1_hierarchies; it && *it; it++)
1576 if (!cgv1_handle_root_cpuset_hierarchy(*it))
1577 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to initialize cpuset.\n", NULL);
1578
e65cfafc
CB
1579 if (!cgv1_enter("/"))
1580 mysyslog(LOG_WARNING, "cgroupfs v1: Failed to escape to init's cgroup.\n", NULL);
1581}
edd25678 1582
e65cfafc
CB
1583/* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1584static void cgv2_escape(void)
1585{
1586 if (!cgv2_enter("/"))
1587 mysyslog(LOG_WARNING, "cgroupfs v2: Failed to escape to init's cgroup.\n", NULL);
1588}
edd25678 1589
e65cfafc
CB
1590/* Wrapper around cgv{1,2}_escape(). */
1591static void cg_escape(void)
1592{
1593 cgv1_escape();
1594 cgv2_escape();
df54106a
SH
1595}
1596
e65cfafc
CB
1597/* Get uid and gid for @user. */
1598static bool get_uid_gid(const char *user, uid_t *uid, gid_t *gid)
df54106a 1599{
e65cfafc 1600 struct passwd *pwent;
df54106a 1601
e65cfafc
CB
1602 pwent = getpwnam(user);
1603 if (!pwent)
1604 return false;
df54106a 1605
e65cfafc
CB
1606 *uid = pwent->pw_uid;
1607 *gid = pwent->pw_gid;
df54106a 1608
e65cfafc 1609 return true;
df54106a
SH
1610}
1611
3145d497
CB
1612/* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1613static bool cg_belongs_to_uid_gid(const char *path, uid_t uid, gid_t gid)
1614{
1615 struct stat statbuf;
1616
1617 if (stat(path, &statbuf) < 0)
1618 return false;
1619
1620 if (!(statbuf.st_uid == uid) || !(statbuf.st_gid == gid))
1621 return false;
1622
1623 return true;
1624}
1625
04742595
CB
1626/* Create cpumask from cpulist aka turn:
1627 *
1628 * 0,2-3
1629 *
1630 * into bit array
1631 *
1632 * 1 0 1 1
1633 */
1634static uint32_t *cg_cpumask(char *buf, size_t nbits)
1635{
1636 char *token;
1637 char *saveptr = NULL;
1638 size_t arrlen = BITS_TO_LONGS(nbits);
1639 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
1640 if (!bitarr)
1641 return NULL;
1642
1643 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
1644 errno = 0;
1645 unsigned start = strtoul(token, NULL, 0);
1646 unsigned end = start;
1647
1648 char *range = strchr(token, '-');
1649 if (range)
1650 end = strtoul(range + 1, NULL, 0);
1651 if (!(start <= end)) {
1652 free(bitarr);
1653 return NULL;
1654 }
1655
1656 if (end >= nbits) {
1657 free(bitarr);
1658 return NULL;
1659 }
1660
1661 while (start <= end)
1662 set_bit(start++, bitarr);
1663 }
1664
1665 return bitarr;
1666}
1667
808fd1ef 1668static char *string_join(const char *sep, const char **parts, bool use_as_prefix)
04742595
CB
1669{
1670 char *result;
1671 char **p;
1672 size_t sep_len = strlen(sep);
1673 size_t result_len = use_as_prefix * sep_len;
1674
bfd723ff
CB
1675 if (!parts)
1676 return NULL;
1677
04742595
CB
1678 /* calculate new string length */
1679 for (p = (char **)parts; *p; p++)
1680 result_len += (p > (char **)parts) * sep_len + strlen(*p);
1681
3acf9e94 1682 result = calloc(result_len + 1, sizeof(char));
04742595
CB
1683 if (!result)
1684 return NULL;
1685
1686 if (use_as_prefix)
1687 strcpy(result, sep);
1688 for (p = (char **)parts; *p; p++) {
1689 if (p > (char **)parts)
1690 strcat(result, sep);
1691 strcat(result, *p);
1692 }
1693
1694 return result;
1695}
1696
1697/* The largest integer that can fit into long int is 2^64. This is a
1698 * 20-digit number.
1699 */
1700#define __IN_TO_STR_LEN 21
1701/* Turn cpumask into simple, comma-separated cpulist. */
1702static char *cg_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
1703{
1704 size_t i;
1705 int ret;
1706 char numstr[__IN_TO_STR_LEN] = {0};
1707 char **cpulist = NULL;
1708
1709 for (i = 0; i <= nbits; i++) {
1710 if (is_set(i, bitarr)) {
1711 ret = snprintf(numstr, __IN_TO_STR_LEN, "%zu", i);
1712 if (ret < 0 || (size_t)ret >= __IN_TO_STR_LEN) {
1713 free_string_list(cpulist);
1714 return NULL;
1715 }
1716 must_append_string(&cpulist, numstr);
1717 }
1718 }
1719 return string_join(",", (const char **)cpulist, false);
1720}
1721
1722static ssize_t cg_get_max_cpus(char *cpulist)
1723{
1724 char *c1, *c2;
1725 char *maxcpus = cpulist;
1726 size_t cpus = 0;
1727
1728 c1 = strrchr(maxcpus, ',');
1729 if (c1)
1730 c1++;
1731
1732 c2 = strrchr(maxcpus, '-');
1733 if (c2)
1734 c2++;
1735
1736 if (!c1 && !c2)
1737 c1 = maxcpus;
04742595
CB
1738 else if (c1 < c2)
1739 c1 = c2;
04742595
CB
1740
1741 /* If the above logic is correct, c1 should always hold a valid string
1742 * here.
1743 */
1744
1745 errno = 0;
1746 cpus = strtoul(c1, NULL, 0);
1747 if (errno != 0)
1748 return -1;
1749
1750 return cpus;
1751}
1752
808fd1ef 1753static ssize_t write_nointr(int fd, const void* buf, size_t count)
04742595
CB
1754{
1755 ssize_t ret;
1756again:
1757 ret = write(fd, buf, count);
1758 if (ret < 0 && errno == EINTR)
1759 goto again;
1760 return ret;
1761}
1762
808fd1ef 1763static int write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
04742595
CB
1764{
1765 int fd, saved_errno;
1766 ssize_t ret;
1767
1768 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
1769 if (fd < 0)
1770 return -1;
1771 ret = write_nointr(fd, buf, count);
1772 if (ret < 0)
1773 goto out_error;
1774 if ((size_t)ret != count)
1775 goto out_error;
1776 if (add_newline) {
1777 ret = write_nointr(fd, "\n", 1);
1778 if (ret != 1)
1779 goto out_error;
1780 }
1781 close(fd);
1782 return 0;
1783
1784out_error:
1785 saved_errno = errno;
1786 close(fd);
1787 errno = saved_errno;
1788 return -1;
1789}
1790
808fd1ef 1791#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
04742595
CB
1792static bool cg_filter_and_set_cpus(char *path, bool am_initialized)
1793{
1794 char *lastslash, *fpath, oldv;
1795 int ret;
1796 ssize_t i;
1797
1798 ssize_t maxposs = 0, maxisol = 0;
1799 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
1800 uint32_t *possmask = NULL, *isolmask = NULL;
808fd1ef 1801 bool bret = false, flipped_bit = false;
04742595
CB
1802
1803 lastslash = strrchr(path, '/');
1804 if (!lastslash) { // bug... this shouldn't be possible
808fd1ef 1805 lxcfs_debug("Invalid path: %s.\n", path);
04742595
CB
1806 return bret;
1807 }
1808 oldv = *lastslash;
1809 *lastslash = '\0';
1810 fpath = must_make_path(path, "cpuset.cpus", NULL);
1811 posscpus = read_file(fpath);
808fd1ef
CB
1812 if (!posscpus) {
1813 lxcfs_debug("Could not read file: %s.\n", fpath);
1814 goto on_error;
1815 }
04742595
CB
1816
1817 /* Get maximum number of cpus found in possible cpuset. */
1818 maxposs = cg_get_max_cpus(posscpus);
1819 if (maxposs < 0)
808fd1ef 1820 goto on_error;
04742595 1821
b25412f5
CB
1822 if (!file_exists(__ISOL_CPUS)) {
1823 /* This system doesn't expose isolated cpus. */
1824 lxcfs_debug("%s", "Path: "__ISOL_CPUS" to read isolated cpus from does not exist.\n");
1825 cpulist = posscpus;
1826 /* No isolated cpus but we weren't already initialized by
1827 * someone. We should simply copy the parents cpuset.cpus
1828 * values.
1829 */
1830 if (!am_initialized) {
1831 lxcfs_debug("%s", "Copying cpuset of parent cgroup.\n");
1832 goto copy_parent;
1833 }
1834 /* No isolated cpus but we were already initialized by someone.
1835 * Nothing more to do for us.
1836 */
1837 goto on_success;
1838 }
1839
808fd1ef
CB
1840 isolcpus = read_file(__ISOL_CPUS);
1841 if (!isolcpus) {
1842 lxcfs_debug("%s", "Could not read file "__ISOL_CPUS"\n");
1843 goto on_error;
1844 }
04742595 1845 if (!isdigit(isolcpus[0])) {
808fd1ef 1846 lxcfs_debug("%s", "No isolated cpus detected.\n");
04742595
CB
1847 cpulist = posscpus;
1848 /* No isolated cpus but we weren't already initialized by
1849 * someone. We should simply copy the parents cpuset.cpus
1850 * values.
1851 */
808fd1ef
CB
1852 if (!am_initialized) {
1853 lxcfs_debug("%s", "Copying cpuset of parent cgroup.\n");
04742595 1854 goto copy_parent;
808fd1ef 1855 }
04742595
CB
1856 /* No isolated cpus but we were already initialized by someone.
1857 * Nothing more to do for us.
1858 */
808fd1ef 1859 goto on_success;
04742595
CB
1860 }
1861
1862 /* Get maximum number of cpus found in isolated cpuset. */
1863 maxisol = cg_get_max_cpus(isolcpus);
1864 if (maxisol < 0)
808fd1ef 1865 goto on_error;
04742595
CB
1866
1867 if (maxposs < maxisol)
1868 maxposs = maxisol;
1869 maxposs++;
1870
1871 possmask = cg_cpumask(posscpus, maxposs);
808fd1ef
CB
1872 if (!possmask) {
1873 lxcfs_debug("%s", "Could not create cpumask for all possible cpus.\n");
1874 goto on_error;
1875 }
04742595
CB
1876
1877 isolmask = cg_cpumask(isolcpus, maxposs);
808fd1ef
CB
1878 if (!isolmask) {
1879 lxcfs_debug("%s", "Could not create cpumask for all isolated cpus.\n");
1880 goto on_error;
1881 }
04742595
CB
1882
1883 for (i = 0; i <= maxposs; i++) {
1884 if (is_set(i, isolmask) && is_set(i, possmask)) {
808fd1ef 1885 flipped_bit = true;
04742595
CB
1886 clear_bit(i, possmask);
1887 }
1888 }
1889
808fd1ef
CB
1890 if (!flipped_bit) {
1891 lxcfs_debug("%s", "No isolated cpus present in cpuset.\n");
1892 goto on_success;
1893 }
1894 lxcfs_debug("%s", "Removed isolated cpus from cpuset.\n");
1895
04742595 1896 cpulist = cg_cpumask_to_cpulist(possmask, maxposs);
808fd1ef
CB
1897 if (!cpulist) {
1898 lxcfs_debug("%s", "Could not create cpu list.\n");
1899 goto on_error;
1900 }
04742595
CB
1901
1902copy_parent:
1903 *lastslash = oldv;
1904 fpath = must_make_path(path, "cpuset.cpus", NULL);
808fd1ef
CB
1905 ret = write_to_file(fpath, cpulist, strlen(cpulist), false);
1906 if (ret < 0) {
1907 lxcfs_debug("Could not write cpu list to: %s.\n", fpath);
1908 goto on_error;
1909 }
04742595 1910
808fd1ef
CB
1911on_success:
1912 bret = true;
1913
1914on_error:
04742595
CB
1915 free(fpath);
1916
1917 free(isolcpus);
1918 free(isolmask);
1919
1920 if (posscpus != cpulist)
1921 free(posscpus);
1922 free(possmask);
1923
1924 free(cpulist);
1925 return bret;
1926}
1927
1928int read_from_file(const char *filename, void* buf, size_t count)
1929{
1930 int fd = -1, saved_errno;
1931 ssize_t ret;
1932
1933 fd = open(filename, O_RDONLY | O_CLOEXEC);
1934 if (fd < 0)
1935 return -1;
1936
1937 if (!buf || !count) {
1938 char buf2[100];
1939 size_t count2 = 0;
1940 while ((ret = read(fd, buf2, 100)) > 0)
1941 count2 += ret;
1942 if (ret >= 0)
1943 ret = count2;
1944 } else {
1945 memset(buf, 0, count);
1946 ret = read(fd, buf, count);
1947 }
1948
1949 if (ret < 0)
1950 lxcfs_debug("read %s: %s", filename, strerror(errno));
1951
1952 saved_errno = errno;
1953 close(fd);
1954 errno = saved_errno;
1955 return ret;
1956}
1957
1958/* Copy contents of parent(@path)/@file to @path/@file */
1959static bool cg_copy_parent_file(char *path, char *file)
1960{
1961 char *lastslash, *value = NULL, *fpath, oldv;
1962 int len = 0;
1963 int ret;
1964
1965 lastslash = strrchr(path, '/');
1966 if (!lastslash) { // bug... this shouldn't be possible
1967 lxcfs_debug("cgfsng:copy_parent_file: bad path %s", path);
1968 return false;
1969 }
1970 oldv = *lastslash;
1971 *lastslash = '\0';
1972 fpath = must_make_path(path, file, NULL);
1973 len = read_from_file(fpath, NULL, 0);
1974 if (len <= 0)
1975 goto bad;
1976 value = must_alloc(len + 1);
1977 if (read_from_file(fpath, value, len) != len)
1978 goto bad;
1979 free(fpath);
1980 *lastslash = oldv;
1981 fpath = must_make_path(path, file, NULL);
808fd1ef 1982 ret = write_to_file(fpath, value, len, false);
04742595
CB
1983 if (ret < 0)
1984 lxcfs_debug("Unable to write %s to %s", value, fpath);
1985 free(fpath);
1986 free(value);
1987 return ret >= 0;
1988
1989bad:
1990 lxcfs_debug("Error reading '%s'", fpath);
1991 free(fpath);
1992 free(value);
1993 return false;
1994}
1995
1996/* In case systemd hasn't already placed us in a user slice for the cpuset v1
1997 * controller we will reside in the root cgroup. This means that
1998 * cgroup.clone_children will not have been initialized for us so we need to do
1999 * it.
2000 */
2001static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy *h)
2002{
2003 char *clonechildrenpath, v;
2004
2005 if (!string_in_list(h->controllers, "cpuset"))
2006 return true;
2007
2008 clonechildrenpath = must_make_path(h->mountpoint, "cgroup.clone_children", NULL);
2009
2010 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2011 lxcfs_debug("Failed to read '%s'", clonechildrenpath);
2012 free(clonechildrenpath);
2013 return false;
2014 }
2015
2016 if (v == '1') { /* already set for us by someone else */
2017 free(clonechildrenpath);
2018 return true;
2019 }
2020
808fd1ef 2021 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
04742595
CB
2022 /* Set clone_children so children inherit our settings */
2023 lxcfs_debug("Failed to write 1 to %s", clonechildrenpath);
2024 free(clonechildrenpath);
2025 return false;
2026 }
2027 free(clonechildrenpath);
2028 return true;
2029}
2030
2031/*
2032 * Initialize the cpuset hierarchy in first directory of @gname and
2033 * set cgroup.clone_children so that children inherit settings.
2034 * Since the h->base_path is populated by init or ourselves, we know
2035 * it is already initialized.
2036 */
2037static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy *h,
2038 const char *cgroup)
2039{
2040 char *cgpath, *clonechildrenpath, v, *slash;
2041
2042 if (!string_in_list(h->controllers, "cpuset"))
2043 return true;
2044
2045 if (*cgroup == '/')
2046 cgroup++;
2047 slash = strchr(cgroup, '/');
2048 if (slash)
2049 *slash = '\0';
2050
2051 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgroup, NULL);
2052 if (slash)
2053 *slash = '/';
2054 if (mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
2055 lxcfs_debug("Failed to create '%s'", cgpath);
2056 free(cgpath);
2057 return false;
2058 }
2059 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
2060 if (!file_exists(clonechildrenpath)) { /* unified hierarchy doesn't have clone_children */
2061 free(clonechildrenpath);
2062 free(cgpath);
2063 return true;
2064 }
2065 if (read_from_file(clonechildrenpath, &v, 1) < 0) {
2066 lxcfs_debug("Failed to read '%s'", clonechildrenpath);
2067 free(clonechildrenpath);
2068 free(cgpath);
2069 return false;
2070 }
2071
2072 /* Make sure any isolated cpus are removed from cpuset.cpus. */
7c029b0f
CB
2073 if (!cg_filter_and_set_cpus(cgpath, v == '1')) {
2074 lxcfs_debug("%s", "Failed to remove isolated cpus.\n");
2075 free(clonechildrenpath);
2076 free(cgpath);
04742595 2077 return false;
7c029b0f 2078 }
04742595
CB
2079
2080 if (v == '1') { /* already set for us by someone else */
808fd1ef 2081 lxcfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\".\n");
04742595
CB
2082 free(clonechildrenpath);
2083 free(cgpath);
2084 return true;
2085 }
2086
2087 /* copy parent's settings */
2088 if (!cg_copy_parent_file(cgpath, "cpuset.mems")) {
808fd1ef 2089 lxcfs_debug("%s", "Failed to copy \"cpuset.mems\" settings.\n");
04742595
CB
2090 free(cgpath);
2091 free(clonechildrenpath);
2092 return false;
2093 }
2094 free(cgpath);
2095
808fd1ef 2096 if (write_to_file(clonechildrenpath, "1", 1, false) < 0) {
04742595
CB
2097 /* Set clone_children so children inherit our settings */
2098 lxcfs_debug("Failed to write 1 to %s", clonechildrenpath);
2099 free(clonechildrenpath);
2100 return false;
2101 }
2102 free(clonechildrenpath);
2103 return true;
2104}
2105
e65cfafc
CB
2106/* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2107 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2108 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2109 * to the caller in @existed.
df54106a 2110 */
e65cfafc
CB
2111#define __PAM_CGFS_USER "/user/"
2112#define __PAM_CGFS_USER_LEN 6
2113static bool cgv1_create_one(struct cgv1_hierarchy *h, const char *cgroup, uid_t uid, gid_t gid, bool *existed)
df54106a 2114{
e65cfafc
CB
2115 char *clean_base_cgroup, *path;
2116 char **controller;
2117 struct cgv1_hierarchy *it;
2118 bool created = false;
2119
a403c004 2120 *existed = false;
e65cfafc
CB
2121 it = h;
2122 for (controller = it->controllers; controller && *controller;
2123 controller++) {
04742595
CB
2124 if (!cgv1_handle_cpuset_hierarchy(it, cgroup))
2125 return false;
2126
e65cfafc
CB
2127 /* If systemd has already created a cgroup for us, keep using
2128 * it.
2129 */
2130 if (cg_systemd_chown_existing_cgroup(it->mountpoint,
2131 it->base_cgroup, uid, gid,
2132 it->systemd_user_slice)) {
2133 return true;
2134 }
2135
2136 /* We need to make sure that we do not create an endless chain
2137 * of sub-cgroups. So we check if we have already logged in
2138 * somehow (sudo -i, su, etc.) and have created a
2139 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2140 * cgroups this is unnecessary since we use the init_cgroup
2141 * anyway, but for controllers which have an existing systemd
2142 * cgroup that does not match the current uid, this is pretty
2143 * useful.
2144 */
2145 if (strncmp(it->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2146 free(it->base_cgroup);
2147 it->base_cgroup = must_copy_string("/");
2148 } else {
2149 clean_base_cgroup =
2150 strstr(it->base_cgroup, __PAM_CGFS_USER);
2151 if (clean_base_cgroup)
2152 *clean_base_cgroup = '\0';
2153 }
df54106a 2154
e65cfafc
CB
2155 path = must_make_path(it->mountpoint, it->init_cgroup, cgroup, NULL);
2156 lxcfs_debug("Constructing path: %s.\n", path);
2157 if (file_exists(path)) {
3145d497 2158 bool our_cg = cg_belongs_to_uid_gid(path, uid, gid);
b36273fa 2159 lxcfs_debug("%s existed and does %shave our uid: %d and gid: %d.\n", path, our_cg ? "" : "not ", uid, gid);
e65cfafc 2160 free(path);
3145d497
CB
2161 if (our_cg)
2162 *existed = false;
2163 else
2164 *existed = true;
2165 return our_cg;
e65cfafc
CB
2166 }
2167 created = mkdir_p(it->mountpoint, path);
2168 if (!created) {
df54106a 2169 free(path);
e65cfafc 2170 continue;
df54106a 2171 }
e65cfafc 2172 if (chown(path, uid, gid) < 0)
b36273fa
CB
2173 mysyslog(LOG_WARNING,
2174 "Failed to chown %s to %d:%d: %s.\n", path,
2175 (int)uid, (int)gid, strerror(errno), NULL);
2176 lxcfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
e65cfafc
CB
2177 free(path);
2178 break;
df54106a 2179 }
e65cfafc 2180
eb42e790 2181 return created;
df54106a
SH
2182}
2183
e65cfafc
CB
2184/* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2185 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2186 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2187 */
2188static bool cgv1_remove_one(struct cgv1_hierarchy *h, const char *cgroup)
df54106a 2189{
df54106a 2190
e65cfafc 2191 char *path;
df54106a 2192
e65cfafc
CB
2193 /* Better safe than sorry. */
2194 if (!h->controllers)
2195 return true;
df54106a 2196
e65cfafc
CB
2197 /* Cgroups created by systemd for us which we re-use won't be removed
2198 * here, since we're using init_cgroup + cgroup as path instead of
2199 * base_cgroup + cgroup.
2200 */
2201 path = must_make_path(h->mountpoint, h->init_cgroup, cgroup, NULL);
2202 (void)recursive_rmdir(path);
2203 free(path);
df54106a
SH
2204
2205 return true;
2206}
2207
e65cfafc
CB
2208/* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2209static bool cgv2_remove(const char *cgroup)
df54106a 2210{
e65cfafc
CB
2211 struct cgv2_hierarchy *v2;
2212 char *path;
e9597a70 2213
e65cfafc
CB
2214 if (!cgv2_hierarchies)
2215 return true;
2216
2217 v2 = *cgv2_hierarchies;
2218
2219 /* If we reused an already existing cgroup, don't bother trying to
2220 * remove (a potentially wrong)/the path.
2221 * Cgroups created by systemd for us which we re-use would be removed
2222 * here, since we're using base_cgroup + cgroup as path.
2223 */
2224 if (v2->systemd_user_slice)
2225 return true;
2226
2227 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2228 (void)recursive_rmdir(path);
2229 free(path);
2230
2231 return true;
df54106a
SH
2232}
2233
e65cfafc
CB
2234/* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2235 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2236 * back, to the caller if the creation failed due to @cgroup already existing
2237 * via @existed.
2238 */
2239static bool cgv1_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
df54106a 2240{
e65cfafc
CB
2241 struct cgv1_hierarchy **it, **rev_it;
2242 bool all_created = true;
df54106a 2243
e65cfafc
CB
2244 for (it = cgv1_hierarchies; it && *it; it++) {
2245 if (!(*it)->controllers || !(*it)->mountpoint ||
2246 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2247 continue;
2248
2249 if (!cgv1_create_one(*it, cgroup, uid, gid, existed)) {
2250 all_created = false;
2251 break;
df54106a 2252 }
df54106a
SH
2253 }
2254
e65cfafc
CB
2255 if (all_created)
2256 return true;
2257
2258 for (rev_it = cgv1_hierarchies; rev_it && *rev_it && (*rev_it != *it);
2259 rev_it++)
2260 cgv1_remove_one(*rev_it, cgroup);
2261
df54106a
SH
2262 return false;
2263}
2264
e65cfafc
CB
2265/* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2266 * the creation failed due to @cgroup already existing via @existed.
2267 */
2268static bool cgv2_create(const char *cgroup, uid_t uid, gid_t gid, bool *existed)
df54106a 2269{
b5b0232c 2270 int ret;
e65cfafc
CB
2271 char *clean_base_cgroup;
2272 char *path;
2273 struct cgv2_hierarchy *v2;
cb083287 2274 bool our_cg = false, created = false;
df54106a 2275
a403c004
CB
2276 *existed = false;
2277
e65cfafc
CB
2278 if (!cgv2_hierarchies || !(*cgv2_hierarchies)->create_rw_cgroup)
2279 return true;
df54106a 2280
e65cfafc 2281 v2 = *cgv2_hierarchies;
df54106a 2282
e65cfafc
CB
2283 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2284 * to be placed under our current cgroup.
2285 */
cb083287
CB
2286 if (cg_systemd_chown_existing_cgroup(v2->mountpoint, v2->base_cgroup,
2287 uid, gid, v2->systemd_user_slice))
b5b0232c 2288 goto delegate_files;
edd25678 2289
423a3b4f 2290 /* We need to make sure that we do not create an endless chain of
e65cfafc
CB
2291 * sub-cgroups. So we check if we have already logged in somehow (sudo
2292 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2293 * skip that part.
2294 */
2295 if (strncmp(v2->base_cgroup, __PAM_CGFS_USER, __PAM_CGFS_USER_LEN) == 0) {
2296 free(v2->base_cgroup);
2297 v2->base_cgroup = must_copy_string("/");
2298 } else {
2299 clean_base_cgroup = strstr(v2->base_cgroup, __PAM_CGFS_USER);
2300 if (clean_base_cgroup)
2301 *clean_base_cgroup = '\0';
df54106a
SH
2302 }
2303
e65cfafc
CB
2304 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup, NULL);
2305 lxcfs_debug("Constructing path \"%s\".\n", path);
2306 if (file_exists(path)) {
cb083287
CB
2307 our_cg = cg_belongs_to_uid_gid(path, uid, gid);
2308 lxcfs_debug(
2309 "%s existed and does %shave our uid: %d and gid: %d.\n",
2310 path, our_cg ? "" : "not ", uid, gid);
e65cfafc 2311 free(path);
cb083287 2312 if (our_cg) {
3145d497 2313 *existed = false;
b5b0232c 2314 goto delegate_files;
cb083287 2315 } else {
3145d497 2316 *existed = true;
cb083287
CB
2317 return false;
2318 }
df54106a 2319 }
df54106a 2320
e65cfafc
CB
2321 created = mkdir_p(v2->mountpoint, path);
2322 if (!created) {
2323 free(path);
df54106a 2324 return false;
e65cfafc
CB
2325 }
2326
cb083287 2327 /* chown cgroup to user */
e65cfafc 2328 if (chown(path, uid, gid) < 0)
b36273fa
CB
2329 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2330 path, (int)uid, (int)gid, strerror(errno), NULL);
cb083287
CB
2331 else
2332 lxcfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
2333 free(path);
2334
b5b0232c 2335delegate_files:
cb083287
CB
2336 /* chown cgroup.procs to user */
2337 if (v2->systemd_user_slice)
2338 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2339 "/cgroup.procs", NULL);
2340 else
2341 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2342 "/cgroup.procs", NULL);
b5b0232c
CB
2343 ret = chown(path, uid, gid);
2344 if (ret < 0)
cb083287
CB
2345 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2346 path, (int)uid, (int)gid, strerror(errno), NULL);
2347 else
2348 lxcfs_debug("Chowned %s to %d:%d.\n", path, (int)uid, (int)gid);
e65cfafc 2349 free(path);
df54106a 2350
b5b0232c
CB
2351 /* chown cgroup.subtree_control to user */
2352 if (v2->systemd_user_slice)
2353 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2354 "/cgroup.subtree_control", NULL);
2355 else
2356 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2357 "/cgroup.subtree_control", NULL);
2358 ret = chown(path, uid, gid);
2359 if (ret < 0)
2360 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2361 path, (int)uid, (int)gid, strerror(errno), NULL);
2362 free(path);
2363
2364 /* chown cgroup.threads to user */
2365 if (v2->systemd_user_slice)
2366 path = must_make_path(v2->mountpoint, v2->base_cgroup,
2367 "/cgroup.threads", NULL);
2368 else
2369 path = must_make_path(v2->mountpoint, v2->base_cgroup, cgroup,
2370 "/cgroup.threads", NULL);
2371 ret = chown(path, uid, gid);
2372 if (ret < 0 && errno != ENOENT)
2373 mysyslog(LOG_WARNING, "Failed to chown %s to %d:%d: %s.\n",
2374 path, (int)uid, (int)gid, strerror(errno), NULL);
2375 free(path);
2376
df54106a
SH
2377 return true;
2378}
2379
e65cfafc
CB
2380/* Create writeable cgroups for @user at login. Details can be found in the
2381 * preamble/license at the top of this file.
2382 */
2383static int handle_login(const char *user, uid_t uid, gid_t gid)
df54106a
SH
2384{
2385 int idx = 0, ret;
2386 bool existed;
df54106a 2387 char cg[MAXPATHLEN];
78a2a9f3 2388
e65cfafc 2389 cg_escape();
df54106a
SH
2390
2391 while (idx >= 0) {
2392 ret = snprintf(cg, MAXPATHLEN, "/user/%s/%d", user, idx);
2393 if (ret < 0 || ret >= MAXPATHLEN) {
e65cfafc
CB
2394 mysyslog(LOG_ERR, "Username too long.\n", NULL);
2395 return PAM_SESSION_ERR;
2396 }
2397
2398 existed = false;
2399 if (!cgv2_create(cg, uid, gid, &existed)) {
2400 if (existed) {
2401 cgv2_remove(cg);
2402 idx++;
2403 continue;
2404 }
2405 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
df54106a
SH
2406 return PAM_SESSION_ERR;
2407 }
2408
56ee748c 2409 existed = false;
e65cfafc 2410 if (!cgv1_create(cg, uid, gid, &existed)) {
56ee748c 2411 if (existed) {
e65cfafc 2412 cgv2_remove(cg);
56ee748c
SH
2413 idx++;
2414 continue;
2415 }
e65cfafc 2416 mysyslog(LOG_ERR, "Failed to create a cgroup for user %s.\n", user, NULL);
df54106a
SH
2417 return PAM_SESSION_ERR;
2418 }
2419
e65cfafc
CB
2420 if (!cg_enter(cg)) {
2421 mysyslog( LOG_ERR, "Failed to enter user cgroup %s for user %s.\n", cg, user, NULL);
df54106a
SH
2422 return PAM_SESSION_ERR;
2423 }
2424 break;
2425 }
2426
2427 return PAM_SUCCESS;
2428}
2429
e65cfafc
CB
2430/* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2431 * hierarchies.
2432 */
2433static bool cgv1_prune_empty_cgroups(const char *user)
2434{
2435 bool controller_removed = true;
2436 bool all_removed = true;
2437 struct cgv1_hierarchy **it;
2438
2439 for (it = cgv1_hierarchies; it && *it; it++) {
2440 int ret;
2441 char *path_base, *path_init;
2442 char **controller;
2443
2444 if (!(*it)->controllers || !(*it)->mountpoint ||
2445 !(*it)->init_cgroup || !(*it)->create_rw_cgroup)
2446 continue;
2447
2448 for (controller = (*it)->controllers; controller && *controller;
2449 controller++) {
2450 bool path_base_rm, path_init_rm;
2451
2452 path_base = must_make_path((*it)->mountpoint, (*it)->base_cgroup, "/user", user, NULL);
2453 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_base);
2454 ret = recursive_rmdir(path_base);
2455 if (ret == -ENOENT || ret >= 0)
2456 path_base_rm = true;
2457 else
2458 path_base_rm = false;
2459 free(path_base);
2460
2461 path_init = must_make_path((*it)->mountpoint, (*it)->init_cgroup, "/user", user, NULL);
2462 lxcfs_debug("cgroupfs v1: Trying to prune \"%s\".\n", path_init);
2463 ret = recursive_rmdir(path_init);
2464 if (ret == -ENOENT || ret >= 0)
2465 path_init_rm = true;
2466 else
2467 path_init_rm = false;
2468 free(path_init);
2469
2470 if (!path_base_rm && !path_init_rm) {
2471 controller_removed = false;
2472 continue;
2473 }
2474
2475 controller_removed = true;
2476 break;
2477 }
2478 if (!controller_removed)
2479 all_removed = false;
2480 }
2481
2482 return all_removed;
2483}
2484
2485/* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2486 * hierarchy.
2487 */
2488static bool cgv2_prune_empty_cgroups(const char *user)
df54106a 2489{
df54106a 2490 int ret;
e65cfafc
CB
2491 struct cgv2_hierarchy *v2;
2492 char *path_base, *path_init;
2493 bool path_base_rm, path_init_rm;
df54106a 2494
e65cfafc
CB
2495 if (!cgv2_hierarchies)
2496 return true;
2497
2498 v2 = *cgv2_hierarchies;
2499
2500 path_base = must_make_path(v2->mountpoint, v2->base_cgroup, "/user", user, NULL);
2501 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_base);
2502 ret = recursive_rmdir(path_base);
2503 if (ret == -ENOENT || ret >= 0)
2504 path_base_rm = true;
2505 else
2506 path_base_rm = false;
2507 free(path_base);
2508
2509 path_init = must_make_path(v2->mountpoint, v2->init_cgroup, "/user", user, NULL);
2510 lxcfs_debug("cgroupfs v2: Trying to prune \"%s\".\n", path_init);
2511 ret = recursive_rmdir(path_init);
2512 if (ret == -ENOENT || ret >= 0)
2513 path_init_rm = true;
2514 else
2515 path_init_rm = false;
2516 free(path_init);
2517
2518 if (!path_base_rm && !path_init_rm)
2519 return false;
2520
2521 return true;
2522}
2523
2524/* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2525static void cg_prune_empty_cgroups(const char *user)
2526{
2527 (void)cgv1_prune_empty_cgroups(user);
2528 (void)cgv2_prune_empty_cgroups(user);
2529}
2530
2531/* Free allocated information for detected cgroupfs v1 hierarchies. */
2532static void cgv1_free_hierarchies(void)
2533{
2534 struct cgv1_hierarchy **it;
2535
2536 if (!cgv1_hierarchies)
2537 return;
2538
2539 for (it = cgv1_hierarchies; it && *it; it++) {
2540 if ((*it)->controllers) {
2541 char **tmp;
2542 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2543 free(*tmp);
2544
2545 free((*it)->controllers);
2546 }
2547 free((*it)->mountpoint);
2548 free((*it)->base_cgroup);
2549 free((*it)->fullcgpath);
2550 free((*it)->init_cgroup);
df54106a 2551 }
e65cfafc
CB
2552 free(cgv1_hierarchies);
2553}
df54106a 2554
e65cfafc
CB
2555/* Free allocated information for the detected cgroupfs v2 hierarchy. */
2556static void cgv2_free_hierarchies(void)
2557{
2558 struct cgv2_hierarchy **it;
2559
2560 if (!cgv2_hierarchies)
2561 return;
2562
2563 for (it = cgv2_hierarchies; it && *it; it++) {
2564 if ((*it)->controllers) {
2565 char **tmp;
2566 for (tmp = (*it)->controllers; tmp && *tmp; tmp++)
2567 free(*tmp);
2568
2569 free((*it)->controllers);
2570 }
2571 free((*it)->mountpoint);
2572 free((*it)->base_cgroup);
2573 free((*it)->fullcgpath);
2574 free((*it)->init_cgroup);
2575 }
2576 free(cgv2_hierarchies);
2577}
2578
2579/* Wrapper around cgv{1,2}_free_hierarchies(). */
2580static void cg_exit(void)
2581{
2582 cgv1_free_hierarchies();
2583 cgv2_free_hierarchies();
2584}
2585
2586int pam_sm_open_session(pam_handle_t *pamh, int flags, int argc,
2587 const char **argv)
2588{
2589 int ret;
2590 uid_t uid = 0;
2591 gid_t gid = 0;
2592 const char *PAM_user = NULL;
df54106a
SH
2593
2594 ret = pam_get_user(pamh, &PAM_user, NULL);
2595 if (ret != PAM_SUCCESS) {
e65cfafc 2596 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
df54106a
SH
2597 return PAM_SESSION_ERR;
2598 }
2599
e65cfafc
CB
2600 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2601 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
2602 return PAM_SESSION_ERR;
2603 }
df54106a 2604
e65cfafc
CB
2605 if (!cg_init(uid, gid)) {
2606 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2607 return PAM_SESSION_ERR;
df54106a 2608 }
df54106a 2609
e65cfafc
CB
2610 /* Try to prune cgroups, that are actually empty but were still marked
2611 * as busy by the kernel so we couldn't remove them on session close.
2612 */
2613 cg_prune_empty_cgroups(PAM_user);
2614
2615 if (cg_mount_mode == CGROUP_UNKNOWN)
2616 return PAM_SESSION_ERR;
2617
7559c0b6
AS
2618 if (argc > 1 && !strcmp(argv[0], "-c")) {
2619 char **clist = make_string_list(argv[1], ",");
2620
2621 /*
2622 * We don't allow using "all" and other controllers explicitly because
2623 * that simply doesn't make any sense.
2624 */
2625 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2626 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2627 free_string_list(clist);
2628 return PAM_SESSION_ERR;
2629 }
2630
2631 cg_mark_to_make_rw(clist);
2632 free_string_list(clist);
2633 }
df54106a 2634
e65cfafc 2635 return handle_login(PAM_user, uid, gid);
df54106a
SH
2636}
2637
2638int pam_sm_close_session(pam_handle_t *pamh, int flags, int argc,
2639 const char **argv)
2640{
e65cfafc
CB
2641 int ret;
2642 uid_t uid = 0;
2643 gid_t gid = 0;
df54106a 2644 const char *PAM_user = NULL;
df54106a 2645
e65cfafc 2646 ret = pam_get_user(pamh, &PAM_user, NULL);
df54106a 2647 if (ret != PAM_SUCCESS) {
e65cfafc
CB
2648 mysyslog(LOG_ERR, "PAM-CGFS: couldn't get user\n", NULL);
2649 return PAM_SESSION_ERR;
2650 }
2651
2652 if (!get_uid_gid(PAM_user, &uid, &gid)) {
2653 mysyslog(LOG_ERR, "Failed to get uid and gid for %s.\n", PAM_user, NULL);
df54106a
SH
2654 return PAM_SESSION_ERR;
2655 }
2656
e65cfafc
CB
2657 if (cg_mount_mode == CGROUP_UNINITIALIZED) {
2658 if (!cg_init(uid, gid))
2659 mysyslog(LOG_ERR, "Failed to get list of controllers\n", NULL);
2660
7559c0b6
AS
2661 if (argc > 1 && !strcmp(argv[0], "-c")) {
2662 char **clist = make_string_list(argv[1], ",");
2663
2664 /*
2665 * We don't allow using "all" and other controllers explicitly because
2666 * that simply doesn't make any sense.
2667 */
2668 if (string_list_length(clist) > 1 && string_in_list(clist, "all")) {
2669 mysyslog(LOG_ERR, "Invalid -c option, cannot specify individual controllers alongside 'all'.\n", NULL);
2670 free_string_list(clist);
2671 return PAM_SESSION_ERR;
2672 }
2673
2674 cg_mark_to_make_rw(clist);
2675 free_string_list(clist);
2676 }
df54106a
SH
2677 }
2678
e65cfafc
CB
2679 cg_prune_empty_cgroups(PAM_user);
2680 cg_exit();
2681
df54106a
SH
2682 return PAM_SUCCESS;
2683}