]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
3 * Copyright © 2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 * Author: Christian Brauner <christian.brauner@ubuntu.com>
7 * When a user logs in, this pam module will create cgroups which the user may
8 * administer. It handles both pure cgroupfs v1 and pure cgroupfs v2, as well as
9 * mixed mounts, where some controllers are mounted in a standard cgroupfs v1
10 * hierarchy location (/sys/fs/cgroup/<controller>) and others are in the
11 * cgroupfs v2 hierarchy.
12 * Writeable cgroups are either created for all controllers or, if specified,
13 * for any controllers listed on the command line.
14 * The cgroup created will be "user/$user/0" for the first session,
15 * "user/$user/1" for the second, etc.
17 * Systems with a systemd init system are treated specially, both with respect
18 * to cgroupfs v1 and cgroupfs v2. For both, cgroupfs v1 and cgroupfs v2, We
19 * check whether systemd already placed us in a cgroup it created:
21 * user.slice/user-uid.slice/session-n.scope
23 * by checking whether uid == our uid. If it did, we simply chown the last
24 * part (session-n.scope). If it did not we create a cgroup as outlined above
25 * (user/$user/n) and chown it to our uid.
26 * The same holds for cgroupfs v2 where this assumptions becomes crucial:
27 * We __have to__ be placed in our under the cgroup systemd created for us on
28 * login, otherwise things like starting an xserver or similar will not work.
30 * All requested cgroups must be mounted under /sys/fs/cgroup/$controller,
31 * no messing around with finding mountpoints.
33 * See COPYING file for details.
43 #include <linux/unistd.h>
51 #include <sys/mount.h>
52 #include <sys/param.h>
54 #include <sys/types.h>
63 #define PAM_SM_SESSION
64 #include <security/_pam_macros.h>
65 #include <security/pam_modules.h>
68 #include "include/strlcpy.h"
72 #include "include/strlcat.h"
75 #define pam_cgfs_debug_stream(stream, format, ...) \
77 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
78 __func__, __VA_ARGS__); \
81 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
84 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
86 #define pam_cgfs_debug(format, ...)
89 /* Taken over modified from the kernel sources. */
90 #define NBITS 32 /* bits in uint32_t */
91 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
92 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
94 static enum cg_mount_mode
{
99 CGROUP_UNINITIALIZED
= 3,
100 } cg_mount_mode
= CGROUP_UNINITIALIZED
;
102 /* Common helper functions. Most of these have been taken from LXC. */
103 static void append_line(char **dest
, size_t oldlen
, char *new, size_t newlen
);
104 static int append_null_to_list(void ***list
);
105 static void batch_realloc(char **mem
, size_t oldlen
, size_t newlen
);
106 static inline void clear_bit(unsigned bit
, uint32_t *bitarr
)
108 bitarr
[bit
/ NBITS
] &= ~(1 << (bit
% NBITS
));
110 static char *copy_to_eol(char *s
);
111 static void free_string_list(char **list
);
112 static char *get_mountpoint(char *line
);
113 static bool get_uid_gid(const char *user
, uid_t
*uid
, gid_t
*gid
);
114 static int handle_login(const char *user
, uid_t uid
, gid_t gid
);
115 static inline bool is_set(unsigned bit
, uint32_t *bitarr
)
117 return (bitarr
[bit
/ NBITS
] & (1 << (bit
% NBITS
))) != 0;
119 static bool is_lxcfs(const char *line
);
120 static bool is_cgv1(char *line
);
121 static bool is_cgv2(char *line
);
122 static void *must_alloc(size_t sz
);
123 static void must_add_to_list(char ***clist
, char *entry
);
124 static void must_append_controller(char **klist
, char **nlist
, char ***clist
,
126 static void must_append_string(char ***list
, char *entry
);
127 static void mysyslog(int err
, const char *format
, ...) __attribute__((sentinel
));
128 static char *read_file(char *fnam
);
129 static int read_from_file(const char *filename
, void* buf
, size_t count
);
130 static int recursive_rmdir(char *dirname
);
131 static inline void set_bit(unsigned bit
, uint32_t *bitarr
)
133 bitarr
[bit
/ NBITS
] |= (1 << (bit
% NBITS
));
135 static bool string_in_list(char **list
, const char *entry
);
136 static char *string_join(const char *sep
, const char **parts
, bool use_as_prefix
);
137 static void trim(char *s
);
138 static bool write_int(char *path
, int v
);
139 static ssize_t
write_nointr(int fd
, const void* buf
, size_t count
);
140 static int write_to_file(const char *filename
, const void *buf
, size_t count
,
143 /* cgroupfs prototypes. */
144 static bool cg_belongs_to_uid_gid(const char *path
, uid_t uid
, gid_t gid
);
145 static uint32_t *cg_cpumask(char *buf
, size_t nbits
);
146 static bool cg_copy_parent_file(char *path
, char *file
);
147 static char *cg_cpumask_to_cpulist(uint32_t *bitarr
, size_t nbits
);
148 static bool cg_enter(const char *cgroup
);
149 static void cg_escape(void);
150 static bool cg_filter_and_set_cpus(char *path
, bool am_initialized
);
151 static ssize_t
cg_get_max_cpus(char *cpulist
);
152 static int cg_get_version_of_mntpt(const char *path
);
153 static bool cg_init(uid_t uid
, gid_t gid
);
154 static void cg_mark_to_make_rw(char **list
);
155 static void cg_prune_empty_cgroups(const char *user
);
156 static bool cg_systemd_created_user_slice(const char *base_cgroup
,
157 const char *init_cgroup
,
158 const char *in
, uid_t uid
);
159 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint
,
160 const char *base_cgroup
, uid_t uid
,
162 bool systemd_user_slice
);
163 static bool cg_systemd_under_user_slice_1(const char *in
, uid_t uid
);
164 static bool cg_systemd_under_user_slice_2(const char *base_cgroup
,
165 const char *init_cgroup
, uid_t uid
);
166 static void cg_systemd_prune_init_scope(char *cg
);
167 static bool is_lxcfs(const char *line
);
169 /* cgroupfs v1 prototypes. */
170 struct cgv1_hierarchy
{
176 bool create_rw_cgroup
;
177 bool systemd_user_slice
;
180 static struct cgv1_hierarchy
**cgv1_hierarchies
;
182 static void cgv1_add_controller(char **clist
, char *mountpoint
,
183 char *base_cgroup
, char *init_cgroup
);
184 static bool cgv1_controller_in_clist(char *cgline
, char *c
);
185 static bool cgv1_controller_lists_intersect(char **l1
, char **l2
);
186 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy
**hlist
,
188 static bool cgv1_create(const char *cgroup
, uid_t uid
, gid_t gid
,
190 static bool cgv1_create_one(struct cgv1_hierarchy
*h
, const char *cgroup
,
191 uid_t uid
, gid_t gid
, bool *existed
);
192 static bool cgv1_enter(const char *cgroup
);
193 static void cgv1_escape(void);
194 static bool cgv1_get_controllers(char ***klist
, char ***nlist
);
195 static char *cgv1_get_current_cgroup(char *basecginfo
, char *controller
);
196 static char **cgv1_get_proc_mountinfo_controllers(char **klist
, char **nlist
,
198 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy
*h
,
200 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy
*h
);
201 static bool cgv1_init(uid_t uid
, gid_t gid
);
202 static void cgv1_mark_to_make_rw(char **clist
);
203 static char *cgv1_must_prefix_named(char *entry
);
204 static bool cgv1_prune_empty_cgroups(const char *user
);
205 static bool cgv1_remove_one(struct cgv1_hierarchy
*h
, const char *cgroup
);
206 static bool is_cgv1(char *line
);
208 /* cgroupfs v2 prototypes. */
209 struct cgv2_hierarchy
{
215 bool create_rw_cgroup
;
216 bool systemd_user_slice
;
219 /* Actually this should only be a single hierarchy. But for the sake of
220 * parallelism and because the layout of the cgroupfs v2 is still somewhat
221 * changing, we'll leave it as an array of structs.
223 static struct cgv2_hierarchy
**cgv2_hierarchies
;
225 static void cgv2_add_controller(char **clist
, char *mountpoint
,
226 char *base_cgroup
, char *init_cgroup
,
227 bool systemd_user_slice
);
228 static bool cgv2_create(const char *cgroup
, uid_t uid
, gid_t gid
,
230 static bool cgv2_enter(const char *cgroup
);
231 static void cgv2_escape(void);
232 static char *cgv2_get_current_cgroup(int pid
);
233 static bool cgv2_init(uid_t uid
, gid_t gid
);
234 static void cgv2_mark_to_make_rw(char **clist
);
235 static bool cgv2_prune_empty_cgroups(const char *user
);
236 static bool cgv2_remove(const char *cgroup
);
237 static bool is_cgv2(char *line
);
239 static int do_mkdir(const char *path
, mode_t mode
)
246 r
= mkdir(path
, mode
);
253 /* Create directory and (if necessary) its parents. */
254 static bool mkdir_parent(const char *root
, char *path
)
258 if (strlen(path
) < strlen(root
))
261 if (strlen(path
) == strlen(root
))
264 b
= path
+ strlen(root
) + 1;
266 while (*b
&& (*b
== '/'))
272 while (*e
&& *e
!= '/')
279 if (file_exists(path
))
282 if (do_mkdir(path
, 0755) < 0) {
283 pam_cgfs_debug("Failed to create %s: %s\n", path
, strerror(errno
));
298 /* Common helper functions. Most of these have been taken from LXC. */
299 static void mysyslog(int err
, const char *format
, ...)
303 va_start(args
, format
);
304 openlog("PAM-CGFS", LOG_CONS
| LOG_PID
, LOG_AUTH
);
305 vsyslog(err
, format
, args
);
310 /* realloc() pointer in batch sizes; do not fail. */
311 #define BATCH_SIZE 50
312 static void batch_realloc(char **mem
, size_t oldlen
, size_t newlen
)
314 int newbatches
= (newlen
/ BATCH_SIZE
) + 1;
315 int oldbatches
= (oldlen
/ BATCH_SIZE
) + 1;
317 if (!*mem
|| newbatches
> oldbatches
)
318 *mem
= must_realloc(*mem
, newbatches
* BATCH_SIZE
);
321 /* Append lines as is to pointer; do not fail. */
322 static void append_line(char **dest
, size_t oldlen
, char *new, size_t newlen
)
324 size_t full
= oldlen
+ newlen
;
326 batch_realloc(dest
, oldlen
, full
+ 1);
328 memcpy(*dest
+ oldlen
, new, newlen
+ 1);
331 /* Read in whole file and return allocated pointer. */
332 static char *read_file(char *fnam
)
336 char *line
= NULL
, *buf
= NULL
;
337 size_t len
= 0, fulllen
= 0;
339 f
= fopen(fnam
, "r");
343 while ((linelen
= getline(&line
, &len
, f
)) != -1) {
344 append_line(&buf
, fulllen
, line
, linelen
);
354 /* Given a pointer to a null-terminated array of pointers, realloc to add one
355 * entry, and point the new entry to NULL. Do not fail. Return the index to the
356 * second-to-last entry - that is, the one which is now available for use
357 * (keeping the list null-terminated).
359 static int append_null_to_list(void ***list
)
364 for (; (*list
)[newentry
]; newentry
++)
367 *list
= must_realloc(*list
, (newentry
+ 2) * sizeof(void **));
368 (*list
)[newentry
+ 1] = NULL
;
373 /* Append new entry to null-terminated array of pointer; make sure that array of
374 * pointers will still be null-terminated.
376 static void must_append_string(char ***list
, char *entry
)
381 newentry
= append_null_to_list((void ***)list
);
382 copy
= must_copy_string(entry
);
383 (*list
)[newentry
] = copy
;
386 /* Remove newlines from string. */
387 static void trim(char *s
)
389 size_t len
= strlen(s
);
391 while ((len
> 0) && s
[len
- 1] == '\n')
395 /* Allocate pointer; do not fail. */
396 static void *must_alloc(size_t sz
)
398 return must_realloc(NULL
, sz
);
401 /* Make allocated copy of string. End of string is taken to be '\n'. */
402 static char *copy_to_eol(char *s
)
404 char *newline
, *sret
;
407 newline
= strchr(s
, '\n');
412 sret
= must_alloc(len
+ 1);
413 memcpy(sret
, s
, len
);
419 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
420 static bool is_lxcfs(const char *line
)
422 char *p
= strstr(line
, " - ");
426 return strncmp(p
, " - fuse.lxcfs ", 14) == 0;
429 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
430 static bool is_cgv1(char *line
)
432 char *p
= strstr(line
, " - ");
436 return strncmp(p
, " - cgroup ", 10) == 0;
439 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
440 static bool is_cgv2(char *line
)
442 char *p
= strstr(line
, " - ");
446 return strncmp(p
, " - cgroup2 ", 11) == 0;
449 /* Given a null-terminated array of strings, check whether @entry is one of the
452 static bool string_in_list(char **list
, const char *entry
)
456 for (it
= list
; it
&& *it
; it
++)
457 if (strcmp(*it
, entry
) == 0)
464 * Creates a null-terminated array of strings, made by splitting the entries in
465 * @str on each @sep. Caller is responsible for calling free_string_list.
467 static char **make_string_list(const char *str
, const char *sep
)
470 char *saveptr
= NULL
;
473 copy
= must_copy_string(str
);
475 for (tok
= strtok_r(copy
, sep
, &saveptr
); tok
;
476 tok
= strtok_r(NULL
, sep
, &saveptr
))
477 must_add_to_list(&clist
, tok
);
484 /* Gets the length of a null-terminated array of strings. */
485 static size_t string_list_length(char **list
)
490 for (it
= list
; it
&& *it
; it
++)
496 /* Free null-terminated array of strings. */
497 static void free_string_list(char **list
)
501 for (it
= list
; it
&& *it
; it
++)
506 /* Write single integer to file. */
507 static bool write_int(char *path
, int v
)
512 f
= fopen(path
, "w");
516 if (fprintf(f
, "%d\n", v
) < 0)
525 /* Recursively remove directory and its parents. */
526 static int recursive_rmdir(char *dirname
)
528 struct dirent
*direntp
;
532 dir
= opendir(dirname
);
536 while ((direntp
= readdir(dir
))) {
540 if (!strcmp(direntp
->d_name
, ".") ||
541 !strcmp(direntp
->d_name
, ".."))
544 pathname
= must_make_path(dirname
, direntp
->d_name
, NULL
);
546 if (lstat(pathname
, &st
)) {
548 pam_cgfs_debug("Failed to stat %s\n", pathname
);
553 if (!S_ISDIR(st
.st_mode
))
556 if (recursive_rmdir(pathname
) < 0)
563 if (rmdir(dirname
) < 0) {
565 pam_cgfs_debug("Failed to delete %s: %s\n", dirname
, strerror(errno
));
569 if (closedir(dir
) < 0) {
571 pam_cgfs_debug("Failed to delete %s: %s\n", dirname
, strerror(errno
));
578 /* Add new entry to null-terminated array of pointers. Make sure array is still
581 static void must_add_to_list(char ***clist
, char *entry
)
585 newentry
= append_null_to_list((void ***)clist
);
586 (*clist
)[newentry
] = must_copy_string(entry
);
589 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
590 static char *get_mountpoint(char *line
)
598 for (i
= 0; i
< 4; i
++) {
610 sret
= must_alloc(len
+ 1);
611 memcpy(sret
, p
, len
);
617 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
618 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
619 * kernel controllers (@klist) and named controllers (@nlist).
621 static bool cgv1_get_controllers(char ***klist
, char ***nlist
)
627 f
= fopen("/proc/self/cgroup", "r");
631 while (getline(&line
, &len
, f
) != -1) {
633 char *saveptr
= NULL
;
635 p
= strchr(line
, ':');
645 /* Skip the v2 hierarchy. */
649 for (tok
= strtok_r(p
, ",", &saveptr
); tok
;
650 tok
= strtok_r(NULL
, ",", &saveptr
)) {
651 if (strncmp(tok
, "name=", 5) == 0)
652 must_append_string(nlist
, tok
);
654 must_append_string(klist
, tok
);
664 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
665 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
666 static bool cgv2_get_controllers(char ***klist)
672 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
673 static char *cgv2_get_current_cgroup(int pid
)
677 char *current_cgroup
;
679 /* The largest integer that can fit into long int is 2^64. This is a
680 * 20-digit number. */
681 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
684 ret
= snprintf(path
, __PIDLEN
, "/proc/%d/cgroup", pid
);
685 if (ret
< 0 || ret
>= __PIDLEN
)
688 cgroups_v2
= read_file(path
);
692 current_cgroup
= strstr(cgroups_v2
, "0::/");
696 current_cgroup
= current_cgroup
+ 3;
697 copy
= copy_to_eol(current_cgroup
);
709 /* Given two null-terminated lists of strings, return true if any string is in
712 static bool cgv1_controller_lists_intersect(char **l1
, char **l2
)
719 for (it
= l1
; it
&& *it
; it
++)
720 if (string_in_list(l2
, *it
))
726 /* For a null-terminated list of controllers @clist, return true if any of those
727 * controllers is already listed the null-terminated list of hierarchies @hlist.
728 * Realistically, if one is present, all must be present.
730 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy
**hlist
, char **clist
)
732 struct cgv1_hierarchy
**it
;
734 for (it
= hlist
; it
&& *it
; it
++)
735 if ((*it
)->controllers
)
736 if (cgv1_controller_lists_intersect((*it
)->controllers
, clist
))
743 /* Set boolean to mark controllers under which we are supposed create a
746 static void cgv1_mark_to_make_rw(char **clist
)
748 struct cgv1_hierarchy
**it
;
750 for (it
= cgv1_hierarchies
; it
&& *it
; it
++)
751 if ((*it
)->controllers
)
752 if (cgv1_controller_lists_intersect((*it
)->controllers
, clist
) ||
753 string_in_list(clist
, "all"))
754 (*it
)->create_rw_cgroup
= true;
757 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
758 * the cgroupfs v2 hierarchy.
760 static void cgv2_mark_to_make_rw(char **clist
)
762 if (string_in_list(clist
, "unified") || string_in_list(clist
, "all"))
763 if (cgv2_hierarchies
)
764 (*cgv2_hierarchies
)->create_rw_cgroup
= true;
767 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
768 static void cg_mark_to_make_rw(char **clist
)
770 cgv1_mark_to_make_rw(clist
);
771 cgv2_mark_to_make_rw(clist
);
774 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
775 static char *cgv1_must_prefix_named(char *entry
)
782 s
= must_alloc(len
+ 6);
784 ret
= snprintf(s
, len
+ 6, "name=%s", entry
);
785 if (ret
< 0 || (size_t)ret
>= (len
+ 6)) {
793 /* Append kernel controller in @klist or named controller in @nlist to @clist */
794 static void must_append_controller(char **klist
, char **nlist
, char ***clist
, char *entry
)
799 if (string_in_list(klist
, entry
) && string_in_list(nlist
, entry
))
802 newentry
= append_null_to_list((void ***)clist
);
804 if (strncmp(entry
, "name=", 5) == 0)
805 copy
= must_copy_string(entry
);
806 else if (string_in_list(klist
, entry
))
807 copy
= must_copy_string(entry
);
809 copy
= cgv1_must_prefix_named(entry
);
811 (*clist
)[newentry
] = copy
;
814 /* Get the controllers from a mountinfo line. There are other ways we could get
815 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
816 * could parse the mount options. But we simply assume that the mountpoint must
817 * be /sys/fs/cgroup/controller-list
819 static char **cgv1_get_proc_mountinfo_controllers(char **klist
, char **nlist
, char *line
)
823 char *saveptr
= NULL
;
828 for (i
= 0; i
< 4; i
++) {
835 if (strncmp(p
, "/sys/fs/cgroup/", 15) != 0)
845 for (tok
= strtok_r(p
, ",", &saveptr
); tok
;
846 tok
= strtok_r(NULL
, ",", &saveptr
))
847 must_append_controller(klist
, nlist
, &aret
, tok
);
852 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
853 static bool cgv1_controller_in_clist(char *cgline
, char *c
)
856 char *tok
, *eol
, *tmp
;
857 char *saveptr
= NULL
;
859 eol
= strchr(cgline
, ':');
864 tmp
= alloca(len
+ 1);
865 memcpy(tmp
, cgline
, len
);
868 for (tok
= strtok_r(tmp
, ",", &saveptr
); tok
;
869 tok
= strtok_r(NULL
, ",", &saveptr
)) {
870 if (strcmp(tok
, c
) == 0)
877 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
878 * of a given cgv1 controller passed in via @controller.
880 static char *cgv1_get_current_cgroup(char *basecginfo
, char *controller
)
892 if (cgv1_controller_in_clist(p
, controller
)) {
898 return copy_to_eol(p
);
910 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
913 #define INIT_SCOPE "/init.scope"
914 static void cg_systemd_prune_init_scope(char *cg
)
921 point
= cg
+ strlen(cg
) - strlen(INIT_SCOPE
);
925 if (strcmp(point
, INIT_SCOPE
) == 0) {
933 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
934 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
935 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
936 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
937 * from /proc/1/cgroup.
939 static void cgv1_add_controller(char **clist
, char *mountpoint
, char *base_cgroup
, char *init_cgroup
)
941 struct cgv1_hierarchy
*new;
944 new = must_alloc(sizeof(*new));
946 new->controllers
= clist
;
947 new->mountpoint
= mountpoint
;
948 new->base_cgroup
= base_cgroup
;
949 new->fullcgpath
= NULL
;
950 new->create_rw_cgroup
= false;
951 new->init_cgroup
= init_cgroup
;
952 new->systemd_user_slice
= false;
954 newentry
= append_null_to_list((void ***)&cgv1_hierarchies
);
955 cgv1_hierarchies
[newentry
] = new;
958 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
959 * currently) include the controllers mounted into the hierarchy (e.g. memory,
960 * pids, blkio), the mountpoint of that hierarchy (Should usually be
961 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
962 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
963 * base cgroup of the current process gathered from /proc/self/cgroup, and the
964 * init cgroup of PID1 gathered from /proc/1/cgroup.
966 static void cgv2_add_controller(char **clist
, char *mountpoint
, char *base_cgroup
, char *init_cgroup
, bool systemd_user_slice
)
968 struct cgv2_hierarchy
*new;
971 new = must_alloc(sizeof(*new));
973 new->controllers
= clist
;
974 new->mountpoint
= mountpoint
;
975 new->base_cgroup
= base_cgroup
;
976 new->fullcgpath
= NULL
;
977 new->create_rw_cgroup
= false;
978 new->init_cgroup
= init_cgroup
;
979 new->systemd_user_slice
= systemd_user_slice
;
981 newentry
= append_null_to_list((void ***)&cgv2_hierarchies
);
982 cgv2_hierarchies
[newentry
] = new;
985 /* In Ubuntu 14.04, the paths created for us were
986 * '/user/$uid.user/$something.session' This can be merged better with
987 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
988 * reason about the correctness.
990 static bool cg_systemd_under_user_slice_1(const char *in
, uid_t uid
)
998 copy
= must_copy_string(in
);
999 if (strlen(copy
) < strlen("/user/1.user/1.session"))
1001 p
= copy
+ strlen(copy
) - 1;
1003 /* skip any trailing '/' (shouldn't be any, but be sure) */
1004 while (p
>= copy
&& *p
== '/')
1009 /* Get last path element */
1010 while (p
>= copy
&& *p
!= '/')
1015 /* make sure it is something.session */
1016 len
= strlen(p
+ 1);
1017 if (len
< strlen("1.session") ||
1018 strncmp(p
+ 1 + len
- 8, ".session", 8) != 0)
1021 /* ok last path piece checks out, now check the second to last */
1023 while (p
>= copy
&& *(--p
) != '/')
1026 if (sscanf(p
+ 1, "%d.user/", &id
) != 1)
1039 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
1040 * assume it belongs to $uid and chown it
1042 static bool cg_systemd_under_user_slice_2(const char *base_cgroup
,
1043 const char *init_cgroup
, uid_t uid
)
1047 size_t curlen
, initlen
;
1049 curlen
= strlen(base_cgroup
);
1050 initlen
= strlen(init_cgroup
);
1051 if (curlen
<= initlen
)
1054 if (strncmp(base_cgroup
, init_cgroup
, initlen
) != 0)
1057 ret
= snprintf(buf
, 100, "/user.slice/user-%d.slice/", (int)uid
);
1058 if (ret
< 0 || ret
>= 100)
1062 initlen
= 0; // skip the '/'
1064 return strncmp(base_cgroup
+ initlen
, buf
, strlen(buf
)) == 0;
1067 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
1068 * is not the end of our systemd path, then we're not part of the PAM call that
1069 * created that path.
1071 * The last piece is chowned to $uid, the user- part not.
1072 * Note: If the user creates paths that look like what we're looking for to
1074 * - they fool us, we create new cgroups, and they get auto-logged-out.
1075 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1076 * lose ownership of their cgroups
1078 static bool cg_systemd_created_user_slice(const char *base_cgroup
,
1079 const char *init_cgroup
,
1080 const char *in
, uid_t uid
)
1088 copy
= must_copy_string(in
);
1090 /* An old version of systemd has already created a cgroup for us. */
1091 if (cg_systemd_under_user_slice_1(in
, uid
))
1094 /* A new version of systemd has already created a cgroup for us. */
1095 if (cg_systemd_under_user_slice_2(base_cgroup
, init_cgroup
, uid
))
1098 if (strlen(copy
) < strlen("/user-0.slice/session-0.scope"))
1101 p
= copy
+ strlen(copy
) - 1;
1102 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1103 while (p
>= copy
&& *p
== '/')
1109 /* Get last path element */
1110 while (p
>= copy
&& *p
!= '/')
1116 /* Make sure it is session-something.scope. */
1117 len
= strlen(p
+ 1);
1118 if (strncmp(p
+ 1, "session-", strlen("session-")) != 0 ||
1119 strncmp(p
+ 1 + len
- 6, ".scope", 6) != 0)
1122 /* Ok last path piece checks out, now check the second to last. */
1124 while (p
>= copy
&& *(--p
) != '/')
1127 if (sscanf(p
+ 1, "user-%d.slice/", &id
) != 1)
1141 /* Chown existing cgroup that systemd has already created for us. */
1142 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint
,
1143 const char *base_cgroup
, uid_t uid
,
1144 gid_t gid
, bool systemd_user_slice
)
1148 if (!systemd_user_slice
)
1151 path
= must_make_path(mountpoint
, base_cgroup
, NULL
);
1153 /* A cgroup within name=systemd has already been created. So we only
1156 if (chown(path
, uid
, gid
) < 0)
1157 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
1158 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
1159 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
1165 /* Detect and store information about cgroupfs v1 hierarchies. */
1166 static bool cgv1_init(uid_t uid
, gid_t gid
)
1169 struct cgv1_hierarchy
**it
;
1172 char **klist
= NULL
, **nlist
= NULL
;
1175 basecginfo
= read_file("/proc/self/cgroup");
1179 f
= fopen("/proc/self/mountinfo", "r");
1185 cgv1_get_controllers(&klist
, &nlist
);
1187 while (getline(&line
, &len
, f
) != -1) {
1188 char **controller_list
= NULL
;
1189 char *mountpoint
, *base_cgroup
;
1191 if (is_lxcfs(line
) || !is_cgv1(line
))
1194 controller_list
= cgv1_get_proc_mountinfo_controllers(klist
, nlist
, line
);
1195 if (!controller_list
)
1198 if (cgv1_controller_list_is_dup(cgv1_hierarchies
, controller_list
)) {
1199 free(controller_list
);
1203 mountpoint
= get_mountpoint(line
);
1205 free_string_list(controller_list
);
1209 base_cgroup
= cgv1_get_current_cgroup(basecginfo
, controller_list
[0]);
1211 free_string_list(controller_list
);
1217 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1218 "mountpoint \"%s\" and cgroup \"%s\"\n",
1219 controller_list
[0], mountpoint
, base_cgroup
);
1220 cgv1_add_controller(controller_list
, mountpoint
, base_cgroup
, NULL
);
1223 free_string_list(klist
);
1224 free_string_list(nlist
);
1229 /* Retrieve init cgroup path for all controllers. */
1230 basecginfo
= read_file("/proc/1/cgroup");
1234 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
1235 if ((*it
)->controllers
) {
1236 char *init_cgroup
, *user_slice
;
1238 /* We've already stored the controller and received its
1239 * current cgroup. If we now fail to retrieve its init
1240 * cgroup, we should probably fail.
1242 init_cgroup
= cgv1_get_current_cgroup(basecginfo
, (*it
)->controllers
[0]);
1248 cg_systemd_prune_init_scope(init_cgroup
);
1249 (*it
)->init_cgroup
= init_cgroup
;
1250 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1252 (*(*it
)->controllers
), init_cgroup
);
1254 /* Check whether systemd has already created a cgroup
1257 user_slice
= must_make_path((*it
)->mountpoint
, (*it
)->base_cgroup
, NULL
);
1258 if (cg_systemd_created_user_slice((*it
)->base_cgroup
, (*it
)->init_cgroup
, user_slice
, uid
))
1259 (*it
)->systemd_user_slice
= true;
1269 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1270 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1272 static inline int cg_get_version_of_mntpt(const char *path
)
1274 if (has_fs_type(path
, CGROUP_SUPER_MAGIC
))
1277 if (has_fs_type(path
, CGROUP2_SUPER_MAGIC
))
1283 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1284 * deals with the empty v2 hierachy as we do not retrieve enabled controllers.
1286 static bool cgv2_init(uid_t uid
, gid_t gid
)
1290 char *current_cgroup
= NULL
, *init_cgroup
= NULL
;
1295 current_cgroup
= cgv2_get_current_cgroup(getpid());
1296 if (!current_cgroup
) {
1297 /* No v2 hierarchy present. We're done. */
1302 init_cgroup
= cgv2_get_current_cgroup(1);
1304 /* If we're here and didn't fail already above, then something's
1305 * certainly wrong, so error this time.
1310 cg_systemd_prune_init_scope(init_cgroup
);
1312 /* Check if the v2 hierarchy is mounted at its standard location.
1313 * If so we can skip the rest of the work here. Although the unified
1314 * hierarchy can be mounted multiple times, each of those mountpoints
1315 * will expose identical information.
1317 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1319 bool has_user_slice
= false;
1321 mountpoint
= must_copy_string("/sys/fs/cgroup");
1325 user_slice
= must_make_path(mountpoint
, current_cgroup
, NULL
);
1326 if (cg_systemd_created_user_slice(current_cgroup
, init_cgroup
, user_slice
, uid
))
1327 has_user_slice
= true;
1330 cgv2_add_controller(NULL
, mountpoint
, current_cgroup
, init_cgroup
, has_user_slice
);
1336 f
= fopen("/proc/self/mountinfo", "r");
1340 /* we support simple cgroup mounts and lxcfs mounts */
1341 while (getline(&line
, &len
, f
) != -1) {
1343 bool has_user_slice
= false;
1348 mountpoint
= get_mountpoint(line
);
1352 user_slice
= must_make_path(mountpoint
, current_cgroup
, NULL
);
1353 if (cg_systemd_created_user_slice(current_cgroup
, init_cgroup
, user_slice
, uid
))
1354 has_user_slice
= true;
1357 cgv2_add_controller(NULL
, mountpoint
, current_cgroup
, init_cgroup
, has_user_slice
);
1359 /* Although the unified hierarchy can be mounted multiple times,
1360 * each of those mountpoints will expose identical information.
1361 * So let the first mountpoint we find, win.
1367 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1368 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1369 mountpoint
, current_cgroup
, init_cgroup
);
1378 free(current_cgroup
);
1384 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1385 * cgroupfs v2 hierarchy.
1386 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1387 * where some controllers are mounted into their standard cgroupfs v1 locations
1388 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1389 * hierarchy (/sys/fs/cgroup).
1391 static bool cg_init(uid_t uid
, gid_t gid
)
1393 if (!cgv1_init(uid
, gid
))
1396 if (!cgv2_init(uid
, gid
))
1399 if (cgv1_hierarchies
&& cgv2_hierarchies
) {
1400 cg_mount_mode
= CGROUP_MIXED
;
1401 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1402 } else if (cgv1_hierarchies
&& !cgv2_hierarchies
) {
1403 cg_mount_mode
= CGROUP_PURE_V1
;
1404 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1405 } else if (cgv2_hierarchies
&& !cgv1_hierarchies
) {
1406 cg_mount_mode
= CGROUP_PURE_V2
;
1407 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1409 cg_mount_mode
= CGROUP_UNKNOWN
;
1410 mysyslog(LOG_ERR
, "Could not detect cgroupfs hierarchy\n", NULL
);
1413 if (cg_mount_mode
== CGROUP_UNKNOWN
)
1419 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1420 static bool cgv1_enter(const char *cgroup
)
1422 struct cgv1_hierarchy
**it
;
1424 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
1426 bool entered
= false;
1428 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
1429 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
1432 for (controller
= (*it
)->controllers
; controller
&& *controller
;
1436 /* We've already been placed in a user slice, so we
1437 * don't need to enter the cgroup again.
1439 if ((*it
)->systemd_user_slice
) {
1444 path
= must_make_path((*it
)->mountpoint
,
1449 if (!file_exists(path
)) {
1451 path
= must_make_path((*it
)->mountpoint
,
1458 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path
);
1459 entered
= write_int(path
, (int)getpid());
1465 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path
);
1476 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1477 static bool cgv2_enter(const char *cgroup
)
1479 struct cgv2_hierarchy
*v2
;
1481 bool entered
= false;
1483 if (!cgv2_hierarchies
)
1486 v2
= *cgv2_hierarchies
;
1488 if (!v2
->mountpoint
|| !v2
->base_cgroup
)
1491 if (!v2
->create_rw_cgroup
|| v2
->systemd_user_slice
)
1494 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, "/cgroup.procs", NULL
);
1495 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path
);
1497 entered
= write_int(path
, (int)getpid());
1499 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path
);
1509 /* Wrapper around cgv{1,2}_enter(). */
1510 static bool cg_enter(const char *cgroup
)
1512 if (!cgv1_enter(cgroup
)) {
1513 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to enter cgroups\n", NULL
);
1517 if (!cgv2_enter(cgroup
)) {
1518 mysyslog(LOG_WARNING
, "cgroupfs v2: Failed to enter cgroups\n", NULL
);
1525 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1526 static void cgv1_escape(void)
1528 struct cgv1_hierarchy
**it
;
1530 /* In case systemd hasn't already placed us in a user slice for the
1531 * cpuset v1 controller we will reside in the root cgroup. This means
1532 * that cgroup.clone_children will not have been initialized for us so
1535 for (it
= cgv1_hierarchies
; it
&& *it
; it
++)
1536 if (!cgv1_handle_root_cpuset_hierarchy(*it
))
1537 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to initialize cpuset\n", NULL
);
1539 if (!cgv1_enter("/"))
1540 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL
);
1543 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1544 static void cgv2_escape(void)
1546 if (!cgv2_enter("/"))
1547 mysyslog(LOG_WARNING
, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL
);
1550 /* Wrapper around cgv{1,2}_escape(). */
1551 static void cg_escape(void)
1557 /* Get uid and gid for @user. */
1558 static bool get_uid_gid(const char *user
, uid_t
*uid
, gid_t
*gid
)
1560 struct passwd pwent
;
1561 struct passwd
*pwentp
= NULL
;
1566 bufsize
= sysconf(_SC_GETPW_R_SIZE_MAX
);
1570 buf
= malloc(bufsize
);
1574 ret
= getpwnam_r(user
, &pwent
, buf
, bufsize
, &pwentp
);
1578 "Could not find matched password record\n", NULL
);
1584 *uid
= pwent
.pw_uid
;
1585 *gid
= pwent
.pw_gid
;
1591 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1592 static bool cg_belongs_to_uid_gid(const char *path
, uid_t uid
, gid_t gid
)
1594 struct stat statbuf
;
1596 if (stat(path
, &statbuf
) < 0)
1599 if (!(statbuf
.st_uid
== uid
) || !(statbuf
.st_gid
== gid
))
1605 /* Create cpumask from cpulist aka turn:
1613 static uint32_t *cg_cpumask(char *buf
, size_t nbits
)
1616 char *saveptr
= NULL
;
1617 size_t arrlen
= BITS_TO_LONGS(nbits
);
1618 uint32_t *bitarr
= calloc(arrlen
, sizeof(uint32_t));
1622 for (; (token
= strtok_r(buf
, ",", &saveptr
)); buf
= NULL
) {
1624 unsigned start
= strtoul(token
, NULL
, 0);
1625 unsigned end
= start
;
1627 char *range
= strchr(token
, '-');
1629 end
= strtoul(range
+ 1, NULL
, 0);
1631 if (!(start
<= end
)) {
1641 while (start
<= end
)
1642 set_bit(start
++, bitarr
);
1648 static char *string_join(const char *sep
, const char **parts
, bool use_as_prefix
)
1652 size_t sep_len
= strlen(sep
);
1653 size_t result_len
= use_as_prefix
* sep_len
;
1659 /* calculate new string length */
1660 for (p
= (char **)parts
; *p
; p
++)
1661 result_len
+= (p
> (char **)parts
) * sep_len
+ strlen(*p
);
1663 buf_len
= result_len
+ 1;
1664 result
= calloc(buf_len
, sizeof(char));
1669 (void)strlcpy(result
, sep
, buf_len
* sizeof(char));
1671 for (p
= (char **)parts
; *p
; p
++) {
1672 if (p
> (char **)parts
)
1673 (void)strlcat(result
, sep
, buf_len
* sizeof(char));
1675 (void)strlcat(result
, *p
, buf_len
* sizeof(char));
1681 /* The largest integer that can fit into long int is 2^64. This is a
1684 #define __IN_TO_STR_LEN 21
1685 /* Turn cpumask into simple, comma-separated cpulist. */
1686 static char *cg_cpumask_to_cpulist(uint32_t *bitarr
, size_t nbits
)
1690 char numstr
[__IN_TO_STR_LEN
] = {0};
1691 char **cpulist
= NULL
;
1693 for (i
= 0; i
<= nbits
; i
++) {
1694 if (is_set(i
, bitarr
)) {
1695 ret
= snprintf(numstr
, __IN_TO_STR_LEN
, "%zu", i
);
1696 if (ret
< 0 || (size_t)ret
>= __IN_TO_STR_LEN
) {
1697 free_string_list(cpulist
);
1701 must_append_string(&cpulist
, numstr
);
1705 return string_join(",", (const char **)cpulist
, false);
1708 static ssize_t
cg_get_max_cpus(char *cpulist
)
1711 char *maxcpus
= cpulist
;
1714 c1
= strrchr(maxcpus
, ',');
1718 c2
= strrchr(maxcpus
, '-');
1730 /* If the above logic is correct, c1 should always hold a valid string
1734 cpus
= strtoul(c1
, NULL
, 0);
1741 static ssize_t
write_nointr(int fd
, const void* buf
, size_t count
)
1746 ret
= write(fd
, buf
, count
);
1747 if (ret
< 0 && errno
== EINTR
)
1753 static int write_to_file(const char *filename
, const void* buf
, size_t count
, bool add_newline
)
1755 int fd
, saved_errno
;
1758 fd
= open(filename
, O_WRONLY
| O_TRUNC
| O_CREAT
| O_CLOEXEC
, 0666);
1762 ret
= write_nointr(fd
, buf
, count
);
1765 if ((size_t)ret
!= count
)
1769 ret
= write_nointr(fd
, "\n", 1);
1778 saved_errno
= errno
;
1780 errno
= saved_errno
;
1784 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1785 static bool cg_filter_and_set_cpus(char *path
, bool am_initialized
)
1787 char *lastslash
, *fpath
, oldv
;
1791 ssize_t maxposs
= 0, maxisol
= 0;
1792 char *cpulist
= NULL
, *posscpus
= NULL
, *isolcpus
= NULL
;
1793 uint32_t *possmask
= NULL
, *isolmask
= NULL
;
1794 bool bret
= false, flipped_bit
= false;
1796 lastslash
= strrchr(path
, '/');
1797 if (!lastslash
) { // bug... this shouldn't be possible
1798 pam_cgfs_debug("Invalid path: %s\n", path
);
1805 fpath
= must_make_path(path
, "cpuset.cpus", NULL
);
1806 posscpus
= read_file(fpath
);
1808 pam_cgfs_debug("Could not read file: %s\n", fpath
);
1812 /* Get maximum number of cpus found in possible cpuset. */
1813 maxposs
= cg_get_max_cpus(posscpus
);
1814 if (maxposs
< 0 || maxposs
>= INT_MAX
- 1)
1817 if (!file_exists(__ISOL_CPUS
)) {
1818 /* This system doesn't expose isolated cpus. */
1819 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS
" to read isolated cpus from does not exist\n");
1822 /* No isolated cpus but we weren't already initialized by
1823 * someone. We should simply copy the parents cpuset.cpus
1826 if (!am_initialized
) {
1827 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1831 /* No isolated cpus but we were already initialized by someone.
1832 * Nothing more to do for us.
1837 isolcpus
= read_file(__ISOL_CPUS
);
1839 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS
"\n");
1843 if (!isdigit(isolcpus
[0])) {
1844 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1847 /* No isolated cpus but we weren't already initialized by
1848 * someone. We should simply copy the parents cpuset.cpus
1851 if (!am_initialized
) {
1852 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1856 /* No isolated cpus but we were already initialized by someone.
1857 * Nothing more to do for us.
1862 /* Get maximum number of cpus found in isolated cpuset. */
1863 maxisol
= cg_get_max_cpus(isolcpus
);
1864 if (maxisol
< 0 || maxisol
>= INT_MAX
- 1)
1867 if (maxposs
< maxisol
)
1871 possmask
= cg_cpumask(posscpus
, maxposs
);
1873 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1877 isolmask
= cg_cpumask(isolcpus
, maxposs
);
1879 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1883 for (i
= 0; i
<= maxposs
; i
++) {
1884 if (is_set(i
, isolmask
) && is_set(i
, possmask
)) {
1886 clear_bit(i
, possmask
);
1891 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1894 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1896 cpulist
= cg_cpumask_to_cpulist(possmask
, maxposs
);
1898 pam_cgfs_debug("%s", "Could not create cpu list\n");
1907 fpath
= must_make_path(path
, "cpuset.cpus", NULL
);
1908 ret
= write_to_file(fpath
, cpulist
, strlen(cpulist
), false);
1910 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath
);
1924 if (posscpus
!= cpulist
)
1932 int read_from_file(const char *filename
, void* buf
, size_t count
)
1934 int fd
= -1, saved_errno
;
1937 fd
= open(filename
, O_RDONLY
| O_CLOEXEC
);
1941 if (!buf
|| !count
) {
1945 while ((ret
= read(fd
, buf2
, 100)) > 0)
1950 memset(buf
, 0, count
);
1951 ret
= read(fd
, buf
, count
);
1955 pam_cgfs_debug("read %s: %s", filename
, strerror(errno
));
1957 saved_errno
= errno
;
1959 errno
= saved_errno
;
1963 /* Copy contents of parent(@path)/@file to @path/@file */
1964 static bool cg_copy_parent_file(char *path
, char *file
)
1966 char *lastslash
, *value
= NULL
, *fpath
, oldv
;
1970 lastslash
= strrchr(path
, '/');
1971 if (!lastslash
) { // bug... this shouldn't be possible
1972 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path
);
1979 fpath
= must_make_path(path
, file
, NULL
);
1980 len
= read_from_file(fpath
, NULL
, 0);
1984 value
= must_alloc(len
+ 1);
1985 if (read_from_file(fpath
, value
, len
) != len
)
1991 fpath
= must_make_path(path
, file
, NULL
);
1992 ret
= write_to_file(fpath
, value
, len
, false);
1994 pam_cgfs_debug("Unable to write %s to %s", value
, fpath
);
2001 pam_cgfs_debug("Error reading '%s'", fpath
);
2007 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
2008 * controller we will reside in the root cgroup. This means that
2009 * cgroup.clone_children will not have been initialized for us so we need to do
2012 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy
*h
)
2014 char *clonechildrenpath
, v
;
2016 if (!string_in_list(h
->controllers
, "cpuset"))
2019 clonechildrenpath
= must_make_path(h
->mountpoint
, "cgroup.clone_children", NULL
);
2021 if (read_from_file(clonechildrenpath
, &v
, 1) < 0) {
2022 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath
);
2023 free(clonechildrenpath
);
2027 if (v
== '1') { /* already set for us by someone else */
2028 free(clonechildrenpath
);
2032 if (write_to_file(clonechildrenpath
, "1", 1, false) < 0) {
2033 /* Set clone_children so children inherit our settings */
2034 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath
);
2035 free(clonechildrenpath
);
2039 free(clonechildrenpath
);
2044 * Initialize the cpuset hierarchy in first directory of @gname and
2045 * set cgroup.clone_children so that children inherit settings.
2046 * Since the h->base_path is populated by init or ourselves, we know
2047 * it is already initialized.
2049 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy
*h
,
2052 char *cgpath
, *clonechildrenpath
, v
, *slash
;
2054 if (!string_in_list(h
->controllers
, "cpuset"))
2059 slash
= strchr(cgroup
, '/');
2063 cgpath
= must_make_path(h
->mountpoint
, h
->base_cgroup
, cgroup
, NULL
);
2067 if (do_mkdir(cgpath
, 0755) < 0 && errno
!= EEXIST
) {
2068 pam_cgfs_debug("Failed to create '%s'", cgpath
);
2073 clonechildrenpath
= must_make_path(cgpath
, "cgroup.clone_children", NULL
);
2074 if (!file_exists(clonechildrenpath
)) { /* unified hierarchy doesn't have clone_children */
2075 free(clonechildrenpath
);
2080 if (read_from_file(clonechildrenpath
, &v
, 1) < 0) {
2081 pam_cgfs_debug("Failed to read '%s'", clonechildrenpath
);
2082 free(clonechildrenpath
);
2087 /* Make sure any isolated cpus are removed from cpuset.cpus. */
2088 if (!cg_filter_and_set_cpus(cgpath
, v
== '1')) {
2089 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
2090 free(clonechildrenpath
);
2095 if (v
== '1') { /* already set for us by someone else */
2096 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
2097 free(clonechildrenpath
);
2102 /* copy parent's settings */
2103 if (!cg_copy_parent_file(cgpath
, "cpuset.mems")) {
2104 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
2106 free(clonechildrenpath
);
2111 if (write_to_file(clonechildrenpath
, "1", 1, false) < 0) {
2112 /* Set clone_children so children inherit our settings */
2113 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath
);
2114 free(clonechildrenpath
);
2117 free(clonechildrenpath
);
2121 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
2122 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
2123 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
2124 * to the caller in @existed.
2126 #define __PAM_CGFS_USER "/user/"
2127 #define __PAM_CGFS_USER_LEN 6
2128 static bool cgv1_create_one(struct cgv1_hierarchy
*h
, const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
2130 char *clean_base_cgroup
, *path
;
2132 struct cgv1_hierarchy
*it
;
2133 bool created
= false;
2138 for (controller
= it
->controllers
; controller
&& *controller
;
2140 if (!cgv1_handle_cpuset_hierarchy(it
, cgroup
))
2143 /* If systemd has already created a cgroup for us, keep using
2146 if (cg_systemd_chown_existing_cgroup(it
->mountpoint
,
2147 it
->base_cgroup
, uid
, gid
,
2148 it
->systemd_user_slice
))
2151 /* We need to make sure that we do not create an endless chain
2152 * of sub-cgroups. So we check if we have already logged in
2153 * somehow (sudo -i, su, etc.) and have created a
2154 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2155 * cgroups this is unnecessary since we use the init_cgroup
2156 * anyway, but for controllers which have an existing systemd
2157 * cgroup that does not match the current uid, this is pretty
2160 if (strncmp(it
->base_cgroup
, __PAM_CGFS_USER
, __PAM_CGFS_USER_LEN
) == 0) {
2161 free(it
->base_cgroup
);
2162 it
->base_cgroup
= must_copy_string("/");
2165 strstr(it
->base_cgroup
, __PAM_CGFS_USER
);
2166 if (clean_base_cgroup
)
2167 *clean_base_cgroup
= '\0';
2170 path
= must_make_path(it
->mountpoint
, it
->init_cgroup
, cgroup
, NULL
);
2171 pam_cgfs_debug("Constructing path: %s\n", path
);
2173 if (file_exists(path
)) {
2174 bool our_cg
= cg_belongs_to_uid_gid(path
, uid
, gid
);
2180 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2181 path
, our_cg
? "" : "not ", uid
, gid
);
2187 created
= mkdir_parent(it
->mountpoint
, path
);
2193 if (chown(path
, uid
, gid
) < 0)
2194 mysyslog(LOG_WARNING
,
2195 "Failed to chown %s to %d:%d: %s\n", path
,
2196 (int)uid
, (int)gid
, strerror(errno
), NULL
);
2198 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2206 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2207 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2208 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2210 static bool cgv1_remove_one(struct cgv1_hierarchy
*h
, const char *cgroup
)
2215 /* Better safe than sorry. */
2216 if (!h
->controllers
)
2219 /* Cgroups created by systemd for us which we re-use won't be removed
2220 * here, since we're using init_cgroup + cgroup as path instead of
2221 * base_cgroup + cgroup.
2223 path
= must_make_path(h
->mountpoint
, h
->init_cgroup
, cgroup
, NULL
);
2224 (void)recursive_rmdir(path
);
2230 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2231 static bool cgv2_remove(const char *cgroup
)
2233 struct cgv2_hierarchy
*v2
;
2236 if (!cgv2_hierarchies
)
2239 v2
= *cgv2_hierarchies
;
2241 /* If we reused an already existing cgroup, don't bother trying to
2242 * remove (a potentially wrong)/the path.
2243 * Cgroups created by systemd for us which we re-use would be removed
2244 * here, since we're using base_cgroup + cgroup as path.
2246 if (v2
->systemd_user_slice
)
2249 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, NULL
);
2250 (void)recursive_rmdir(path
);
2256 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2257 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2258 * back, to the caller if the creation failed due to @cgroup already existing
2261 static bool cgv1_create(const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
2263 struct cgv1_hierarchy
**it
, **rev_it
;
2264 bool all_created
= true;
2266 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2267 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
2268 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
2271 if (!cgv1_create_one(*it
, cgroup
, uid
, gid
, existed
)) {
2272 all_created
= false;
2280 for (rev_it
= cgv1_hierarchies
; rev_it
&& *rev_it
&& (*rev_it
!= *it
);
2282 cgv1_remove_one(*rev_it
, cgroup
);
2287 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2288 * the creation failed due to @cgroup already existing via @existed.
2290 static bool cgv2_create(const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
2293 char *clean_base_cgroup
;
2295 struct cgv2_hierarchy
*v2
;
2296 bool our_cg
= false, created
= false;
2300 if (!cgv2_hierarchies
|| !(*cgv2_hierarchies
)->create_rw_cgroup
)
2303 v2
= *cgv2_hierarchies
;
2305 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2306 * to be placed under our current cgroup.
2308 if (cg_systemd_chown_existing_cgroup(v2
->mountpoint
, v2
->base_cgroup
,
2309 uid
, gid
, v2
->systemd_user_slice
))
2310 goto delegate_files
;
2312 /* We need to make sure that we do not create an endless chain of
2313 * sub-cgroups. So we check if we have already logged in somehow (sudo
2314 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2317 if (strncmp(v2
->base_cgroup
, __PAM_CGFS_USER
, __PAM_CGFS_USER_LEN
) == 0) {
2318 free(v2
->base_cgroup
);
2319 v2
->base_cgroup
= must_copy_string("/");
2321 clean_base_cgroup
= strstr(v2
->base_cgroup
, __PAM_CGFS_USER
);
2322 if (clean_base_cgroup
)
2323 *clean_base_cgroup
= '\0';
2326 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, NULL
);
2327 pam_cgfs_debug("Constructing path \"%s\"\n", path
);
2329 if (file_exists(path
)) {
2330 our_cg
= cg_belongs_to_uid_gid(path
, uid
, gid
);
2331 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2332 path
, our_cg
? "" : "not ", uid
, gid
);
2336 goto delegate_files
;
2343 created
= mkdir_parent(v2
->mountpoint
, path
);
2349 /* chown cgroup to user */
2350 if (chown(path
, uid
, gid
) < 0)
2351 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2352 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2354 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2358 /* chown cgroup.procs to user */
2359 if (v2
->systemd_user_slice
)
2360 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2361 "/cgroup.procs", NULL
);
2363 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2364 "/cgroup.procs", NULL
);
2366 ret
= chown(path
, uid
, gid
);
2368 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2369 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2371 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2374 /* chown cgroup.subtree_control to user */
2375 if (v2
->systemd_user_slice
)
2376 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2377 "/cgroup.subtree_control", NULL
);
2379 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2380 "/cgroup.subtree_control", NULL
);
2382 ret
= chown(path
, uid
, gid
);
2384 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2385 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2388 /* chown cgroup.threads to user */
2389 if (v2
->systemd_user_slice
)
2390 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2391 "/cgroup.threads", NULL
);
2393 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2394 "/cgroup.threads", NULL
);
2395 ret
= chown(path
, uid
, gid
);
2396 if (ret
< 0 && errno
!= ENOENT
)
2397 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2398 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2404 /* Create writeable cgroups for @user at login. Details can be found in the
2405 * preamble/license at the top of this file.
2407 static int handle_login(const char *user
, uid_t uid
, gid_t gid
)
2411 char cg
[MAXPATHLEN
];
2416 ret
= snprintf(cg
, MAXPATHLEN
, "/user/%s/%d", user
, idx
);
2417 if (ret
< 0 || ret
>= MAXPATHLEN
) {
2418 mysyslog(LOG_ERR
, "Username too long\n", NULL
);
2419 return PAM_SESSION_ERR
;
2423 if (!cgv2_create(cg
, uid
, gid
, &existed
)) {
2430 mysyslog(LOG_ERR
, "Failed to create a cgroup for user %s\n", user
, NULL
);
2431 return PAM_SESSION_ERR
;
2435 if (!cgv1_create(cg
, uid
, gid
, &existed
)) {
2442 mysyslog(LOG_ERR
, "Failed to create a cgroup for user %s\n", user
, NULL
);
2443 return PAM_SESSION_ERR
;
2446 if (!cg_enter(cg
)) {
2447 mysyslog( LOG_ERR
, "Failed to enter user cgroup %s for user %s\n", cg
, user
, NULL
);
2448 return PAM_SESSION_ERR
;
2457 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2460 static bool cgv1_prune_empty_cgroups(const char *user
)
2462 bool controller_removed
= true;
2463 bool all_removed
= true;
2464 struct cgv1_hierarchy
**it
;
2466 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2468 char *path_base
, *path_init
;
2471 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
2472 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
2475 for (controller
= (*it
)->controllers
; controller
&& *controller
;
2477 bool path_base_rm
, path_init_rm
;
2479 path_base
= must_make_path((*it
)->mountpoint
, (*it
)->base_cgroup
, "/user", user
, NULL
);
2480 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base
);
2482 ret
= recursive_rmdir(path_base
);
2483 if (ret
== -ENOENT
|| ret
>= 0)
2484 path_base_rm
= true;
2486 path_base_rm
= false;
2489 path_init
= must_make_path((*it
)->mountpoint
, (*it
)->init_cgroup
, "/user", user
, NULL
);
2490 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init
);
2492 ret
= recursive_rmdir(path_init
);
2493 if (ret
== -ENOENT
|| ret
>= 0)
2494 path_init_rm
= true;
2496 path_init_rm
= false;
2499 if (!path_base_rm
&& !path_init_rm
) {
2500 controller_removed
= false;
2504 controller_removed
= true;
2508 if (!controller_removed
)
2509 all_removed
= false;
2515 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2518 static bool cgv2_prune_empty_cgroups(const char *user
)
2521 struct cgv2_hierarchy
*v2
;
2522 char *path_base
, *path_init
;
2523 bool path_base_rm
, path_init_rm
;
2525 if (!cgv2_hierarchies
)
2528 v2
= *cgv2_hierarchies
;
2530 path_base
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, "/user", user
, NULL
);
2531 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base
);
2533 ret
= recursive_rmdir(path_base
);
2534 if (ret
== -ENOENT
|| ret
>= 0)
2535 path_base_rm
= true;
2537 path_base_rm
= false;
2540 path_init
= must_make_path(v2
->mountpoint
, v2
->init_cgroup
, "/user", user
, NULL
);
2541 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init
);
2543 ret
= recursive_rmdir(path_init
);
2544 if (ret
== -ENOENT
|| ret
>= 0)
2545 path_init_rm
= true;
2547 path_init_rm
= false;
2550 if (!path_base_rm
&& !path_init_rm
)
2556 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2557 static void cg_prune_empty_cgroups(const char *user
)
2559 (void)cgv1_prune_empty_cgroups(user
);
2560 (void)cgv2_prune_empty_cgroups(user
);
2563 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2564 static void cgv1_free_hierarchies(void)
2566 struct cgv1_hierarchy
**it
;
2568 if (!cgv1_hierarchies
)
2571 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2572 if ((*it
)->controllers
) {
2574 for (tmp
= (*it
)->controllers
; tmp
&& *tmp
; tmp
++)
2577 free((*it
)->controllers
);
2580 free((*it
)->mountpoint
);
2581 free((*it
)->base_cgroup
);
2582 free((*it
)->fullcgpath
);
2583 free((*it
)->init_cgroup
);
2586 free(cgv1_hierarchies
);
2589 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2590 static void cgv2_free_hierarchies(void)
2592 struct cgv2_hierarchy
**it
;
2594 if (!cgv2_hierarchies
)
2597 for (it
= cgv2_hierarchies
; it
&& *it
; it
++) {
2598 if ((*it
)->controllers
) {
2601 for (tmp
= (*it
)->controllers
; tmp
&& *tmp
; tmp
++)
2604 free((*it
)->controllers
);
2607 free((*it
)->mountpoint
);
2608 free((*it
)->base_cgroup
);
2609 free((*it
)->fullcgpath
);
2610 free((*it
)->init_cgroup
);
2613 free(cgv2_hierarchies
);
2616 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2617 static void cg_exit(void)
2619 cgv1_free_hierarchies();
2620 cgv2_free_hierarchies();
2623 int pam_sm_open_session(pam_handle_t
*pamh
, int flags
, int argc
,
2629 const char *PAM_user
= NULL
;
2631 ret
= pam_get_user(pamh
, &PAM_user
, NULL
);
2632 if (ret
!= PAM_SUCCESS
) {
2633 mysyslog(LOG_ERR
, "PAM-CGFS: couldn't get user\n", NULL
);
2634 return PAM_SESSION_ERR
;
2637 if (!get_uid_gid(PAM_user
, &uid
, &gid
)) {
2638 mysyslog(LOG_ERR
, "Failed to get uid and gid for %s\n", PAM_user
, NULL
);
2639 return PAM_SESSION_ERR
;
2642 if (!cg_init(uid
, gid
)) {
2643 mysyslog(LOG_ERR
, "Failed to get list of controllers\n", NULL
);
2644 return PAM_SESSION_ERR
;
2647 /* Try to prune cgroups, that are actually empty but were still marked
2648 * as busy by the kernel so we couldn't remove them on session close.
2650 cg_prune_empty_cgroups(PAM_user
);
2652 if (cg_mount_mode
== CGROUP_UNKNOWN
)
2653 return PAM_SESSION_ERR
;
2655 if (argc
> 1 && !strcmp(argv
[0], "-c")) {
2656 char **clist
= make_string_list(argv
[1], ",");
2659 * We don't allow using "all" and other controllers explicitly because
2660 * that simply doesn't make any sense.
2662 if (string_list_length(clist
) > 1 && string_in_list(clist
, "all")) {
2663 mysyslog(LOG_ERR
, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL
);
2664 free_string_list(clist
);
2665 return PAM_SESSION_ERR
;
2668 cg_mark_to_make_rw(clist
);
2669 free_string_list(clist
);
2672 return handle_login(PAM_user
, uid
, gid
);
2675 int pam_sm_close_session(pam_handle_t
*pamh
, int flags
, int argc
,
2681 const char *PAM_user
= NULL
;
2683 ret
= pam_get_user(pamh
, &PAM_user
, NULL
);
2684 if (ret
!= PAM_SUCCESS
) {
2685 mysyslog(LOG_ERR
, "PAM-CGFS: couldn't get user\n", NULL
);
2686 return PAM_SESSION_ERR
;
2689 if (!get_uid_gid(PAM_user
, &uid
, &gid
)) {
2690 mysyslog(LOG_ERR
, "Failed to get uid and gid for %s\n", PAM_user
, NULL
);
2691 return PAM_SESSION_ERR
;
2694 if (cg_mount_mode
== CGROUP_UNINITIALIZED
) {
2695 if (!cg_init(uid
, gid
))
2696 mysyslog(LOG_ERR
, "Failed to get list of controllers\n", NULL
);
2698 if (argc
> 1 && !strcmp(argv
[0], "-c")) {
2699 char **clist
= make_string_list(argv
[1], ",");
2702 * We don't allow using "all" and other controllers explicitly because
2703 * that simply doesn't make any sense.
2705 if (string_list_length(clist
) > 1 && string_in_list(clist
, "all")) {
2706 mysyslog(LOG_ERR
, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL
);
2707 free_string_list(clist
);
2708 return PAM_SESSION_ERR
;
2711 cg_mark_to_make_rw(clist
);
2712 free_string_list(clist
);
2716 cg_prune_empty_cgroups(PAM_user
);