]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/pam/pam_cgfs.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include <linux/unistd.h>
17 #include <sys/mount.h>
18 #include <sys/param.h>
20 #include <sys/types.h>
25 #include "file_utils.h"
27 #include "memory_utils.h"
28 #include "string_utils.h"
30 #define PAM_SM_SESSION
31 #include <security/_pam_macros.h>
32 #include <security/pam_modules.h>
42 #define pam_cgfs_debug_stream(stream, format, ...) \
44 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
45 __func__, __VA_ARGS__); \
48 #define pam_cgfs_error(format, ...) pam_cgfs_debug_stream(stderr, format, __VA_ARGS__)
51 #define pam_cgfs_debug(format, ...) pam_cgfs_error(format, __VA_ARGS__)
53 #define pam_cgfs_debug(format, ...) \
58 static enum cg_mount_mode
{
63 CGROUP_UNINITIALIZED
= 3,
64 } cg_mount_mode
= CGROUP_UNINITIALIZED
;
66 /* Common helper functions. Most of these have been taken from LXC. */
67 static void append_line(char **dest
, size_t oldlen
, char *new, size_t newlen
);
68 static int append_null_to_list(void ***list
);
69 static void batch_realloc(char **mem
, size_t oldlen
, size_t newlen
);
70 static char *copy_to_eol(char *s
);
71 static char *get_mountpoint(char *line
);
72 static bool get_uid_gid(const char *user
, uid_t
*uid
, gid_t
*gid
);
73 static int handle_login(const char *user
, uid_t uid
, gid_t gid
);
74 static bool is_lxcfs(const char *line
);
75 static bool is_cgv1(char *line
);
76 static bool is_cgv2(char *line
);
77 static void must_add_to_list(char ***clist
, char *entry
);
78 static void must_append_controller(char **klist
, char **nlist
, char ***clist
,
80 static void must_append_string(char ***list
, char *entry
);
81 static void mysyslog(int err
, const char *format
, ...) __attribute__((sentinel
));
82 static char *read_file(char *fnam
);
83 static int recursive_rmdir(char *dirname
);
84 static bool string_in_list(char **list
, const char *entry
);
85 static char *string_join(const char *sep
, const char **parts
, bool use_as_prefix
);
86 static void trim(char *s
);
87 static bool write_int(char *path
, int v
);
89 /* cgroupfs prototypes. */
90 static bool cg_belongs_to_uid_gid(const char *path
, uid_t uid
, gid_t gid
);
91 static uint32_t *cg_cpumask(char *buf
, size_t nbits
);
92 static bool cg_copy_parent_file(char *path
, char *file
);
93 static char *cg_cpumask_to_cpulist(uint32_t *bitarr
, size_t nbits
);
94 static bool cg_enter(const char *cgroup
);
95 static void cg_escape(void);
96 static bool cg_filter_and_set_cpus(char *path
, bool am_initialized
);
97 static ssize_t
cg_get_max_cpus(char *cpulist
);
98 static int cg_get_version_of_mntpt(const char *path
);
99 static bool cg_init(uid_t uid
, gid_t gid
);
100 static void cg_mark_to_make_rw(char **list
);
101 static void cg_prune_empty_cgroups(const char *user
);
102 static bool cg_systemd_created_user_slice(const char *base_cgroup
,
103 const char *init_cgroup
,
104 const char *in
, uid_t uid
);
105 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint
,
106 const char *base_cgroup
, uid_t uid
,
108 bool systemd_user_slice
);
109 static bool cg_systemd_under_user_slice_1(const char *in
, uid_t uid
);
110 static bool cg_systemd_under_user_slice_2(const char *base_cgroup
,
111 const char *init_cgroup
, uid_t uid
);
112 static void cg_systemd_prune_init_scope(char *cg
);
113 static bool is_lxcfs(const char *line
);
115 /* cgroupfs v1 prototypes. */
116 struct cgv1_hierarchy
{
122 bool create_rw_cgroup
;
123 bool systemd_user_slice
;
126 static struct cgv1_hierarchy
**cgv1_hierarchies
;
128 static void cgv1_add_controller(char **clist
, char *mountpoint
,
129 char *base_cgroup
, char *init_cgroup
);
130 static bool cgv1_controller_in_clist(char *cgline
, char *c
);
131 static bool cgv1_controller_lists_intersect(char **l1
, char **l2
);
132 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy
**hlist
,
134 static bool cgv1_create(const char *cgroup
, uid_t uid
, gid_t gid
,
136 static bool cgv1_create_one(struct cgv1_hierarchy
*h
, const char *cgroup
,
137 uid_t uid
, gid_t gid
, bool *existed
);
138 static bool cgv1_enter(const char *cgroup
);
139 static void cgv1_escape(void);
140 static bool cgv1_get_controllers(char ***klist
, char ***nlist
);
141 static char *cgv1_get_current_cgroup(char *basecginfo
, char *controller
);
142 static char **cgv1_get_proc_mountinfo_controllers(char **klist
, char **nlist
,
144 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy
*h
,
146 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy
*h
);
147 static bool cgv1_init(uid_t uid
, gid_t gid
);
148 static void cgv1_mark_to_make_rw(char **clist
);
149 static char *cgv1_must_prefix_named(char *entry
);
150 static bool cgv1_prune_empty_cgroups(const char *user
);
151 static bool cgv1_remove_one(struct cgv1_hierarchy
*h
, const char *cgroup
);
152 static bool is_cgv1(char *line
);
154 /* cgroupfs v2 prototypes. */
155 struct cgv2_hierarchy
{
161 bool create_rw_cgroup
;
162 bool systemd_user_slice
;
165 /* Actually this should only be a single hierarchy. But for the sake of
166 * parallelism and because the layout of the cgroupfs v2 is still somewhat
167 * changing, we'll leave it as an array of structs.
169 static struct cgv2_hierarchy
**cgv2_hierarchies
;
171 static void cgv2_add_controller(char **clist
, char *mountpoint
,
172 char *base_cgroup
, char *init_cgroup
,
173 bool systemd_user_slice
);
174 static bool cgv2_create(const char *cgroup
, uid_t uid
, gid_t gid
,
176 static bool cgv2_enter(const char *cgroup
);
177 static void cgv2_escape(void);
178 static char *cgv2_get_current_cgroup(int pid
);
179 static bool cgv2_init(uid_t uid
, gid_t gid
);
180 static void cgv2_mark_to_make_rw(char **clist
);
181 static bool cgv2_prune_empty_cgroups(const char *user
);
182 static bool cgv2_remove(const char *cgroup
);
183 static bool is_cgv2(char *line
);
185 static int do_mkdir(const char *path
, mode_t mode
)
192 r
= mkdir(path
, mode
);
199 /* Create directory and (if necessary) its parents. */
200 static bool lxc_mkdir_parent(const char *root
, char *path
)
204 if (strlen(path
) < strlen(root
))
207 if (strlen(path
) == strlen(root
))
210 b
= path
+ strlen(root
) + 1;
212 while (*b
&& (*b
== '/'))
218 while (*e
&& *e
!= '/')
225 if (file_exists(path
))
228 if (do_mkdir(path
, 0755) < 0) {
229 pam_cgfs_debug("Failed to create %s: %s\n", path
, strerror(errno
));
244 /* Common helper functions. Most of these have been taken from LXC. */
245 static void mysyslog(int err
, const char *format
, ...)
249 va_start(args
, format
);
250 #pragma GCC diagnostic push
251 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
252 openlog("PAM-CGFS", LOG_CONS
| LOG_PID
, LOG_AUTH
);
253 vsyslog(err
, format
, args
);
254 #pragma GCC diagnostic pop
259 /* realloc() pointer in batch sizes; do not fail. */
260 #define BATCH_SIZE 50
261 static void batch_realloc(char **mem
, size_t oldlen
, size_t newlen
)
263 int newbatches
= (newlen
/ BATCH_SIZE
) + 1;
264 int oldbatches
= (oldlen
/ BATCH_SIZE
) + 1;
266 if (!*mem
|| newbatches
> oldbatches
)
267 *mem
= must_realloc(*mem
, newbatches
* BATCH_SIZE
);
270 /* Append lines as is to pointer; do not fail. */
271 static void append_line(char **dest
, size_t oldlen
, char *new, size_t newlen
)
273 size_t full
= oldlen
+ newlen
;
275 batch_realloc(dest
, oldlen
, full
+ 1);
277 memcpy(*dest
+ oldlen
, new, newlen
+ 1);
280 /* Read in whole file and return allocated pointer. */
281 static char *read_file(char *fnam
)
285 char *line
= NULL
, *buf
= NULL
;
286 size_t len
= 0, fulllen
= 0;
288 f
= fopen(fnam
, "r");
292 while ((linelen
= getline(&line
, &len
, f
)) != -1) {
293 append_line(&buf
, fulllen
, line
, linelen
);
303 /* Given a pointer to a null-terminated array of pointers, realloc to add one
304 * entry, and point the new entry to NULL. Do not fail. Return the index to the
305 * second-to-last entry - that is, the one which is now available for use
306 * (keeping the list null-terminated).
308 static int append_null_to_list(void ***list
)
313 for (; (*list
)[newentry
]; newentry
++)
316 *list
= must_realloc(*list
, (newentry
+ 2) * sizeof(void **));
317 (*list
)[newentry
+ 1] = NULL
;
322 /* Append new entry to null-terminated array of pointer; make sure that array of
323 * pointers will still be null-terminated.
325 static void must_append_string(char ***list
, char *entry
)
330 newentry
= append_null_to_list((void ***)list
);
331 copy
= must_copy_string(entry
);
332 (*list
)[newentry
] = copy
;
335 /* Remove newlines from string. */
336 static void trim(char *s
)
338 size_t len
= strlen(s
);
340 while ((len
> 0) && s
[len
- 1] == '\n')
344 /* Make allocated copy of string. End of string is taken to be '\n'. */
345 static char *copy_to_eol(char *s
)
347 char *newline
, *sret
;
350 newline
= strchr(s
, '\n');
355 sret
= must_realloc(NULL
, len
+ 1);
356 memcpy(sret
, s
, len
);
362 /* Check if given entry under /proc/<pid>/mountinfo is a fuse.lxcfs mount. */
363 static bool is_lxcfs(const char *line
)
365 char *p
= strstr(line
, " - ");
369 return strncmp(p
, " - fuse.lxcfs ", 14) == 0;
372 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v1 mount. */
373 static bool is_cgv1(char *line
)
375 char *p
= strstr(line
, " - ");
379 return strncmp(p
, " - cgroup ", 10) == 0;
382 /* Check if given entry under /proc/<pid>/mountinfo is a cgroupfs v2 mount. */
383 static bool is_cgv2(char *line
)
385 char *p
= strstr(line
, " - ");
389 return strncmp(p
, " - cgroup2 ", 11) == 0;
392 /* Given a null-terminated array of strings, check whether @entry is one of the
395 static bool string_in_list(char **list
, const char *entry
)
399 for (it
= list
; it
&& *it
; it
++)
400 if (strcmp(*it
, entry
) == 0)
407 * Creates a null-terminated array of strings, made by splitting the entries in
408 * @str on each @sep. Caller is responsible for calling free_string_list.
410 static char **make_string_list(const char *str
, const char *sep
)
413 char *saveptr
= NULL
;
416 copy
= must_copy_string(str
);
418 for (tok
= strtok_r(copy
, sep
, &saveptr
); tok
;
419 tok
= strtok_r(NULL
, sep
, &saveptr
))
420 must_add_to_list(&clist
, tok
);
427 /* Gets the length of a null-terminated array of strings. */
428 static size_t string_list_length(char **list
)
433 for (it
= list
; it
&& *it
; it
++)
439 /* Write single integer to file. */
440 static bool write_int(char *path
, int v
)
445 f
= fopen(path
, "w");
449 if (fprintf(f
, "%d\n", v
) < 0)
458 /* Recursively remove directory and its parents. */
459 static int recursive_rmdir(char *dirname
)
461 __do_closedir
DIR *dir
= NULL
;
462 struct dirent
*direntp
;
465 dir
= opendir(dirname
);
469 while ((direntp
= readdir(dir
))) {
473 if (!strcmp(direntp
->d_name
, ".") ||
474 !strcmp(direntp
->d_name
, ".."))
477 pathname
= must_make_path(dirname
, direntp
->d_name
, NULL
);
479 if (lstat(pathname
, &st
)) {
481 pam_cgfs_debug("Failed to stat %s\n", pathname
);
486 if (!S_ISDIR(st
.st_mode
))
489 if (recursive_rmdir(pathname
) < 0)
496 if (rmdir(dirname
) < 0) {
498 pam_cgfs_debug("Failed to delete %s: %s\n", dirname
, strerror(errno
));
505 /* Add new entry to null-terminated array of pointers. Make sure array is still
508 static void must_add_to_list(char ***clist
, char *entry
)
512 newentry
= append_null_to_list((void ***)clist
);
513 (*clist
)[newentry
] = must_copy_string(entry
);
516 /* Get mountpoint from a /proc/<pid>/mountinfo line. */
517 static char *get_mountpoint(char *line
)
525 for (i
= 0; i
< 4; i
++) {
537 sret
= must_realloc(NULL
, len
+ 1);
538 memcpy(sret
, p
, len
);
544 /* Create list of cgroupfs v1 controller found under /proc/self/cgroup. Skips
545 * the 0::/some/path cgroupfs v2 hierarchy listed. Splits controllers into
546 * kernel controllers (@klist) and named controllers (@nlist).
548 static bool cgv1_get_controllers(char ***klist
, char ***nlist
)
554 f
= fopen("/proc/self/cgroup", "r");
558 while (getline(&line
, &len
, f
) != -1) {
560 char *saveptr
= NULL
;
562 p
= strchr(line
, ':');
572 /* Skip the v2 hierarchy. */
576 for (tok
= strtok_r(p
, ",", &saveptr
); tok
;
577 tok
= strtok_r(NULL
, ",", &saveptr
)) {
578 if (strncmp(tok
, "name=", 5) == 0)
579 must_append_string(nlist
, tok
);
581 must_append_string(klist
, tok
);
591 /* Get list of controllers for cgroupfs v2 hierarchy by looking at
592 * cgroup.controllers and/or cgroup.subtree_control of a given (parent) cgroup.
593 static bool cgv2_get_controllers(char ***klist)
599 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
600 static char *cgv2_get_current_cgroup(int pid
)
604 char *current_cgroup
;
606 /* The largest integer that can fit into long int is 2^64. This is a
607 * 20-digit number. */
608 #define __PIDLEN /* /proc */ 5 + /* /pid-to-str */ 21 + /* /cgroup */ 7 + /* \0 */ 1
611 ret
= snprintf(path
, __PIDLEN
, "/proc/%d/cgroup", pid
);
612 if (ret
< 0 || ret
>= __PIDLEN
)
615 cgroups_v2
= read_file(path
);
619 current_cgroup
= strstr(cgroups_v2
, "0::/");
623 current_cgroup
= current_cgroup
+ 3;
624 copy
= copy_to_eol(current_cgroup
);
636 /* Given two null-terminated lists of strings, return true if any string is in
639 static bool cgv1_controller_lists_intersect(char **l1
, char **l2
)
646 for (it
= l1
; it
&& *it
; it
++)
647 if (string_in_list(l2
, *it
))
653 /* For a null-terminated list of controllers @clist, return true if any of those
654 * controllers is already listed the null-terminated list of hierarchies @hlist.
655 * Realistically, if one is present, all must be present.
657 static bool cgv1_controller_list_is_dup(struct cgv1_hierarchy
**hlist
, char **clist
)
659 struct cgv1_hierarchy
**it
;
661 for (it
= hlist
; it
&& *it
; it
++)
662 if ((*it
)->controllers
)
663 if (cgv1_controller_lists_intersect((*it
)->controllers
, clist
))
670 /* Set boolean to mark controllers under which we are supposed create a
673 static void cgv1_mark_to_make_rw(char **clist
)
675 struct cgv1_hierarchy
**it
;
677 for (it
= cgv1_hierarchies
; it
&& *it
; it
++)
678 if ((*it
)->controllers
)
679 if (cgv1_controller_lists_intersect((*it
)->controllers
, clist
) ||
680 string_in_list(clist
, "all"))
681 (*it
)->create_rw_cgroup
= true;
684 /* Set boolean to mark whether we are supposed to create a writeable cgroup in
685 * the cgroupfs v2 hierarchy.
687 static void cgv2_mark_to_make_rw(char **clist
)
689 if (string_in_list(clist
, "unified") || string_in_list(clist
, "all"))
690 if (cgv2_hierarchies
)
691 (*cgv2_hierarchies
)->create_rw_cgroup
= true;
694 /* Wrapper around cgv{1,2}_mark_to_make_rw(). */
695 static void cg_mark_to_make_rw(char **clist
)
697 cgv1_mark_to_make_rw(clist
);
698 cgv2_mark_to_make_rw(clist
);
701 /* Prefix any named controllers with "name=", e.g. "name=systemd". */
702 static char *cgv1_must_prefix_named(char *entry
)
709 s
= must_realloc(NULL
, len
+ 6);
711 ret
= snprintf(s
, len
+ 6, "name=%s", entry
);
712 if (ret
< 0 || (size_t)ret
>= (len
+ 6)) {
720 /* Append kernel controller in @klist or named controller in @nlist to @clist */
721 static void must_append_controller(char **klist
, char **nlist
, char ***clist
, char *entry
)
726 if (string_in_list(klist
, entry
) && string_in_list(nlist
, entry
))
729 newentry
= append_null_to_list((void ***)clist
);
731 if (strncmp(entry
, "name=", 5) == 0)
732 copy
= must_copy_string(entry
);
733 else if (string_in_list(klist
, entry
))
734 copy
= must_copy_string(entry
);
736 copy
= cgv1_must_prefix_named(entry
);
738 (*clist
)[newentry
] = copy
;
741 /* Get the controllers from a mountinfo line. There are other ways we could get
742 * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
743 * could parse the mount options. But we simply assume that the mountpoint must
744 * be /sys/fs/cgroup/controller-list
746 static char **cgv1_get_proc_mountinfo_controllers(char **klist
, char **nlist
, char *line
)
750 char *saveptr
= NULL
;
755 for (i
= 0; i
< 4; i
++) {
762 if (strncmp(p
, "/sys/fs/cgroup/", 15) != 0)
772 for (tok
= strtok_r(p
, ",", &saveptr
); tok
;
773 tok
= strtok_r(NULL
, ",", &saveptr
))
774 must_append_controller(klist
, nlist
, &aret
, tok
);
779 /* Check if a cgroupfs v2 controller is present in the string @cgline. */
780 static bool cgv1_controller_in_clist(char *cgline
, char *c
)
782 __do_free
char *tmp
= NULL
;
785 char *saveptr
= NULL
;
787 eol
= strchr(cgline
, ':');
792 tmp
= must_realloc(NULL
, len
+ 1);
793 memcpy(tmp
, cgline
, len
);
796 for (tok
= strtok_r(tmp
, ",", &saveptr
); tok
;
797 tok
= strtok_r(NULL
, ",", &saveptr
)) {
798 if (strcmp(tok
, c
) == 0)
805 /* Get current cgroup from the /proc/<pid>/cgroup file passed in via @basecginfo
806 * of a given cgv1 controller passed in via @controller.
808 static char *cgv1_get_current_cgroup(char *basecginfo
, char *controller
)
820 if (cgv1_controller_in_clist(p
, controller
)) {
826 return copy_to_eol(p
);
838 /* Remove /init.scope from string @cg. This will mostly affect systemd-based
841 #define INIT_SCOPE "/init.scope"
842 static void cg_systemd_prune_init_scope(char *cg
)
849 point
= cg
+ strlen(cg
) - strlen(INIT_SCOPE
);
853 if (strcmp(point
, INIT_SCOPE
) == 0) {
861 /* Add new info about a mounted cgroupfs v1 hierarchy. Includes the controllers
862 * mounted into that hierarchy (e.g. cpu,cpuacct), the mountpoint of that
863 * hierarchy (/sys/fs/cgroup/<controller>, the base cgroup of the current
864 * process gathered from /proc/self/cgroup, and the init cgroup of PID1 gathered
865 * from /proc/1/cgroup.
867 static void cgv1_add_controller(char **clist
, char *mountpoint
, char *base_cgroup
, char *init_cgroup
)
869 struct cgv1_hierarchy
*new;
872 new = must_realloc(NULL
, sizeof(*new));
874 new->controllers
= clist
;
875 new->mountpoint
= mountpoint
;
876 new->base_cgroup
= base_cgroup
;
877 new->fullcgpath
= NULL
;
878 new->create_rw_cgroup
= false;
879 new->init_cgroup
= init_cgroup
;
880 new->systemd_user_slice
= false;
882 newentry
= append_null_to_list((void ***)&cgv1_hierarchies
);
883 cgv1_hierarchies
[newentry
] = new;
886 /* Add new info about the mounted cgroupfs v2 hierarchy. Can (but doesn't
887 * currently) include the controllers mounted into the hierarchy (e.g. memory,
888 * pids, blkio), the mountpoint of that hierarchy (Should usually be
889 * /sys/fs/cgroup but some init systems seems to think it might be a good idea
890 * to also mount empty cgroupfs v2 hierarchies at /sys/fs/cgroup/systemd.), the
891 * base cgroup of the current process gathered from /proc/self/cgroup, and the
892 * init cgroup of PID1 gathered from /proc/1/cgroup.
894 static void cgv2_add_controller(char **clist
, char *mountpoint
, char *base_cgroup
, char *init_cgroup
, bool systemd_user_slice
)
896 struct cgv2_hierarchy
*new;
899 new = must_realloc(NULL
, sizeof(*new));
901 new->controllers
= clist
;
902 new->mountpoint
= mountpoint
;
903 new->base_cgroup
= base_cgroup
;
904 new->fullcgpath
= NULL
;
905 new->create_rw_cgroup
= false;
906 new->init_cgroup
= init_cgroup
;
907 new->systemd_user_slice
= systemd_user_slice
;
909 newentry
= append_null_to_list((void ***)&cgv2_hierarchies
);
910 cgv2_hierarchies
[newentry
] = new;
913 /* In Ubuntu 14.04, the paths created for us were
914 * '/user/$uid.user/$something.session' This can be merged better with
915 * systemd_created_slice_for_us(), but keeping it separate makes it easier to
916 * reason about the correctness.
918 static bool cg_systemd_under_user_slice_1(const char *in
, uid_t uid
)
926 copy
= must_copy_string(in
);
927 if (strlen(copy
) < strlen("/user/1.user/1.session"))
929 p
= copy
+ strlen(copy
) - 1;
931 /* skip any trailing '/' (shouldn't be any, but be sure) */
932 while (p
>= copy
&& *p
== '/')
937 /* Get last path element */
938 while (p
>= copy
&& *p
!= '/')
943 /* make sure it is something.session */
945 if (len
< strlen("1.session") ||
946 strncmp(p
+ 1 + len
- 8, ".session", 8) != 0)
949 /* ok last path piece checks out, now check the second to last */
951 while (p
>= copy
&& *(--p
) != '/')
954 if (sscanf(p
+ 1, "%d.user/", &id
) != 1)
967 /* So long as our path relative to init starts with /user.slice/user-$uid.slice,
968 * assume it belongs to $uid and chown it
970 static bool cg_systemd_under_user_slice_2(const char *base_cgroup
,
971 const char *init_cgroup
, uid_t uid
)
975 size_t curlen
, initlen
;
977 curlen
= strlen(base_cgroup
);
978 initlen
= strlen(init_cgroup
);
979 if (curlen
<= initlen
)
982 if (strncmp(base_cgroup
, init_cgroup
, initlen
) != 0)
985 ret
= snprintf(buf
, 100, "/user.slice/user-%d.slice/", (int)uid
);
986 if (ret
< 0 || ret
>= 100)
990 initlen
= 0; // skip the '/'
992 return strncmp(base_cgroup
+ initlen
, buf
, strlen(buf
)) == 0;
995 /* The systemd-created path is: user-$uid.slice/session-c$session.scope. If that
996 * is not the end of our systemd path, then we're not part of the PAM call that
999 * The last piece is chowned to $uid, the user- part not.
1000 * Note: If the user creates paths that look like what we're looking for to
1002 * - they fool us, we create new cgroups, and they get auto-logged-out.
1003 * - they fool a root sudo, systemd cgroup is not changed but chowned, and they
1004 * lose ownership of their cgroups
1006 static bool cg_systemd_created_user_slice(const char *base_cgroup
,
1007 const char *init_cgroup
,
1008 const char *in
, uid_t uid
)
1016 copy
= must_copy_string(in
);
1018 /* An old version of systemd has already created a cgroup for us. */
1019 if (cg_systemd_under_user_slice_1(in
, uid
))
1022 /* A new version of systemd has already created a cgroup for us. */
1023 if (cg_systemd_under_user_slice_2(base_cgroup
, init_cgroup
, uid
))
1026 if (strlen(copy
) < strlen("/user-0.slice/session-0.scope"))
1029 p
= copy
+ strlen(copy
) - 1;
1030 /* Skip any trailing '/' (shouldn't be any, but be sure). */
1031 while (p
>= copy
&& *p
== '/')
1037 /* Get last path element */
1038 while (p
>= copy
&& *p
!= '/')
1044 /* Make sure it is session-something.scope. */
1045 len
= strlen(p
+ 1);
1046 if (strncmp(p
+ 1, "session-", strlen("session-")) != 0 ||
1047 strncmp(p
+ 1 + len
- 6, ".scope", 6) != 0)
1050 /* Ok last path piece checks out, now check the second to last. */
1052 while (p
>= copy
&& *(--p
) != '/')
1055 if (sscanf(p
+ 1, "user-%d.slice/", &id
) != 1)
1069 /* Chown existing cgroup that systemd has already created for us. */
1070 static bool cg_systemd_chown_existing_cgroup(const char *mountpoint
,
1071 const char *base_cgroup
, uid_t uid
,
1072 gid_t gid
, bool systemd_user_slice
)
1076 if (!systemd_user_slice
)
1079 path
= must_make_path(mountpoint
, base_cgroup
, NULL
);
1081 /* A cgroup within name=systemd has already been created. So we only
1084 if (chown(path
, uid
, gid
) < 0)
1085 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
1086 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
1087 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
1093 /* Detect and store information about cgroupfs v1 hierarchies. */
1094 static bool cgv1_init(uid_t uid
, gid_t gid
)
1097 struct cgv1_hierarchy
**it
;
1100 char **klist
= NULL
, **nlist
= NULL
;
1103 basecginfo
= read_file("/proc/self/cgroup");
1107 f
= fopen("/proc/self/mountinfo", "r");
1113 cgv1_get_controllers(&klist
, &nlist
);
1115 while (getline(&line
, &len
, f
) != -1) {
1116 char **controller_list
= NULL
;
1117 char *mountpoint
, *base_cgroup
;
1119 if (is_lxcfs(line
) || !is_cgv1(line
))
1122 controller_list
= cgv1_get_proc_mountinfo_controllers(klist
, nlist
, line
);
1123 if (!controller_list
)
1126 if (cgv1_controller_list_is_dup(cgv1_hierarchies
, controller_list
)) {
1127 free(controller_list
);
1131 mountpoint
= get_mountpoint(line
);
1133 free_string_list(controller_list
);
1137 base_cgroup
= cgv1_get_current_cgroup(basecginfo
, controller_list
[0]);
1139 free_string_list(controller_list
);
1145 pam_cgfs_debug("Detected cgroupfs v1 controller \"%s\" with "
1146 "mountpoint \"%s\" and cgroup \"%s\"\n",
1147 controller_list
[0], mountpoint
, base_cgroup
);
1148 cgv1_add_controller(controller_list
, mountpoint
, base_cgroup
, NULL
);
1151 free_string_list(klist
);
1152 free_string_list(nlist
);
1157 /* Retrieve init cgroup path for all controllers. */
1158 basecginfo
= read_file("/proc/1/cgroup");
1162 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
1163 if ((*it
)->controllers
) {
1164 char *init_cgroup
, *user_slice
;
1166 /* We've already stored the controller and received its
1167 * current cgroup. If we now fail to retrieve its init
1168 * cgroup, we should probably fail.
1170 init_cgroup
= cgv1_get_current_cgroup(basecginfo
, (*it
)->controllers
[0]);
1176 cg_systemd_prune_init_scope(init_cgroup
);
1177 (*it
)->init_cgroup
= init_cgroup
;
1178 pam_cgfs_debug("cgroupfs v1 controller \"%s\" has init "
1180 (*(*it
)->controllers
), init_cgroup
);
1182 /* Check whether systemd has already created a cgroup
1185 user_slice
= must_make_path((*it
)->mountpoint
, (*it
)->base_cgroup
, NULL
);
1186 if (cg_systemd_created_user_slice((*it
)->base_cgroup
, (*it
)->init_cgroup
, user_slice
, uid
))
1187 (*it
)->systemd_user_slice
= true;
1197 /* Check whether @path is a cgroupfs v1 or cgroupfs v2 mount. Returns -1 if
1198 * statfs fails. If @path is null /sys/fs/cgroup is checked.
1200 static inline int cg_get_version_of_mntpt(const char *path
)
1202 if (has_fs_type(path
, CGROUP_SUPER_MAGIC
))
1205 if (has_fs_type(path
, CGROUP2_SUPER_MAGIC
))
1211 /* Detect and store information about the cgroupfs v2 hierarchy. Currently only
1212 * deals with the empty v2 hierarchy as we do not retrieve enabled controllers.
1214 static bool cgv2_init(uid_t uid
, gid_t gid
)
1218 char *current_cgroup
= NULL
, *init_cgroup
= NULL
;
1223 current_cgroup
= cgv2_get_current_cgroup(getpid());
1224 if (!current_cgroup
) {
1225 /* No v2 hierarchy present. We're done. */
1230 init_cgroup
= cgv2_get_current_cgroup(1);
1232 /* If we're here and didn't fail already above, then something's
1233 * certainly wrong, so error this time.
1238 cg_systemd_prune_init_scope(init_cgroup
);
1240 /* Check if the v2 hierarchy is mounted at its standard location.
1241 * If so we can skip the rest of the work here. Although the unified
1242 * hierarchy can be mounted multiple times, each of those mountpoints
1243 * will expose identical information.
1245 if (cg_get_version_of_mntpt("/sys/fs/cgroup") == 2) {
1247 bool has_user_slice
= false;
1249 mountpoint
= must_copy_string("/sys/fs/cgroup");
1253 user_slice
= must_make_path(mountpoint
, current_cgroup
, NULL
);
1254 if (cg_systemd_created_user_slice(current_cgroup
, init_cgroup
, user_slice
, uid
))
1255 has_user_slice
= true;
1258 cgv2_add_controller(NULL
, mountpoint
, current_cgroup
, init_cgroup
, has_user_slice
);
1264 f
= fopen("/proc/self/mountinfo", "r");
1268 /* we support simple cgroup mounts and lxcfs mounts */
1269 while (getline(&line
, &len
, f
) != -1) {
1271 bool has_user_slice
= false;
1276 mountpoint
= get_mountpoint(line
);
1280 user_slice
= must_make_path(mountpoint
, current_cgroup
, NULL
);
1281 if (cg_systemd_created_user_slice(current_cgroup
, init_cgroup
, user_slice
, uid
))
1282 has_user_slice
= true;
1285 cgv2_add_controller(NULL
, mountpoint
, current_cgroup
, init_cgroup
, has_user_slice
);
1287 /* Although the unified hierarchy can be mounted multiple times,
1288 * each of those mountpoints will expose identical information.
1289 * So let the first mountpoint we find, win.
1295 pam_cgfs_debug("Detected cgroupfs v2 hierarchy at mountpoint \"%s\" with "
1296 "current cgroup \"%s\" and init cgroup \"%s\"\n",
1297 mountpoint
, current_cgroup
, init_cgroup
);
1306 free(current_cgroup
);
1312 /* Detect and store information about mounted cgroupfs v1 hierarchies and the
1313 * cgroupfs v2 hierarchy.
1314 * Detect whether we are on a pure cgroupfs v1, cgroupfs v2, or mixed system,
1315 * where some controllers are mounted into their standard cgroupfs v1 locations
1316 * (/sys/fs/cgroup/<controller>) and others are mounted into the cgroupfs v2
1317 * hierarchy (/sys/fs/cgroup).
1319 static bool cg_init(uid_t uid
, gid_t gid
)
1321 if (!cgv1_init(uid
, gid
))
1324 if (!cgv2_init(uid
, gid
))
1327 if (cgv1_hierarchies
&& cgv2_hierarchies
) {
1328 cg_mount_mode
= CGROUP_MIXED
;
1329 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 and v2 hierarchies");
1330 } else if (cgv1_hierarchies
&& !cgv2_hierarchies
) {
1331 cg_mount_mode
= CGROUP_PURE_V1
;
1332 pam_cgfs_debug("%s\n", "Detected cgroupfs v1 hierarchies");
1333 } else if (cgv2_hierarchies
&& !cgv1_hierarchies
) {
1334 cg_mount_mode
= CGROUP_PURE_V2
;
1335 pam_cgfs_debug("%s\n", "Detected cgroupfs v2 hierarchies");
1337 cg_mount_mode
= CGROUP_UNKNOWN
;
1338 mysyslog(LOG_ERR
, "Could not detect cgroupfs hierarchy\n", NULL
);
1341 if (cg_mount_mode
== CGROUP_UNKNOWN
)
1347 /* Try to move/migrate us into @cgroup in a cgroupfs v1 hierarchy. */
1348 static bool cgv1_enter(const char *cgroup
)
1350 struct cgv1_hierarchy
**it
;
1352 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
1354 bool entered
= false;
1356 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
1357 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
1360 for (controller
= (*it
)->controllers
; controller
&& *controller
;
1364 /* We've already been placed in a user slice, so we
1365 * don't need to enter the cgroup again.
1367 if ((*it
)->systemd_user_slice
) {
1372 path
= must_make_path((*it
)->mountpoint
,
1377 if (!file_exists(path
)) {
1379 path
= must_make_path((*it
)->mountpoint
,
1386 pam_cgfs_debug("Attempting to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path
);
1387 entered
= write_int(path
, (int)getpid());
1393 pam_cgfs_debug("Failed to enter cgroupfs v1 hierarchy in \"%s\" cgroup\n", path
);
1404 /* Try to move/migrate us into @cgroup in the cgroupfs v2 hierarchy. */
1405 static bool cgv2_enter(const char *cgroup
)
1407 struct cgv2_hierarchy
*v2
;
1409 bool entered
= false;
1411 if (!cgv2_hierarchies
)
1414 v2
= *cgv2_hierarchies
;
1416 if (!v2
->mountpoint
|| !v2
->base_cgroup
)
1419 if (!v2
->create_rw_cgroup
|| v2
->systemd_user_slice
)
1422 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, "/cgroup.procs", NULL
);
1423 pam_cgfs_debug("Attempting to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path
);
1425 entered
= write_int(path
, (int)getpid());
1427 pam_cgfs_debug("Failed to enter cgroupfs v2 hierarchy in cgroup \"%s\"\n", path
);
1437 /* Wrapper around cgv{1,2}_enter(). */
1438 static bool cg_enter(const char *cgroup
)
1440 if (!cgv1_enter(cgroup
)) {
1441 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to enter cgroups\n", NULL
);
1445 if (!cgv2_enter(cgroup
)) {
1446 mysyslog(LOG_WARNING
, "cgroupfs v2: Failed to enter cgroups\n", NULL
);
1453 /* Escape to root cgroup in all detected cgroupfs v1 hierarchies. */
1454 static void cgv1_escape(void)
1456 struct cgv1_hierarchy
**it
;
1458 /* In case systemd hasn't already placed us in a user slice for the
1459 * cpuset v1 controller we will reside in the root cgroup. This means
1460 * that cgroup.clone_children will not have been initialized for us so
1463 for (it
= cgv1_hierarchies
; it
&& *it
; it
++)
1464 if (!cgv1_handle_root_cpuset_hierarchy(*it
))
1465 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to initialize cpuset\n", NULL
);
1467 if (!cgv1_enter("/"))
1468 mysyslog(LOG_WARNING
, "cgroupfs v1: Failed to escape to init's cgroup\n", NULL
);
1471 /* Escape to root cgroup in the cgroupfs v2 hierarchy. */
1472 static void cgv2_escape(void)
1474 if (!cgv2_enter("/"))
1475 mysyslog(LOG_WARNING
, "cgroupfs v2: Failed to escape to init's cgroup\n", NULL
);
1478 /* Wrapper around cgv{1,2}_escape(). */
1479 static void cg_escape(void)
1485 /* Get uid and gid for @user. */
1486 static bool get_uid_gid(const char *user
, uid_t
*uid
, gid_t
*gid
)
1488 struct passwd pwent
;
1489 struct passwd
*pwentp
= NULL
;
1494 bufsize
= sysconf(_SC_GETPW_R_SIZE_MAX
);
1498 buf
= malloc(bufsize
);
1502 ret
= getpwnam_r(user
, &pwent
, buf
, bufsize
, &pwentp
);
1506 "Could not find matched password record\n", NULL
);
1512 *uid
= pwent
.pw_uid
;
1513 *gid
= pwent
.pw_gid
;
1519 /* Check if cgroup belongs to our uid and gid. If so, reuse it. */
1520 static bool cg_belongs_to_uid_gid(const char *path
, uid_t uid
, gid_t gid
)
1522 struct stat statbuf
;
1524 if (stat(path
, &statbuf
) < 0)
1527 if (!(statbuf
.st_uid
== uid
) || !(statbuf
.st_gid
== gid
))
1533 /* Create cpumask from cpulist aka turn:
1541 static uint32_t *cg_cpumask(char *buf
, size_t nbits
)
1544 char *saveptr
= NULL
;
1545 size_t arrlen
= BITS_TO_LONGS(nbits
);
1546 uint32_t *bitarr
= calloc(arrlen
, sizeof(uint32_t));
1550 for (; (token
= strtok_r(buf
, ",", &saveptr
)); buf
= NULL
) {
1552 unsigned start
= strtoul(token
, NULL
, 0);
1553 unsigned end
= start
;
1555 char *range
= strchr(token
, '-');
1557 end
= strtoul(range
+ 1, NULL
, 0);
1559 if (!(start
<= end
)) {
1569 while (start
<= end
)
1570 set_bit(start
++, bitarr
);
1576 static char *string_join(const char *sep
, const char **parts
, bool use_as_prefix
)
1580 size_t sep_len
= strlen(sep
);
1581 size_t result_len
= use_as_prefix
* sep_len
;
1587 /* calculate new string length */
1588 for (p
= (char **)parts
; *p
; p
++)
1589 result_len
+= (p
> (char **)parts
) * sep_len
+ strlen(*p
);
1591 buf_len
= result_len
+ 1;
1592 result
= calloc(buf_len
, sizeof(char));
1597 (void)strlcpy(result
, sep
, buf_len
* sizeof(char));
1599 for (p
= (char **)parts
; *p
; p
++) {
1600 if (p
> (char **)parts
)
1601 (void)strlcat(result
, sep
, buf_len
* sizeof(char));
1603 (void)strlcat(result
, *p
, buf_len
* sizeof(char));
1609 /* The largest integer that can fit into long int is 2^64. This is a
1612 #define __IN_TO_STR_LEN 21
1613 /* Turn cpumask into simple, comma-separated cpulist. */
1614 static char *cg_cpumask_to_cpulist(uint32_t *bitarr
, size_t nbits
)
1618 char numstr
[__IN_TO_STR_LEN
] = {0};
1619 char **cpulist
= NULL
;
1621 for (i
= 0; i
<= nbits
; i
++) {
1622 if (is_set(i
, bitarr
)) {
1623 ret
= snprintf(numstr
, __IN_TO_STR_LEN
, "%zu", i
);
1624 if (ret
< 0 || (size_t)ret
>= __IN_TO_STR_LEN
) {
1625 free_string_list(cpulist
);
1629 must_append_string(&cpulist
, numstr
);
1633 return string_join(",", (const char **)cpulist
, false);
1636 static ssize_t
cg_get_max_cpus(char *cpulist
)
1639 char *maxcpus
= cpulist
;
1642 c1
= strrchr(maxcpus
, ',');
1646 c2
= strrchr(maxcpus
, '-');
1658 /* If the above logic is correct, c1 should always hold a valid string
1662 cpus
= strtoul(c1
, NULL
, 0);
1669 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
1670 static bool cg_filter_and_set_cpus(char *path
, bool am_initialized
)
1672 char *lastslash
, *fpath
, oldv
;
1676 ssize_t maxposs
= 0, maxisol
= 0;
1677 char *cpulist
= NULL
, *posscpus
= NULL
, *isolcpus
= NULL
;
1678 uint32_t *possmask
= NULL
, *isolmask
= NULL
;
1679 bool bret
= false, flipped_bit
= false;
1681 lastslash
= strrchr(path
, '/');
1682 if (!lastslash
) { // bug... this shouldn't be possible
1683 pam_cgfs_debug("Invalid path: %s\n", path
);
1690 fpath
= must_make_path(path
, "cpuset.cpus", NULL
);
1691 posscpus
= read_file(fpath
);
1693 pam_cgfs_debug("Could not read file: %s\n", fpath
);
1697 /* Get maximum number of cpus found in possible cpuset. */
1698 maxposs
= cg_get_max_cpus(posscpus
);
1699 if (maxposs
< 0 || maxposs
>= INT_MAX
- 1)
1702 if (!file_exists(__ISOL_CPUS
)) {
1703 /* This system doesn't expose isolated cpus. */
1704 pam_cgfs_debug("%s", "Path: "__ISOL_CPUS
" to read isolated cpus from does not exist\n");
1707 /* No isolated cpus but we weren't already initialized by
1708 * someone. We should simply copy the parents cpuset.cpus
1711 if (!am_initialized
) {
1712 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1716 /* No isolated cpus but we were already initialized by someone.
1717 * Nothing more to do for us.
1722 isolcpus
= read_file(__ISOL_CPUS
);
1724 pam_cgfs_debug("%s", "Could not read file "__ISOL_CPUS
"\n");
1728 if (!isdigit(isolcpus
[0])) {
1729 pam_cgfs_debug("%s", "No isolated cpus detected\n");
1732 /* No isolated cpus but we weren't already initialized by
1733 * someone. We should simply copy the parents cpuset.cpus
1736 if (!am_initialized
) {
1737 pam_cgfs_debug("%s", "Copying cpuset of parent cgroup\n");
1741 /* No isolated cpus but we were already initialized by someone.
1742 * Nothing more to do for us.
1747 /* Get maximum number of cpus found in isolated cpuset. */
1748 maxisol
= cg_get_max_cpus(isolcpus
);
1749 if (maxisol
< 0 || maxisol
>= INT_MAX
- 1)
1752 if (maxposs
< maxisol
)
1756 possmask
= cg_cpumask(posscpus
, maxposs
);
1758 pam_cgfs_debug("%s", "Could not create cpumask for all possible cpus\n");
1762 isolmask
= cg_cpumask(isolcpus
, maxposs
);
1764 pam_cgfs_debug("%s", "Could not create cpumask for all isolated cpus\n");
1768 for (i
= 0; i
<= maxposs
; i
++) {
1769 if (is_set(i
, isolmask
) && is_set(i
, possmask
)) {
1771 clear_bit(i
, possmask
);
1776 pam_cgfs_debug("%s", "No isolated cpus present in cpuset\n");
1779 pam_cgfs_debug("%s", "Removed isolated cpus from cpuset\n");
1781 cpulist
= cg_cpumask_to_cpulist(possmask
, maxposs
);
1783 pam_cgfs_debug("%s", "Could not create cpu list\n");
1792 fpath
= must_make_path(path
, "cpuset.cpus", NULL
);
1793 ret
= lxc_write_to_file(fpath
, cpulist
, strlen(cpulist
), false, 0660);
1795 pam_cgfs_debug("Could not write cpu list to: %s\n", fpath
);
1809 if (posscpus
!= cpulist
)
1817 /* Copy contents of parent(@path)/@file to @path/@file */
1818 static bool cg_copy_parent_file(char *path
, char *file
)
1820 char *lastslash
, *value
= NULL
, *fpath
, oldv
;
1824 lastslash
= strrchr(path
, '/');
1825 if (!lastslash
) { // bug... this shouldn't be possible
1826 pam_cgfs_debug("cgfsng:copy_parent_file: bad path %s", path
);
1833 fpath
= must_make_path(path
, file
, NULL
);
1834 len
= lxc_read_from_file(fpath
, NULL
, 0);
1836 pam_cgfs_debug("Failed to read %s: %s", fpath
, strerror(errno
));
1840 value
= must_realloc(NULL
, len
+ 1);
1841 if (lxc_read_from_file(fpath
, value
, len
) != len
) {
1842 pam_cgfs_debug("Failed to read %s: %s", fpath
, strerror(errno
));
1849 fpath
= must_make_path(path
, file
, NULL
);
1850 ret
= lxc_write_to_file(fpath
, value
, len
, false, 0660);
1852 pam_cgfs_debug("Unable to write %s to %s", value
, fpath
);
1859 pam_cgfs_debug("Error reading '%s'", fpath
);
1865 /* In case systemd hasn't already placed us in a user slice for the cpuset v1
1866 * controller we will reside in the root cgroup. This means that
1867 * cgroup.clone_children will not have been initialized for us so we need to do
1870 static bool cgv1_handle_root_cpuset_hierarchy(struct cgv1_hierarchy
*h
)
1872 char *clonechildrenpath
, v
;
1874 if (!string_in_list(h
->controllers
, "cpuset"))
1877 clonechildrenpath
= must_make_path(h
->mountpoint
, "cgroup.clone_children", NULL
);
1879 if (lxc_read_from_file(clonechildrenpath
, &v
, 1) < 0) {
1880 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath
, strerror(errno
));
1881 free(clonechildrenpath
);
1885 if (v
== '1') { /* already set for us by someone else */
1886 free(clonechildrenpath
);
1890 if (lxc_write_to_file(clonechildrenpath
, "1", 1, false, 0660) < 0) {
1891 /* Set clone_children so children inherit our settings */
1892 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath
);
1893 free(clonechildrenpath
);
1897 free(clonechildrenpath
);
1902 * Initialize the cpuset hierarchy in first directory of @gname and
1903 * set cgroup.clone_children so that children inherit settings.
1904 * Since the h->base_path is populated by init or ourselves, we know
1905 * it is already initialized.
1907 static bool cgv1_handle_cpuset_hierarchy(struct cgv1_hierarchy
*h
,
1910 char *cgpath
, *clonechildrenpath
, v
, *slash
;
1912 if (!string_in_list(h
->controllers
, "cpuset"))
1917 slash
= strchr(cgroup
, '/');
1921 cgpath
= must_make_path(h
->mountpoint
, h
->base_cgroup
, cgroup
, NULL
);
1925 if (do_mkdir(cgpath
, 0755) < 0 && errno
!= EEXIST
) {
1926 pam_cgfs_debug("Failed to create '%s'", cgpath
);
1931 clonechildrenpath
= must_make_path(cgpath
, "cgroup.clone_children", NULL
);
1932 if (!file_exists(clonechildrenpath
)) { /* unified hierarchy doesn't have clone_children */
1933 free(clonechildrenpath
);
1938 if (lxc_read_from_file(clonechildrenpath
, &v
, 1) < 0) {
1939 pam_cgfs_debug("Failed to read %s: %s", clonechildrenpath
, strerror(errno
));
1940 free(clonechildrenpath
);
1945 /* Make sure any isolated cpus are removed from cpuset.cpus. */
1946 if (!cg_filter_and_set_cpus(cgpath
, v
== '1')) {
1947 pam_cgfs_debug("%s", "Failed to remove isolated cpus\n");
1948 free(clonechildrenpath
);
1953 if (v
== '1') { /* already set for us by someone else */
1954 pam_cgfs_debug("%s", "\"cgroup.clone_children\" was already set to \"1\"\n");
1955 free(clonechildrenpath
);
1960 /* copy parent's settings */
1961 if (!cg_copy_parent_file(cgpath
, "cpuset.mems")) {
1962 pam_cgfs_debug("%s", "Failed to copy \"cpuset.mems\" settings\n");
1964 free(clonechildrenpath
);
1969 if (lxc_write_to_file(clonechildrenpath
, "1", 1, false, 0660) < 0) {
1970 /* Set clone_children so children inherit our settings */
1971 pam_cgfs_debug("Failed to write 1 to %s", clonechildrenpath
);
1972 free(clonechildrenpath
);
1975 free(clonechildrenpath
);
1979 /* Create and chown @cgroup for all given controllers in a cgroupfs v1 hierarchy
1980 * (For example, create @cgroup for the cpu and cpuacct controller mounted into
1981 * /sys/fs/cgroup/cpu,cpuacct). Check if the path already exists and report back
1982 * to the caller in @existed.
1984 #define __PAM_CGFS_USER "/user/"
1985 #define __PAM_CGFS_USER_LEN 6
1986 static bool cgv1_create_one(struct cgv1_hierarchy
*h
, const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
1988 char *clean_base_cgroup
, *path
;
1990 struct cgv1_hierarchy
*it
;
1991 bool created
= false;
1996 for (controller
= it
->controllers
; controller
&& *controller
;
1998 if (!cgv1_handle_cpuset_hierarchy(it
, cgroup
))
2001 /* If systemd has already created a cgroup for us, keep using
2004 if (cg_systemd_chown_existing_cgroup(it
->mountpoint
,
2005 it
->base_cgroup
, uid
, gid
,
2006 it
->systemd_user_slice
))
2009 /* We need to make sure that we do not create an endless chain
2010 * of sub-cgroups. So we check if we have already logged in
2011 * somehow (sudo -i, su, etc.) and have created a
2012 * /user/PAM_user/idx cgroup. If so, we skip that part. For most
2013 * cgroups this is unnecessary since we use the init_cgroup
2014 * anyway, but for controllers which have an existing systemd
2015 * cgroup that does not match the current uid, this is pretty
2018 if (strncmp(it
->base_cgroup
, __PAM_CGFS_USER
, __PAM_CGFS_USER_LEN
) == 0) {
2019 free(it
->base_cgroup
);
2020 it
->base_cgroup
= must_copy_string("/");
2023 strstr(it
->base_cgroup
, __PAM_CGFS_USER
);
2024 if (clean_base_cgroup
)
2025 *clean_base_cgroup
= '\0';
2028 path
= must_make_path(it
->mountpoint
, it
->init_cgroup
, cgroup
, NULL
);
2029 pam_cgfs_debug("Constructing path: %s\n", path
);
2031 if (file_exists(path
)) {
2032 bool our_cg
= cg_belongs_to_uid_gid(path
, uid
, gid
);
2038 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2039 path
, our_cg
? "" : "not ", uid
, gid
);
2045 created
= lxc_mkdir_parent(it
->mountpoint
, path
);
2051 if (chown(path
, uid
, gid
) < 0)
2052 mysyslog(LOG_WARNING
,
2053 "Failed to chown %s to %d:%d: %s\n", path
,
2054 (int)uid
, (int)gid
, strerror(errno
), NULL
);
2056 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2064 /* Try to remove @cgroup for all given controllers in a cgroupfs v1 hierarchy
2065 * (For example, try to remove @cgroup for the cpu and cpuacct controller
2066 * mounted into /sys/fs/cgroup/cpu,cpuacct). Ignores failures.
2068 static bool cgv1_remove_one(struct cgv1_hierarchy
*h
, const char *cgroup
)
2073 /* Better safe than sorry. */
2074 if (!h
->controllers
)
2077 /* Cgroups created by systemd for us which we re-use won't be removed
2078 * here, since we're using init_cgroup + cgroup as path instead of
2079 * base_cgroup + cgroup.
2081 path
= must_make_path(h
->mountpoint
, h
->init_cgroup
, cgroup
, NULL
);
2082 (void)recursive_rmdir(path
);
2088 /* Try to remove @cgroup the cgroupfs v2 hierarchy. */
2089 static bool cgv2_remove(const char *cgroup
)
2091 struct cgv2_hierarchy
*v2
;
2094 if (!cgv2_hierarchies
)
2097 v2
= *cgv2_hierarchies
;
2099 /* If we reused an already existing cgroup, don't bother trying to
2100 * remove (a potentially wrong)/the path.
2101 * Cgroups created by systemd for us which we re-use would be removed
2102 * here, since we're using base_cgroup + cgroup as path.
2104 if (v2
->systemd_user_slice
)
2107 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, NULL
);
2108 (void)recursive_rmdir(path
);
2114 /* Create @cgroup in all detected cgroupfs v1 hierarchy. If the creation fails
2115 * for any cgroupfs v1 hierarchy, remove all we have created so far. Report
2116 * back, to the caller if the creation failed due to @cgroup already existing
2119 static bool cgv1_create(const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
2121 struct cgv1_hierarchy
**it
, **rev_it
;
2122 bool all_created
= true;
2124 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2125 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
2126 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
2129 if (!cgv1_create_one(*it
, cgroup
, uid
, gid
, existed
)) {
2130 all_created
= false;
2138 for (rev_it
= cgv1_hierarchies
; rev_it
&& *rev_it
&& (*rev_it
!= *it
);
2140 cgv1_remove_one(*rev_it
, cgroup
);
2145 /* Create @cgroup in the cgroupfs v2 hierarchy. Report back, to the caller if
2146 * the creation failed due to @cgroup already existing via @existed.
2148 static bool cgv2_create(const char *cgroup
, uid_t uid
, gid_t gid
, bool *existed
)
2151 char *clean_base_cgroup
;
2153 struct cgv2_hierarchy
*v2
;
2154 bool our_cg
= false, created
= false;
2158 if (!cgv2_hierarchies
|| !(*cgv2_hierarchies
)->create_rw_cgroup
)
2161 v2
= *cgv2_hierarchies
;
2163 /* We can't be placed under init's cgroup for the v2 hierarchy. We need
2164 * to be placed under our current cgroup.
2166 if (cg_systemd_chown_existing_cgroup(v2
->mountpoint
, v2
->base_cgroup
,
2167 uid
, gid
, v2
->systemd_user_slice
))
2168 goto delegate_files
;
2170 /* We need to make sure that we do not create an endless chain of
2171 * sub-cgroups. So we check if we have already logged in somehow (sudo
2172 * -i, su, etc.) and have created a /user/PAM_user/idx cgroup. If so, we
2175 if (strncmp(v2
->base_cgroup
, __PAM_CGFS_USER
, __PAM_CGFS_USER_LEN
) == 0) {
2176 free(v2
->base_cgroup
);
2177 v2
->base_cgroup
= must_copy_string("/");
2179 clean_base_cgroup
= strstr(v2
->base_cgroup
, __PAM_CGFS_USER
);
2180 if (clean_base_cgroup
)
2181 *clean_base_cgroup
= '\0';
2184 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
, NULL
);
2185 pam_cgfs_debug("Constructing path \"%s\"\n", path
);
2187 if (file_exists(path
)) {
2188 our_cg
= cg_belongs_to_uid_gid(path
, uid
, gid
);
2189 pam_cgfs_debug("%s existed and does %shave our uid: %d and gid: %d\n",
2190 path
, our_cg
? "" : "not ", uid
, gid
);
2194 goto delegate_files
;
2201 created
= lxc_mkdir_parent(v2
->mountpoint
, path
);
2207 /* chown cgroup to user */
2208 if (chown(path
, uid
, gid
) < 0)
2209 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2210 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2212 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2216 /* chown cgroup.procs to user */
2217 if (v2
->systemd_user_slice
)
2218 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2219 "/cgroup.procs", NULL
);
2221 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2222 "/cgroup.procs", NULL
);
2224 ret
= chown(path
, uid
, gid
);
2226 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2227 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2229 pam_cgfs_debug("Chowned %s to %d:%d\n", path
, (int)uid
, (int)gid
);
2232 /* chown cgroup.subtree_control to user */
2233 if (v2
->systemd_user_slice
)
2234 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2235 "/cgroup.subtree_control", NULL
);
2237 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2238 "/cgroup.subtree_control", NULL
);
2240 ret
= chown(path
, uid
, gid
);
2242 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2243 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2246 /* chown cgroup.threads to user */
2247 if (v2
->systemd_user_slice
)
2248 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
,
2249 "/cgroup.threads", NULL
);
2251 path
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, cgroup
,
2252 "/cgroup.threads", NULL
);
2253 ret
= chown(path
, uid
, gid
);
2254 if (ret
< 0 && errno
!= ENOENT
)
2255 mysyslog(LOG_WARNING
, "Failed to chown %s to %d:%d: %s\n",
2256 path
, (int)uid
, (int)gid
, strerror(errno
), NULL
);
2262 /* Create writeable cgroups for @user at login. Details can be found in the
2263 * preamble/license at the top of this file.
2265 static int handle_login(const char *user
, uid_t uid
, gid_t gid
)
2274 ret
= snprintf(cg
, PATH_MAX
, "/user/%s/%d", user
, idx
);
2275 if (ret
< 0 || ret
>= PATH_MAX
) {
2276 mysyslog(LOG_ERR
, "Username too long\n", NULL
);
2277 return PAM_SESSION_ERR
;
2281 if (!cgv2_create(cg
, uid
, gid
, &existed
)) {
2288 mysyslog(LOG_ERR
, "Failed to create a cgroup for user %s\n", user
, NULL
);
2289 return PAM_SESSION_ERR
;
2293 if (!cgv1_create(cg
, uid
, gid
, &existed
)) {
2300 mysyslog(LOG_ERR
, "Failed to create a cgroup for user %s\n", user
, NULL
);
2301 return PAM_SESSION_ERR
;
2304 if (!cg_enter(cg
)) {
2305 mysyslog( LOG_ERR
, "Failed to enter user cgroup %s for user %s\n", cg
, user
, NULL
);
2306 return PAM_SESSION_ERR
;
2315 /* Try to prune cgroups we created and that now are empty from all cgroupfs v1
2318 static bool cgv1_prune_empty_cgroups(const char *user
)
2320 bool controller_removed
= true;
2321 bool all_removed
= true;
2322 struct cgv1_hierarchy
**it
;
2324 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2326 char *path_base
, *path_init
;
2329 if (!(*it
)->controllers
|| !(*it
)->mountpoint
||
2330 !(*it
)->init_cgroup
|| !(*it
)->create_rw_cgroup
)
2333 for (controller
= (*it
)->controllers
; controller
&& *controller
;
2335 bool path_base_rm
, path_init_rm
;
2337 path_base
= must_make_path((*it
)->mountpoint
, (*it
)->base_cgroup
, "/user", user
, NULL
);
2338 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_base
);
2340 ret
= recursive_rmdir(path_base
);
2341 if (ret
== -ENOENT
|| ret
>= 0)
2342 path_base_rm
= true;
2344 path_base_rm
= false;
2347 path_init
= must_make_path((*it
)->mountpoint
, (*it
)->init_cgroup
, "/user", user
, NULL
);
2348 pam_cgfs_debug("cgroupfs v1: Trying to prune \"%s\"\n", path_init
);
2350 ret
= recursive_rmdir(path_init
);
2351 if (ret
== -ENOENT
|| ret
>= 0)
2352 path_init_rm
= true;
2354 path_init_rm
= false;
2357 if (!path_base_rm
&& !path_init_rm
) {
2358 controller_removed
= false;
2362 controller_removed
= true;
2366 if (!controller_removed
)
2367 all_removed
= false;
2373 /* Try to prune cgroup we created and that now is empty from the cgroupfs v2
2376 static bool cgv2_prune_empty_cgroups(const char *user
)
2379 struct cgv2_hierarchy
*v2
;
2380 char *path_base
, *path_init
;
2381 bool path_base_rm
, path_init_rm
;
2383 if (!cgv2_hierarchies
)
2386 v2
= *cgv2_hierarchies
;
2388 path_base
= must_make_path(v2
->mountpoint
, v2
->base_cgroup
, "/user", user
, NULL
);
2389 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_base
);
2391 ret
= recursive_rmdir(path_base
);
2392 if (ret
== -ENOENT
|| ret
>= 0)
2393 path_base_rm
= true;
2395 path_base_rm
= false;
2398 path_init
= must_make_path(v2
->mountpoint
, v2
->init_cgroup
, "/user", user
, NULL
);
2399 pam_cgfs_debug("cgroupfs v2: Trying to prune \"%s\"\n", path_init
);
2401 ret
= recursive_rmdir(path_init
);
2402 if (ret
== -ENOENT
|| ret
>= 0)
2403 path_init_rm
= true;
2405 path_init_rm
= false;
2408 if (!path_base_rm
&& !path_init_rm
)
2414 /* Wrapper around cgv{1,2}_prune_empty_cgroups(). */
2415 static void cg_prune_empty_cgroups(const char *user
)
2417 (void)cgv1_prune_empty_cgroups(user
);
2418 (void)cgv2_prune_empty_cgroups(user
);
2421 /* Free allocated information for detected cgroupfs v1 hierarchies. */
2422 static void cgv1_free_hierarchies(void)
2424 struct cgv1_hierarchy
**it
;
2426 if (!cgv1_hierarchies
)
2429 for (it
= cgv1_hierarchies
; it
&& *it
; it
++) {
2430 if ((*it
)->controllers
) {
2432 for (tmp
= (*it
)->controllers
; tmp
&& *tmp
; tmp
++)
2435 free((*it
)->controllers
);
2438 free((*it
)->mountpoint
);
2439 free((*it
)->base_cgroup
);
2440 free((*it
)->fullcgpath
);
2441 free((*it
)->init_cgroup
);
2444 free(cgv1_hierarchies
);
2447 /* Free allocated information for the detected cgroupfs v2 hierarchy. */
2448 static void cgv2_free_hierarchies(void)
2450 struct cgv2_hierarchy
**it
;
2452 if (!cgv2_hierarchies
)
2455 for (it
= cgv2_hierarchies
; it
&& *it
; it
++) {
2456 if ((*it
)->controllers
) {
2459 for (tmp
= (*it
)->controllers
; tmp
&& *tmp
; tmp
++)
2462 free((*it
)->controllers
);
2465 free((*it
)->mountpoint
);
2466 free((*it
)->base_cgroup
);
2467 free((*it
)->fullcgpath
);
2468 free((*it
)->init_cgroup
);
2471 free(cgv2_hierarchies
);
2474 /* Wrapper around cgv{1,2}_free_hierarchies(). */
2475 static void cg_exit(void)
2477 cgv1_free_hierarchies();
2478 cgv2_free_hierarchies();
2481 int pam_sm_open_session(pam_handle_t
*pamh
, int flags
, int argc
,
2487 const char *PAM_user
= NULL
;
2489 ret
= pam_get_user(pamh
, &PAM_user
, NULL
);
2490 if (ret
!= PAM_SUCCESS
) {
2491 mysyslog(LOG_ERR
, "PAM-CGFS: couldn't get user\n", NULL
);
2492 return PAM_SESSION_ERR
;
2495 if (!get_uid_gid(PAM_user
, &uid
, &gid
)) {
2496 mysyslog(LOG_ERR
, "Failed to get uid and gid for %s\n", PAM_user
, NULL
);
2497 return PAM_SESSION_ERR
;
2500 if (!cg_init(uid
, gid
)) {
2501 mysyslog(LOG_ERR
, "Failed to get list of controllers\n", NULL
);
2502 return PAM_SESSION_ERR
;
2505 /* Try to prune cgroups, that are actually empty but were still marked
2506 * as busy by the kernel so we couldn't remove them on session close.
2508 cg_prune_empty_cgroups(PAM_user
);
2510 if (cg_mount_mode
== CGROUP_UNKNOWN
)
2511 return PAM_SESSION_ERR
;
2513 if (argc
> 1 && !strcmp(argv
[0], "-c")) {
2514 char **clist
= make_string_list(argv
[1], ",");
2517 * We don't allow using "all" and other controllers explicitly because
2518 * that simply doesn't make any sense.
2520 if (string_list_length(clist
) > 1 && string_in_list(clist
, "all")) {
2521 mysyslog(LOG_ERR
, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL
);
2522 free_string_list(clist
);
2523 return PAM_SESSION_ERR
;
2526 cg_mark_to_make_rw(clist
);
2527 free_string_list(clist
);
2530 return handle_login(PAM_user
, uid
, gid
);
2533 int pam_sm_close_session(pam_handle_t
*pamh
, int flags
, int argc
,
2539 const char *PAM_user
= NULL
;
2541 ret
= pam_get_user(pamh
, &PAM_user
, NULL
);
2542 if (ret
!= PAM_SUCCESS
) {
2543 mysyslog(LOG_ERR
, "PAM-CGFS: couldn't get user\n", NULL
);
2544 return PAM_SESSION_ERR
;
2547 if (!get_uid_gid(PAM_user
, &uid
, &gid
)) {
2548 mysyslog(LOG_ERR
, "Failed to get uid and gid for %s\n", PAM_user
, NULL
);
2549 return PAM_SESSION_ERR
;
2552 if (cg_mount_mode
== CGROUP_UNINITIALIZED
) {
2553 if (!cg_init(uid
, gid
))
2554 mysyslog(LOG_ERR
, "Failed to get list of controllers\n", NULL
);
2556 if (argc
> 1 && !strcmp(argv
[0], "-c")) {
2557 char **clist
= make_string_list(argv
[1], ",");
2560 * We don't allow using "all" and other controllers explicitly because
2561 * that simply doesn't make any sense.
2563 if (string_list_length(clist
) > 1 && string_in_list(clist
, "all")) {
2564 mysyslog(LOG_ERR
, "Invalid -c option, cannot specify individual controllers alongside 'all'\n", NULL
);
2565 free_string_list(clist
);
2566 return PAM_SESSION_ERR
;
2569 cg_mark_to_make_rw(clist
);
2570 free_string_list(clist
);
2574 cg_prune_empty_cgroups(PAM_user
);