2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/inotify.h>
38 #include <sys/mount.h>
39 #include <netinet/in.h>
54 #include <../include/lxcmntent.h>
59 struct cgroup_hierarchy
;
60 struct cgroup_meta_data
;
61 struct cgroup_mount_point
;
64 * cgroup_meta_data: the metadata about the cgroup infrastructure on this
67 struct cgroup_meta_data
{
68 ptrdiff_t ref
; /* simple refcount */
69 struct cgroup_hierarchy
**hierarchies
;
70 struct cgroup_mount_point
**mount_points
;
71 int maximum_hierarchy
;
75 * cgroup_hierarchy: describes a single cgroup hierarchy
76 * (may have multiple mount points)
78 struct cgroup_hierarchy
{
80 bool used
; /* false if the hierarchy should be ignored by lxc */
82 struct cgroup_mount_point
*rw_absolute_mount_point
;
83 struct cgroup_mount_point
*ro_absolute_mount_point
;
84 struct cgroup_mount_point
**all_mount_points
;
85 size_t all_mount_point_capacity
;
89 * cgroup_mount_point: a mount point to where a hierarchy
92 struct cgroup_mount_point
{
93 struct cgroup_hierarchy
*hierarchy
;
97 bool need_cpuset_init
;
101 * cgroup_process_info: describes the membership of a
102 * process to the different cgroup
105 * Note this is the per-process info tracked by the cgfs_ops.
107 struct cgroup_process_info
{
108 struct cgroup_process_info
*next
;
109 struct cgroup_meta_data
*meta_ref
;
110 struct cgroup_hierarchy
*hierarchy
;
112 char *cgroup_path_sub
;
113 char **created_paths
;
114 size_t created_paths_capacity
;
115 size_t created_paths_count
;
116 struct cgroup_mount_point
*designated_mount_point
;
121 const char *cgroup_pattern
;
122 struct cgroup_meta_data
*meta
;
123 struct cgroup_process_info
*info
;
126 lxc_log_define(lxc_cgfs
, lxc
);
128 static struct cgroup_process_info
*lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
, struct cgroup_meta_data
*meta
);
129 static char **subsystems_from_mount_options(const char *mount_options
, char **kernel_list
);
130 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
);
131 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
);
132 static bool is_valid_cgroup(const char *name
);
133 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
);
134 static int remove_cgroup(struct cgroup_mount_point
*mp
, const char *path
, bool recurse
,
135 struct lxc_conf
*conf
);
136 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
, const char *path
, const char *suffix
);
137 static struct cgroup_process_info
*find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
);
138 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
, char *value
, size_t len
);
139 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
, const char *value
);
140 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
, char *v
, bool for_allow
);
141 static int do_setup_cgroup_limits(struct cgfs_data
*d
, struct lxc_list
*cgroup_settings
, bool do_devices
);
142 static int cgroup_recursive_task_count(const char *cgroup_path
);
143 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
, char *cgroup_path
);
144 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
, const char *path
);
146 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
);
147 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
);
148 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
);
150 /* free process membership information */
151 static void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
);
152 static void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
,
153 struct lxc_conf
*conf
);
155 static struct cgroup_ops cgfs_ops
;
157 static int cgroup_rmdir(char *dirname
)
159 struct dirent
*direntp
;
163 char pathname
[MAXPATHLEN
];
165 dir
= opendir(dirname
);
167 ERROR("Failed to open %s", dirname
);
171 while ((direntp
= readdir(dir
))) {
178 if (!strcmp(direntp
->d_name
, ".") ||
179 !strcmp(direntp
->d_name
, ".."))
182 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
183 if (rc
< 0 || rc
>= MAXPATHLEN
) {
184 ERROR("pathname too long");
187 saved_errno
= -ENOMEM
;
190 ret
= lstat(pathname
, &mystat
);
192 SYSERROR("Failed to stat %s", pathname
);
198 if (S_ISDIR(mystat
.st_mode
)) {
199 if (cgroup_rmdir(pathname
) < 0) {
207 if (rmdir(dirname
) < 0) {
208 SYSERROR("Failed to delete %s", dirname
);
216 SYSERROR("Failed to close directory %s", dirname
);
223 return failed
? -1 : 0;
226 static int rmdir_wrapper(void *data
)
230 if (setresgid(0,0,0) < 0)
231 SYSERROR("Failed to setgid to 0");
232 if (setresuid(0,0,0) < 0)
233 SYSERROR("Failed to setuid to 0");
234 if (setgroups(0, NULL
) < 0)
235 SYSERROR("Failed to clear groups");
237 return cgroup_rmdir(path
);
240 static struct cgroup_meta_data
*lxc_cgroup_load_meta()
242 const char *cgroup_use
= NULL
;
243 char **cgroup_use_list
= NULL
;
244 struct cgroup_meta_data
*md
= NULL
;
248 cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
249 if (!cgroup_use
&& errno
!= 0)
252 cgroup_use_list
= lxc_string_split_and_trim(cgroup_use
, ',');
253 if (!cgroup_use_list
)
257 md
= lxc_cgroup_load_meta2((const char **)cgroup_use_list
);
259 lxc_free_array((void **)cgroup_use_list
, free
);
264 /* Step 1: determine all kernel subsystems */
265 static bool find_cgroup_subsystems(char ***kernel_subsystems
)
271 size_t kernel_subsystems_count
= 0;
272 size_t kernel_subsystems_capacity
= 0;
275 proc_cgroups
= fopen_cloexec("/proc/cgroups", "r");
279 while (getline(&line
, &sz
, proc_cgroups
) != -1) {
282 int hierarchy_number
;
289 tab1
= strchr(line
, '\t');
293 tab2
= strchr(tab1
, '\t');
299 hierarchy_number
= strtoul(tab1
, &tab2
, 10);
302 (void)hierarchy_number
;
304 r
= lxc_grow_array((void ***)kernel_subsystems
, &kernel_subsystems_capacity
, kernel_subsystems_count
+ 1, 12);
307 (*kernel_subsystems
)[kernel_subsystems_count
] = strdup(line
);
308 if (!(*kernel_subsystems
)[kernel_subsystems_count
])
310 kernel_subsystems_count
++;
315 fclose(proc_cgroups
);
320 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
321 * since mount points don't specify hierarchy number and
322 * /proc/cgroups does not contain named hierarchies
324 static bool find_cgroup_hierarchies(struct cgroup_meta_data
*meta_data
,
325 bool all_kernel_subsystems
, bool all_named_subsystems
,
326 const char **subsystem_whitelist
)
328 FILE *proc_self_cgroup
;
333 size_t hierarchy_capacity
= 0;
335 proc_self_cgroup
= fopen_cloexec("/proc/self/cgroup", "r");
336 /* if for some reason (because of setns() and pid namespace for example),
337 * /proc/self is not valid, we try /proc/1/cgroup... */
338 if (!proc_self_cgroup
)
339 proc_self_cgroup
= fopen_cloexec("/proc/1/cgroup", "r");
340 if (!proc_self_cgroup
)
343 while (getline(&line
, &sz
, proc_self_cgroup
) != -1) {
344 /* file format: hierarchy:subsystems:group,
345 * we only extract hierarchy and subsystems
349 int hierarchy_number
;
350 struct cgroup_hierarchy
*h
= NULL
;
356 colon1
= strchr(line
, ':');
360 colon2
= strchr(colon1
, ':');
367 /* With cgroupv2 /proc/self/cgroup can contain entries of the
369 * These entries need to be skipped.
371 if (!strcmp(colon1
, ""))
374 hierarchy_number
= strtoul(line
, &colon2
, 10);
375 if (!colon2
|| *colon2
)
378 if (hierarchy_number
> meta_data
->maximum_hierarchy
) {
379 /* lxc_grow_array will never shrink, so even if we find a lower
380 * hierarchy number here, the array will never be smaller
382 r
= lxc_grow_array((void ***)&meta_data
->hierarchies
, &hierarchy_capacity
, hierarchy_number
+ 1, 12);
386 meta_data
->maximum_hierarchy
= hierarchy_number
;
389 /* this shouldn't happen, we had this already */
390 if (meta_data
->hierarchies
[hierarchy_number
])
393 h
= calloc(1, sizeof(struct cgroup_hierarchy
));
397 meta_data
->hierarchies
[hierarchy_number
] = h
;
399 h
->index
= hierarchy_number
;
400 h
->subsystems
= lxc_string_split_and_trim(colon1
, ',');
403 /* see if this hierarchy should be considered */
404 if (!all_kernel_subsystems
|| !all_named_subsystems
) {
405 for (p
= h
->subsystems
; *p
; p
++) {
406 if (!strncmp(*p
, "name=", 5)) {
407 if (all_named_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
412 if (all_kernel_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
419 /* we want all hierarchy anyway */
426 fclose(proc_self_cgroup
);
431 /* Step 3: determine all mount points of each hierarchy */
432 static bool find_hierarchy_mountpts( struct cgroup_meta_data
*meta_data
, char **kernel_subsystems
)
435 FILE *proc_self_mountinfo
;
438 char **tokens
= NULL
;
439 size_t mount_point_count
= 0;
440 size_t mount_point_capacity
= 0;
441 size_t token_capacity
= 0;
443 bool is_cgns
= cgns_supported();
445 proc_self_mountinfo
= fopen_cloexec("/proc/self/mountinfo", "r");
446 /* if for some reason (because of setns() and pid namespace for example),
447 * /proc/self is not valid, we try /proc/1/cgroup... */
448 if (!proc_self_mountinfo
)
449 proc_self_mountinfo
= fopen_cloexec("/proc/1/mountinfo", "r");
450 if (!proc_self_mountinfo
)
453 while (getline(&line
, &sz
, proc_self_mountinfo
) != -1) {
454 char *token
, *line_tok
, *saveptr
= NULL
;
456 struct cgroup_mount_point
*mount_point
;
457 struct cgroup_hierarchy
*h
;
459 bool is_lxcfs
= false;
461 if (line
[0] && line
[strlen(line
) - 1] == '\n')
462 line
[strlen(line
) - 1] = '\0';
464 for (i
= 0, line_tok
= line
; (token
= strtok_r(line_tok
, " ", &saveptr
)); line_tok
= NULL
) {
465 r
= lxc_grow_array((void ***)&tokens
, &token_capacity
, i
+ 1, 64);
471 /* layout of /proc/self/mountinfo:
474 * 2: device major:minor
477 * 5: per-mount options
478 * [optional X]: additional data
482 * X+10: per-superblock options
484 for (j
= 6; j
< i
&& tokens
[j
]; j
++)
485 if (!strcmp(tokens
[j
], "-"))
488 /* could not find separator */
489 if (j
>= i
|| !tokens
[j
])
491 /* there should be exactly three fields after
497 /* not a cgroup filesystem */
498 if (strcmp(tokens
[j
+ 1], "cgroup") != 0) {
499 if (strcmp(tokens
[j
+ 1], "fuse.lxcfs") != 0)
501 if (strncmp(tokens
[4], "/sys/fs/cgroup/", 15) != 0)
504 char *curtok
= tokens
[4] + 15;
505 subsystems
= subsystems_from_mount_options(curtok
,
508 subsystems
= subsystems_from_mount_options(tokens
[j
+ 3],
514 for (k
= 0; k
<= meta_data
->maximum_hierarchy
; k
++) {
515 if (meta_data
->hierarchies
[k
] &&
516 meta_data
->hierarchies
[k
]->subsystems
[0] &&
517 lxc_string_in_array(meta_data
->hierarchies
[k
]->subsystems
[0], (const char **)subsystems
)) {
518 /* TODO: we could also check if the lists really match completely,
519 * just to have an additional sanity check */
520 h
= meta_data
->hierarchies
[k
];
524 lxc_free_array((void **)subsystems
, free
);
528 r
= lxc_grow_array((void ***)&meta_data
->mount_points
, &mount_point_capacity
, mount_point_count
+ 1, 12);
532 /* create mount point object */
533 mount_point
= calloc(1, sizeof(*mount_point
));
537 meta_data
->mount_points
[mount_point_count
++] = mount_point
;
539 mount_point
->hierarchy
= h
;
540 if (is_lxcfs
|| is_cgns
)
541 mount_point
->mount_prefix
= strdup("/");
543 mount_point
->mount_prefix
= strdup(tokens
[3]);
544 mount_point
->mount_point
= strdup(tokens
[4]);
545 if (!mount_point
->mount_point
|| !mount_point
->mount_prefix
)
547 mount_point
->read_only
= !lxc_string_in_list("rw", tokens
[5], ',');
549 if (!strcmp(mount_point
->mount_prefix
, "/")) {
550 if (mount_point
->read_only
) {
551 if (!h
->ro_absolute_mount_point
)
552 h
->ro_absolute_mount_point
= mount_point
;
554 if (!h
->rw_absolute_mount_point
)
555 h
->rw_absolute_mount_point
= mount_point
;
560 k
= lxc_array_len((void **)h
->all_mount_points
);
563 r
= lxc_grow_array((void ***)&h
->all_mount_points
, &h
->all_mount_point_capacity
, k
+ 1, 4);
566 h
->all_mount_points
[k
] = mount_point
;
571 fclose(proc_self_mountinfo
);
577 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
)
579 bool all_kernel_subsystems
= true;
580 bool all_named_subsystems
= false;
581 struct cgroup_meta_data
*meta_data
= NULL
;
582 char **kernel_subsystems
= NULL
;
585 /* if the subsystem whitelist is not specified, include all
586 * hierarchies that contain kernel subsystems by default but
587 * no hierarchies that only contain named subsystems
589 * if it is specified, the specifier @all will select all
590 * hierarchies, @kernel will select all hierarchies with
591 * kernel subsystems and @named will select all named
594 all_kernel_subsystems
= subsystem_whitelist
?
595 (lxc_string_in_array("@kernel", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
597 all_named_subsystems
= subsystem_whitelist
?
598 (lxc_string_in_array("@named", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
601 meta_data
= calloc(1, sizeof(struct cgroup_meta_data
));
606 if (!find_cgroup_subsystems(&kernel_subsystems
))
609 if (!find_cgroup_hierarchies(meta_data
, all_kernel_subsystems
,
610 all_named_subsystems
, subsystem_whitelist
))
613 if (!find_hierarchy_mountpts(meta_data
, kernel_subsystems
))
616 /* oops, we couldn't find anything */
617 if (!meta_data
->hierarchies
|| !meta_data
->mount_points
) {
622 lxc_free_array((void **)kernel_subsystems
, free
);
627 lxc_free_array((void **)kernel_subsystems
, free
);
628 lxc_cgroup_put_meta(meta_data
);
633 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
)
639 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
)
644 if (--meta_data
->ref
> 0)
646 lxc_free_array((void **)meta_data
->mount_points
, (lxc_free_fn
)lxc_cgroup_mount_point_free
);
647 if (meta_data
->hierarchies
)
648 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++)
649 if (meta_data
->hierarchies
[i
])
650 lxc_cgroup_hierarchy_free(meta_data
->hierarchies
[i
]);
651 free(meta_data
->hierarchies
);
656 static struct cgroup_hierarchy
*lxc_cgroup_find_hierarchy(struct cgroup_meta_data
*meta_data
, const char *subsystem
)
659 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
660 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
663 if (h
&& lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
669 static bool mountpoint_is_accessible(struct cgroup_mount_point
*mp
)
671 return mp
&& access(mp
->mount_point
, F_OK
) == 0;
674 static struct cgroup_mount_point
*lxc_cgroup_find_mount_point(struct cgroup_hierarchy
*hierarchy
, const char *group
, bool should_be_writable
)
676 struct cgroup_mount_point
**mps
;
677 struct cgroup_mount_point
*current_result
= NULL
;
678 ssize_t quality
= -1;
681 if (mountpoint_is_accessible(hierarchy
->rw_absolute_mount_point
))
682 return hierarchy
->rw_absolute_mount_point
;
683 if (!should_be_writable
&& mountpoint_is_accessible(hierarchy
->ro_absolute_mount_point
))
684 return hierarchy
->ro_absolute_mount_point
;
686 for (mps
= hierarchy
->all_mount_points
; mps
&& *mps
; mps
++) {
687 struct cgroup_mount_point
*mp
= *mps
;
688 size_t prefix_len
= mp
->mount_prefix
? strlen(mp
->mount_prefix
) : 0;
690 if (prefix_len
== 1 && mp
->mount_prefix
[0] == '/')
693 if (!mountpoint_is_accessible(mp
))
696 if (should_be_writable
&& mp
->read_only
)
700 (strncmp(group
, mp
->mount_prefix
, prefix_len
) == 0 &&
701 (group
[prefix_len
] == '\0' || group
[prefix_len
] == '/'))) {
702 /* search for the best quality match, i.e. the match with the
703 * shortest prefix where this group is still contained
705 if (quality
== -1 || prefix_len
< quality
) {
707 quality
= prefix_len
;
714 return current_result
;
717 static char *lxc_cgroup_find_abs_path(const char *subsystem
, const char *group
, bool should_be_writable
, const char *suffix
)
719 struct cgroup_meta_data
*meta_data
;
720 struct cgroup_hierarchy
*h
;
721 struct cgroup_mount_point
*mp
;
725 meta_data
= lxc_cgroup_load_meta();
729 h
= lxc_cgroup_find_hierarchy(meta_data
, subsystem
);
733 mp
= lxc_cgroup_find_mount_point(h
, group
, should_be_writable
);
737 result
= cgroup_to_absolute_path(mp
, group
, suffix
);
741 lxc_cgroup_put_meta(meta_data
);
746 lxc_cgroup_put_meta(meta_data
);
751 static struct cgroup_process_info
*lxc_cgroup_process_info_get(pid_t pid
, struct cgroup_meta_data
*meta
)
754 snprintf(pid_buf
, 32, "/proc/%lu/cgroup", (unsigned long)pid
);
755 return lxc_cgroup_process_info_getx(pid_buf
, meta
);
758 static struct cgroup_process_info
*lxc_cgroup_process_info_get_init(struct cgroup_meta_data
*meta
)
760 return lxc_cgroup_process_info_get(1, meta
);
763 static struct cgroup_process_info
*lxc_cgroup_process_info_get_self(struct cgroup_meta_data
*meta
)
765 struct cgroup_process_info
*i
;
766 i
= lxc_cgroup_process_info_getx("/proc/self/cgroup", meta
);
768 i
= lxc_cgroup_process_info_get(lxc_raw_getpid(), meta
);
773 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
774 * is already in a new cgroup named after the pid. 'mnt' is passed in as
775 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
776 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
777 * and return the string /sys/fs/cgroup/lxc/c1.
779 static char *cgroup_rename_nsgroup(const char *mountpath
, const char *oldname
, pid_t pid
, const char *name
)
781 char *dir
, *fulloldpath
;
782 char *newname
, *fullnewpath
;
783 int len
, newlen
, ret
;
786 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
789 * fulloldpath = /cgroup/ab/2375
790 * fullnewpath = /cgroup/ab/c1
793 dir
= alloca(strlen(oldname
) + 1);
794 strcpy(dir
, oldname
);
796 len
= strlen(oldname
) + strlen(mountpath
) + 22;
797 fulloldpath
= alloca(len
);
798 ret
= snprintf(fulloldpath
, len
, "%s/%s/%lu", mountpath
, oldname
, (unsigned long)pid
);
799 if (ret
< 0 || ret
>= len
)
802 len
= strlen(dir
) + strlen(name
) + 2;
803 newname
= malloc(len
);
805 SYSERROR("Out of memory");
808 ret
= snprintf(newname
, len
, "%s/%s", dir
, name
);
809 if (ret
< 0 || ret
>= len
) {
814 newlen
= strlen(mountpath
) + len
+ 2;
815 fullnewpath
= alloca(newlen
);
816 ret
= snprintf(fullnewpath
, newlen
, "%s/%s", mountpath
, newname
);
817 if (ret
< 0 || ret
>= newlen
) {
822 if (access(fullnewpath
, F_OK
) == 0) {
823 if (rmdir(fullnewpath
) != 0) {
824 SYSERROR("container cgroup %s already exists.", fullnewpath
);
829 if (rename(fulloldpath
, fullnewpath
)) {
830 SYSERROR("failed to rename cgroup %s->%s", fulloldpath
, fullnewpath
);
835 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
840 static bool is_crucial_hierarchy(struct cgroup_hierarchy
*h
)
844 for (p
= h
->subsystems
; *p
; p
++) {
845 if (is_crucial_cgroup_subsystem(*p
))
851 /* create a new cgroup */
852 static struct cgroup_process_info
*lxc_cgroupfs_create(const char *name
, const char *path_pattern
, struct cgroup_meta_data
*meta_data
, const char *sub_pattern
)
854 char **cgroup_path_components
= NULL
;
856 char *path_so_far
= NULL
;
857 char **new_cgroup_paths
= NULL
;
858 char **new_cgroup_paths_sub
= NULL
;
859 struct cgroup_mount_point
*mp
;
860 struct cgroup_hierarchy
*h
;
861 struct cgroup_process_info
*base_info
= NULL
;
862 struct cgroup_process_info
*info_ptr
;
866 bool had_sub_pattern
= false;
869 if (!is_valid_cgroup(name
)) {
870 ERROR("Invalid cgroup name: '%s'", name
);
875 if (!strstr(path_pattern
, "%n")) {
876 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern
);
881 /* we will modify the result of this operation directly,
882 * so we don't have to copy the data structure
884 base_info
= (path_pattern
[0] == '/') ?
885 lxc_cgroup_process_info_get_init(meta_data
) :
886 lxc_cgroup_process_info_get_self(meta_data
);
890 new_cgroup_paths
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
891 if (!new_cgroup_paths
)
892 goto out_initial_error
;
894 new_cgroup_paths_sub
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
895 if (!new_cgroup_paths_sub
)
896 goto out_initial_error
;
898 /* find mount points we can use */
899 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
900 h
= info_ptr
->hierarchy
;
903 mp
= lxc_cgroup_find_mount_point(h
, info_ptr
->cgroup_path
, true);
905 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h
->index
);
906 goto out_initial_error
;
908 info_ptr
->designated_mount_point
= mp
;
910 if (lxc_string_in_array("ns", (const char **)h
->subsystems
))
912 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
913 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
914 goto out_initial_error
;
918 /* normalize the path */
919 cgroup_path_components
= lxc_normalize_path(path_pattern
);
920 if (!cgroup_path_components
)
921 goto out_initial_error
;
923 /* go through the path components to see if we can create them */
924 for (p
= cgroup_path_components
; *p
|| (sub_pattern
&& !had_sub_pattern
); p
++) {
925 /* we only want to create the same component with -1, -2, etc.
926 * if the component contains the container name itself, otherwise
927 * it's not an error if it already exists
929 char *p_eff
= *p
? *p
: (char *)sub_pattern
;
930 bool contains_name
= strstr(p_eff
, "%n");
931 char *current_component
= NULL
;
932 char *current_subpath
= NULL
;
933 char *current_entire_path
= NULL
;
938 /* if we are processing the subpattern, we want to make sure
939 * loop is ended the next time around
942 had_sub_pattern
= true;
946 goto find_name_on_this_level
;
948 cleanup_name_on_this_level
:
949 /* This is reached if we found a name clash.
950 * In that case, remove the cgroup from all previous hierarchies
952 for (j
= 0, info_ptr
= base_info
; j
< i
&& info_ptr
; info_ptr
= info_ptr
->next
, j
++) {
953 if (info_ptr
->created_paths_count
< 1)
955 r
= remove_cgroup(info_ptr
->designated_mount_point
, info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1], false, NULL
);
957 WARN("could not clean up cgroup we created when trying to create container");
958 free(info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1]);
959 info_ptr
->created_paths
[--info_ptr
->created_paths_count
] = NULL
;
961 if (current_component
!= current_subpath
)
962 free(current_subpath
);
963 if (current_component
!= p_eff
)
964 free(current_component
);
965 current_component
= current_subpath
= NULL
;
966 /* try again with another suffix */
969 find_name_on_this_level
:
970 /* determine name of the path component we should create */
971 if (contains_name
&& suffix
> 0) {
972 char *buf
= calloc(strlen(name
) + 32, 1);
974 goto out_initial_error
;
975 snprintf(buf
, strlen(name
) + 32, "%s-%u", name
, suffix
);
976 current_component
= lxc_string_replace("%n", buf
, p_eff
);
979 current_component
= contains_name
? lxc_string_replace("%n", name
, p_eff
) : p_eff
;
981 parts
[0] = path_so_far
;
982 parts
[1] = current_component
;
984 current_subpath
= path_so_far
? lxc_string_join("/", (const char **)parts
, false) : current_component
;
986 /* Now go through each hierarchy and try to create the
987 * corresponding cgroup
989 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
992 if (!info_ptr
->hierarchy
)
995 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
997 current_entire_path
= NULL
;
999 parts2
[0] = !strcmp(info_ptr
->cgroup_path
, "/") ? "" : info_ptr
->cgroup_path
;
1000 parts2
[1] = current_subpath
;
1002 current_entire_path
= lxc_string_join("/", (const char **)parts2
, false);
1005 /* we are processing the subpath, so only update that one */
1006 free(new_cgroup_paths_sub
[i
]);
1007 new_cgroup_paths_sub
[i
] = strdup(current_entire_path
);
1008 if (!new_cgroup_paths_sub
[i
])
1009 goto cleanup_from_error
;
1011 /* remember which path was used on this controller */
1012 free(new_cgroup_paths
[i
]);
1013 new_cgroup_paths
[i
] = strdup(current_entire_path
);
1014 if (!new_cgroup_paths
[i
])
1015 goto cleanup_from_error
;
1018 r
= create_cgroup(info_ptr
->designated_mount_point
, current_entire_path
);
1019 if (r
< 0 && errno
== EEXIST
&& contains_name
) {
1020 /* name clash => try new name with new suffix */
1021 free(current_entire_path
);
1022 current_entire_path
= NULL
;
1023 goto cleanup_name_on_this_level
;
1024 } else if (r
< 0 && errno
!= EEXIST
) {
1025 if (is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1026 SYSERROR("Could not create cgroup '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1027 goto cleanup_from_error
;
1030 } else if (r
== 0) {
1031 /* successfully created */
1032 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1034 goto cleanup_from_error
;
1035 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, current_entire_path
)) {
1036 ERROR("Failed to initialize cpuset for '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1037 goto cleanup_from_error
;
1039 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = current_entire_path
;
1041 /* if we didn't create the cgroup, then we have to make sure that
1042 * further cgroups will be created properly
1044 if (handle_cgroup_settings(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
) < 0) {
1045 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
1046 goto cleanup_from_error
;
1048 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
)) {
1049 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr
->cgroup_path
);
1050 goto cleanup_from_error
;
1054 /* already existed but path component of pattern didn't contain '%n',
1055 * so this is not an error; but then we don't need current_entire_path
1058 free(current_entire_path
);
1059 current_entire_path
= NULL
;
1063 /* save path so far */
1065 path_so_far
= strdup(current_subpath
);
1067 goto cleanup_from_error
;
1070 if (current_component
!= current_subpath
)
1071 free(current_subpath
);
1072 if (current_component
!= p_eff
)
1073 free(current_component
);
1074 current_component
= current_subpath
= NULL
;
1078 /* called if an error occurred in the loop, so we
1079 * do some additional cleanup here
1081 saved_errno
= errno
;
1082 if (current_component
!= current_subpath
)
1083 free(current_subpath
);
1084 if (current_component
!= p_eff
)
1085 free(current_component
);
1086 free(current_entire_path
);
1087 errno
= saved_errno
;
1088 goto out_initial_error
;
1091 /* we're done, now update the paths */
1092 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
1093 if (!info_ptr
->hierarchy
)
1095 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
1096 * will take care of it
1097 * Since we do a continue in above loop, new_cgroup_paths[i] is
1098 * unset anyway, as is new_cgroup_paths_sub[i]
1100 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1102 free(info_ptr
->cgroup_path
);
1103 info_ptr
->cgroup_path
= new_cgroup_paths
[i
];
1104 info_ptr
->cgroup_path_sub
= new_cgroup_paths_sub
[i
];
1106 /* don't use lxc_free_array since we used the array members
1107 * to store them in our result...
1109 free(new_cgroup_paths
);
1110 free(new_cgroup_paths_sub
);
1112 lxc_free_array((void **)cgroup_path_components
, free
);
1116 saved_errno
= errno
;
1118 lxc_cgroup_process_info_free_and_remove(base_info
, NULL
);
1119 lxc_free_array((void **)new_cgroup_paths
, free
);
1120 lxc_free_array((void **)new_cgroup_paths_sub
, free
);
1121 lxc_free_array((void **)cgroup_path_components
, free
);
1122 errno
= saved_errno
;
1126 static int lxc_cgroup_create_legacy(struct cgroup_process_info
*base_info
, const char *name
, pid_t pid
)
1128 struct cgroup_process_info
*info_ptr
;
1131 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1132 if (!info_ptr
->hierarchy
)
1135 if (!lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1138 * For any path which has ns cgroup mounted, handler->pid is already
1139 * moved into a container called '%d % (handler->pid)'. Rename it to
1140 * the cgroup name and record that.
1142 char *tmp
= cgroup_rename_nsgroup((const char *)info_ptr
->designated_mount_point
->mount_point
,
1143 info_ptr
->cgroup_path
, pid
, name
);
1146 free(info_ptr
->cgroup_path
);
1147 info_ptr
->cgroup_path
= tmp
;
1148 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1154 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = tmp
;
1159 /* get the cgroup membership of a given container */
1160 static struct cgroup_process_info
*lxc_cgroup_get_container_info(const char *name
, const char *lxcpath
, struct cgroup_meta_data
*meta_data
)
1162 struct cgroup_process_info
*result
= NULL
;
1163 int saved_errno
= 0;
1165 struct cgroup_process_info
**cptr
= &result
;
1166 struct cgroup_process_info
*entry
= NULL
;
1169 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
1170 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
1174 /* use the command interface to look for the cgroup */
1175 path
= lxc_cmd_get_cgroup_path(name
, lxcpath
, h
->subsystems
[0]);
1181 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1184 entry
->meta_ref
= lxc_cgroup_get_meta(meta_data
);
1185 entry
->hierarchy
= h
;
1186 entry
->cgroup_path
= path
;
1189 /* it is not an error if we don't find anything here,
1190 * it is up to the caller to decide what to do in that
1192 entry
->designated_mount_point
= lxc_cgroup_find_mount_point(h
, entry
->cgroup_path
, true);
1195 cptr
= &entry
->next
;
1201 saved_errno
= errno
;
1203 lxc_cgroup_process_info_free(result
);
1204 lxc_cgroup_process_info_free(entry
);
1205 errno
= saved_errno
;
1209 /* move a processs to the cgroups specified by the membership */
1210 static int lxc_cgroupfs_enter(struct cgroup_process_info
*info
, pid_t pid
, bool enter_sub
)
1213 char *cgroup_tasks_fn
;
1215 struct cgroup_process_info
*info_ptr
;
1217 snprintf(pid_buf
, 32, "%lu", (unsigned long)pid
);
1218 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1219 if (!info_ptr
->hierarchy
)
1222 char *cgroup_path
= (enter_sub
&& info_ptr
->cgroup_path_sub
) ?
1223 info_ptr
->cgroup_path_sub
:
1224 info_ptr
->cgroup_path
;
1226 if (!info_ptr
->designated_mount_point
) {
1227 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, cgroup_path
, true);
1228 if (!info_ptr
->designated_mount_point
) {
1229 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid
, cgroup_path
);
1234 cgroup_tasks_fn
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, cgroup_path
, "/tasks");
1235 if (!cgroup_tasks_fn
) {
1236 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1240 r
= lxc_write_to_file(cgroup_tasks_fn
, pid_buf
, strlen(pid_buf
), false);
1241 free(cgroup_tasks_fn
);
1242 if (r
< 0 && is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1243 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1251 /* free process membership information */
1252 void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
)
1254 struct cgroup_process_info
*next
;
1258 lxc_cgroup_put_meta(info
->meta_ref
);
1259 free(info
->cgroup_path
);
1260 free(info
->cgroup_path_sub
);
1261 lxc_free_array((void **)info
->created_paths
, free
);
1263 lxc_cgroup_process_info_free(next
);
1266 /* free process membership information and remove cgroups that were created */
1267 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
, struct lxc_conf
*conf
)
1269 struct cgroup_process_info
*next
;
1275 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1277 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1279 /* ignore return value here, perhaps we created the
1280 * '/lxc' cgroup in this container but another container
1281 * is still running (for example)
1283 (void)remove_cgroup(mp
, info
->cgroup_path
, true, conf
);
1285 for (pp
= info
->created_paths
; pp
&& *pp
; pp
++);
1286 for ((void)(pp
&& --pp
); info
->created_paths
&& pp
>= info
->created_paths
; --pp
) {
1289 free(info
->created_paths
);
1290 lxc_cgroup_put_meta(info
->meta_ref
);
1291 free(info
->cgroup_path
);
1292 free(info
->cgroup_path_sub
);
1294 lxc_cgroup_process_info_free_and_remove(next
, conf
);
1297 static char *lxc_cgroup_get_hierarchy_path_data(const char *subsystem
, struct cgfs_data
*d
)
1299 struct cgroup_process_info
*info
= d
->info
;
1300 info
= find_info_for_subsystem(info
, subsystem
);
1303 prune_init_scope(info
->cgroup_path
);
1304 return info
->cgroup_path
;
1307 static char *lxc_cgroup_get_hierarchy_abs_path_data(const char *subsystem
, struct cgfs_data
*d
)
1309 struct cgroup_process_info
*info
= d
->info
;
1310 struct cgroup_mount_point
*mp
= NULL
;
1312 info
= find_info_for_subsystem(info
, subsystem
);
1315 if (info
->designated_mount_point
) {
1316 mp
= info
->designated_mount_point
;
1318 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1322 return cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1325 static char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1327 struct cgroup_meta_data
*meta
;
1328 struct cgroup_process_info
*base_info
, *info
;
1329 struct cgroup_mount_point
*mp
;
1330 char *result
= NULL
;
1332 meta
= lxc_cgroup_load_meta();
1335 base_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta
);
1338 info
= find_info_for_subsystem(base_info
, subsystem
);
1341 if (info
->designated_mount_point
) {
1342 mp
= info
->designated_mount_point
;
1344 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1348 result
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1351 lxc_cgroup_process_info_free(base_info
);
1353 lxc_cgroup_put_meta(meta
);
1357 static int lxc_cgroup_set_data(const char *filename
, const char *value
, struct cgfs_data
*d
)
1359 char *subsystem
= NULL
, *p
, *path
;
1362 subsystem
= alloca(strlen(filename
) + 1);
1363 strcpy(subsystem
, filename
);
1364 if ((p
= strchr(subsystem
, '.')) != NULL
)
1368 path
= lxc_cgroup_get_hierarchy_abs_path_data(subsystem
, d
);
1370 ret
= do_cgroup_set(path
, filename
, value
);
1371 int saved_errno
= errno
;
1373 errno
= saved_errno
;
1378 static int lxc_cgroupfs_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1380 char *subsystem
= NULL
, *p
, *path
;
1383 subsystem
= alloca(strlen(filename
) + 1);
1384 strcpy(subsystem
, filename
);
1385 if ((p
= strchr(subsystem
, '.')) != NULL
)
1388 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1390 ret
= do_cgroup_set(path
, filename
, value
);
1396 static int lxc_cgroupfs_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
1398 char *subsystem
= NULL
, *p
, *path
;
1401 subsystem
= alloca(strlen(filename
) + 1);
1402 strcpy(subsystem
, filename
);
1403 if ((p
= strchr(subsystem
, '.')) != NULL
)
1406 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1408 ret
= do_cgroup_get(path
, filename
, value
, len
);
1414 static bool cgroupfs_mount_cgroup(void *hdata
, const char *root
, int type
)
1416 size_t bufsz
= strlen(root
) + sizeof("/sys/fs/cgroup");
1418 char **parts
= NULL
;
1419 char *dirname
= NULL
;
1420 char *abs_path
= NULL
;
1421 char *abs_path2
= NULL
;
1422 struct cgfs_data
*cgfs_d
;
1423 struct cgroup_process_info
*info
, *base_info
;
1424 int r
, saved_errno
= 0;
1425 struct lxc_handler
*handler
= hdata
;
1427 if (cgns_supported())
1430 cgfs_d
= handler
->cgroup_data
;
1433 base_info
= cgfs_d
->info
;
1435 /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
1436 * have access to the lxc_conf object at this point. It really should be up
1437 * to the caller to fix this, but this doesn't really hurt.
1439 if (type
== LXC_AUTO_CGROUP_FULL_NOSPEC
)
1440 type
= LXC_AUTO_CGROUP_FULL_MIXED
;
1441 else if (type
== LXC_AUTO_CGROUP_NOSPEC
)
1442 type
= LXC_AUTO_CGROUP_MIXED
;
1444 if (type
< LXC_AUTO_CGROUP_RO
|| type
> LXC_AUTO_CGROUP_FULL_MIXED
) {
1445 ERROR("could not mount cgroups into container: invalid type specified internally");
1450 path
= calloc(1, bufsz
);
1453 snprintf(path
, bufsz
, "%s/sys/fs/cgroup", root
);
1454 r
= safe_mount("cgroup_root", path
, "tmpfs",
1455 MS_NOSUID
|MS_NODEV
|MS_NOEXEC
|MS_RELATIME
,
1456 "size=10240k,mode=755",
1459 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1463 /* now mount all the hierarchies we care about */
1464 for (info
= base_info
; info
; info
= info
->next
) {
1465 size_t subsystem_count
, i
;
1466 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1468 if (!info
->hierarchy
)
1471 if (!mountpoint_is_accessible(mp
))
1472 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1475 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1479 subsystem_count
= lxc_array_len((void **)info
->hierarchy
->subsystems
);
1480 parts
= calloc(subsystem_count
+ 1, sizeof(char *));
1484 for (i
= 0; i
< subsystem_count
; i
++) {
1485 if (!strncmp(info
->hierarchy
->subsystems
[i
], "name=", 5))
1486 parts
[i
] = info
->hierarchy
->subsystems
[i
] + 5;
1488 parts
[i
] = info
->hierarchy
->subsystems
[i
];
1490 dirname
= lxc_string_join(",", (const char **)parts
, false);
1494 /* create subsystem directory */
1495 abs_path
= lxc_append_paths(path
, dirname
);
1498 r
= mkdir_p(abs_path
, 0755);
1499 if (r
< 0 && errno
!= EEXIST
) {
1500 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname
);
1504 abs_path2
= lxc_append_paths(abs_path
, info
->cgroup_path
);
1508 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_RW
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1509 /* bind-mount the cgroup entire filesystem there */
1510 if (strcmp(mp
->mount_prefix
, "/") != 0) {
1511 /* FIXME: maybe we should just try to remount the entire hierarchy
1512 * with a regular mount command? may that works? */
1513 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname
);
1516 r
= mount(mp
->mount_point
, abs_path
, "none", MS_BIND
, 0);
1518 SYSERROR("error bind-mounting %s to %s", mp
->mount_point
, abs_path
);
1521 /* main cgroup path should be read-only */
1522 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1523 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1525 SYSERROR("error re-mounting %s readonly", abs_path
);
1529 /* own cgroup should be read-write */
1530 if (type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1531 r
= mount(abs_path2
, abs_path2
, NULL
, MS_BIND
, NULL
);
1533 SYSERROR("error bind-mounting %s onto itself", abs_path2
);
1536 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
, NULL
);
1538 SYSERROR("error re-mounting %s readwrite", abs_path2
);
1543 /* create path for container's cgroup */
1544 r
= mkdir_p(abs_path2
, 0755);
1545 if (r
< 0 && errno
!= EEXIST
) {
1546 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname
, info
->cgroup_path
);
1550 /* for read-only and mixed cases, we have to bind-mount the tmpfs directory
1551 * that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
1552 * itself and then bind-mount it read-only, since we keep the tmpfs itself
1553 * read-write (see comment below)
1555 if (type
== LXC_AUTO_CGROUP_MIXED
|| type
== LXC_AUTO_CGROUP_RO
) {
1556 r
= mount(abs_path
, abs_path
, NULL
, MS_BIND
, NULL
);
1558 SYSERROR("error bind-mounting %s onto itself", abs_path
);
1561 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1563 SYSERROR("error re-mounting %s readonly", abs_path
);
1571 /* bind-mount container's cgroup to that directory */
1572 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1575 r
= mount(abs_path
, abs_path2
, "none", MS_BIND
, 0);
1576 if (r
< 0 && is_crucial_hierarchy(info
->hierarchy
)) {
1577 SYSERROR("error bind-mounting %s to %s", abs_path
, abs_path2
);
1580 if (type
== LXC_AUTO_CGROUP_RO
) {
1581 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1583 SYSERROR("error re-mounting %s readonly", abs_path2
);
1594 /* add symlinks for every single subsystem */
1595 if (subsystem_count
> 1) {
1596 for (i
= 0; i
< subsystem_count
; i
++) {
1597 abs_path
= lxc_append_paths(path
, parts
[i
]);
1600 r
= symlink(dirname
, abs_path
);
1602 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts
[i
], dirname
);
1613 /* We used to remount the entire tmpfs readonly if any :ro or
1614 * :mixed mode was specified. However, Ubuntu's mountall has the
1615 * unfortunate behavior to block bootup if /sys/fs/cgroup is
1616 * mounted read-only and cannot be remounted read-write.
1617 * (mountall reads /lib/init/fstab and tries to (re-)mount all of
1618 * these if they are not already mounted with the right options;
1619 * it contains an entry for /sys/fs/cgroup. In case it can't do
1620 * that, it prompts for the user to either manually fix it or
1621 * boot anyway. But without user input, booting of the container
1624 * Instead of remounting the entire tmpfs readonly, we only
1625 * remount the paths readonly that are part of the cgroup
1634 saved_errno
= errno
;
1640 errno
= saved_errno
;
1644 static int cgfs_nrtasks(void *hdata
)
1646 struct cgfs_data
*d
= hdata
;
1647 struct cgroup_process_info
*info
;
1648 struct cgroup_mount_point
*mp
= NULL
;
1649 char *abs_path
= NULL
;
1663 if (info
->designated_mount_point
) {
1664 mp
= info
->designated_mount_point
;
1666 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, false);
1671 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1675 ret
= cgroup_recursive_task_count(abs_path
);
1680 static struct cgroup_process_info
*
1681 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
,
1682 struct cgroup_meta_data
*meta
)
1684 struct cgroup_process_info
*result
= NULL
;
1685 FILE *proc_pid_cgroup
= NULL
;
1688 int saved_errno
= 0;
1689 struct cgroup_process_info
**cptr
= &result
;
1690 struct cgroup_process_info
*entry
= NULL
;
1692 proc_pid_cgroup
= fopen_cloexec(proc_pid_cgroup_str
, "r");
1693 if (!proc_pid_cgroup
)
1696 while (getline(&line
, &sz
, proc_pid_cgroup
) != -1) {
1697 /* file format: hierarchy:subsystems:group */
1701 int hierarchy_number
;
1702 struct cgroup_hierarchy
*h
= NULL
;
1707 if (line
[strlen(line
) - 1] == '\n')
1708 line
[strlen(line
) - 1] = '\0';
1710 colon1
= strchr(line
, ':');
1714 colon2
= strchr(colon1
, ':');
1721 /* With cgroupv2 /proc/self/cgroup can contain entries of the
1723 * These entries need to be skipped.
1725 if (!strcmp(colon1
, ""))
1728 hierarchy_number
= strtoul(line
, &endptr
, 10);
1729 if (!endptr
|| *endptr
)
1732 if (hierarchy_number
> meta
->maximum_hierarchy
) {
1733 /* we encountered a hierarchy we didn't have before,
1734 * so probably somebody remounted some stuff in the
1741 h
= meta
->hierarchies
[hierarchy_number
];
1743 /* we encountered a hierarchy that was thought to be
1744 * dead before, so probably somebody remounted some
1745 * stuff in the mean time...
1751 /* we are told that we should ignore this hierarchy */
1755 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1759 entry
->meta_ref
= lxc_cgroup_get_meta(meta
);
1760 entry
->hierarchy
= h
;
1761 entry
->cgroup_path
= strdup(colon2
);
1762 if (!entry
->cgroup_path
)
1764 prune_init_scope(entry
->cgroup_path
);
1767 cptr
= &entry
->next
;
1771 fclose(proc_pid_cgroup
);
1776 saved_errno
= errno
;
1777 if (proc_pid_cgroup
)
1778 fclose(proc_pid_cgroup
);
1779 lxc_cgroup_process_info_free(result
);
1780 lxc_cgroup_process_info_free(entry
);
1782 errno
= saved_errno
;
1786 static char **subsystems_from_mount_options(const char *mount_options
,
1789 char *token
, *str
, *saveptr
= NULL
;
1790 char **result
= NULL
;
1791 size_t result_capacity
= 0;
1792 size_t result_count
= 0;
1796 str
= alloca(strlen(mount_options
)+1);
1797 strcpy(str
, mount_options
);
1798 for (; (token
= strtok_r(str
, ",", &saveptr
)); str
= NULL
) {
1799 /* we have a subsystem if it's either in the list of
1800 * subsystems provided by the kernel OR if it starts
1801 * with name= for named hierarchies
1803 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 12);
1806 result
[result_count
+ 1] = NULL
;
1807 if (strncmp(token
, "name=", 5) && !lxc_string_in_array(token
, (const char **)kernel_list
)) {
1808 /* this is eg 'systemd' but the mount will be
1811 result
[result_count
] = malloc(strlen(token
) + 6);
1812 if (result
[result_count
])
1813 sprintf(result
[result_count
], "name=%s", token
);
1815 result
[result_count
] = strdup(token
);
1816 if (!result
[result_count
])
1824 saved_errno
= errno
;
1825 lxc_free_array((void**)result
, free
);
1826 errno
= saved_errno
;
1830 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
)
1834 free(mp
->mount_point
);
1835 free(mp
->mount_prefix
);
1839 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
)
1843 if (h
->subsystems
) {
1844 lxc_free_array((void **)h
->subsystems
, free
);
1845 h
->subsystems
= NULL
;
1847 if (h
->all_mount_points
) {
1848 free(h
->all_mount_points
);
1849 h
->all_mount_points
= NULL
;
1855 static bool is_valid_cgroup(const char *name
)
1858 for (p
= name
; *p
; p
++) {
1859 /* Use the ASCII printable characters range(32 - 127)
1860 * is reasonable, we kick out 32(SPACE) because it'll
1861 * break legacy lxc-ls
1863 if (*p
<= 32 || *p
>= 127 || *p
== '/')
1866 return strcmp(name
, ".") != 0 && strcmp(name
, "..") != 0;
1869 static int create_or_remove_cgroup(bool do_remove
,
1870 struct cgroup_mount_point
*mp
, const char *path
, int recurse
,
1871 struct lxc_conf
*conf
)
1873 int r
, saved_errno
= 0;
1874 char *buf
= cgroup_to_absolute_path(mp
, path
, NULL
);
1878 /* create or remove directory */
1880 if (!dir_exists(buf
))
1883 if (conf
&& !lxc_list_empty(&conf
->id_map
))
1884 r
= userns_exec_1(conf
, rmdir_wrapper
, buf
,
1887 r
= cgroup_rmdir(buf
);
1891 r
= mkdir_p(buf
, 0777);
1892 saved_errno
= errno
;
1894 errno
= saved_errno
;
1898 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
)
1900 return create_or_remove_cgroup(false, mp
, path
, false, NULL
);
1903 static int remove_cgroup(struct cgroup_mount_point
*mp
,
1904 const char *path
, bool recurse
, struct lxc_conf
*conf
)
1906 return create_or_remove_cgroup(true, mp
, path
, recurse
, conf
);
1909 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
,
1910 const char *path
, const char *suffix
)
1912 /* first we have to make sure we subtract the mount point's prefix */
1913 char *prefix
= mp
->mount_prefix
;
1917 /* we want to make sure only absolute paths to cgroups are passed to us */
1918 if (path
[0] != '/') {
1923 if (prefix
&& !strcmp(prefix
, "/"))
1926 /* prefix doesn't match */
1927 if (prefix
&& strncmp(prefix
, path
, strlen(prefix
)) != 0) {
1931 /* if prefix is /foo and path is /foobar */
1932 if (prefix
&& path
[strlen(prefix
)] != '/' && path
[strlen(prefix
)] != '\0') {
1937 /* remove prefix from path */
1938 path
+= prefix
? strlen(prefix
) : 0;
1940 len
= strlen(mp
->mount_point
) + strlen(path
) + (suffix
? strlen(suffix
) : 0);
1941 buf
= calloc(len
+ 1, 1);
1944 rv
= snprintf(buf
, len
+ 1, "%s%s%s", mp
->mount_point
, path
, suffix
? suffix
: "");
1954 static struct cgroup_process_info
*
1955 find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
)
1957 struct cgroup_process_info
*info_ptr
;
1958 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1959 struct cgroup_hierarchy
*h
= info_ptr
->hierarchy
;
1962 if (lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
1969 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
,
1970 char *value
, size_t len
)
1972 const char *parts
[3] = {
1978 int ret
, saved_errno
;
1980 filename
= lxc_string_join("/", parts
, false);
1984 ret
= lxc_read_from_file(filename
, value
, len
);
1985 saved_errno
= errno
;
1987 errno
= saved_errno
;
1991 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
,
1994 const char *parts
[3] = {
2000 int ret
, saved_errno
;
2002 filename
= lxc_string_join("/", parts
, false);
2006 ret
= lxc_write_to_file(filename
, value
, strlen(value
), false);
2007 saved_errno
= errno
;
2009 errno
= saved_errno
;
2013 static int do_setup_cgroup_limits(struct cgfs_data
*d
,
2014 struct lxc_list
*cgroup_settings
, bool do_devices
)
2016 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
2017 struct lxc_cgroup
*cg
;
2020 if (lxc_list_empty(cgroup_settings
))
2023 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
2024 if (!sorted_cgroup_settings
) {
2028 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
2029 cg
= iterator
->elem
;
2031 if (do_devices
== !strncmp("devices", cg
->subsystem
, 7)) {
2032 if (strcmp(cg
->subsystem
, "devices.deny") == 0 &&
2033 cgroup_devices_has_allow_or_deny(d
, cg
->value
, false))
2035 if (strcmp(cg
->subsystem
, "devices.allow") == 0 &&
2036 cgroup_devices_has_allow_or_deny(d
, cg
->value
, true))
2038 if (lxc_cgroup_set_data(cg
->subsystem
, cg
->value
, d
)) {
2039 if (do_devices
&& (errno
== EACCES
|| errno
== EPERM
)) {
2040 WARN("Error setting %s to %s for %s",
2041 cg
->subsystem
, cg
->value
, d
->name
);
2044 SYSERROR("Error setting %s to %s for %s",
2045 cg
->subsystem
, cg
->value
, d
->name
);
2050 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
2054 INFO("cgroup has been setup");
2056 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
2057 lxc_list_del(iterator
);
2060 free(sorted_cgroup_settings
);
2064 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
,
2065 char *v
, bool for_allow
)
2071 bool ret
= !for_allow
;
2072 const char *parts
[3] = {
2078 /* XXX FIXME if users could use something other than 'lxc.devices.deny =
2079 * a'. not sure they ever do, but they *could* right now, I'm assuming
2082 if (!for_allow
&& strcmp(v
, "a") != 0 && strcmp(v
, "a *:* rwm") != 0)
2085 parts
[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_data("devices", d
);
2088 path
= lxc_string_join("/", parts
, false);
2090 free((void *)parts
[0]);
2094 devices_list
= fopen_cloexec(path
, "r");
2095 if (!devices_list
) {
2100 while (getline(&line
, &sz
, devices_list
) != -1) {
2101 size_t len
= strlen(line
);
2102 if (len
> 0 && line
[len
-1] == '\n')
2104 if (strcmp(line
, "a *:* rwm") == 0) {
2107 } else if (for_allow
&& strcmp(line
, v
) == 0) {
2114 fclose(devices_list
);
2120 static int cgroup_recursive_task_count(const char *cgroup_path
)
2123 struct dirent
*dent
;
2126 d
= opendir(cgroup_path
);
2130 while ((dent
= readdir(d
))) {
2131 const char *parts
[3] = {
2139 if (!strcmp(dent
->d_name
, ".") || !strcmp(dent
->d_name
, ".."))
2141 sub_path
= lxc_string_join("/", parts
, false);
2146 r
= stat(sub_path
, &st
);
2152 if (S_ISDIR(st
.st_mode
)) {
2153 r
= cgroup_recursive_task_count(sub_path
);
2156 } else if (!strcmp(dent
->d_name
, "tasks")) {
2157 r
= lxc_count_file_lines(sub_path
);
2168 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
,
2171 int r
, saved_errno
= 0;
2174 mp
->need_cpuset_init
= false;
2176 /* If this is the memory cgroup, we want to enforce hierarchy.
2177 * But don't fail if for some reason we can't.
2179 if (lxc_string_in_array("memory", (const char **)mp
->hierarchy
->subsystems
)) {
2180 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/memory.use_hierarchy");
2182 r
= lxc_read_from_file(cc_path
, buf
, 1);
2183 if (r
< 1 || buf
[0] != '1') {
2184 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2186 SYSERROR("failed to set memory.use_hierarchy to 1; continuing");
2192 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2193 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2194 * and cpuset.cpus and then
2196 if (lxc_string_in_array("cpuset", (const char **)mp
->hierarchy
->subsystems
)) {
2197 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/cgroup.clone_children");
2202 /* cgroup.clone_children is not available when running under
2203 * older kernel versions; in this case, we'll initialize
2204 * cpuset.cpus and cpuset.mems later, after the new cgroup
2207 if (stat(cc_path
, &sb
) != 0 && errno
== ENOENT
) {
2208 mp
->need_cpuset_init
= true;
2212 r
= lxc_read_from_file(cc_path
, buf
, 1);
2213 if (r
== 1 && buf
[0] == '1') {
2217 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2218 saved_errno
= errno
;
2220 errno
= saved_errno
;
2221 return r
< 0 ? -1 : 0;
2226 static int cgroup_read_from_file(const char *fn
, char buf
[], size_t bufsize
)
2228 int ret
= lxc_read_from_file(fn
, buf
, bufsize
);
2230 SYSERROR("failed to read %s", fn
);
2233 if (ret
== bufsize
) {
2235 /* obviously this wasn't empty */
2236 buf
[bufsize
-1] = '\0';
2239 /* Callers don't do this, but regression/sanity check */
2240 ERROR("was not expecting 0 bufsize");
2247 static bool do_init_cpuset_file(struct cgroup_mount_point
*mp
,
2248 const char *path
, const char *name
)
2251 char *childfile
, *parentfile
= NULL
, *tmp
;
2255 childfile
= cgroup_to_absolute_path(mp
, path
, name
);
2259 /* don't overwrite a non-empty value in the file */
2260 ret
= cgroup_read_from_file(childfile
, value
, sizeof(value
));
2263 if (value
[0] != '\0' && value
[0] != '\n') {
2268 /* path to the same name in the parent cgroup */
2269 parentfile
= strdup(path
);
2273 tmp
= strrchr(parentfile
, '/');
2276 if (tmp
== parentfile
)
2277 tmp
++; /* keep the '/' at the start */
2280 parentfile
= cgroup_to_absolute_path(mp
, tmp
, name
);
2285 /* copy from parent to child cgroup */
2286 ret
= cgroup_read_from_file(parentfile
, value
, sizeof(value
));
2289 if (ret
== sizeof(value
)) {
2290 /* If anyone actually sees this error, we can address it */
2291 ERROR("parent cpuset value too long");
2294 ok
= (lxc_write_to_file(childfile
, value
, strlen(value
), false) >= 0);
2296 SYSERROR("failed writing %s", childfile
);
2304 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
,
2307 /* the files we have to handle here are only in cpuset hierarchies */
2308 if (!lxc_string_in_array("cpuset",
2309 (const char **)mp
->hierarchy
->subsystems
))
2312 if (!mp
->need_cpuset_init
)
2315 return (do_init_cpuset_file(mp
, path
, "/cpuset.cpus") &&
2316 do_init_cpuset_file(mp
, path
, "/cpuset.mems") );
2319 static void print_cgfs_init_debuginfo(struct cgfs_data
*d
)
2323 if (!getenv("LXC_DEBUG_CGFS"))
2326 DEBUG("Cgroup information:");
2327 DEBUG(" container name: %s", d
->name
);
2328 if (!d
->meta
|| !d
->meta
->hierarchies
) {
2329 DEBUG(" No hierarchies found.");
2332 DEBUG(" Controllers:");
2333 for (i
= 0; i
<= d
->meta
->maximum_hierarchy
; i
++) {
2335 struct cgroup_hierarchy
*h
= d
->meta
->hierarchies
[i
];
2337 DEBUG(" Empty hierarchy number %d.", i
);
2340 for (p
= h
->subsystems
; p
&& *p
; p
++) {
2341 DEBUG(" %2d: %s", i
, *p
);
2346 struct cgroup_ops
*cgfs_ops_init(void)
2351 static void *cgfs_init(struct lxc_handler
*handler
)
2353 struct cgfs_data
*d
;
2355 d
= malloc(sizeof(*d
));
2359 memset(d
, 0, sizeof(*d
));
2360 d
->name
= strdup(handler
->name
);
2364 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
2366 d
->meta
= lxc_cgroup_load_meta();
2368 ERROR("cgroupfs failed to detect cgroup metadata");
2372 print_cgfs_init_debuginfo(d
);
2383 static void cgfs_destroy(void *hdata
, struct lxc_conf
*conf
)
2385 struct cgfs_data
*d
= hdata
;
2390 lxc_cgroup_process_info_free_and_remove(d
->info
, conf
);
2391 lxc_cgroup_put_meta(d
->meta
);
2395 static inline bool cgfs_create(void *hdata
)
2397 struct cgfs_data
*d
= hdata
;
2398 struct cgroup_process_info
*i
;
2399 struct cgroup_meta_data
*md
;
2404 i
= lxc_cgroupfs_create(d
->name
, d
->cgroup_pattern
, md
, NULL
);
2411 static inline bool cgfs_enter(void *hdata
, pid_t pid
)
2413 struct cgfs_data
*d
= hdata
;
2414 struct cgroup_process_info
*i
;
2420 ret
= lxc_cgroupfs_enter(i
, pid
, false);
2425 static inline bool cgfs_create_legacy(void *hdata
, pid_t pid
)
2427 struct cgfs_data
*d
= hdata
;
2428 struct cgroup_process_info
*i
;
2433 if (lxc_cgroup_create_legacy(i
, d
->name
, pid
) < 0) {
2434 ERROR("failed to create legacy ns cgroups for '%s'", d
->name
);
2440 static const char *cgfs_get_cgroup(void *hdata
, const char *subsystem
)
2442 struct cgfs_data
*d
= hdata
;
2446 return lxc_cgroup_get_hierarchy_path_data(subsystem
, d
);
2449 static bool cgfs_escape(void *hdata
)
2451 struct cgroup_meta_data
*md
;
2455 md
= lxc_cgroup_load_meta();
2459 for (i
= 0; i
<= md
->maximum_hierarchy
; i
++) {
2460 struct cgroup_hierarchy
*h
= md
->hierarchies
[i
];
2461 struct cgroup_mount_point
*mp
;
2467 WARN("not escaping hierarchy %d", i
);
2471 mp
= lxc_cgroup_find_mount_point(h
, "/", true);
2475 tasks
= cgroup_to_absolute_path(mp
, "/", "tasks");
2479 f
= fopen(tasks
, "a");
2484 written
= fprintf(f
, "%d\n", lxc_raw_getpid());
2487 SYSERROR("writing tasks failed\n");
2494 lxc_cgroup_put_meta(md
);
2498 static int cgfs_num_hierarchies(void)
2500 /* not implemented */
2504 static bool cgfs_get_hierarchies(int i
, char ***out
)
2506 /* not implemented */
2510 static bool cgfs_unfreeze(void *hdata
)
2512 struct cgfs_data
*d
= hdata
;
2513 char *cgabspath
, *cgrelpath
;
2519 cgrelpath
= lxc_cgroup_get_hierarchy_path_data("freezer", d
);
2520 cgabspath
= lxc_cgroup_find_abs_path("freezer", cgrelpath
, true, NULL
);
2524 ret
= do_cgroup_set(cgabspath
, "freezer.state", "THAWED");
2529 static bool cgroupfs_setup_limits(void *hdata
, struct lxc_conf
*conf
,
2532 struct cgfs_data
*d
= hdata
;
2536 return do_setup_cgroup_limits(d
, &conf
->cgroup
, with_devices
) == 0;
2539 static bool lxc_cgroupfs_attach(const char *name
, const char *lxcpath
, pid_t pid
)
2541 struct cgroup_meta_data
*meta_data
;
2542 struct cgroup_process_info
*container_info
;
2545 meta_data
= lxc_cgroup_load_meta();
2547 ERROR("could not move attached process %d to cgroup of container", pid
);
2551 container_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta_data
);
2552 lxc_cgroup_put_meta(meta_data
);
2553 if (!container_info
) {
2554 ERROR("could not move attached process %d to cgroup of container", pid
);
2558 ret
= lxc_cgroupfs_enter(container_info
, pid
, false);
2559 lxc_cgroup_process_info_free(container_info
);
2561 ERROR("could not move attached process %d to cgroup of container", pid
);
2568 const char *cgroup_path
;
2573 * TODO - someone should refactor this to unshare once passing all the paths
2574 * to be chowned in one go
2576 static int chown_cgroup_wrapper(void *data
)
2578 struct chown_data
*arg
= data
;
2582 if (setresgid(0,0,0) < 0)
2583 SYSERROR("Failed to setgid to 0");
2584 if (setresuid(0,0,0) < 0)
2585 SYSERROR("Failed to setuid to 0");
2586 if (setgroups(0, NULL
) < 0)
2587 SYSERROR("Failed to clear groups");
2588 destuid
= get_ns_uid(arg
->origuid
);
2590 if (chown(arg
->cgroup_path
, destuid
, 0) < 0)
2591 SYSERROR("Failed chowning %s to %d", arg
->cgroup_path
, (int)destuid
);
2593 fpath
= lxc_append_paths(arg
->cgroup_path
, "tasks");
2596 if (chown(fpath
, destuid
, 0) < 0)
2597 SYSERROR("Error chowning %s\n", fpath
);
2600 fpath
= lxc_append_paths(arg
->cgroup_path
, "cgroup.procs");
2603 if (chown(fpath
, destuid
, 0) < 0)
2604 SYSERROR("Error chowning %s", fpath
);
2610 static bool do_cgfs_chown(char *cgroup_path
, struct lxc_conf
*conf
)
2612 struct chown_data data
;
2615 if (!dir_exists(cgroup_path
))
2618 if (lxc_list_empty(&conf
->id_map
))
2619 /* If there's no mapping then we don't need to chown */
2622 data
.cgroup_path
= cgroup_path
;
2623 data
.origuid
= geteuid();
2625 /* Unpriv users can't chown it themselves, so chown from
2626 * a child namespace mapping both our own and the target uid
2628 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
,
2629 "chown_cgroup_wrapper") < 0) {
2630 ERROR("Error requesting cgroup chown in new namespace");
2635 * Now chmod 775 the directory else the container cannot create cgroups.
2636 * This can't be done in the child namespace because it only group-owns
2639 if (chmod(cgroup_path
, 0775) < 0) {
2640 SYSERROR("Error chmoding %s\n", cgroup_path
);
2643 fpath
= lxc_append_paths(cgroup_path
, "tasks");
2646 if (chmod(fpath
, 0664) < 0)
2647 SYSERROR("Error chmoding %s\n", fpath
);
2649 fpath
= lxc_append_paths(cgroup_path
, "cgroup.procs");
2652 if (chmod(fpath
, 0664) < 0)
2653 SYSERROR("Error chmoding %s\n", fpath
);
2659 static bool cgfs_chown(void *hdata
, struct lxc_conf
*conf
)
2661 struct cgfs_data
*d
= hdata
;
2662 struct cgroup_process_info
*info_ptr
;
2669 for (info_ptr
= d
->info
; info_ptr
; info_ptr
= info_ptr
->next
) {
2670 if (!info_ptr
->hierarchy
)
2673 if (!info_ptr
->designated_mount_point
) {
2674 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, info_ptr
->cgroup_path
, true);
2675 if (!info_ptr
->designated_mount_point
) {
2676 SYSERROR("Could not chown cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", info_ptr
->cgroup_path
);
2681 cgpath
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
, NULL
);
2683 SYSERROR("Could not chown cgroup %s: internal error", info_ptr
->cgroup_path
);
2686 r
= do_cgfs_chown(cgpath
, conf
);
2687 if (!r
&& is_crucial_hierarchy(info_ptr
->hierarchy
)) {
2688 ERROR("Failed chowning %s\n", cgpath
);
2698 static struct cgroup_ops cgfs_ops
= {
2700 .destroy
= cgfs_destroy
,
2701 .create
= cgfs_create
,
2702 .enter
= cgfs_enter
,
2703 .create_legacy
= cgfs_create_legacy
,
2704 .get_cgroup
= cgfs_get_cgroup
,
2705 .escape
= cgfs_escape
,
2706 .num_hierarchies
= cgfs_num_hierarchies
,
2707 .get_hierarchies
= cgfs_get_hierarchies
,
2708 .get
= lxc_cgroupfs_get
,
2709 .set
= lxc_cgroupfs_set
,
2710 .unfreeze
= cgfs_unfreeze
,
2711 .setup_limits
= cgroupfs_setup_limits
,
2713 .attach
= lxc_cgroupfs_attach
,
2714 .chown
= cgfs_chown
,
2715 .mount_cgroup
= cgroupfs_mount_cgroup
,
2716 .nrtasks
= cgfs_nrtasks
,