2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/inotify.h>
38 #include <sys/mount.h>
39 #include <netinet/in.h>
54 #include <../include/lxcmntent.h>
59 struct cgroup_hierarchy
;
60 struct cgroup_meta_data
;
61 struct cgroup_mount_point
;
64 * cgroup_meta_data: the metadata about the cgroup infrastructure on this
67 struct cgroup_meta_data
{
68 ptrdiff_t ref
; /* simple refcount */
69 struct cgroup_hierarchy
**hierarchies
;
70 struct cgroup_mount_point
**mount_points
;
71 int maximum_hierarchy
;
75 * cgroup_hierarchy: describes a single cgroup hierarchy
76 * (may have multiple mount points)
78 struct cgroup_hierarchy
{
80 bool used
; /* false if the hierarchy should be ignored by lxc */
82 struct cgroup_mount_point
*rw_absolute_mount_point
;
83 struct cgroup_mount_point
*ro_absolute_mount_point
;
84 struct cgroup_mount_point
**all_mount_points
;
85 size_t all_mount_point_capacity
;
89 * cgroup_mount_point: a mount point to where a hierarchy
92 struct cgroup_mount_point
{
93 struct cgroup_hierarchy
*hierarchy
;
97 bool need_cpuset_init
;
101 * cgroup_process_info: describes the membership of a
102 * process to the different cgroup
105 * Note this is the per-process info tracked by the cgfs_ops.
106 * This is not used with cgmanager.
108 struct cgroup_process_info
{
109 struct cgroup_process_info
*next
;
110 struct cgroup_meta_data
*meta_ref
;
111 struct cgroup_hierarchy
*hierarchy
;
113 char *cgroup_path_sub
;
114 char **created_paths
;
115 size_t created_paths_capacity
;
116 size_t created_paths_count
;
117 struct cgroup_mount_point
*designated_mount_point
;
122 const char *cgroup_pattern
;
123 struct cgroup_meta_data
*meta
;
124 struct cgroup_process_info
*info
;
127 lxc_log_define(lxc_cgfs
, lxc
);
129 static struct cgroup_process_info
*lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
, struct cgroup_meta_data
*meta
);
130 static char **subsystems_from_mount_options(const char *mount_options
, char **kernel_list
);
131 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
);
132 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
);
133 static bool is_valid_cgroup(const char *name
);
134 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
);
135 static int remove_cgroup(struct cgroup_mount_point
*mp
, const char *path
, bool recurse
,
136 struct lxc_conf
*conf
);
137 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
, const char *path
, const char *suffix
);
138 static struct cgroup_process_info
*find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
);
139 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
, char *value
, size_t len
);
140 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
, const char *value
);
141 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
, char *v
, bool for_allow
);
142 static int do_setup_cgroup_limits(struct cgfs_data
*d
, struct lxc_list
*cgroup_settings
, bool do_devices
);
143 static int cgroup_recursive_task_count(const char *cgroup_path
);
144 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
, char *cgroup_path
);
145 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
, const char *path
);
147 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
);
148 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
);
149 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
);
151 /* free process membership information */
152 static void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
);
153 static void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
,
154 struct lxc_conf
*conf
);
156 static struct cgroup_ops cgfs_ops
;
158 static int cgroup_rmdir(char *dirname
)
160 struct dirent
*direntp
;
164 char pathname
[MAXPATHLEN
];
166 dir
= opendir(dirname
);
168 ERROR("Failed to open %s", dirname
);
172 while ((direntp
= readdir(dir
))) {
179 if (!strcmp(direntp
->d_name
, ".") ||
180 !strcmp(direntp
->d_name
, ".."))
183 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
184 if (rc
< 0 || rc
>= MAXPATHLEN
) {
185 ERROR("pathname too long");
188 saved_errno
= -ENOMEM
;
191 ret
= lstat(pathname
, &mystat
);
193 SYSERROR("Failed to stat %s", pathname
);
199 if (S_ISDIR(mystat
.st_mode
)) {
200 if (cgroup_rmdir(pathname
) < 0) {
208 if (rmdir(dirname
) < 0) {
209 SYSERROR("Failed to delete %s", dirname
);
217 SYSERROR("Failed to close directory %s", dirname
);
224 return failed
? -1 : 0;
227 static int rmdir_wrapper(void *data
)
231 if (setresgid(0,0,0) < 0)
232 SYSERROR("Failed to setgid to 0");
233 if (setresuid(0,0,0) < 0)
234 SYSERROR("Failed to setuid to 0");
235 if (setgroups(0, NULL
) < 0)
236 SYSERROR("Failed to clear groups");
238 return cgroup_rmdir(path
);
241 static struct cgroup_meta_data
*lxc_cgroup_load_meta()
243 const char *cgroup_use
= NULL
;
244 char **cgroup_use_list
= NULL
;
245 struct cgroup_meta_data
*md
= NULL
;
249 cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
250 if (!cgroup_use
&& errno
!= 0)
253 cgroup_use_list
= lxc_string_split_and_trim(cgroup_use
, ',');
254 if (!cgroup_use_list
)
258 md
= lxc_cgroup_load_meta2((const char **)cgroup_use_list
);
260 lxc_free_array((void **)cgroup_use_list
, free
);
265 /* Step 1: determine all kernel subsystems */
266 static bool find_cgroup_subsystems(char ***kernel_subsystems
)
272 size_t kernel_subsystems_count
= 0;
273 size_t kernel_subsystems_capacity
= 0;
276 proc_cgroups
= fopen_cloexec("/proc/cgroups", "r");
280 while (getline(&line
, &sz
, proc_cgroups
) != -1) {
283 int hierarchy_number
;
290 tab1
= strchr(line
, '\t');
294 tab2
= strchr(tab1
, '\t');
300 hierarchy_number
= strtoul(tab1
, &tab2
, 10);
303 (void)hierarchy_number
;
305 r
= lxc_grow_array((void ***)kernel_subsystems
, &kernel_subsystems_capacity
, kernel_subsystems_count
+ 1, 12);
308 (*kernel_subsystems
)[kernel_subsystems_count
] = strdup(line
);
309 if (!(*kernel_subsystems
)[kernel_subsystems_count
])
311 kernel_subsystems_count
++;
316 fclose(proc_cgroups
);
321 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
322 * since mount points don't specify hierarchy number and
323 * /proc/cgroups does not contain named hierarchies
325 static bool find_cgroup_hierarchies(struct cgroup_meta_data
*meta_data
,
326 bool all_kernel_subsystems
, bool all_named_subsystems
,
327 const char **subsystem_whitelist
)
329 FILE *proc_self_cgroup
;
334 size_t hierarchy_capacity
= 0;
336 proc_self_cgroup
= fopen_cloexec("/proc/self/cgroup", "r");
337 /* if for some reason (because of setns() and pid namespace for example),
338 * /proc/self is not valid, we try /proc/1/cgroup... */
339 if (!proc_self_cgroup
)
340 proc_self_cgroup
= fopen_cloexec("/proc/1/cgroup", "r");
341 if (!proc_self_cgroup
)
344 while (getline(&line
, &sz
, proc_self_cgroup
) != -1) {
345 /* file format: hierarchy:subsystems:group,
346 * we only extract hierarchy and subsystems
350 int hierarchy_number
;
351 struct cgroup_hierarchy
*h
= NULL
;
357 colon1
= strchr(line
, ':');
361 colon2
= strchr(colon1
, ':');
368 /* With cgroupv2 /proc/self/cgroup can contain entries of the
370 * These entries need to be skipped.
372 if (!strcmp(colon1
, ""))
375 hierarchy_number
= strtoul(line
, &colon2
, 10);
376 if (!colon2
|| *colon2
)
379 if (hierarchy_number
> meta_data
->maximum_hierarchy
) {
380 /* lxc_grow_array will never shrink, so even if we find a lower
381 * hierarchy number here, the array will never be smaller
383 r
= lxc_grow_array((void ***)&meta_data
->hierarchies
, &hierarchy_capacity
, hierarchy_number
+ 1, 12);
387 meta_data
->maximum_hierarchy
= hierarchy_number
;
390 /* this shouldn't happen, we had this already */
391 if (meta_data
->hierarchies
[hierarchy_number
])
394 h
= calloc(1, sizeof(struct cgroup_hierarchy
));
398 meta_data
->hierarchies
[hierarchy_number
] = h
;
400 h
->index
= hierarchy_number
;
401 h
->subsystems
= lxc_string_split_and_trim(colon1
, ',');
404 /* see if this hierarchy should be considered */
405 if (!all_kernel_subsystems
|| !all_named_subsystems
) {
406 for (p
= h
->subsystems
; *p
; p
++) {
407 if (!strncmp(*p
, "name=", 5)) {
408 if (all_named_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
413 if (all_kernel_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
420 /* we want all hierarchy anyway */
427 fclose(proc_self_cgroup
);
432 /* Step 3: determine all mount points of each hierarchy */
433 static bool find_hierarchy_mountpts( struct cgroup_meta_data
*meta_data
, char **kernel_subsystems
)
436 FILE *proc_self_mountinfo
;
439 char **tokens
= NULL
;
440 size_t mount_point_count
= 0;
441 size_t mount_point_capacity
= 0;
442 size_t token_capacity
= 0;
444 bool is_cgns
= cgns_supported();
446 proc_self_mountinfo
= fopen_cloexec("/proc/self/mountinfo", "r");
447 /* if for some reason (because of setns() and pid namespace for example),
448 * /proc/self is not valid, we try /proc/1/cgroup... */
449 if (!proc_self_mountinfo
)
450 proc_self_mountinfo
= fopen_cloexec("/proc/1/mountinfo", "r");
451 if (!proc_self_mountinfo
)
454 while (getline(&line
, &sz
, proc_self_mountinfo
) != -1) {
455 char *token
, *line_tok
, *saveptr
= NULL
;
457 struct cgroup_mount_point
*mount_point
;
458 struct cgroup_hierarchy
*h
;
460 bool is_lxcfs
= false;
462 if (line
[0] && line
[strlen(line
) - 1] == '\n')
463 line
[strlen(line
) - 1] = '\0';
465 for (i
= 0, line_tok
= line
; (token
= strtok_r(line_tok
, " ", &saveptr
)); line_tok
= NULL
) {
466 r
= lxc_grow_array((void ***)&tokens
, &token_capacity
, i
+ 1, 64);
472 /* layout of /proc/self/mountinfo:
475 * 2: device major:minor
478 * 5: per-mount options
479 * [optional X]: additional data
483 * X+10: per-superblock options
485 for (j
= 6; j
< i
&& tokens
[j
]; j
++)
486 if (!strcmp(tokens
[j
], "-"))
489 /* could not find separator */
490 if (j
>= i
|| !tokens
[j
])
492 /* there should be exactly three fields after
498 /* not a cgroup filesystem */
499 if (strcmp(tokens
[j
+ 1], "cgroup") != 0) {
500 if (strcmp(tokens
[j
+ 1], "fuse.lxcfs") != 0)
502 if (strncmp(tokens
[4], "/sys/fs/cgroup/", 15) != 0)
505 char *curtok
= tokens
[4] + 15;
506 subsystems
= subsystems_from_mount_options(curtok
,
509 subsystems
= subsystems_from_mount_options(tokens
[j
+ 3],
515 for (k
= 0; k
<= meta_data
->maximum_hierarchy
; k
++) {
516 if (meta_data
->hierarchies
[k
] &&
517 meta_data
->hierarchies
[k
]->subsystems
[0] &&
518 lxc_string_in_array(meta_data
->hierarchies
[k
]->subsystems
[0], (const char **)subsystems
)) {
519 /* TODO: we could also check if the lists really match completely,
520 * just to have an additional sanity check */
521 h
= meta_data
->hierarchies
[k
];
525 lxc_free_array((void **)subsystems
, free
);
527 r
= lxc_grow_array((void ***)&meta_data
->mount_points
, &mount_point_capacity
, mount_point_count
+ 1, 12);
531 /* create mount point object */
532 mount_point
= calloc(1, sizeof(*mount_point
));
536 meta_data
->mount_points
[mount_point_count
++] = mount_point
;
538 mount_point
->hierarchy
= h
;
539 if (is_lxcfs
|| is_cgns
)
540 mount_point
->mount_prefix
= strdup("/");
542 mount_point
->mount_prefix
= strdup(tokens
[3]);
543 mount_point
->mount_point
= strdup(tokens
[4]);
544 if (!mount_point
->mount_point
|| !mount_point
->mount_prefix
)
546 mount_point
->read_only
= !lxc_string_in_list("rw", tokens
[5], ',');
548 if (!strcmp(mount_point
->mount_prefix
, "/")) {
549 if (mount_point
->read_only
) {
550 if (!h
->ro_absolute_mount_point
)
551 h
->ro_absolute_mount_point
= mount_point
;
553 if (!h
->rw_absolute_mount_point
)
554 h
->rw_absolute_mount_point
= mount_point
;
559 k
= lxc_array_len((void **)h
->all_mount_points
);
562 r
= lxc_grow_array((void ***)&h
->all_mount_points
, &h
->all_mount_point_capacity
, k
+ 1, 4);
565 h
->all_mount_points
[k
] = mount_point
;
570 fclose(proc_self_mountinfo
);
576 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
)
578 bool all_kernel_subsystems
= true;
579 bool all_named_subsystems
= false;
580 struct cgroup_meta_data
*meta_data
= NULL
;
581 char **kernel_subsystems
= NULL
;
584 /* if the subsystem whitelist is not specified, include all
585 * hierarchies that contain kernel subsystems by default but
586 * no hierarchies that only contain named subsystems
588 * if it is specified, the specifier @all will select all
589 * hierarchies, @kernel will select all hierarchies with
590 * kernel subsystems and @named will select all named
593 all_kernel_subsystems
= subsystem_whitelist
?
594 (lxc_string_in_array("@kernel", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
596 all_named_subsystems
= subsystem_whitelist
?
597 (lxc_string_in_array("@named", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
600 meta_data
= calloc(1, sizeof(struct cgroup_meta_data
));
605 if (!find_cgroup_subsystems(&kernel_subsystems
))
608 if (!find_cgroup_hierarchies(meta_data
, all_kernel_subsystems
,
609 all_named_subsystems
, subsystem_whitelist
))
612 if (!find_hierarchy_mountpts(meta_data
, kernel_subsystems
))
615 /* oops, we couldn't find anything */
616 if (!meta_data
->hierarchies
|| !meta_data
->mount_points
) {
621 lxc_free_array((void **)kernel_subsystems
, free
);
626 lxc_free_array((void **)kernel_subsystems
, free
);
627 lxc_cgroup_put_meta(meta_data
);
632 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
)
638 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
)
643 if (--meta_data
->ref
> 0)
645 lxc_free_array((void **)meta_data
->mount_points
, (lxc_free_fn
)lxc_cgroup_mount_point_free
);
646 if (meta_data
->hierarchies
)
647 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++)
648 if (meta_data
->hierarchies
[i
])
649 lxc_cgroup_hierarchy_free(meta_data
->hierarchies
[i
]);
650 free(meta_data
->hierarchies
);
655 static struct cgroup_hierarchy
*lxc_cgroup_find_hierarchy(struct cgroup_meta_data
*meta_data
, const char *subsystem
)
658 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
659 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
662 if (h
&& lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
668 static bool mountpoint_is_accessible(struct cgroup_mount_point
*mp
)
670 return mp
&& access(mp
->mount_point
, F_OK
) == 0;
673 static struct cgroup_mount_point
*lxc_cgroup_find_mount_point(struct cgroup_hierarchy
*hierarchy
, const char *group
, bool should_be_writable
)
675 struct cgroup_mount_point
**mps
;
676 struct cgroup_mount_point
*current_result
= NULL
;
677 ssize_t quality
= -1;
680 if (mountpoint_is_accessible(hierarchy
->rw_absolute_mount_point
))
681 return hierarchy
->rw_absolute_mount_point
;
682 if (!should_be_writable
&& mountpoint_is_accessible(hierarchy
->ro_absolute_mount_point
))
683 return hierarchy
->ro_absolute_mount_point
;
685 for (mps
= hierarchy
->all_mount_points
; mps
&& *mps
; mps
++) {
686 struct cgroup_mount_point
*mp
= *mps
;
687 size_t prefix_len
= mp
->mount_prefix
? strlen(mp
->mount_prefix
) : 0;
689 if (prefix_len
== 1 && mp
->mount_prefix
[0] == '/')
692 if (!mountpoint_is_accessible(mp
))
695 if (should_be_writable
&& mp
->read_only
)
699 (strncmp(group
, mp
->mount_prefix
, prefix_len
) == 0 &&
700 (group
[prefix_len
] == '\0' || group
[prefix_len
] == '/'))) {
701 /* search for the best quality match, i.e. the match with the
702 * shortest prefix where this group is still contained
704 if (quality
== -1 || prefix_len
< quality
) {
706 quality
= prefix_len
;
713 return current_result
;
716 static char *lxc_cgroup_find_abs_path(const char *subsystem
, const char *group
, bool should_be_writable
, const char *suffix
)
718 struct cgroup_meta_data
*meta_data
;
719 struct cgroup_hierarchy
*h
;
720 struct cgroup_mount_point
*mp
;
724 meta_data
= lxc_cgroup_load_meta();
728 h
= lxc_cgroup_find_hierarchy(meta_data
, subsystem
);
732 mp
= lxc_cgroup_find_mount_point(h
, group
, should_be_writable
);
736 result
= cgroup_to_absolute_path(mp
, group
, suffix
);
740 lxc_cgroup_put_meta(meta_data
);
745 lxc_cgroup_put_meta(meta_data
);
750 static struct cgroup_process_info
*lxc_cgroup_process_info_get(pid_t pid
, struct cgroup_meta_data
*meta
)
753 snprintf(pid_buf
, 32, "/proc/%lu/cgroup", (unsigned long)pid
);
754 return lxc_cgroup_process_info_getx(pid_buf
, meta
);
757 static struct cgroup_process_info
*lxc_cgroup_process_info_get_init(struct cgroup_meta_data
*meta
)
759 return lxc_cgroup_process_info_get(1, meta
);
762 static struct cgroup_process_info
*lxc_cgroup_process_info_get_self(struct cgroup_meta_data
*meta
)
764 struct cgroup_process_info
*i
;
765 i
= lxc_cgroup_process_info_getx("/proc/self/cgroup", meta
);
767 i
= lxc_cgroup_process_info_get(lxc_raw_getpid(), meta
);
772 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
773 * is already in a new cgroup named after the pid. 'mnt' is passed in as
774 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
775 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
776 * and return the string /sys/fs/cgroup/lxc/c1.
778 static char *cgroup_rename_nsgroup(const char *mountpath
, const char *oldname
, pid_t pid
, const char *name
)
780 char *dir
, *fulloldpath
;
781 char *newname
, *fullnewpath
;
782 int len
, newlen
, ret
;
785 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
788 * fulloldpath = /cgroup/ab/2375
789 * fullnewpath = /cgroup/ab/c1
792 dir
= alloca(strlen(oldname
) + 1);
793 strcpy(dir
, oldname
);
795 len
= strlen(oldname
) + strlen(mountpath
) + 22;
796 fulloldpath
= alloca(len
);
797 ret
= snprintf(fulloldpath
, len
, "%s/%s/%lu", mountpath
, oldname
, (unsigned long)pid
);
798 if (ret
< 0 || ret
>= len
)
801 len
= strlen(dir
) + strlen(name
) + 2;
802 newname
= malloc(len
);
804 SYSERROR("Out of memory");
807 ret
= snprintf(newname
, len
, "%s/%s", dir
, name
);
808 if (ret
< 0 || ret
>= len
) {
813 newlen
= strlen(mountpath
) + len
+ 2;
814 fullnewpath
= alloca(newlen
);
815 ret
= snprintf(fullnewpath
, newlen
, "%s/%s", mountpath
, newname
);
816 if (ret
< 0 || ret
>= newlen
) {
821 if (access(fullnewpath
, F_OK
) == 0) {
822 if (rmdir(fullnewpath
) != 0) {
823 SYSERROR("container cgroup %s already exists.", fullnewpath
);
828 if (rename(fulloldpath
, fullnewpath
)) {
829 SYSERROR("failed to rename cgroup %s->%s", fulloldpath
, fullnewpath
);
834 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
839 static bool is_crucial_hierarchy(struct cgroup_hierarchy
*h
)
843 for (p
= h
->subsystems
; *p
; p
++) {
844 if (is_crucial_cgroup_subsystem(*p
))
850 /* create a new cgroup */
851 static struct cgroup_process_info
*lxc_cgroupfs_create(const char *name
, const char *path_pattern
, struct cgroup_meta_data
*meta_data
, const char *sub_pattern
)
853 char **cgroup_path_components
= NULL
;
855 char *path_so_far
= NULL
;
856 char **new_cgroup_paths
= NULL
;
857 char **new_cgroup_paths_sub
= NULL
;
858 struct cgroup_mount_point
*mp
;
859 struct cgroup_hierarchy
*h
;
860 struct cgroup_process_info
*base_info
= NULL
;
861 struct cgroup_process_info
*info_ptr
;
865 bool had_sub_pattern
= false;
868 if (!is_valid_cgroup(name
)) {
869 ERROR("Invalid cgroup name: '%s'", name
);
874 if (!strstr(path_pattern
, "%n")) {
875 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern
);
880 /* we will modify the result of this operation directly,
881 * so we don't have to copy the data structure
883 base_info
= (path_pattern
[0] == '/') ?
884 lxc_cgroup_process_info_get_init(meta_data
) :
885 lxc_cgroup_process_info_get_self(meta_data
);
889 new_cgroup_paths
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
890 if (!new_cgroup_paths
)
891 goto out_initial_error
;
893 new_cgroup_paths_sub
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
894 if (!new_cgroup_paths_sub
)
895 goto out_initial_error
;
897 /* find mount points we can use */
898 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
899 h
= info_ptr
->hierarchy
;
902 mp
= lxc_cgroup_find_mount_point(h
, info_ptr
->cgroup_path
, true);
904 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h
->index
);
905 goto out_initial_error
;
907 info_ptr
->designated_mount_point
= mp
;
909 if (lxc_string_in_array("ns", (const char **)h
->subsystems
))
911 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
912 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
913 goto out_initial_error
;
917 /* normalize the path */
918 cgroup_path_components
= lxc_normalize_path(path_pattern
);
919 if (!cgroup_path_components
)
920 goto out_initial_error
;
922 /* go through the path components to see if we can create them */
923 for (p
= cgroup_path_components
; *p
|| (sub_pattern
&& !had_sub_pattern
); p
++) {
924 /* we only want to create the same component with -1, -2, etc.
925 * if the component contains the container name itself, otherwise
926 * it's not an error if it already exists
928 char *p_eff
= *p
? *p
: (char *)sub_pattern
;
929 bool contains_name
= strstr(p_eff
, "%n");
930 char *current_component
= NULL
;
931 char *current_subpath
= NULL
;
932 char *current_entire_path
= NULL
;
937 /* if we are processing the subpattern, we want to make sure
938 * loop is ended the next time around
941 had_sub_pattern
= true;
945 goto find_name_on_this_level
;
947 cleanup_name_on_this_level
:
948 /* This is reached if we found a name clash.
949 * In that case, remove the cgroup from all previous hierarchies
951 for (j
= 0, info_ptr
= base_info
; j
< i
&& info_ptr
; info_ptr
= info_ptr
->next
, j
++) {
952 if (info_ptr
->created_paths_count
< 1)
954 r
= remove_cgroup(info_ptr
->designated_mount_point
, info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1], false, NULL
);
956 WARN("could not clean up cgroup we created when trying to create container");
957 free(info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1]);
958 info_ptr
->created_paths
[--info_ptr
->created_paths_count
] = NULL
;
960 if (current_component
!= current_subpath
)
961 free(current_subpath
);
962 if (current_component
!= p_eff
)
963 free(current_component
);
964 current_component
= current_subpath
= NULL
;
965 /* try again with another suffix */
968 find_name_on_this_level
:
969 /* determine name of the path component we should create */
970 if (contains_name
&& suffix
> 0) {
971 char *buf
= calloc(strlen(name
) + 32, 1);
973 goto out_initial_error
;
974 snprintf(buf
, strlen(name
) + 32, "%s-%u", name
, suffix
);
975 current_component
= lxc_string_replace("%n", buf
, p_eff
);
978 current_component
= contains_name
? lxc_string_replace("%n", name
, p_eff
) : p_eff
;
980 parts
[0] = path_so_far
;
981 parts
[1] = current_component
;
983 current_subpath
= path_so_far
? lxc_string_join("/", (const char **)parts
, false) : current_component
;
985 /* Now go through each hierarchy and try to create the
986 * corresponding cgroup
988 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
991 if (!info_ptr
->hierarchy
)
994 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
996 current_entire_path
= NULL
;
998 parts2
[0] = !strcmp(info_ptr
->cgroup_path
, "/") ? "" : info_ptr
->cgroup_path
;
999 parts2
[1] = current_subpath
;
1001 current_entire_path
= lxc_string_join("/", (const char **)parts2
, false);
1004 /* we are processing the subpath, so only update that one */
1005 free(new_cgroup_paths_sub
[i
]);
1006 new_cgroup_paths_sub
[i
] = strdup(current_entire_path
);
1007 if (!new_cgroup_paths_sub
[i
])
1008 goto cleanup_from_error
;
1010 /* remember which path was used on this controller */
1011 free(new_cgroup_paths
[i
]);
1012 new_cgroup_paths
[i
] = strdup(current_entire_path
);
1013 if (!new_cgroup_paths
[i
])
1014 goto cleanup_from_error
;
1017 r
= create_cgroup(info_ptr
->designated_mount_point
, current_entire_path
);
1018 if (r
< 0 && errno
== EEXIST
&& contains_name
) {
1019 /* name clash => try new name with new suffix */
1020 free(current_entire_path
);
1021 current_entire_path
= NULL
;
1022 goto cleanup_name_on_this_level
;
1023 } else if (r
< 0 && errno
!= EEXIST
) {
1024 if (is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1025 SYSERROR("Could not create cgroup '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1026 goto cleanup_from_error
;
1029 } else if (r
== 0) {
1030 /* successfully created */
1031 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1033 goto cleanup_from_error
;
1034 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, current_entire_path
)) {
1035 ERROR("Failed to initialize cpuset for '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1036 goto cleanup_from_error
;
1038 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = current_entire_path
;
1040 /* if we didn't create the cgroup, then we have to make sure that
1041 * further cgroups will be created properly
1043 if (handle_cgroup_settings(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
) < 0) {
1044 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
1045 goto cleanup_from_error
;
1047 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
)) {
1048 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr
->cgroup_path
);
1049 goto cleanup_from_error
;
1053 /* already existed but path component of pattern didn't contain '%n',
1054 * so this is not an error; but then we don't need current_entire_path
1057 free(current_entire_path
);
1058 current_entire_path
= NULL
;
1062 /* save path so far */
1064 path_so_far
= strdup(current_subpath
);
1066 goto cleanup_from_error
;
1069 if (current_component
!= current_subpath
)
1070 free(current_subpath
);
1071 if (current_component
!= p_eff
)
1072 free(current_component
);
1073 current_component
= current_subpath
= NULL
;
1077 /* called if an error occurred in the loop, so we
1078 * do some additional cleanup here
1080 saved_errno
= errno
;
1081 if (current_component
!= current_subpath
)
1082 free(current_subpath
);
1083 if (current_component
!= p_eff
)
1084 free(current_component
);
1085 free(current_entire_path
);
1086 errno
= saved_errno
;
1087 goto out_initial_error
;
1090 /* we're done, now update the paths */
1091 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
1092 if (!info_ptr
->hierarchy
)
1094 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
1095 * will take care of it
1096 * Since we do a continue in above loop, new_cgroup_paths[i] is
1097 * unset anyway, as is new_cgroup_paths_sub[i]
1099 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1101 free(info_ptr
->cgroup_path
);
1102 info_ptr
->cgroup_path
= new_cgroup_paths
[i
];
1103 info_ptr
->cgroup_path_sub
= new_cgroup_paths_sub
[i
];
1105 /* don't use lxc_free_array since we used the array members
1106 * to store them in our result...
1108 free(new_cgroup_paths
);
1109 free(new_cgroup_paths_sub
);
1111 lxc_free_array((void **)cgroup_path_components
, free
);
1115 saved_errno
= errno
;
1117 lxc_cgroup_process_info_free_and_remove(base_info
, NULL
);
1118 lxc_free_array((void **)new_cgroup_paths
, free
);
1119 lxc_free_array((void **)new_cgroup_paths_sub
, free
);
1120 lxc_free_array((void **)cgroup_path_components
, free
);
1121 errno
= saved_errno
;
1125 static int lxc_cgroup_create_legacy(struct cgroup_process_info
*base_info
, const char *name
, pid_t pid
)
1127 struct cgroup_process_info
*info_ptr
;
1130 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1131 if (!info_ptr
->hierarchy
)
1134 if (!lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1137 * For any path which has ns cgroup mounted, handler->pid is already
1138 * moved into a container called '%d % (handler->pid)'. Rename it to
1139 * the cgroup name and record that.
1141 char *tmp
= cgroup_rename_nsgroup((const char *)info_ptr
->designated_mount_point
->mount_point
,
1142 info_ptr
->cgroup_path
, pid
, name
);
1145 free(info_ptr
->cgroup_path
);
1146 info_ptr
->cgroup_path
= tmp
;
1147 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1153 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = tmp
;
1158 /* get the cgroup membership of a given container */
1159 static struct cgroup_process_info
*lxc_cgroup_get_container_info(const char *name
, const char *lxcpath
, struct cgroup_meta_data
*meta_data
)
1161 struct cgroup_process_info
*result
= NULL
;
1162 int saved_errno
= 0;
1164 struct cgroup_process_info
**cptr
= &result
;
1165 struct cgroup_process_info
*entry
= NULL
;
1168 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
1169 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
1173 /* use the command interface to look for the cgroup */
1174 path
= lxc_cmd_get_cgroup_path(name
, lxcpath
, h
->subsystems
[0]);
1180 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1183 entry
->meta_ref
= lxc_cgroup_get_meta(meta_data
);
1184 entry
->hierarchy
= h
;
1185 entry
->cgroup_path
= path
;
1188 /* it is not an error if we don't find anything here,
1189 * it is up to the caller to decide what to do in that
1191 entry
->designated_mount_point
= lxc_cgroup_find_mount_point(h
, entry
->cgroup_path
, true);
1194 cptr
= &entry
->next
;
1200 saved_errno
= errno
;
1202 lxc_cgroup_process_info_free(result
);
1203 lxc_cgroup_process_info_free(entry
);
1204 errno
= saved_errno
;
1208 /* move a processs to the cgroups specified by the membership */
1209 static int lxc_cgroupfs_enter(struct cgroup_process_info
*info
, pid_t pid
, bool enter_sub
)
1212 char *cgroup_tasks_fn
;
1214 struct cgroup_process_info
*info_ptr
;
1216 snprintf(pid_buf
, 32, "%lu", (unsigned long)pid
);
1217 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1218 if (!info_ptr
->hierarchy
)
1221 char *cgroup_path
= (enter_sub
&& info_ptr
->cgroup_path_sub
) ?
1222 info_ptr
->cgroup_path_sub
:
1223 info_ptr
->cgroup_path
;
1225 if (!info_ptr
->designated_mount_point
) {
1226 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, cgroup_path
, true);
1227 if (!info_ptr
->designated_mount_point
) {
1228 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid
, cgroup_path
);
1233 cgroup_tasks_fn
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, cgroup_path
, "/tasks");
1234 if (!cgroup_tasks_fn
) {
1235 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1239 r
= lxc_write_to_file(cgroup_tasks_fn
, pid_buf
, strlen(pid_buf
), false);
1240 free(cgroup_tasks_fn
);
1241 if (r
< 0 && is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1242 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1250 /* free process membership information */
1251 void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
)
1253 struct cgroup_process_info
*next
;
1257 lxc_cgroup_put_meta(info
->meta_ref
);
1258 free(info
->cgroup_path
);
1259 free(info
->cgroup_path_sub
);
1260 lxc_free_array((void **)info
->created_paths
, free
);
1262 lxc_cgroup_process_info_free(next
);
1265 /* free process membership information and remove cgroups that were created */
1266 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
, struct lxc_conf
*conf
)
1268 struct cgroup_process_info
*next
;
1274 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1276 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1278 /* ignore return value here, perhaps we created the
1279 * '/lxc' cgroup in this container but another container
1280 * is still running (for example)
1282 (void)remove_cgroup(mp
, info
->cgroup_path
, true, conf
);
1284 for (pp
= info
->created_paths
; pp
&& *pp
; pp
++);
1285 for ((void)(pp
&& --pp
); info
->created_paths
&& pp
>= info
->created_paths
; --pp
) {
1288 free(info
->created_paths
);
1289 lxc_cgroup_put_meta(info
->meta_ref
);
1290 free(info
->cgroup_path
);
1291 free(info
->cgroup_path_sub
);
1293 lxc_cgroup_process_info_free_and_remove(next
, conf
);
1296 static char *lxc_cgroup_get_hierarchy_path_data(const char *subsystem
, struct cgfs_data
*d
)
1298 struct cgroup_process_info
*info
= d
->info
;
1299 info
= find_info_for_subsystem(info
, subsystem
);
1302 prune_init_scope(info
->cgroup_path
);
1303 return info
->cgroup_path
;
1306 static char *lxc_cgroup_get_hierarchy_abs_path_data(const char *subsystem
, struct cgfs_data
*d
)
1308 struct cgroup_process_info
*info
= d
->info
;
1309 struct cgroup_mount_point
*mp
= NULL
;
1311 info
= find_info_for_subsystem(info
, subsystem
);
1314 if (info
->designated_mount_point
) {
1315 mp
= info
->designated_mount_point
;
1317 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1321 return cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1324 static char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1326 struct cgroup_meta_data
*meta
;
1327 struct cgroup_process_info
*base_info
, *info
;
1328 struct cgroup_mount_point
*mp
;
1329 char *result
= NULL
;
1331 meta
= lxc_cgroup_load_meta();
1334 base_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta
);
1337 info
= find_info_for_subsystem(base_info
, subsystem
);
1340 if (info
->designated_mount_point
) {
1341 mp
= info
->designated_mount_point
;
1343 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1347 result
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1350 lxc_cgroup_process_info_free(base_info
);
1352 lxc_cgroup_put_meta(meta
);
1356 static int lxc_cgroup_set_data(const char *filename
, const char *value
, struct cgfs_data
*d
)
1358 char *subsystem
= NULL
, *p
, *path
;
1361 subsystem
= alloca(strlen(filename
) + 1);
1362 strcpy(subsystem
, filename
);
1363 if ((p
= strchr(subsystem
, '.')) != NULL
)
1367 path
= lxc_cgroup_get_hierarchy_abs_path_data(subsystem
, d
);
1369 ret
= do_cgroup_set(path
, filename
, value
);
1370 int saved_errno
= errno
;
1372 errno
= saved_errno
;
1377 static int lxc_cgroupfs_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1379 char *subsystem
= NULL
, *p
, *path
;
1382 subsystem
= alloca(strlen(filename
) + 1);
1383 strcpy(subsystem
, filename
);
1384 if ((p
= strchr(subsystem
, '.')) != NULL
)
1387 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1389 ret
= do_cgroup_set(path
, filename
, value
);
1395 static int lxc_cgroupfs_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
1397 char *subsystem
= NULL
, *p
, *path
;
1400 subsystem
= alloca(strlen(filename
) + 1);
1401 strcpy(subsystem
, filename
);
1402 if ((p
= strchr(subsystem
, '.')) != NULL
)
1405 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1407 ret
= do_cgroup_get(path
, filename
, value
, len
);
1413 static bool cgroupfs_mount_cgroup(void *hdata
, const char *root
, int type
)
1415 size_t bufsz
= strlen(root
) + sizeof("/sys/fs/cgroup");
1417 char **parts
= NULL
;
1418 char *dirname
= NULL
;
1419 char *abs_path
= NULL
;
1420 char *abs_path2
= NULL
;
1421 struct cgfs_data
*cgfs_d
;
1422 struct cgroup_process_info
*info
, *base_info
;
1423 int r
, saved_errno
= 0;
1424 struct lxc_handler
*handler
= hdata
;
1426 if (cgns_supported())
1429 cgfs_d
= handler
->cgroup_data
;
1432 base_info
= cgfs_d
->info
;
1434 /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
1435 * have access to the lxc_conf object at this point. It really should be up
1436 * to the caller to fix this, but this doesn't really hurt.
1438 if (type
== LXC_AUTO_CGROUP_FULL_NOSPEC
)
1439 type
= LXC_AUTO_CGROUP_FULL_MIXED
;
1440 else if (type
== LXC_AUTO_CGROUP_NOSPEC
)
1441 type
= LXC_AUTO_CGROUP_MIXED
;
1443 if (type
< LXC_AUTO_CGROUP_RO
|| type
> LXC_AUTO_CGROUP_FULL_MIXED
) {
1444 ERROR("could not mount cgroups into container: invalid type specified internally");
1449 path
= calloc(1, bufsz
);
1452 snprintf(path
, bufsz
, "%s/sys/fs/cgroup", root
);
1453 r
= safe_mount("cgroup_root", path
, "tmpfs",
1454 MS_NOSUID
|MS_NODEV
|MS_NOEXEC
|MS_RELATIME
,
1455 "size=10240k,mode=755",
1458 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1462 /* now mount all the hierarchies we care about */
1463 for (info
= base_info
; info
; info
= info
->next
) {
1464 size_t subsystem_count
, i
;
1465 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1467 if (!info
->hierarchy
)
1470 if (!mountpoint_is_accessible(mp
))
1471 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1474 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1478 subsystem_count
= lxc_array_len((void **)info
->hierarchy
->subsystems
);
1479 parts
= calloc(subsystem_count
+ 1, sizeof(char *));
1483 for (i
= 0; i
< subsystem_count
; i
++) {
1484 if (!strncmp(info
->hierarchy
->subsystems
[i
], "name=", 5))
1485 parts
[i
] = info
->hierarchy
->subsystems
[i
] + 5;
1487 parts
[i
] = info
->hierarchy
->subsystems
[i
];
1489 dirname
= lxc_string_join(",", (const char **)parts
, false);
1493 /* create subsystem directory */
1494 abs_path
= lxc_append_paths(path
, dirname
);
1497 r
= mkdir_p(abs_path
, 0755);
1498 if (r
< 0 && errno
!= EEXIST
) {
1499 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname
);
1503 abs_path2
= lxc_append_paths(abs_path
, info
->cgroup_path
);
1507 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_RW
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1508 /* bind-mount the cgroup entire filesystem there */
1509 if (strcmp(mp
->mount_prefix
, "/") != 0) {
1510 /* FIXME: maybe we should just try to remount the entire hierarchy
1511 * with a regular mount command? may that works? */
1512 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname
);
1515 r
= mount(mp
->mount_point
, abs_path
, "none", MS_BIND
, 0);
1517 SYSERROR("error bind-mounting %s to %s", mp
->mount_point
, abs_path
);
1520 /* main cgroup path should be read-only */
1521 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1522 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1524 SYSERROR("error re-mounting %s readonly", abs_path
);
1528 /* own cgroup should be read-write */
1529 if (type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1530 r
= mount(abs_path2
, abs_path2
, NULL
, MS_BIND
, NULL
);
1532 SYSERROR("error bind-mounting %s onto itself", abs_path2
);
1535 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
, NULL
);
1537 SYSERROR("error re-mounting %s readwrite", abs_path2
);
1542 /* create path for container's cgroup */
1543 r
= mkdir_p(abs_path2
, 0755);
1544 if (r
< 0 && errno
!= EEXIST
) {
1545 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname
, info
->cgroup_path
);
1549 /* for read-only and mixed cases, we have to bind-mount the tmpfs directory
1550 * that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
1551 * itself and then bind-mount it read-only, since we keep the tmpfs itself
1552 * read-write (see comment below)
1554 if (type
== LXC_AUTO_CGROUP_MIXED
|| type
== LXC_AUTO_CGROUP_RO
) {
1555 r
= mount(abs_path
, abs_path
, NULL
, MS_BIND
, NULL
);
1557 SYSERROR("error bind-mounting %s onto itself", abs_path
);
1560 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1562 SYSERROR("error re-mounting %s readonly", abs_path
);
1570 /* bind-mount container's cgroup to that directory */
1571 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1574 r
= mount(abs_path
, abs_path2
, "none", MS_BIND
, 0);
1575 if (r
< 0 && is_crucial_hierarchy(info
->hierarchy
)) {
1576 SYSERROR("error bind-mounting %s to %s", abs_path
, abs_path2
);
1579 if (type
== LXC_AUTO_CGROUP_RO
) {
1580 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1582 SYSERROR("error re-mounting %s readonly", abs_path2
);
1593 /* add symlinks for every single subsystem */
1594 if (subsystem_count
> 1) {
1595 for (i
= 0; i
< subsystem_count
; i
++) {
1596 abs_path
= lxc_append_paths(path
, parts
[i
]);
1599 r
= symlink(dirname
, abs_path
);
1601 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts
[i
], dirname
);
1612 /* We used to remount the entire tmpfs readonly if any :ro or
1613 * :mixed mode was specified. However, Ubuntu's mountall has the
1614 * unfortunate behavior to block bootup if /sys/fs/cgroup is
1615 * mounted read-only and cannot be remounted read-write.
1616 * (mountall reads /lib/init/fstab and tries to (re-)mount all of
1617 * these if they are not already mounted with the right options;
1618 * it contains an entry for /sys/fs/cgroup. In case it can't do
1619 * that, it prompts for the user to either manually fix it or
1620 * boot anyway. But without user input, booting of the container
1623 * Instead of remounting the entire tmpfs readonly, we only
1624 * remount the paths readonly that are part of the cgroup
1633 saved_errno
= errno
;
1639 errno
= saved_errno
;
1643 static int cgfs_nrtasks(void *hdata
)
1645 struct cgfs_data
*d
= hdata
;
1646 struct cgroup_process_info
*info
;
1647 struct cgroup_mount_point
*mp
= NULL
;
1648 char *abs_path
= NULL
;
1662 if (info
->designated_mount_point
) {
1663 mp
= info
->designated_mount_point
;
1665 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, false);
1670 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1674 ret
= cgroup_recursive_task_count(abs_path
);
1679 static struct cgroup_process_info
*
1680 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
,
1681 struct cgroup_meta_data
*meta
)
1683 struct cgroup_process_info
*result
= NULL
;
1684 FILE *proc_pid_cgroup
= NULL
;
1687 int saved_errno
= 0;
1688 struct cgroup_process_info
**cptr
= &result
;
1689 struct cgroup_process_info
*entry
= NULL
;
1691 proc_pid_cgroup
= fopen_cloexec(proc_pid_cgroup_str
, "r");
1692 if (!proc_pid_cgroup
)
1695 while (getline(&line
, &sz
, proc_pid_cgroup
) != -1) {
1696 /* file format: hierarchy:subsystems:group */
1700 int hierarchy_number
;
1701 struct cgroup_hierarchy
*h
= NULL
;
1706 if (line
[strlen(line
) - 1] == '\n')
1707 line
[strlen(line
) - 1] = '\0';
1709 colon1
= strchr(line
, ':');
1713 colon2
= strchr(colon1
, ':');
1720 /* With cgroupv2 /proc/self/cgroup can contain entries of the
1722 * These entries need to be skipped.
1724 if (!strcmp(colon1
, ""))
1727 hierarchy_number
= strtoul(line
, &endptr
, 10);
1728 if (!endptr
|| *endptr
)
1731 if (hierarchy_number
> meta
->maximum_hierarchy
) {
1732 /* we encountered a hierarchy we didn't have before,
1733 * so probably somebody remounted some stuff in the
1740 h
= meta
->hierarchies
[hierarchy_number
];
1742 /* we encountered a hierarchy that was thought to be
1743 * dead before, so probably somebody remounted some
1744 * stuff in the mean time...
1750 /* we are told that we should ignore this hierarchy */
1754 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1758 entry
->meta_ref
= lxc_cgroup_get_meta(meta
);
1759 entry
->hierarchy
= h
;
1760 entry
->cgroup_path
= strdup(colon2
);
1761 if (!entry
->cgroup_path
)
1763 prune_init_scope(entry
->cgroup_path
);
1766 cptr
= &entry
->next
;
1770 fclose(proc_pid_cgroup
);
1775 saved_errno
= errno
;
1776 if (proc_pid_cgroup
)
1777 fclose(proc_pid_cgroup
);
1778 lxc_cgroup_process_info_free(result
);
1779 lxc_cgroup_process_info_free(entry
);
1781 errno
= saved_errno
;
1785 static char **subsystems_from_mount_options(const char *mount_options
,
1788 char *token
, *str
, *saveptr
= NULL
;
1789 char **result
= NULL
;
1790 size_t result_capacity
= 0;
1791 size_t result_count
= 0;
1795 str
= alloca(strlen(mount_options
)+1);
1796 strcpy(str
, mount_options
);
1797 for (; (token
= strtok_r(str
, ",", &saveptr
)); str
= NULL
) {
1798 /* we have a subsystem if it's either in the list of
1799 * subsystems provided by the kernel OR if it starts
1800 * with name= for named hierarchies
1802 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 12);
1805 result
[result_count
+ 1] = NULL
;
1806 if (strncmp(token
, "name=", 5) && !lxc_string_in_array(token
, (const char **)kernel_list
)) {
1807 /* this is eg 'systemd' but the mount will be
1810 result
[result_count
] = malloc(strlen(token
) + 6);
1811 if (result
[result_count
])
1812 sprintf(result
[result_count
], "name=%s", token
);
1814 result
[result_count
] = strdup(token
);
1815 if (!result
[result_count
])
1823 saved_errno
= errno
;
1824 lxc_free_array((void**)result
, free
);
1825 errno
= saved_errno
;
1829 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
)
1833 free(mp
->mount_point
);
1834 free(mp
->mount_prefix
);
1838 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
)
1842 if (h
->subsystems
) {
1843 lxc_free_array((void **)h
->subsystems
, free
);
1844 h
->subsystems
= NULL
;
1846 if (h
->all_mount_points
) {
1847 free(h
->all_mount_points
);
1848 h
->all_mount_points
= NULL
;
1854 static bool is_valid_cgroup(const char *name
)
1857 for (p
= name
; *p
; p
++) {
1858 /* Use the ASCII printable characters range(32 - 127)
1859 * is reasonable, we kick out 32(SPACE) because it'll
1860 * break legacy lxc-ls
1862 if (*p
<= 32 || *p
>= 127 || *p
== '/')
1865 return strcmp(name
, ".") != 0 && strcmp(name
, "..") != 0;
1868 static int create_or_remove_cgroup(bool do_remove
,
1869 struct cgroup_mount_point
*mp
, const char *path
, int recurse
,
1870 struct lxc_conf
*conf
)
1872 int r
, saved_errno
= 0;
1873 char *buf
= cgroup_to_absolute_path(mp
, path
, NULL
);
1877 /* create or remove directory */
1879 if (!dir_exists(buf
))
1882 if (conf
&& !lxc_list_empty(&conf
->id_map
))
1883 r
= userns_exec_1(conf
, rmdir_wrapper
, buf
,
1886 r
= cgroup_rmdir(buf
);
1890 r
= mkdir_p(buf
, 0777);
1891 saved_errno
= errno
;
1893 errno
= saved_errno
;
1897 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
)
1899 return create_or_remove_cgroup(false, mp
, path
, false, NULL
);
1902 static int remove_cgroup(struct cgroup_mount_point
*mp
,
1903 const char *path
, bool recurse
, struct lxc_conf
*conf
)
1905 return create_or_remove_cgroup(true, mp
, path
, recurse
, conf
);
1908 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
,
1909 const char *path
, const char *suffix
)
1911 /* first we have to make sure we subtract the mount point's prefix */
1912 char *prefix
= mp
->mount_prefix
;
1916 /* we want to make sure only absolute paths to cgroups are passed to us */
1917 if (path
[0] != '/') {
1922 if (prefix
&& !strcmp(prefix
, "/"))
1925 /* prefix doesn't match */
1926 if (prefix
&& strncmp(prefix
, path
, strlen(prefix
)) != 0) {
1930 /* if prefix is /foo and path is /foobar */
1931 if (prefix
&& path
[strlen(prefix
)] != '/' && path
[strlen(prefix
)] != '\0') {
1936 /* remove prefix from path */
1937 path
+= prefix
? strlen(prefix
) : 0;
1939 len
= strlen(mp
->mount_point
) + strlen(path
) + (suffix
? strlen(suffix
) : 0);
1940 buf
= calloc(len
+ 1, 1);
1943 rv
= snprintf(buf
, len
+ 1, "%s%s%s", mp
->mount_point
, path
, suffix
? suffix
: "");
1953 static struct cgroup_process_info
*
1954 find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
)
1956 struct cgroup_process_info
*info_ptr
;
1957 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1958 struct cgroup_hierarchy
*h
= info_ptr
->hierarchy
;
1961 if (lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
1968 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
,
1969 char *value
, size_t len
)
1971 const char *parts
[3] = {
1977 int ret
, saved_errno
;
1979 filename
= lxc_string_join("/", parts
, false);
1983 ret
= lxc_read_from_file(filename
, value
, len
);
1984 saved_errno
= errno
;
1986 errno
= saved_errno
;
1990 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
,
1993 const char *parts
[3] = {
1999 int ret
, saved_errno
;
2001 filename
= lxc_string_join("/", parts
, false);
2005 ret
= lxc_write_to_file(filename
, value
, strlen(value
), false);
2006 saved_errno
= errno
;
2008 errno
= saved_errno
;
2012 static int do_setup_cgroup_limits(struct cgfs_data
*d
,
2013 struct lxc_list
*cgroup_settings
, bool do_devices
)
2015 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
2016 struct lxc_cgroup
*cg
;
2019 if (lxc_list_empty(cgroup_settings
))
2022 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
2023 if (!sorted_cgroup_settings
) {
2027 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
2028 cg
= iterator
->elem
;
2030 if (do_devices
== !strncmp("devices", cg
->subsystem
, 7)) {
2031 if (strcmp(cg
->subsystem
, "devices.deny") == 0 &&
2032 cgroup_devices_has_allow_or_deny(d
, cg
->value
, false))
2034 if (strcmp(cg
->subsystem
, "devices.allow") == 0 &&
2035 cgroup_devices_has_allow_or_deny(d
, cg
->value
, true))
2037 if (lxc_cgroup_set_data(cg
->subsystem
, cg
->value
, d
)) {
2038 if (do_devices
&& (errno
== EACCES
|| errno
== EPERM
)) {
2039 WARN("Error setting %s to %s for %s",
2040 cg
->subsystem
, cg
->value
, d
->name
);
2043 SYSERROR("Error setting %s to %s for %s",
2044 cg
->subsystem
, cg
->value
, d
->name
);
2049 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
2053 INFO("cgroup has been setup");
2055 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
2056 lxc_list_del(iterator
);
2059 free(sorted_cgroup_settings
);
2063 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
,
2064 char *v
, bool for_allow
)
2070 bool ret
= !for_allow
;
2071 const char *parts
[3] = {
2077 /* XXX FIXME if users could use something other than 'lxc.devices.deny =
2078 * a'. not sure they ever do, but they *could* right now, I'm assuming
2081 if (!for_allow
&& strcmp(v
, "a") != 0 && strcmp(v
, "a *:* rwm") != 0)
2084 parts
[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_data("devices", d
);
2087 path
= lxc_string_join("/", parts
, false);
2089 free((void *)parts
[0]);
2093 devices_list
= fopen_cloexec(path
, "r");
2094 if (!devices_list
) {
2099 while (getline(&line
, &sz
, devices_list
) != -1) {
2100 size_t len
= strlen(line
);
2101 if (len
> 0 && line
[len
-1] == '\n')
2103 if (strcmp(line
, "a *:* rwm") == 0) {
2106 } else if (for_allow
&& strcmp(line
, v
) == 0) {
2113 fclose(devices_list
);
2119 static int cgroup_recursive_task_count(const char *cgroup_path
)
2122 struct dirent
*dent
;
2125 d
= opendir(cgroup_path
);
2129 while ((dent
= readdir(d
))) {
2130 const char *parts
[3] = {
2138 if (!strcmp(dent
->d_name
, ".") || !strcmp(dent
->d_name
, ".."))
2140 sub_path
= lxc_string_join("/", parts
, false);
2145 r
= stat(sub_path
, &st
);
2151 if (S_ISDIR(st
.st_mode
)) {
2152 r
= cgroup_recursive_task_count(sub_path
);
2155 } else if (!strcmp(dent
->d_name
, "tasks")) {
2156 r
= lxc_count_file_lines(sub_path
);
2167 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
,
2170 int r
, saved_errno
= 0;
2173 mp
->need_cpuset_init
= false;
2175 /* If this is the memory cgroup, we want to enforce hierarchy.
2176 * But don't fail if for some reason we can't.
2178 if (lxc_string_in_array("memory", (const char **)mp
->hierarchy
->subsystems
)) {
2179 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/memory.use_hierarchy");
2181 r
= lxc_read_from_file(cc_path
, buf
, 1);
2182 if (r
< 1 || buf
[0] != '1') {
2183 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2185 SYSERROR("failed to set memory.use_hierarchy to 1; continuing");
2191 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2192 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2193 * and cpuset.cpus and then
2195 if (lxc_string_in_array("cpuset", (const char **)mp
->hierarchy
->subsystems
)) {
2196 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/cgroup.clone_children");
2201 /* cgroup.clone_children is not available when running under
2202 * older kernel versions; in this case, we'll initialize
2203 * cpuset.cpus and cpuset.mems later, after the new cgroup
2206 if (stat(cc_path
, &sb
) != 0 && errno
== ENOENT
) {
2207 mp
->need_cpuset_init
= true;
2211 r
= lxc_read_from_file(cc_path
, buf
, 1);
2212 if (r
== 1 && buf
[0] == '1') {
2216 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2217 saved_errno
= errno
;
2219 errno
= saved_errno
;
2220 return r
< 0 ? -1 : 0;
2225 static int cgroup_read_from_file(const char *fn
, char buf
[], size_t bufsize
)
2227 int ret
= lxc_read_from_file(fn
, buf
, bufsize
);
2229 SYSERROR("failed to read %s", fn
);
2232 if (ret
== bufsize
) {
2234 /* obviously this wasn't empty */
2235 buf
[bufsize
-1] = '\0';
2238 /* Callers don't do this, but regression/sanity check */
2239 ERROR("was not expecting 0 bufsize");
2246 static bool do_init_cpuset_file(struct cgroup_mount_point
*mp
,
2247 const char *path
, const char *name
)
2250 char *childfile
, *parentfile
= NULL
, *tmp
;
2254 childfile
= cgroup_to_absolute_path(mp
, path
, name
);
2258 /* don't overwrite a non-empty value in the file */
2259 ret
= cgroup_read_from_file(childfile
, value
, sizeof(value
));
2262 if (value
[0] != '\0' && value
[0] != '\n') {
2267 /* path to the same name in the parent cgroup */
2268 parentfile
= strdup(path
);
2272 tmp
= strrchr(parentfile
, '/');
2275 if (tmp
== parentfile
)
2276 tmp
++; /* keep the '/' at the start */
2279 parentfile
= cgroup_to_absolute_path(mp
, tmp
, name
);
2284 /* copy from parent to child cgroup */
2285 ret
= cgroup_read_from_file(parentfile
, value
, sizeof(value
));
2288 if (ret
== sizeof(value
)) {
2289 /* If anyone actually sees this error, we can address it */
2290 ERROR("parent cpuset value too long");
2293 ok
= (lxc_write_to_file(childfile
, value
, strlen(value
), false) >= 0);
2295 SYSERROR("failed writing %s", childfile
);
2303 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
,
2306 /* the files we have to handle here are only in cpuset hierarchies */
2307 if (!lxc_string_in_array("cpuset",
2308 (const char **)mp
->hierarchy
->subsystems
))
2311 if (!mp
->need_cpuset_init
)
2314 return (do_init_cpuset_file(mp
, path
, "/cpuset.cpus") &&
2315 do_init_cpuset_file(mp
, path
, "/cpuset.mems") );
2318 static void print_cgfs_init_debuginfo(struct cgfs_data
*d
)
2322 if (!getenv("LXC_DEBUG_CGFS"))
2325 DEBUG("Cgroup information:");
2326 DEBUG(" container name: %s", d
->name
);
2327 if (!d
->meta
|| !d
->meta
->hierarchies
) {
2328 DEBUG(" No hierarchies found.");
2331 DEBUG(" Controllers:");
2332 for (i
= 0; i
<= d
->meta
->maximum_hierarchy
; i
++) {
2334 struct cgroup_hierarchy
*h
= d
->meta
->hierarchies
[i
];
2336 DEBUG(" Empty hierarchy number %d.", i
);
2339 for (p
= h
->subsystems
; p
&& *p
; p
++) {
2340 DEBUG(" %2d: %s", i
, *p
);
2345 struct cgroup_ops
*cgfs_ops_init(void)
2350 static void *cgfs_init(struct lxc_handler
*handler
)
2352 struct cgfs_data
*d
;
2354 d
= malloc(sizeof(*d
));
2358 memset(d
, 0, sizeof(*d
));
2359 d
->name
= strdup(handler
->name
);
2363 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
2365 d
->meta
= lxc_cgroup_load_meta();
2367 ERROR("cgroupfs failed to detect cgroup metadata");
2371 print_cgfs_init_debuginfo(d
);
2382 static void cgfs_destroy(void *hdata
, struct lxc_conf
*conf
)
2384 struct cgfs_data
*d
= hdata
;
2389 lxc_cgroup_process_info_free_and_remove(d
->info
, conf
);
2390 lxc_cgroup_put_meta(d
->meta
);
2394 static inline bool cgfs_create(void *hdata
)
2396 struct cgfs_data
*d
= hdata
;
2397 struct cgroup_process_info
*i
;
2398 struct cgroup_meta_data
*md
;
2403 i
= lxc_cgroupfs_create(d
->name
, d
->cgroup_pattern
, md
, NULL
);
2410 static inline bool cgfs_enter(void *hdata
, pid_t pid
)
2412 struct cgfs_data
*d
= hdata
;
2413 struct cgroup_process_info
*i
;
2419 ret
= lxc_cgroupfs_enter(i
, pid
, false);
2424 static inline bool cgfs_create_legacy(void *hdata
, pid_t pid
)
2426 struct cgfs_data
*d
= hdata
;
2427 struct cgroup_process_info
*i
;
2432 if (lxc_cgroup_create_legacy(i
, d
->name
, pid
) < 0) {
2433 ERROR("failed to create legacy ns cgroups for '%s'", d
->name
);
2439 static const char *cgfs_get_cgroup(void *hdata
, const char *subsystem
)
2441 struct cgfs_data
*d
= hdata
;
2445 return lxc_cgroup_get_hierarchy_path_data(subsystem
, d
);
2448 static bool cgfs_escape(void *hdata
)
2450 struct cgroup_meta_data
*md
;
2454 md
= lxc_cgroup_load_meta();
2458 for (i
= 0; i
<= md
->maximum_hierarchy
; i
++) {
2459 struct cgroup_hierarchy
*h
= md
->hierarchies
[i
];
2460 struct cgroup_mount_point
*mp
;
2466 WARN("not escaping hierarchy %d", i
);
2470 mp
= lxc_cgroup_find_mount_point(h
, "/", true);
2474 tasks
= cgroup_to_absolute_path(mp
, "/", "tasks");
2478 f
= fopen(tasks
, "a");
2483 written
= fprintf(f
, "%d\n", lxc_raw_getpid());
2486 SYSERROR("writing tasks failed\n");
2493 lxc_cgroup_put_meta(md
);
2497 static int cgfs_num_hierarchies(void)
2499 /* not implemented */
2503 static bool cgfs_get_hierarchies(int i
, char ***out
)
2505 /* not implemented */
2509 static bool cgfs_unfreeze(void *hdata
)
2511 struct cgfs_data
*d
= hdata
;
2512 char *cgabspath
, *cgrelpath
;
2518 cgrelpath
= lxc_cgroup_get_hierarchy_path_data("freezer", d
);
2519 cgabspath
= lxc_cgroup_find_abs_path("freezer", cgrelpath
, true, NULL
);
2523 ret
= do_cgroup_set(cgabspath
, "freezer.state", "THAWED");
2528 static bool cgroupfs_setup_limits(void *hdata
, struct lxc_list
*cgroup_conf
,
2531 struct cgfs_data
*d
= hdata
;
2535 return do_setup_cgroup_limits(d
, cgroup_conf
, with_devices
) == 0;
2538 static bool lxc_cgroupfs_attach(const char *name
, const char *lxcpath
, pid_t pid
)
2540 struct cgroup_meta_data
*meta_data
;
2541 struct cgroup_process_info
*container_info
;
2544 meta_data
= lxc_cgroup_load_meta();
2546 ERROR("could not move attached process %d to cgroup of container", pid
);
2550 container_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta_data
);
2551 lxc_cgroup_put_meta(meta_data
);
2552 if (!container_info
) {
2553 ERROR("could not move attached process %d to cgroup of container", pid
);
2557 ret
= lxc_cgroupfs_enter(container_info
, pid
, false);
2558 lxc_cgroup_process_info_free(container_info
);
2560 ERROR("could not move attached process %d to cgroup of container", pid
);
2567 const char *cgroup_path
;
2572 * TODO - someone should refactor this to unshare once passing all the paths
2573 * to be chowned in one go
2575 static int chown_cgroup_wrapper(void *data
)
2577 struct chown_data
*arg
= data
;
2581 if (setresgid(0,0,0) < 0)
2582 SYSERROR("Failed to setgid to 0");
2583 if (setresuid(0,0,0) < 0)
2584 SYSERROR("Failed to setuid to 0");
2585 if (setgroups(0, NULL
) < 0)
2586 SYSERROR("Failed to clear groups");
2587 destuid
= get_ns_uid(arg
->origuid
);
2589 if (chown(arg
->cgroup_path
, destuid
, 0) < 0)
2590 SYSERROR("Failed chowning %s to %d", arg
->cgroup_path
, (int)destuid
);
2592 fpath
= lxc_append_paths(arg
->cgroup_path
, "tasks");
2595 if (chown(fpath
, destuid
, 0) < 0)
2596 SYSERROR("Error chowning %s\n", fpath
);
2599 fpath
= lxc_append_paths(arg
->cgroup_path
, "cgroup.procs");
2602 if (chown(fpath
, destuid
, 0) < 0)
2603 SYSERROR("Error chowning %s", fpath
);
2609 static bool do_cgfs_chown(char *cgroup_path
, struct lxc_conf
*conf
)
2611 struct chown_data data
;
2614 if (!dir_exists(cgroup_path
))
2617 if (lxc_list_empty(&conf
->id_map
))
2618 /* If there's no mapping then we don't need to chown */
2621 data
.cgroup_path
= cgroup_path
;
2622 data
.origuid
= geteuid();
2624 /* Unpriv users can't chown it themselves, so chown from
2625 * a child namespace mapping both our own and the target uid
2627 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
,
2628 "chown_cgroup_wrapper") < 0) {
2629 ERROR("Error requesting cgroup chown in new namespace");
2634 * Now chmod 775 the directory else the container cannot create cgroups.
2635 * This can't be done in the child namespace because it only group-owns
2638 if (chmod(cgroup_path
, 0775) < 0) {
2639 SYSERROR("Error chmoding %s\n", cgroup_path
);
2642 fpath
= lxc_append_paths(cgroup_path
, "tasks");
2645 if (chmod(fpath
, 0664) < 0)
2646 SYSERROR("Error chmoding %s\n", fpath
);
2648 fpath
= lxc_append_paths(cgroup_path
, "cgroup.procs");
2651 if (chmod(fpath
, 0664) < 0)
2652 SYSERROR("Error chmoding %s\n", fpath
);
2658 static bool cgfs_chown(void *hdata
, struct lxc_conf
*conf
)
2660 struct cgfs_data
*d
= hdata
;
2661 struct cgroup_process_info
*info_ptr
;
2668 for (info_ptr
= d
->info
; info_ptr
; info_ptr
= info_ptr
->next
) {
2669 if (!info_ptr
->hierarchy
)
2672 if (!info_ptr
->designated_mount_point
) {
2673 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, info_ptr
->cgroup_path
, true);
2674 if (!info_ptr
->designated_mount_point
) {
2675 SYSERROR("Could not chown cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", info_ptr
->cgroup_path
);
2680 cgpath
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
, NULL
);
2682 SYSERROR("Could not chown cgroup %s: internal error", info_ptr
->cgroup_path
);
2685 r
= do_cgfs_chown(cgpath
, conf
);
2686 if (!r
&& is_crucial_hierarchy(info_ptr
->hierarchy
)) {
2687 ERROR("Failed chowning %s\n", cgpath
);
2697 static struct cgroup_ops cgfs_ops
= {
2699 .destroy
= cgfs_destroy
,
2700 .create
= cgfs_create
,
2701 .enter
= cgfs_enter
,
2702 .create_legacy
= cgfs_create_legacy
,
2703 .get_cgroup
= cgfs_get_cgroup
,
2704 .escape
= cgfs_escape
,
2705 .num_hierarchies
= cgfs_num_hierarchies
,
2706 .get_hierarchies
= cgfs_get_hierarchies
,
2707 .get
= lxc_cgroupfs_get
,
2708 .set
= lxc_cgroupfs_set
,
2709 .unfreeze
= cgfs_unfreeze
,
2710 .setup_limits
= cgroupfs_setup_limits
,
2712 .attach
= lxc_cgroupfs_attach
,
2713 .chown
= cgfs_chown
,
2714 .mount_cgroup
= cgroupfs_mount_cgroup
,
2715 .nrtasks
= cgfs_nrtasks
,