2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/inotify.h>
38 #include <sys/mount.h>
39 #include <netinet/in.h>
54 #include <../include/lxcmntent.h>
59 struct cgroup_hierarchy
;
60 struct cgroup_meta_data
;
61 struct cgroup_mount_point
;
64 * cgroup_meta_data: the metadata about the cgroup infrastructure on this
67 struct cgroup_meta_data
{
68 ptrdiff_t ref
; /* simple refcount */
69 struct cgroup_hierarchy
**hierarchies
;
70 struct cgroup_mount_point
**mount_points
;
71 int maximum_hierarchy
;
75 * cgroup_hierarchy: describes a single cgroup hierarchy
76 * (may have multiple mount points)
78 struct cgroup_hierarchy
{
80 bool used
; /* false if the hierarchy should be ignored by lxc */
82 struct cgroup_mount_point
*rw_absolute_mount_point
;
83 struct cgroup_mount_point
*ro_absolute_mount_point
;
84 struct cgroup_mount_point
**all_mount_points
;
85 size_t all_mount_point_capacity
;
89 * cgroup_mount_point: a mount point to where a hierarchy
92 struct cgroup_mount_point
{
93 struct cgroup_hierarchy
*hierarchy
;
97 bool need_cpuset_init
;
101 * cgroup_process_info: describes the membership of a
102 * process to the different cgroup
105 * Note this is the per-process info tracked by the cgfs_ops.
106 * This is not used with cgmanager.
108 struct cgroup_process_info
{
109 struct cgroup_process_info
*next
;
110 struct cgroup_meta_data
*meta_ref
;
111 struct cgroup_hierarchy
*hierarchy
;
113 char *cgroup_path_sub
;
114 char **created_paths
;
115 size_t created_paths_capacity
;
116 size_t created_paths_count
;
117 struct cgroup_mount_point
*designated_mount_point
;
122 const char *cgroup_pattern
;
123 struct cgroup_meta_data
*meta
;
124 struct cgroup_process_info
*info
;
127 lxc_log_define(lxc_cgfs
, lxc
);
129 static struct cgroup_process_info
*lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
, struct cgroup_meta_data
*meta
);
130 static char **subsystems_from_mount_options(const char *mount_options
, char **kernel_list
);
131 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
);
132 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
);
133 static bool is_valid_cgroup(const char *name
);
134 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
);
135 static int remove_cgroup(struct cgroup_mount_point
*mp
, const char *path
, bool recurse
,
136 struct lxc_conf
*conf
);
137 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
, const char *path
, const char *suffix
);
138 static struct cgroup_process_info
*find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
);
139 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
, char *value
, size_t len
);
140 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
, const char *value
);
141 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
, char *v
, bool for_allow
);
142 static int do_setup_cgroup_limits(struct cgfs_data
*d
, struct lxc_list
*cgroup_settings
, bool do_devices
);
143 static int cgroup_recursive_task_count(const char *cgroup_path
);
144 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
, char *cgroup_path
);
145 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
, const char *path
);
147 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
);
148 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
);
149 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
);
151 /* free process membership information */
152 static void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
);
153 static void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
,
154 struct lxc_conf
*conf
);
156 static struct cgroup_ops cgfs_ops
;
158 static int cgroup_rmdir(char *dirname
)
160 struct dirent
*direntp
;
164 char pathname
[MAXPATHLEN
];
166 dir
= opendir(dirname
);
168 ERROR("%s: failed to open %s", __func__
, dirname
);
172 while ((direntp
= readdir(dir
))) {
179 if (!strcmp(direntp
->d_name
, ".") ||
180 !strcmp(direntp
->d_name
, ".."))
183 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
184 if (rc
< 0 || rc
>= MAXPATHLEN
) {
185 ERROR("pathname too long");
188 saved_errno
= -ENOMEM
;
191 ret
= lstat(pathname
, &mystat
);
193 SYSERROR("%s: failed to stat %s", __func__
, pathname
);
199 if (S_ISDIR(mystat
.st_mode
)) {
200 if (cgroup_rmdir(pathname
) < 0) {
208 if (rmdir(dirname
) < 0) {
209 SYSERROR("%s: failed to delete %s", __func__
, dirname
);
217 SYSERROR("%s: failed to close directory %s", __func__
, dirname
);
224 return failed
? -1 : 0;
227 static int rmdir_wrapper(void *data
)
231 if (setresgid(0,0,0) < 0)
232 SYSERROR("Failed to setgid to 0");
233 if (setresuid(0,0,0) < 0)
234 SYSERROR("Failed to setuid to 0");
235 if (setgroups(0, NULL
) < 0)
236 SYSERROR("Failed to clear groups");
238 return cgroup_rmdir(path
);
241 static struct cgroup_meta_data
*lxc_cgroup_load_meta()
243 const char *cgroup_use
= NULL
;
244 char **cgroup_use_list
= NULL
;
245 struct cgroup_meta_data
*md
= NULL
;
249 cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
250 if (!cgroup_use
&& errno
!= 0)
253 cgroup_use_list
= lxc_string_split_and_trim(cgroup_use
, ',');
254 if (!cgroup_use_list
)
258 md
= lxc_cgroup_load_meta2((const char **)cgroup_use_list
);
260 lxc_free_array((void **)cgroup_use_list
, free
);
265 /* Step 1: determine all kernel subsystems */
266 static bool find_cgroup_subsystems(char ***kernel_subsystems
)
272 size_t kernel_subsystems_count
= 0;
273 size_t kernel_subsystems_capacity
= 0;
276 proc_cgroups
= fopen_cloexec("/proc/cgroups", "r");
280 while (getline(&line
, &sz
, proc_cgroups
) != -1) {
283 int hierarchy_number
;
290 tab1
= strchr(line
, '\t');
294 tab2
= strchr(tab1
, '\t');
300 hierarchy_number
= strtoul(tab1
, &tab2
, 10);
303 (void)hierarchy_number
;
305 r
= lxc_grow_array((void ***)kernel_subsystems
, &kernel_subsystems_capacity
, kernel_subsystems_count
+ 1, 12);
308 (*kernel_subsystems
)[kernel_subsystems_count
] = strdup(line
);
309 if (!(*kernel_subsystems
)[kernel_subsystems_count
])
311 kernel_subsystems_count
++;
316 fclose(proc_cgroups
);
321 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
322 * since mount points don't specify hierarchy number and
323 * /proc/cgroups does not contain named hierarchies
325 static bool find_cgroup_hierarchies(struct cgroup_meta_data
*meta_data
,
326 bool all_kernel_subsystems
, bool all_named_subsystems
,
327 const char **subsystem_whitelist
)
329 FILE *proc_self_cgroup
;
334 size_t hierarchy_capacity
= 0;
336 proc_self_cgroup
= fopen_cloexec("/proc/self/cgroup", "r");
337 /* if for some reason (because of setns() and pid namespace for example),
338 * /proc/self is not valid, we try /proc/1/cgroup... */
339 if (!proc_self_cgroup
)
340 proc_self_cgroup
= fopen_cloexec("/proc/1/cgroup", "r");
341 if (!proc_self_cgroup
)
344 while (getline(&line
, &sz
, proc_self_cgroup
) != -1) {
345 /* file format: hierarchy:subsystems:group,
346 * we only extract hierarchy and subsystems
350 int hierarchy_number
;
351 struct cgroup_hierarchy
*h
= NULL
;
357 colon1
= strchr(line
, ':');
361 colon2
= strchr(colon1
, ':');
368 /* With cgroupv2 /proc/self/cgroup can contain entries of the
370 * These entries need to be skipped.
372 if (!strcmp(colon1
, ""))
375 hierarchy_number
= strtoul(line
, &colon2
, 10);
376 if (!colon2
|| *colon2
)
379 if (hierarchy_number
> meta_data
->maximum_hierarchy
) {
380 /* lxc_grow_array will never shrink, so even if we find a lower
381 * hierarchy number here, the array will never be smaller
383 r
= lxc_grow_array((void ***)&meta_data
->hierarchies
, &hierarchy_capacity
, hierarchy_number
+ 1, 12);
387 meta_data
->maximum_hierarchy
= hierarchy_number
;
390 /* this shouldn't happen, we had this already */
391 if (meta_data
->hierarchies
[hierarchy_number
])
394 h
= calloc(1, sizeof(struct cgroup_hierarchy
));
398 meta_data
->hierarchies
[hierarchy_number
] = h
;
400 h
->index
= hierarchy_number
;
401 h
->subsystems
= lxc_string_split_and_trim(colon1
, ',');
404 /* see if this hierarchy should be considered */
405 if (!all_kernel_subsystems
|| !all_named_subsystems
) {
406 for (p
= h
->subsystems
; *p
; p
++) {
407 if (!strncmp(*p
, "name=", 5)) {
408 if (all_named_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
413 if (all_kernel_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
420 /* we want all hierarchy anyway */
427 fclose(proc_self_cgroup
);
432 /* Step 3: determine all mount points of each hierarchy */
433 static bool find_hierarchy_mountpts( struct cgroup_meta_data
*meta_data
, char **kernel_subsystems
)
436 FILE *proc_self_mountinfo
;
439 char **tokens
= NULL
;
440 size_t mount_point_count
= 0;
441 size_t mount_point_capacity
= 0;
442 size_t token_capacity
= 0;
444 bool is_cgns
= cgns_supported();
446 proc_self_mountinfo
= fopen_cloexec("/proc/self/mountinfo", "r");
447 /* if for some reason (because of setns() and pid namespace for example),
448 * /proc/self is not valid, we try /proc/1/cgroup... */
449 if (!proc_self_mountinfo
)
450 proc_self_mountinfo
= fopen_cloexec("/proc/1/mountinfo", "r");
451 if (!proc_self_mountinfo
)
454 while (getline(&line
, &sz
, proc_self_mountinfo
) != -1) {
455 char *token
, *line_tok
, *saveptr
= NULL
;
457 struct cgroup_mount_point
*mount_point
;
458 struct cgroup_hierarchy
*h
;
460 bool is_lxcfs
= false;
462 if (line
[0] && line
[strlen(line
) - 1] == '\n')
463 line
[strlen(line
) - 1] = '\0';
465 for (i
= 0, line_tok
= line
; (token
= strtok_r(line_tok
, " ", &saveptr
)); line_tok
= NULL
) {
466 r
= lxc_grow_array((void ***)&tokens
, &token_capacity
, i
+ 1, 64);
472 /* layout of /proc/self/mountinfo:
475 * 2: device major:minor
478 * 5: per-mount options
479 * [optional X]: additional data
483 * X+10: per-superblock options
485 for (j
= 6; j
< i
&& tokens
[j
]; j
++)
486 if (!strcmp(tokens
[j
], "-"))
489 /* could not find separator */
490 if (j
>= i
|| !tokens
[j
])
492 /* there should be exactly three fields after
498 /* not a cgroup filesystem */
499 if (strcmp(tokens
[j
+ 1], "cgroup") != 0) {
500 if (strcmp(tokens
[j
+ 1], "fuse.lxcfs") != 0)
502 if (strncmp(tokens
[4], "/sys/fs/cgroup/", 15) != 0)
505 char *curtok
= tokens
[4] + 15;
506 subsystems
= subsystems_from_mount_options(curtok
,
509 subsystems
= subsystems_from_mount_options(tokens
[j
+ 3],
515 for (k
= 0; k
<= meta_data
->maximum_hierarchy
; k
++) {
516 if (meta_data
->hierarchies
[k
] &&
517 meta_data
->hierarchies
[k
]->subsystems
[0] &&
518 lxc_string_in_array(meta_data
->hierarchies
[k
]->subsystems
[0], (const char **)subsystems
)) {
519 /* TODO: we could also check if the lists really match completely,
520 * just to have an additional sanity check */
521 h
= meta_data
->hierarchies
[k
];
525 lxc_free_array((void **)subsystems
, free
);
527 r
= lxc_grow_array((void ***)&meta_data
->mount_points
, &mount_point_capacity
, mount_point_count
+ 1, 12);
531 /* create mount point object */
532 mount_point
= calloc(1, sizeof(*mount_point
));
536 meta_data
->mount_points
[mount_point_count
++] = mount_point
;
538 mount_point
->hierarchy
= h
;
539 if (is_lxcfs
|| is_cgns
)
540 mount_point
->mount_prefix
= strdup("/");
542 mount_point
->mount_prefix
= strdup(tokens
[3]);
543 mount_point
->mount_point
= strdup(tokens
[4]);
544 if (!mount_point
->mount_point
|| !mount_point
->mount_prefix
)
546 mount_point
->read_only
= !lxc_string_in_list("rw", tokens
[5], ',');
548 if (!strcmp(mount_point
->mount_prefix
, "/")) {
549 if (mount_point
->read_only
) {
550 if (!h
->ro_absolute_mount_point
)
551 h
->ro_absolute_mount_point
= mount_point
;
553 if (!h
->rw_absolute_mount_point
)
554 h
->rw_absolute_mount_point
= mount_point
;
558 k
= lxc_array_len((void **)h
->all_mount_points
);
559 r
= lxc_grow_array((void ***)&h
->all_mount_points
, &h
->all_mount_point_capacity
, k
+ 1, 4);
562 h
->all_mount_points
[k
] = mount_point
;
567 fclose(proc_self_mountinfo
);
573 static struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
)
575 bool all_kernel_subsystems
= true;
576 bool all_named_subsystems
= false;
577 struct cgroup_meta_data
*meta_data
= NULL
;
578 char **kernel_subsystems
= NULL
;
581 /* if the subsystem whitelist is not specified, include all
582 * hierarchies that contain kernel subsystems by default but
583 * no hierarchies that only contain named subsystems
585 * if it is specified, the specifier @all will select all
586 * hierarchies, @kernel will select all hierarchies with
587 * kernel subsystems and @named will select all named
590 all_kernel_subsystems
= subsystem_whitelist
?
591 (lxc_string_in_array("@kernel", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
593 all_named_subsystems
= subsystem_whitelist
?
594 (lxc_string_in_array("@named", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
597 meta_data
= calloc(1, sizeof(struct cgroup_meta_data
));
602 if (!find_cgroup_subsystems(&kernel_subsystems
))
605 if (!find_cgroup_hierarchies(meta_data
, all_kernel_subsystems
,
606 all_named_subsystems
, subsystem_whitelist
))
609 if (!find_hierarchy_mountpts(meta_data
, kernel_subsystems
))
612 /* oops, we couldn't find anything */
613 if (!meta_data
->hierarchies
|| !meta_data
->mount_points
) {
618 lxc_free_array((void **)kernel_subsystems
, free
);
623 lxc_free_array((void **)kernel_subsystems
, free
);
624 lxc_cgroup_put_meta(meta_data
);
629 static struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
)
635 static struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
)
640 if (--meta_data
->ref
> 0)
642 lxc_free_array((void **)meta_data
->mount_points
, (lxc_free_fn
)lxc_cgroup_mount_point_free
);
643 if (meta_data
->hierarchies
)
644 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++)
645 if (meta_data
->hierarchies
[i
])
646 lxc_cgroup_hierarchy_free(meta_data
->hierarchies
[i
]);
647 free(meta_data
->hierarchies
);
652 static struct cgroup_hierarchy
*lxc_cgroup_find_hierarchy(struct cgroup_meta_data
*meta_data
, const char *subsystem
)
655 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
656 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
659 if (h
&& lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
665 static bool mountpoint_is_accessible(struct cgroup_mount_point
*mp
)
667 return mp
&& access(mp
->mount_point
, F_OK
) == 0;
670 static struct cgroup_mount_point
*lxc_cgroup_find_mount_point(struct cgroup_hierarchy
*hierarchy
, const char *group
, bool should_be_writable
)
672 struct cgroup_mount_point
**mps
;
673 struct cgroup_mount_point
*current_result
= NULL
;
674 ssize_t quality
= -1;
677 if (mountpoint_is_accessible(hierarchy
->rw_absolute_mount_point
))
678 return hierarchy
->rw_absolute_mount_point
;
679 if (!should_be_writable
&& mountpoint_is_accessible(hierarchy
->ro_absolute_mount_point
))
680 return hierarchy
->ro_absolute_mount_point
;
682 for (mps
= hierarchy
->all_mount_points
; mps
&& *mps
; mps
++) {
683 struct cgroup_mount_point
*mp
= *mps
;
684 size_t prefix_len
= mp
->mount_prefix
? strlen(mp
->mount_prefix
) : 0;
686 if (prefix_len
== 1 && mp
->mount_prefix
[0] == '/')
689 if (!mountpoint_is_accessible(mp
))
692 if (should_be_writable
&& mp
->read_only
)
696 (strncmp(group
, mp
->mount_prefix
, prefix_len
) == 0 &&
697 (group
[prefix_len
] == '\0' || group
[prefix_len
] == '/'))) {
698 /* search for the best quality match, i.e. the match with the
699 * shortest prefix where this group is still contained
701 if (quality
== -1 || prefix_len
< quality
) {
703 quality
= prefix_len
;
710 return current_result
;
713 static char *lxc_cgroup_find_abs_path(const char *subsystem
, const char *group
, bool should_be_writable
, const char *suffix
)
715 struct cgroup_meta_data
*meta_data
;
716 struct cgroup_hierarchy
*h
;
717 struct cgroup_mount_point
*mp
;
721 meta_data
= lxc_cgroup_load_meta();
725 h
= lxc_cgroup_find_hierarchy(meta_data
, subsystem
);
729 mp
= lxc_cgroup_find_mount_point(h
, group
, should_be_writable
);
733 result
= cgroup_to_absolute_path(mp
, group
, suffix
);
737 lxc_cgroup_put_meta(meta_data
);
742 lxc_cgroup_put_meta(meta_data
);
747 static struct cgroup_process_info
*lxc_cgroup_process_info_get(pid_t pid
, struct cgroup_meta_data
*meta
)
750 snprintf(pid_buf
, 32, "/proc/%lu/cgroup", (unsigned long)pid
);
751 return lxc_cgroup_process_info_getx(pid_buf
, meta
);
754 static struct cgroup_process_info
*lxc_cgroup_process_info_get_init(struct cgroup_meta_data
*meta
)
756 return lxc_cgroup_process_info_get(1, meta
);
759 static struct cgroup_process_info
*lxc_cgroup_process_info_get_self(struct cgroup_meta_data
*meta
)
761 struct cgroup_process_info
*i
;
762 i
= lxc_cgroup_process_info_getx("/proc/self/cgroup", meta
);
764 i
= lxc_cgroup_process_info_get(getpid(), meta
);
769 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
770 * is already in a new cgroup named after the pid. 'mnt' is passed in as
771 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
772 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
773 * and return the string /sys/fs/cgroup/lxc/c1.
775 static char *cgroup_rename_nsgroup(const char *mountpath
, const char *oldname
, pid_t pid
, const char *name
)
777 char *dir
, *fulloldpath
;
778 char *newname
, *fullnewpath
;
779 int len
, newlen
, ret
;
782 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
785 * fulloldpath = /cgroup/ab/2375
786 * fullnewpath = /cgroup/ab/c1
789 dir
= alloca(strlen(oldname
) + 1);
790 strcpy(dir
, oldname
);
792 len
= strlen(oldname
) + strlen(mountpath
) + 22;
793 fulloldpath
= alloca(len
);
794 ret
= snprintf(fulloldpath
, len
, "%s/%s/%ld", mountpath
, oldname
, (unsigned long)pid
);
795 if (ret
< 0 || ret
>= len
)
798 len
= strlen(dir
) + strlen(name
) + 2;
799 newname
= malloc(len
);
801 SYSERROR("Out of memory");
804 ret
= snprintf(newname
, len
, "%s/%s", dir
, name
);
805 if (ret
< 0 || ret
>= len
) {
810 newlen
= strlen(mountpath
) + len
+ 2;
811 fullnewpath
= alloca(newlen
);
812 ret
= snprintf(fullnewpath
, newlen
, "%s/%s", mountpath
, newname
);
813 if (ret
< 0 || ret
>= newlen
) {
818 if (access(fullnewpath
, F_OK
) == 0) {
819 if (rmdir(fullnewpath
) != 0) {
820 SYSERROR("container cgroup %s already exists.", fullnewpath
);
825 if (rename(fulloldpath
, fullnewpath
)) {
826 SYSERROR("failed to rename cgroup %s->%s", fulloldpath
, fullnewpath
);
831 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
836 static bool is_crucial_hierarchy(struct cgroup_hierarchy
*h
)
840 for (p
= h
->subsystems
; *p
; p
++) {
841 if (is_crucial_cgroup_subsystem(*p
))
847 /* create a new cgroup */
848 static struct cgroup_process_info
*lxc_cgroupfs_create(const char *name
, const char *path_pattern
, struct cgroup_meta_data
*meta_data
, const char *sub_pattern
)
850 char **cgroup_path_components
= NULL
;
852 char *path_so_far
= NULL
;
853 char **new_cgroup_paths
= NULL
;
854 char **new_cgroup_paths_sub
= NULL
;
855 struct cgroup_mount_point
*mp
;
856 struct cgroup_hierarchy
*h
;
857 struct cgroup_process_info
*base_info
= NULL
;
858 struct cgroup_process_info
*info_ptr
;
862 bool had_sub_pattern
= false;
865 if (!is_valid_cgroup(name
)) {
866 ERROR("Invalid cgroup name: '%s'", name
);
871 if (!strstr(path_pattern
, "%n")) {
872 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern
);
877 /* we will modify the result of this operation directly,
878 * so we don't have to copy the data structure
880 base_info
= (path_pattern
[0] == '/') ?
881 lxc_cgroup_process_info_get_init(meta_data
) :
882 lxc_cgroup_process_info_get_self(meta_data
);
886 new_cgroup_paths
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
887 if (!new_cgroup_paths
)
888 goto out_initial_error
;
890 new_cgroup_paths_sub
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
891 if (!new_cgroup_paths_sub
)
892 goto out_initial_error
;
894 /* find mount points we can use */
895 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
896 h
= info_ptr
->hierarchy
;
899 mp
= lxc_cgroup_find_mount_point(h
, info_ptr
->cgroup_path
, true);
901 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h
->index
);
902 goto out_initial_error
;
904 info_ptr
->designated_mount_point
= mp
;
906 if (lxc_string_in_array("ns", (const char **)h
->subsystems
))
908 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
909 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
910 goto out_initial_error
;
914 /* normalize the path */
915 cgroup_path_components
= lxc_normalize_path(path_pattern
);
916 if (!cgroup_path_components
)
917 goto out_initial_error
;
919 /* go through the path components to see if we can create them */
920 for (p
= cgroup_path_components
; *p
|| (sub_pattern
&& !had_sub_pattern
); p
++) {
921 /* we only want to create the same component with -1, -2, etc.
922 * if the component contains the container name itself, otherwise
923 * it's not an error if it already exists
925 char *p_eff
= *p
? *p
: (char *)sub_pattern
;
926 bool contains_name
= strstr(p_eff
, "%n");
927 char *current_component
= NULL
;
928 char *current_subpath
= NULL
;
929 char *current_entire_path
= NULL
;
934 /* if we are processing the subpattern, we want to make sure
935 * loop is ended the next time around
938 had_sub_pattern
= true;
942 goto find_name_on_this_level
;
944 cleanup_name_on_this_level
:
945 /* This is reached if we found a name clash.
946 * In that case, remove the cgroup from all previous hierarchies
948 for (j
= 0, info_ptr
= base_info
; j
< i
&& info_ptr
; info_ptr
= info_ptr
->next
, j
++) {
949 if (info_ptr
->created_paths_count
< 1)
951 r
= remove_cgroup(info_ptr
->designated_mount_point
, info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1], false, NULL
);
953 WARN("could not clean up cgroup we created when trying to create container");
954 free(info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1]);
955 info_ptr
->created_paths
[--info_ptr
->created_paths_count
] = NULL
;
957 if (current_component
!= current_subpath
)
958 free(current_subpath
);
959 if (current_component
!= p_eff
)
960 free(current_component
);
961 current_component
= current_subpath
= NULL
;
962 /* try again with another suffix */
965 find_name_on_this_level
:
966 /* determine name of the path component we should create */
967 if (contains_name
&& suffix
> 0) {
968 char *buf
= calloc(strlen(name
) + 32, 1);
970 goto out_initial_error
;
971 snprintf(buf
, strlen(name
) + 32, "%s-%u", name
, suffix
);
972 current_component
= lxc_string_replace("%n", buf
, p_eff
);
975 current_component
= contains_name
? lxc_string_replace("%n", name
, p_eff
) : p_eff
;
977 parts
[0] = path_so_far
;
978 parts
[1] = current_component
;
980 current_subpath
= path_so_far
? lxc_string_join("/", (const char **)parts
, false) : current_component
;
982 /* Now go through each hierarchy and try to create the
983 * corresponding cgroup
985 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
988 if (!info_ptr
->hierarchy
)
991 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
993 current_entire_path
= NULL
;
995 parts2
[0] = !strcmp(info_ptr
->cgroup_path
, "/") ? "" : info_ptr
->cgroup_path
;
996 parts2
[1] = current_subpath
;
998 current_entire_path
= lxc_string_join("/", (const char **)parts2
, false);
1001 /* we are processing the subpath, so only update that one */
1002 free(new_cgroup_paths_sub
[i
]);
1003 new_cgroup_paths_sub
[i
] = strdup(current_entire_path
);
1004 if (!new_cgroup_paths_sub
[i
])
1005 goto cleanup_from_error
;
1007 /* remember which path was used on this controller */
1008 free(new_cgroup_paths
[i
]);
1009 new_cgroup_paths
[i
] = strdup(current_entire_path
);
1010 if (!new_cgroup_paths
[i
])
1011 goto cleanup_from_error
;
1014 r
= create_cgroup(info_ptr
->designated_mount_point
, current_entire_path
);
1015 if (r
< 0 && errno
== EEXIST
&& contains_name
) {
1016 /* name clash => try new name with new suffix */
1017 free(current_entire_path
);
1018 current_entire_path
= NULL
;
1019 goto cleanup_name_on_this_level
;
1020 } else if (r
< 0 && errno
!= EEXIST
) {
1021 if (is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1022 SYSERROR("Could not create cgroup '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1023 goto cleanup_from_error
;
1026 } else if (r
== 0) {
1027 /* successfully created */
1028 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1030 goto cleanup_from_error
;
1031 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, current_entire_path
)) {
1032 ERROR("Failed to initialize cpuset for '%s' in '%s'.", current_entire_path
, info_ptr
->designated_mount_point
->mount_point
);
1033 goto cleanup_from_error
;
1035 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = current_entire_path
;
1037 /* if we didn't create the cgroup, then we have to make sure that
1038 * further cgroups will be created properly
1040 if (handle_cgroup_settings(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
) < 0) {
1041 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
1042 goto cleanup_from_error
;
1044 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
)) {
1045 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr
->cgroup_path
);
1046 goto cleanup_from_error
;
1050 /* already existed but path component of pattern didn't contain '%n',
1051 * so this is not an error; but then we don't need current_entire_path
1054 free(current_entire_path
);
1055 current_entire_path
= NULL
;
1059 /* save path so far */
1061 path_so_far
= strdup(current_subpath
);
1063 goto cleanup_from_error
;
1066 if (current_component
!= current_subpath
)
1067 free(current_subpath
);
1068 if (current_component
!= p_eff
)
1069 free(current_component
);
1070 current_component
= current_subpath
= NULL
;
1074 /* called if an error occurred in the loop, so we
1075 * do some additional cleanup here
1077 saved_errno
= errno
;
1078 if (current_component
!= current_subpath
)
1079 free(current_subpath
);
1080 if (current_component
!= p_eff
)
1081 free(current_component
);
1082 free(current_entire_path
);
1083 errno
= saved_errno
;
1084 goto out_initial_error
;
1087 /* we're done, now update the paths */
1088 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
1089 if (!info_ptr
->hierarchy
)
1091 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
1092 * will take care of it
1093 * Since we do a continue in above loop, new_cgroup_paths[i] is
1094 * unset anyway, as is new_cgroup_paths_sub[i]
1096 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1098 free(info_ptr
->cgroup_path
);
1099 info_ptr
->cgroup_path
= new_cgroup_paths
[i
];
1100 info_ptr
->cgroup_path_sub
= new_cgroup_paths_sub
[i
];
1102 /* don't use lxc_free_array since we used the array members
1103 * to store them in our result...
1105 free(new_cgroup_paths
);
1106 free(new_cgroup_paths_sub
);
1108 lxc_free_array((void **)cgroup_path_components
, free
);
1112 saved_errno
= errno
;
1114 lxc_cgroup_process_info_free_and_remove(base_info
, NULL
);
1115 lxc_free_array((void **)new_cgroup_paths
, free
);
1116 lxc_free_array((void **)new_cgroup_paths_sub
, free
);
1117 lxc_free_array((void **)cgroup_path_components
, free
);
1118 errno
= saved_errno
;
1122 static int lxc_cgroup_create_legacy(struct cgroup_process_info
*base_info
, const char *name
, pid_t pid
)
1124 struct cgroup_process_info
*info_ptr
;
1127 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1128 if (!info_ptr
->hierarchy
)
1131 if (!lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
1134 * For any path which has ns cgroup mounted, handler->pid is already
1135 * moved into a container called '%d % (handler->pid)'. Rename it to
1136 * the cgroup name and record that.
1138 char *tmp
= cgroup_rename_nsgroup((const char *)info_ptr
->designated_mount_point
->mount_point
,
1139 info_ptr
->cgroup_path
, pid
, name
);
1142 free(info_ptr
->cgroup_path
);
1143 info_ptr
->cgroup_path
= tmp
;
1144 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1150 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = tmp
;
1155 /* get the cgroup membership of a given container */
1156 static struct cgroup_process_info
*lxc_cgroup_get_container_info(const char *name
, const char *lxcpath
, struct cgroup_meta_data
*meta_data
)
1158 struct cgroup_process_info
*result
= NULL
;
1159 int saved_errno
= 0;
1161 struct cgroup_process_info
**cptr
= &result
;
1162 struct cgroup_process_info
*entry
= NULL
;
1165 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
1166 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
1170 /* use the command interface to look for the cgroup */
1171 path
= lxc_cmd_get_cgroup_path(name
, lxcpath
, h
->subsystems
[0]);
1177 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1180 entry
->meta_ref
= lxc_cgroup_get_meta(meta_data
);
1181 entry
->hierarchy
= h
;
1182 entry
->cgroup_path
= path
;
1185 /* it is not an error if we don't find anything here,
1186 * it is up to the caller to decide what to do in that
1188 entry
->designated_mount_point
= lxc_cgroup_find_mount_point(h
, entry
->cgroup_path
, true);
1191 cptr
= &entry
->next
;
1197 saved_errno
= errno
;
1199 lxc_cgroup_process_info_free(result
);
1200 lxc_cgroup_process_info_free(entry
);
1201 errno
= saved_errno
;
1205 /* move a processs to the cgroups specified by the membership */
1206 static int lxc_cgroupfs_enter(struct cgroup_process_info
*info
, pid_t pid
, bool enter_sub
)
1209 char *cgroup_tasks_fn
;
1211 struct cgroup_process_info
*info_ptr
;
1213 snprintf(pid_buf
, 32, "%lu", (unsigned long)pid
);
1214 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1215 if (!info_ptr
->hierarchy
)
1218 char *cgroup_path
= (enter_sub
&& info_ptr
->cgroup_path_sub
) ?
1219 info_ptr
->cgroup_path_sub
:
1220 info_ptr
->cgroup_path
;
1222 if (!info_ptr
->designated_mount_point
) {
1223 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, cgroup_path
, true);
1224 if (!info_ptr
->designated_mount_point
) {
1225 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid
, cgroup_path
);
1230 cgroup_tasks_fn
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, cgroup_path
, "/tasks");
1231 if (!cgroup_tasks_fn
) {
1232 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1236 r
= lxc_write_to_file(cgroup_tasks_fn
, pid_buf
, strlen(pid_buf
), false);
1237 free(cgroup_tasks_fn
);
1238 if (r
< 0 && is_crucial_hierarchy(info_ptr
->hierarchy
)) {
1239 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1247 /* free process membership information */
1248 void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
)
1250 struct cgroup_process_info
*next
;
1254 lxc_cgroup_put_meta(info
->meta_ref
);
1255 free(info
->cgroup_path
);
1256 free(info
->cgroup_path_sub
);
1257 lxc_free_array((void **)info
->created_paths
, free
);
1259 lxc_cgroup_process_info_free(next
);
1262 /* free process membership information and remove cgroups that were created */
1263 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
, struct lxc_conf
*conf
)
1265 struct cgroup_process_info
*next
;
1271 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1273 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1275 /* ignore return value here, perhaps we created the
1276 * '/lxc' cgroup in this container but another container
1277 * is still running (for example)
1279 (void)remove_cgroup(mp
, info
->cgroup_path
, true, conf
);
1281 for (pp
= info
->created_paths
; pp
&& *pp
; pp
++);
1282 for ((void)(pp
&& --pp
); info
->created_paths
&& pp
>= info
->created_paths
; --pp
) {
1285 free(info
->created_paths
);
1286 lxc_cgroup_put_meta(info
->meta_ref
);
1287 free(info
->cgroup_path
);
1288 free(info
->cgroup_path_sub
);
1290 lxc_cgroup_process_info_free_and_remove(next
, conf
);
1293 static char *lxc_cgroup_get_hierarchy_path_data(const char *subsystem
, struct cgfs_data
*d
)
1295 struct cgroup_process_info
*info
= d
->info
;
1296 info
= find_info_for_subsystem(info
, subsystem
);
1299 prune_init_scope(info
->cgroup_path
);
1300 return info
->cgroup_path
;
1303 static char *lxc_cgroup_get_hierarchy_abs_path_data(const char *subsystem
, struct cgfs_data
*d
)
1305 struct cgroup_process_info
*info
= d
->info
;
1306 struct cgroup_mount_point
*mp
= NULL
;
1308 info
= find_info_for_subsystem(info
, subsystem
);
1311 if (info
->designated_mount_point
) {
1312 mp
= info
->designated_mount_point
;
1314 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1318 return cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1321 static char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1323 struct cgroup_meta_data
*meta
;
1324 struct cgroup_process_info
*base_info
, *info
;
1325 struct cgroup_mount_point
*mp
;
1326 char *result
= NULL
;
1328 meta
= lxc_cgroup_load_meta();
1331 base_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta
);
1334 info
= find_info_for_subsystem(base_info
, subsystem
);
1337 if (info
->designated_mount_point
) {
1338 mp
= info
->designated_mount_point
;
1340 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1344 result
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1347 lxc_cgroup_process_info_free(base_info
);
1349 lxc_cgroup_put_meta(meta
);
1353 static int lxc_cgroup_set_data(const char *filename
, const char *value
, struct cgfs_data
*d
)
1355 char *subsystem
= NULL
, *p
, *path
;
1358 subsystem
= alloca(strlen(filename
) + 1);
1359 strcpy(subsystem
, filename
);
1360 if ((p
= strchr(subsystem
, '.')) != NULL
)
1364 path
= lxc_cgroup_get_hierarchy_abs_path_data(subsystem
, d
);
1366 ret
= do_cgroup_set(path
, filename
, value
);
1367 int saved_errno
= errno
;
1369 errno
= saved_errno
;
1374 static int lxc_cgroupfs_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1376 char *subsystem
= NULL
, *p
, *path
;
1379 subsystem
= alloca(strlen(filename
) + 1);
1380 strcpy(subsystem
, filename
);
1381 if ((p
= strchr(subsystem
, '.')) != NULL
)
1384 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1386 ret
= do_cgroup_set(path
, filename
, value
);
1392 static int lxc_cgroupfs_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
1394 char *subsystem
= NULL
, *p
, *path
;
1397 subsystem
= alloca(strlen(filename
) + 1);
1398 strcpy(subsystem
, filename
);
1399 if ((p
= strchr(subsystem
, '.')) != NULL
)
1402 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1404 ret
= do_cgroup_get(path
, filename
, value
, len
);
1410 static bool cgroupfs_mount_cgroup(void *hdata
, const char *root
, int type
)
1412 size_t bufsz
= strlen(root
) + sizeof("/sys/fs/cgroup");
1414 char **parts
= NULL
;
1415 char *dirname
= NULL
;
1416 char *abs_path
= NULL
;
1417 char *abs_path2
= NULL
;
1418 struct cgfs_data
*cgfs_d
;
1419 struct cgroup_process_info
*info
, *base_info
;
1420 int r
, saved_errno
= 0;
1422 if (cgns_supported())
1428 base_info
= cgfs_d
->info
;
1430 /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
1431 * have access to the lxc_conf object at this point. It really should be up
1432 * to the caller to fix this, but this doesn't really hurt.
1434 if (type
== LXC_AUTO_CGROUP_FULL_NOSPEC
)
1435 type
= LXC_AUTO_CGROUP_FULL_MIXED
;
1436 else if (type
== LXC_AUTO_CGROUP_NOSPEC
)
1437 type
= LXC_AUTO_CGROUP_MIXED
;
1439 if (type
< LXC_AUTO_CGROUP_RO
|| type
> LXC_AUTO_CGROUP_FULL_MIXED
) {
1440 ERROR("could not mount cgroups into container: invalid type specified internally");
1445 path
= calloc(1, bufsz
);
1448 snprintf(path
, bufsz
, "%s/sys/fs/cgroup", root
);
1449 r
= safe_mount("cgroup_root", path
, "tmpfs",
1450 MS_NOSUID
|MS_NODEV
|MS_NOEXEC
|MS_RELATIME
,
1451 "size=10240k,mode=755",
1454 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1458 /* now mount all the hierarchies we care about */
1459 for (info
= base_info
; info
; info
= info
->next
) {
1460 size_t subsystem_count
, i
;
1461 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1463 if (!info
->hierarchy
)
1466 if (!mountpoint_is_accessible(mp
))
1467 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1470 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1474 subsystem_count
= lxc_array_len((void **)info
->hierarchy
->subsystems
);
1475 parts
= calloc(subsystem_count
+ 1, sizeof(char *));
1479 for (i
= 0; i
< subsystem_count
; i
++) {
1480 if (!strncmp(info
->hierarchy
->subsystems
[i
], "name=", 5))
1481 parts
[i
] = info
->hierarchy
->subsystems
[i
] + 5;
1483 parts
[i
] = info
->hierarchy
->subsystems
[i
];
1485 dirname
= lxc_string_join(",", (const char **)parts
, false);
1489 /* create subsystem directory */
1490 abs_path
= lxc_append_paths(path
, dirname
);
1493 r
= mkdir_p(abs_path
, 0755);
1494 if (r
< 0 && errno
!= EEXIST
) {
1495 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname
);
1499 abs_path2
= lxc_append_paths(abs_path
, info
->cgroup_path
);
1503 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_RW
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1504 /* bind-mount the cgroup entire filesystem there */
1505 if (strcmp(mp
->mount_prefix
, "/") != 0) {
1506 /* FIXME: maybe we should just try to remount the entire hierarchy
1507 * with a regular mount command? may that works? */
1508 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname
);
1511 r
= mount(mp
->mount_point
, abs_path
, "none", MS_BIND
, 0);
1513 SYSERROR("error bind-mounting %s to %s", mp
->mount_point
, abs_path
);
1516 /* main cgroup path should be read-only */
1517 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1518 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1520 SYSERROR("error re-mounting %s readonly", abs_path
);
1524 /* own cgroup should be read-write */
1525 if (type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1526 r
= mount(abs_path2
, abs_path2
, NULL
, MS_BIND
, NULL
);
1528 SYSERROR("error bind-mounting %s onto itself", abs_path2
);
1531 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
, NULL
);
1533 SYSERROR("error re-mounting %s readwrite", abs_path2
);
1538 /* create path for container's cgroup */
1539 r
= mkdir_p(abs_path2
, 0755);
1540 if (r
< 0 && errno
!= EEXIST
) {
1541 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname
, info
->cgroup_path
);
1545 /* for read-only and mixed cases, we have to bind-mount the tmpfs directory
1546 * that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
1547 * itself and then bind-mount it read-only, since we keep the tmpfs itself
1548 * read-write (see comment below)
1550 if (type
== LXC_AUTO_CGROUP_MIXED
|| type
== LXC_AUTO_CGROUP_RO
) {
1551 r
= mount(abs_path
, abs_path
, NULL
, MS_BIND
, NULL
);
1553 SYSERROR("error bind-mounting %s onto itself", abs_path
);
1556 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1558 SYSERROR("error re-mounting %s readonly", abs_path
);
1566 /* bind-mount container's cgroup to that directory */
1567 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1570 r
= mount(abs_path
, abs_path2
, "none", MS_BIND
, 0);
1571 if (r
< 0 && is_crucial_hierarchy(info
->hierarchy
)) {
1572 SYSERROR("error bind-mounting %s to %s", abs_path
, abs_path2
);
1575 if (type
== LXC_AUTO_CGROUP_RO
) {
1576 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1578 SYSERROR("error re-mounting %s readonly", abs_path2
);
1589 /* add symlinks for every single subsystem */
1590 if (subsystem_count
> 1) {
1591 for (i
= 0; i
< subsystem_count
; i
++) {
1592 abs_path
= lxc_append_paths(path
, parts
[i
]);
1595 r
= symlink(dirname
, abs_path
);
1597 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts
[i
], dirname
);
1608 /* We used to remount the entire tmpfs readonly if any :ro or
1609 * :mixed mode was specified. However, Ubuntu's mountall has the
1610 * unfortunate behavior to block bootup if /sys/fs/cgroup is
1611 * mounted read-only and cannot be remounted read-write.
1612 * (mountall reads /lib/init/fstab and tries to (re-)mount all of
1613 * these if they are not already mounted with the right options;
1614 * it contains an entry for /sys/fs/cgroup. In case it can't do
1615 * that, it prompts for the user to either manually fix it or
1616 * boot anyway. But without user input, booting of the container
1619 * Instead of remounting the entire tmpfs readonly, we only
1620 * remount the paths readonly that are part of the cgroup
1629 saved_errno
= errno
;
1635 errno
= saved_errno
;
1639 static int cgfs_nrtasks(void *hdata
)
1641 struct cgfs_data
*d
= hdata
;
1642 struct cgroup_process_info
*info
;
1643 struct cgroup_mount_point
*mp
= NULL
;
1644 char *abs_path
= NULL
;
1658 if (info
->designated_mount_point
) {
1659 mp
= info
->designated_mount_point
;
1661 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, false);
1666 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1670 ret
= cgroup_recursive_task_count(abs_path
);
1675 static struct cgroup_process_info
*
1676 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
,
1677 struct cgroup_meta_data
*meta
)
1679 struct cgroup_process_info
*result
= NULL
;
1680 FILE *proc_pid_cgroup
= NULL
;
1683 int saved_errno
= 0;
1684 struct cgroup_process_info
**cptr
= &result
;
1685 struct cgroup_process_info
*entry
= NULL
;
1687 proc_pid_cgroup
= fopen_cloexec(proc_pid_cgroup_str
, "r");
1688 if (!proc_pid_cgroup
)
1691 while (getline(&line
, &sz
, proc_pid_cgroup
) != -1) {
1692 /* file format: hierarchy:subsystems:group */
1696 int hierarchy_number
;
1697 struct cgroup_hierarchy
*h
= NULL
;
1702 if (line
[strlen(line
) - 1] == '\n')
1703 line
[strlen(line
) - 1] = '\0';
1705 colon1
= strchr(line
, ':');
1709 colon2
= strchr(colon1
, ':');
1716 /* With cgroupv2 /proc/self/cgroup can contain entries of the
1718 * These entries need to be skipped.
1720 if (!strcmp(colon1
, ""))
1723 hierarchy_number
= strtoul(line
, &endptr
, 10);
1724 if (!endptr
|| *endptr
)
1727 if (hierarchy_number
> meta
->maximum_hierarchy
) {
1728 /* we encountered a hierarchy we didn't have before,
1729 * so probably somebody remounted some stuff in the
1736 h
= meta
->hierarchies
[hierarchy_number
];
1738 /* we encountered a hierarchy that was thought to be
1739 * dead before, so probably somebody remounted some
1740 * stuff in the mean time...
1746 /* we are told that we should ignore this hierarchy */
1750 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1754 entry
->meta_ref
= lxc_cgroup_get_meta(meta
);
1755 entry
->hierarchy
= h
;
1756 entry
->cgroup_path
= strdup(colon2
);
1757 if (!entry
->cgroup_path
)
1759 prune_init_scope(entry
->cgroup_path
);
1762 cptr
= &entry
->next
;
1766 fclose(proc_pid_cgroup
);
1771 saved_errno
= errno
;
1772 if (proc_pid_cgroup
)
1773 fclose(proc_pid_cgroup
);
1774 lxc_cgroup_process_info_free(result
);
1775 lxc_cgroup_process_info_free(entry
);
1777 errno
= saved_errno
;
1781 static char **subsystems_from_mount_options(const char *mount_options
,
1784 char *token
, *str
, *saveptr
= NULL
;
1785 char **result
= NULL
;
1786 size_t result_capacity
= 0;
1787 size_t result_count
= 0;
1791 str
= alloca(strlen(mount_options
)+1);
1792 strcpy(str
, mount_options
);
1793 for (; (token
= strtok_r(str
, ",", &saveptr
)); str
= NULL
) {
1794 /* we have a subsystem if it's either in the list of
1795 * subsystems provided by the kernel OR if it starts
1796 * with name= for named hierarchies
1798 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 12);
1801 result
[result_count
+ 1] = NULL
;
1802 if (strncmp(token
, "name=", 5) && !lxc_string_in_array(token
, (const char **)kernel_list
)) {
1803 // this is eg 'systemd' but the mount will be 'name=systemd'
1804 result
[result_count
] = malloc(strlen(token
) + 6);
1805 if (result
[result_count
])
1806 sprintf(result
[result_count
], "name=%s", token
);
1808 result
[result_count
] = strdup(token
);
1809 if (!result
[result_count
])
1817 saved_errno
= errno
;
1818 lxc_free_array((void**)result
, free
);
1819 errno
= saved_errno
;
1823 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
)
1827 free(mp
->mount_point
);
1828 free(mp
->mount_prefix
);
1832 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
)
1836 if (h
->subsystems
) {
1837 lxc_free_array((void **)h
->subsystems
, free
);
1838 h
->subsystems
= NULL
;
1840 if (h
->all_mount_points
) {
1841 free(h
->all_mount_points
);
1842 h
->all_mount_points
= NULL
;
1848 static bool is_valid_cgroup(const char *name
)
1851 for (p
= name
; *p
; p
++) {
1852 /* Use the ASCII printable characters range(32 - 127)
1853 * is reasonable, we kick out 32(SPACE) because it'll
1854 * break legacy lxc-ls
1856 if (*p
<= 32 || *p
>= 127 || *p
== '/')
1859 return strcmp(name
, ".") != 0 && strcmp(name
, "..") != 0;
1862 static int create_or_remove_cgroup(bool do_remove
,
1863 struct cgroup_mount_point
*mp
, const char *path
, int recurse
,
1864 struct lxc_conf
*conf
)
1866 int r
, saved_errno
= 0;
1867 char *buf
= cgroup_to_absolute_path(mp
, path
, NULL
);
1871 /* create or remove directory */
1873 if (!dir_exists(buf
))
1876 if (conf
&& !lxc_list_empty(&conf
->id_map
))
1877 r
= userns_exec_1(conf
, rmdir_wrapper
, buf
,
1880 r
= cgroup_rmdir(buf
);
1884 r
= mkdir_p(buf
, 0777);
1885 saved_errno
= errno
;
1887 errno
= saved_errno
;
1891 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
)
1893 return create_or_remove_cgroup(false, mp
, path
, false, NULL
);
1896 static int remove_cgroup(struct cgroup_mount_point
*mp
,
1897 const char *path
, bool recurse
, struct lxc_conf
*conf
)
1899 return create_or_remove_cgroup(true, mp
, path
, recurse
, conf
);
1902 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
,
1903 const char *path
, const char *suffix
)
1905 /* first we have to make sure we subtract the mount point's prefix */
1906 char *prefix
= mp
->mount_prefix
;
1910 /* we want to make sure only absolute paths to cgroups are passed to us */
1911 if (path
[0] != '/') {
1916 if (prefix
&& !strcmp(prefix
, "/"))
1919 /* prefix doesn't match */
1920 if (prefix
&& strncmp(prefix
, path
, strlen(prefix
)) != 0) {
1924 /* if prefix is /foo and path is /foobar */
1925 if (prefix
&& path
[strlen(prefix
)] != '/' && path
[strlen(prefix
)] != '\0') {
1930 /* remove prefix from path */
1931 path
+= prefix
? strlen(prefix
) : 0;
1933 len
= strlen(mp
->mount_point
) + strlen(path
) + (suffix
? strlen(suffix
) : 0);
1934 buf
= calloc(len
+ 1, 1);
1937 rv
= snprintf(buf
, len
+ 1, "%s%s%s", mp
->mount_point
, path
, suffix
? suffix
: "");
1947 static struct cgroup_process_info
*
1948 find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
)
1950 struct cgroup_process_info
*info_ptr
;
1951 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1952 struct cgroup_hierarchy
*h
= info_ptr
->hierarchy
;
1955 if (lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
1962 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
,
1963 char *value
, size_t len
)
1965 const char *parts
[3] = {
1971 int ret
, saved_errno
;
1973 filename
= lxc_string_join("/", parts
, false);
1977 ret
= lxc_read_from_file(filename
, value
, len
);
1978 saved_errno
= errno
;
1980 errno
= saved_errno
;
1984 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
,
1987 const char *parts
[3] = {
1993 int ret
, saved_errno
;
1995 filename
= lxc_string_join("/", parts
, false);
1999 ret
= lxc_write_to_file(filename
, value
, strlen(value
), false);
2000 saved_errno
= errno
;
2002 errno
= saved_errno
;
2006 static int do_setup_cgroup_limits(struct cgfs_data
*d
,
2007 struct lxc_list
*cgroup_settings
, bool do_devices
)
2009 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
2010 struct lxc_cgroup
*cg
;
2013 if (lxc_list_empty(cgroup_settings
))
2016 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
2017 if (!sorted_cgroup_settings
) {
2021 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
2022 cg
= iterator
->elem
;
2024 if (do_devices
== !strncmp("devices", cg
->subsystem
, 7)) {
2025 if (strcmp(cg
->subsystem
, "devices.deny") == 0 &&
2026 cgroup_devices_has_allow_or_deny(d
, cg
->value
, false))
2028 if (strcmp(cg
->subsystem
, "devices.allow") == 0 &&
2029 cgroup_devices_has_allow_or_deny(d
, cg
->value
, true))
2031 if (lxc_cgroup_set_data(cg
->subsystem
, cg
->value
, d
)) {
2032 if (do_devices
&& (errno
== EACCES
|| errno
== EPERM
)) {
2033 WARN("Error setting %s to %s for %s",
2034 cg
->subsystem
, cg
->value
, d
->name
);
2037 SYSERROR("Error setting %s to %s for %s",
2038 cg
->subsystem
, cg
->value
, d
->name
);
2043 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
2047 INFO("cgroup has been setup");
2049 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
2050 lxc_list_del(iterator
);
2053 free(sorted_cgroup_settings
);
2057 static bool cgroup_devices_has_allow_or_deny(struct cgfs_data
*d
,
2058 char *v
, bool for_allow
)
2064 bool ret
= !for_allow
;
2065 const char *parts
[3] = {
2071 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
2072 // not sure they ever do, but they *could*
2073 // right now, I'm assuming they do NOT
2074 if (!for_allow
&& strcmp(v
, "a") != 0 && strcmp(v
, "a *:* rwm") != 0)
2077 parts
[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_data("devices", d
);
2080 path
= lxc_string_join("/", parts
, false);
2082 free((void *)parts
[0]);
2086 devices_list
= fopen_cloexec(path
, "r");
2087 if (!devices_list
) {
2092 while (getline(&line
, &sz
, devices_list
) != -1) {
2093 size_t len
= strlen(line
);
2094 if (len
> 0 && line
[len
-1] == '\n')
2096 if (strcmp(line
, "a *:* rwm") == 0) {
2099 } else if (for_allow
&& strcmp(line
, v
) == 0) {
2106 fclose(devices_list
);
2112 static int cgroup_recursive_task_count(const char *cgroup_path
)
2115 struct dirent
*dent
;
2118 d
= opendir(cgroup_path
);
2122 while ((dent
= readdir(d
))) {
2123 const char *parts
[3] = {
2131 if (!strcmp(dent
->d_name
, ".") || !strcmp(dent
->d_name
, ".."))
2133 sub_path
= lxc_string_join("/", parts
, false);
2138 r
= stat(sub_path
, &st
);
2144 if (S_ISDIR(st
.st_mode
)) {
2145 r
= cgroup_recursive_task_count(sub_path
);
2148 } else if (!strcmp(dent
->d_name
, "tasks")) {
2149 r
= lxc_count_file_lines(sub_path
);
2160 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
,
2163 int r
, saved_errno
= 0;
2166 mp
->need_cpuset_init
= false;
2168 /* If this is the memory cgroup, we want to enforce hierarchy.
2169 * But don't fail if for some reason we can't.
2171 if (lxc_string_in_array("memory", (const char **)mp
->hierarchy
->subsystems
)) {
2172 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/memory.use_hierarchy");
2174 r
= lxc_read_from_file(cc_path
, buf
, 1);
2175 if (r
< 1 || buf
[0] != '1') {
2176 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2178 SYSERROR("failed to set memory.use_hierarchy to 1; continuing");
2184 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2185 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2186 * and cpuset.cpus and then
2188 if (lxc_string_in_array("cpuset", (const char **)mp
->hierarchy
->subsystems
)) {
2189 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/cgroup.clone_children");
2194 /* cgroup.clone_children is not available when running under
2195 * older kernel versions; in this case, we'll initialize
2196 * cpuset.cpus and cpuset.mems later, after the new cgroup
2199 if (stat(cc_path
, &sb
) != 0 && errno
== ENOENT
) {
2200 mp
->need_cpuset_init
= true;
2204 r
= lxc_read_from_file(cc_path
, buf
, 1);
2205 if (r
== 1 && buf
[0] == '1') {
2209 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2210 saved_errno
= errno
;
2212 errno
= saved_errno
;
2213 return r
< 0 ? -1 : 0;
2218 static int cgroup_read_from_file(const char *fn
, char buf
[], size_t bufsize
)
2220 int ret
= lxc_read_from_file(fn
, buf
, bufsize
);
2222 SYSERROR("failed to read %s", fn
);
2225 if (ret
== bufsize
) {
2227 /* obviously this wasn't empty */
2228 buf
[bufsize
-1] = '\0';
2231 /* Callers don't do this, but regression/sanity check */
2232 ERROR("%s: was not expecting 0 bufsize", __func__
);
2239 static bool do_init_cpuset_file(struct cgroup_mount_point
*mp
,
2240 const char *path
, const char *name
)
2243 char *childfile
, *parentfile
= NULL
, *tmp
;
2247 childfile
= cgroup_to_absolute_path(mp
, path
, name
);
2251 /* don't overwrite a non-empty value in the file */
2252 ret
= cgroup_read_from_file(childfile
, value
, sizeof(value
));
2255 if (value
[0] != '\0' && value
[0] != '\n') {
2260 /* path to the same name in the parent cgroup */
2261 parentfile
= strdup(path
);
2265 tmp
= strrchr(parentfile
, '/');
2268 if (tmp
== parentfile
)
2269 tmp
++; /* keep the '/' at the start */
2272 parentfile
= cgroup_to_absolute_path(mp
, tmp
, name
);
2277 /* copy from parent to child cgroup */
2278 ret
= cgroup_read_from_file(parentfile
, value
, sizeof(value
));
2281 if (ret
== sizeof(value
)) {
2282 /* If anyone actually sees this error, we can address it */
2283 ERROR("parent cpuset value too long");
2286 ok
= (lxc_write_to_file(childfile
, value
, strlen(value
), false) >= 0);
2288 SYSERROR("failed writing %s", childfile
);
2296 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
,
2299 /* the files we have to handle here are only in cpuset hierarchies */
2300 if (!lxc_string_in_array("cpuset",
2301 (const char **)mp
->hierarchy
->subsystems
))
2304 if (!mp
->need_cpuset_init
)
2307 return (do_init_cpuset_file(mp
, path
, "/cpuset.cpus") &&
2308 do_init_cpuset_file(mp
, path
, "/cpuset.mems") );
2311 static void print_cgfs_init_debuginfo(struct cgfs_data
*d
)
2315 if (!getenv("LXC_DEBUG_CGFS"))
2318 DEBUG("Cgroup information:");
2319 DEBUG(" container name: %s", d
->name
);
2320 if (!d
->meta
|| !d
->meta
->hierarchies
) {
2321 DEBUG(" No hierarchies found.");
2324 DEBUG(" Controllers:");
2325 for (i
= 0; i
<= d
->meta
->maximum_hierarchy
; i
++) {
2327 struct cgroup_hierarchy
*h
= d
->meta
->hierarchies
[i
];
2329 DEBUG(" Empty hierarchy number %d.", i
);
2332 for (p
= h
->subsystems
; p
&& *p
; p
++) {
2333 DEBUG(" %2d: %s", i
, *p
);
2338 struct cgroup_ops
*cgfs_ops_init(void)
2343 static void *cgfs_init(const char *name
)
2345 struct cgfs_data
*d
;
2347 d
= malloc(sizeof(*d
));
2351 memset(d
, 0, sizeof(*d
));
2352 d
->name
= strdup(name
);
2356 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
2358 d
->meta
= lxc_cgroup_load_meta();
2360 ERROR("cgroupfs failed to detect cgroup metadata");
2364 print_cgfs_init_debuginfo(d
);
2375 static void cgfs_destroy(void *hdata
, struct lxc_conf
*conf
)
2377 struct cgfs_data
*d
= hdata
;
2382 lxc_cgroup_process_info_free_and_remove(d
->info
, conf
);
2383 lxc_cgroup_put_meta(d
->meta
);
2387 static inline bool cgfs_create(void *hdata
)
2389 struct cgfs_data
*d
= hdata
;
2390 struct cgroup_process_info
*i
;
2391 struct cgroup_meta_data
*md
;
2396 i
= lxc_cgroupfs_create(d
->name
, d
->cgroup_pattern
, md
, NULL
);
2403 static inline bool cgfs_enter(void *hdata
, pid_t pid
)
2405 struct cgfs_data
*d
= hdata
;
2406 struct cgroup_process_info
*i
;
2412 ret
= lxc_cgroupfs_enter(i
, pid
, false);
2417 static inline bool cgfs_create_legacy(void *hdata
, pid_t pid
)
2419 struct cgfs_data
*d
= hdata
;
2420 struct cgroup_process_info
*i
;
2425 if (lxc_cgroup_create_legacy(i
, d
->name
, pid
) < 0) {
2426 ERROR("failed to create legacy ns cgroups for '%s'", d
->name
);
2432 static const char *cgfs_get_cgroup(void *hdata
, const char *subsystem
)
2434 struct cgfs_data
*d
= hdata
;
2438 return lxc_cgroup_get_hierarchy_path_data(subsystem
, d
);
2441 static bool cgfs_escape(void *hdata
)
2443 struct cgroup_meta_data
*md
;
2447 md
= lxc_cgroup_load_meta();
2451 for (i
= 0; i
<= md
->maximum_hierarchy
; i
++) {
2452 struct cgroup_hierarchy
*h
= md
->hierarchies
[i
];
2453 struct cgroup_mount_point
*mp
;
2459 WARN("not escaping hierarchy %d", i
);
2463 mp
= lxc_cgroup_find_mount_point(h
, "/", true);
2467 tasks
= cgroup_to_absolute_path(mp
, "/", "tasks");
2471 f
= fopen(tasks
, "a");
2476 written
= fprintf(f
, "%d\n", getpid());
2479 SYSERROR("writing tasks failed\n");
2486 lxc_cgroup_put_meta(md
);
2490 static int cgfs_num_hierarchies(void)
2492 /* not implemented */
2496 static bool cgfs_get_hierarchies(int i
, char ***out
)
2498 /* not implemented */
2502 static bool cgfs_unfreeze(void *hdata
)
2504 struct cgfs_data
*d
= hdata
;
2505 char *cgabspath
, *cgrelpath
;
2511 cgrelpath
= lxc_cgroup_get_hierarchy_path_data("freezer", d
);
2512 cgabspath
= lxc_cgroup_find_abs_path("freezer", cgrelpath
, true, NULL
);
2516 ret
= do_cgroup_set(cgabspath
, "freezer.state", "THAWED");
2521 static bool cgroupfs_setup_limits(void *hdata
, struct lxc_list
*cgroup_conf
,
2524 struct cgfs_data
*d
= hdata
;
2528 return do_setup_cgroup_limits(d
, cgroup_conf
, with_devices
) == 0;
2531 static bool lxc_cgroupfs_attach(const char *name
, const char *lxcpath
, pid_t pid
)
2533 struct cgroup_meta_data
*meta_data
;
2534 struct cgroup_process_info
*container_info
;
2537 meta_data
= lxc_cgroup_load_meta();
2539 ERROR("could not move attached process %d to cgroup of container", pid
);
2543 container_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta_data
);
2544 lxc_cgroup_put_meta(meta_data
);
2545 if (!container_info
) {
2546 ERROR("could not move attached process %d to cgroup of container", pid
);
2550 ret
= lxc_cgroupfs_enter(container_info
, pid
, false);
2551 lxc_cgroup_process_info_free(container_info
);
2553 ERROR("could not move attached process %d to cgroup of container", pid
);
2560 const char *cgroup_path
;
2565 * TODO - someone should refactor this to unshare once passing all the paths
2566 * to be chowned in one go
2568 static int chown_cgroup_wrapper(void *data
)
2570 struct chown_data
*arg
= data
;
2574 if (setresgid(0,0,0) < 0)
2575 SYSERROR("Failed to setgid to 0");
2576 if (setresuid(0,0,0) < 0)
2577 SYSERROR("Failed to setuid to 0");
2578 if (setgroups(0, NULL
) < 0)
2579 SYSERROR("Failed to clear groups");
2580 destuid
= get_ns_uid(arg
->origuid
);
2582 if (chown(arg
->cgroup_path
, destuid
, 0) < 0)
2583 SYSERROR("Failed chowning %s to %d", arg
->cgroup_path
, (int)destuid
);
2585 fpath
= lxc_append_paths(arg
->cgroup_path
, "tasks");
2588 if (chown(fpath
, destuid
, 0) < 0)
2589 SYSERROR("Error chowning %s\n", fpath
);
2592 fpath
= lxc_append_paths(arg
->cgroup_path
, "cgroup.procs");
2595 if (chown(fpath
, destuid
, 0) < 0)
2596 SYSERROR("Error chowning %s", fpath
);
2602 static bool do_cgfs_chown(char *cgroup_path
, struct lxc_conf
*conf
)
2604 struct chown_data data
;
2607 if (!dir_exists(cgroup_path
))
2610 if (lxc_list_empty(&conf
->id_map
))
2611 /* If there's no mapping then we don't need to chown */
2614 data
.cgroup_path
= cgroup_path
;
2615 data
.origuid
= geteuid();
2617 /* Unpriv users can't chown it themselves, so chown from
2618 * a child namespace mapping both our own and the target uid
2620 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
,
2621 "chown_cgroup_wrapper") < 0) {
2622 ERROR("Error requesting cgroup chown in new namespace");
2627 * Now chmod 775 the directory else the container cannot create cgroups.
2628 * This can't be done in the child namespace because it only group-owns
2631 if (chmod(cgroup_path
, 0775) < 0) {
2632 SYSERROR("Error chmoding %s\n", cgroup_path
);
2635 fpath
= lxc_append_paths(cgroup_path
, "tasks");
2638 if (chmod(fpath
, 0664) < 0)
2639 SYSERROR("Error chmoding %s\n", fpath
);
2641 fpath
= lxc_append_paths(cgroup_path
, "cgroup.procs");
2644 if (chmod(fpath
, 0664) < 0)
2645 SYSERROR("Error chmoding %s\n", fpath
);
2651 static bool cgfs_chown(void *hdata
, struct lxc_conf
*conf
)
2653 struct cgfs_data
*d
= hdata
;
2654 struct cgroup_process_info
*info_ptr
;
2661 for (info_ptr
= d
->info
; info_ptr
; info_ptr
= info_ptr
->next
) {
2662 if (!info_ptr
->hierarchy
)
2665 if (!info_ptr
->designated_mount_point
) {
2666 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, info_ptr
->cgroup_path
, true);
2667 if (!info_ptr
->designated_mount_point
) {
2668 SYSERROR("Could not chown cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", info_ptr
->cgroup_path
);
2673 cgpath
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
, NULL
);
2675 SYSERROR("Could not chown cgroup %s: internal error", info_ptr
->cgroup_path
);
2678 r
= do_cgfs_chown(cgpath
, conf
);
2679 if (!r
&& is_crucial_hierarchy(info_ptr
->hierarchy
)) {
2680 ERROR("Failed chowning %s\n", cgpath
);
2690 static struct cgroup_ops cgfs_ops
= {
2692 .destroy
= cgfs_destroy
,
2693 .create
= cgfs_create
,
2694 .enter
= cgfs_enter
,
2695 .create_legacy
= cgfs_create_legacy
,
2696 .get_cgroup
= cgfs_get_cgroup
,
2697 .escape
= cgfs_escape
,
2698 .num_hierarchies
= cgfs_num_hierarchies
,
2699 .get_hierarchies
= cgfs_get_hierarchies
,
2700 .get
= lxc_cgroupfs_get
,
2701 .set
= lxc_cgroupfs_set
,
2702 .unfreeze
= cgfs_unfreeze
,
2703 .setup_limits
= cgroupfs_setup_limits
,
2705 .attach
= lxc_cgroupfs_attach
,
2706 .chown
= cgfs_chown
,
2707 .mount_cgroup
= cgroupfs_mount_cgroup
,
2708 .nrtasks
= cgfs_nrtasks
,