2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/inotify.h>
37 #include <sys/mount.h>
38 #include <netinet/in.h>
54 #include <../include/lxcmntent.h>
59 lxc_log_define(lxc_cgroup
, lxc
);
61 static struct cgroup_process_info
*lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
, struct cgroup_meta_data
*meta
);
62 static char **subsystems_from_mount_options(const char *mount_options
, char **kernel_list
);
63 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
);
64 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
);
65 static bool is_valid_cgroup(const char *name
);
66 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
);
67 static int remove_cgroup(struct cgroup_mount_point
*mp
, const char *path
, bool recurse
);
68 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
, const char *path
, const char *suffix
);
69 static struct cgroup_process_info
*find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
);
70 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
, char *value
, size_t len
);
71 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
, const char *value
);
72 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler
*h
, char *v
, bool for_allow
);
73 static int do_setup_cgroup_limits(struct lxc_handler
*h
, struct lxc_list
*cgroup_settings
, bool do_devices
);
74 static int cgroup_recursive_task_count(const char *cgroup_path
);
75 static int count_lines(const char *fn
);
76 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
, char *cgroup_path
);
77 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
, const char *path
);
79 static struct cgroup_ops cgfs_ops
;
80 struct cgroup_ops
*active_cg_ops
= &cgfs_ops
;
81 static void init_cg_ops(void);
84 /* this needs to be mutexed for api use */
85 extern bool cgmanager_initialized
;
86 extern bool use_cgmanager
;
87 extern bool lxc_init_cgmanager(void);
89 static bool cgmanager_initialized
= false;
90 static bool use_cgmanager
= false;
91 static bool lxc_init_cgmanager(void) { return false; }
94 static int cgroup_rmdir(char *dirname
)
96 struct dirent dirent
, *direntp
;
100 char pathname
[MAXPATHLEN
];
102 dir
= opendir(dirname
);
104 ERROR("%s: failed to open %s", __func__
, dirname
);
108 while (!readdir_r(dir
, &dirent
, &direntp
)) {
115 if (!strcmp(direntp
->d_name
, ".") ||
116 !strcmp(direntp
->d_name
, ".."))
119 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
120 if (rc
< 0 || rc
>= MAXPATHLEN
) {
121 ERROR("pathname too long");
124 saved_errno
= -ENOMEM
;
127 ret
= lstat(pathname
, &mystat
);
129 SYSERROR("%s: failed to stat %s", __func__
, pathname
);
135 if (S_ISDIR(mystat
.st_mode
)) {
136 if (cgroup_rmdir(pathname
) < 0) {
144 if (rmdir(dirname
) < 0) {
145 SYSERROR("%s: failed to delete %s", __func__
, dirname
);
153 SYSERROR("%s: failed to close directory %s", __func__
, dirname
);
160 return failed
? -1 : 0;
163 struct cgroup_meta_data
*lxc_cgroup_load_meta()
165 const char *cgroup_use
= NULL
;
166 char **cgroup_use_list
= NULL
;
167 struct cgroup_meta_data
*md
= NULL
;
171 cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
172 if (!cgroup_use
&& errno
!= 0)
175 cgroup_use_list
= lxc_string_split_and_trim(cgroup_use
, ',');
176 if (!cgroup_use_list
)
180 md
= lxc_cgroup_load_meta2((const char **)cgroup_use_list
);
182 lxc_free_array((void **)cgroup_use_list
, free
);
187 /* Step 1: determine all kernel subsystems */
188 bool find_cgroup_subsystems(char ***kernel_subsystems
)
194 size_t kernel_subsystems_count
= 0;
195 size_t kernel_subsystems_capacity
= 0;
198 proc_cgroups
= fopen_cloexec("/proc/cgroups", "r");
202 while (getline(&line
, &sz
, proc_cgroups
) != -1) {
205 int hierarchy_number
;
212 tab1
= strchr(line
, '\t');
216 tab2
= strchr(tab1
, '\t');
222 hierarchy_number
= strtoul(tab1
, &tab2
, 10);
225 (void)hierarchy_number
;
227 r
= lxc_grow_array((void ***)kernel_subsystems
, &kernel_subsystems_capacity
, kernel_subsystems_count
+ 1, 12);
230 (*kernel_subsystems
)[kernel_subsystems_count
] = strdup(line
);
231 if (!(*kernel_subsystems
)[kernel_subsystems_count
])
233 kernel_subsystems_count
++;
238 fclose(proc_cgroups
);
243 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
244 * since mount points don't specify hierarchy number and
245 * /proc/cgroups does not contain named hierarchies
247 static bool find_cgroup_hierarchies(struct cgroup_meta_data
*meta_data
,
248 bool all_kernel_subsystems
, bool all_named_subsystems
,
249 const char **subsystem_whitelist
)
251 FILE *proc_self_cgroup
;
256 size_t hierarchy_capacity
= 0;
258 proc_self_cgroup
= fopen_cloexec("/proc/self/cgroup", "r");
259 /* if for some reason (because of setns() and pid namespace for example),
260 * /proc/self is not valid, we try /proc/1/cgroup... */
261 if (!proc_self_cgroup
)
262 proc_self_cgroup
= fopen_cloexec("/proc/1/cgroup", "r");
263 if (!proc_self_cgroup
)
266 while (getline(&line
, &sz
, proc_self_cgroup
) != -1) {
267 /* file format: hierarchy:subsystems:group,
268 * we only extract hierarchy and subsystems
272 int hierarchy_number
;
273 struct cgroup_hierarchy
*h
= NULL
;
279 colon1
= strchr(line
, ':');
283 colon2
= strchr(colon1
, ':');
289 hierarchy_number
= strtoul(line
, &colon2
, 10);
290 if (!colon2
|| *colon2
)
293 if (hierarchy_number
> meta_data
->maximum_hierarchy
) {
294 /* lxc_grow_array will never shrink, so even if we find a lower
295 * hierarchy number here, the array will never be smaller
297 r
= lxc_grow_array((void ***)&meta_data
->hierarchies
, &hierarchy_capacity
, hierarchy_number
+ 1, 12);
301 meta_data
->maximum_hierarchy
= hierarchy_number
;
304 /* this shouldn't happen, we had this already */
305 if (meta_data
->hierarchies
[hierarchy_number
])
308 h
= calloc(1, sizeof(struct cgroup_hierarchy
));
312 meta_data
->hierarchies
[hierarchy_number
] = h
;
314 h
->index
= hierarchy_number
;
315 h
->subsystems
= lxc_string_split_and_trim(colon1
, ',');
318 /* see if this hierarchy should be considered */
319 if (!all_kernel_subsystems
|| !all_named_subsystems
) {
320 for (p
= h
->subsystems
; *p
; p
++) {
321 if (!strncmp(*p
, "name=", 5)) {
322 if (all_named_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
327 if (all_kernel_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
334 /* we want all hierarchy anyway */
341 fclose(proc_self_cgroup
);
346 /* Step 3: determine all mount points of each hierarchy */
347 static bool find_hierarchy_mountpts( struct cgroup_meta_data
*meta_data
, char **kernel_subsystems
)
350 FILE *proc_self_mountinfo
;
353 char **tokens
= NULL
;
354 size_t mount_point_count
= 0;
355 size_t mount_point_capacity
= 0;
356 size_t token_capacity
= 0;
359 proc_self_mountinfo
= fopen_cloexec("/proc/self/mountinfo", "r");
360 /* if for some reason (because of setns() and pid namespace for example),
361 * /proc/self is not valid, we try /proc/1/cgroup... */
362 if (!proc_self_mountinfo
)
363 proc_self_mountinfo
= fopen_cloexec("/proc/1/mountinfo", "r");
364 if (!proc_self_mountinfo
)
367 while (getline(&line
, &sz
, proc_self_mountinfo
) != -1) {
368 char *token
, *line_tok
, *saveptr
= NULL
;
370 struct cgroup_mount_point
*mount_point
;
371 struct cgroup_hierarchy
*h
;
374 if (line
[0] && line
[strlen(line
) - 1] == '\n')
375 line
[strlen(line
) - 1] = '\0';
377 for (i
= 0, line_tok
= line
; (token
= strtok_r(line_tok
, " ", &saveptr
)); line_tok
= NULL
) {
378 r
= lxc_grow_array((void ***)&tokens
, &token_capacity
, i
+ 1, 64);
384 /* layout of /proc/self/mountinfo:
387 * 2: device major:minor
390 * 5: per-mount options
391 * [optional X]: additional data
395 * X+10: per-superblock options
397 for (j
= 6; j
< i
&& tokens
[j
]; j
++)
398 if (!strcmp(tokens
[j
], "-"))
401 /* could not find separator */
402 if (j
>= i
|| !tokens
[j
])
404 /* there should be exactly three fields after
410 /* not a cgroup filesystem */
411 if (strcmp(tokens
[j
+ 1], "cgroup") != 0)
414 subsystems
= subsystems_from_mount_options(tokens
[j
+ 3], kernel_subsystems
);
419 for (k
= 1; k
<= meta_data
->maximum_hierarchy
; k
++) {
420 if (meta_data
->hierarchies
[k
] &&
421 meta_data
->hierarchies
[k
]->subsystems
[0] &&
422 lxc_string_in_array(meta_data
->hierarchies
[k
]->subsystems
[0], (const char **)subsystems
)) {
423 /* TODO: we could also check if the lists really match completely,
424 * just to have an additional sanity check */
425 h
= meta_data
->hierarchies
[k
];
429 lxc_free_array((void **)subsystems
, free
);
431 r
= lxc_grow_array((void ***)&meta_data
->mount_points
, &mount_point_capacity
, mount_point_count
+ 1, 12);
435 /* create mount point object */
436 mount_point
= calloc(1, sizeof(*mount_point
));
440 meta_data
->mount_points
[mount_point_count
++] = mount_point
;
442 mount_point
->hierarchy
= h
;
443 mount_point
->mount_point
= strdup(tokens
[4]);
444 mount_point
->mount_prefix
= strdup(tokens
[3]);
445 if (!mount_point
->mount_point
|| !mount_point
->mount_prefix
)
447 mount_point
->read_only
= !lxc_string_in_list("rw", tokens
[5], ',');
449 if (!strcmp(mount_point
->mount_prefix
, "/")) {
450 if (mount_point
->read_only
) {
451 if (!h
->ro_absolute_mount_point
)
452 h
->ro_absolute_mount_point
= mount_point
;
454 if (!h
->rw_absolute_mount_point
)
455 h
->rw_absolute_mount_point
= mount_point
;
459 k
= lxc_array_len((void **)h
->all_mount_points
);
460 r
= lxc_grow_array((void ***)&h
->all_mount_points
, &h
->all_mount_point_capacity
, k
+ 1, 4);
463 h
->all_mount_points
[k
] = mount_point
;
468 fclose(proc_self_mountinfo
);
474 struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
)
476 bool all_kernel_subsystems
= true;
477 bool all_named_subsystems
= false;
478 struct cgroup_meta_data
*meta_data
= NULL
;
479 char **kernel_subsystems
= NULL
;
482 /* if the subsystem whitelist is not specified, include all
483 * hierarchies that contain kernel subsystems by default but
484 * no hierarchies that only contain named subsystems
486 * if it is specified, the specifier @all will select all
487 * hierarchies, @kernel will select all hierarchies with
488 * kernel subsystems and @named will select all named
491 all_kernel_subsystems
= subsystem_whitelist
?
492 (lxc_string_in_array("@kernel", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
494 all_named_subsystems
= subsystem_whitelist
?
495 (lxc_string_in_array("@named", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
498 meta_data
= calloc(1, sizeof(struct cgroup_meta_data
));
503 if (!find_cgroup_subsystems(&kernel_subsystems
))
506 if (!find_cgroup_hierarchies(meta_data
, all_kernel_subsystems
,
507 all_named_subsystems
, subsystem_whitelist
))
510 if (!find_hierarchy_mountpts(meta_data
, kernel_subsystems
))
513 /* oops, we couldn't find anything */
514 if (!meta_data
->hierarchies
|| !meta_data
->mount_points
) {
519 lxc_free_array((void **)kernel_subsystems
, free
);
524 lxc_free_array((void **)kernel_subsystems
, free
);
525 lxc_cgroup_put_meta(meta_data
);
530 struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
)
536 struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
)
541 if (--meta_data
->ref
> 0)
543 lxc_free_array((void **)meta_data
->mount_points
, (lxc_free_fn
)lxc_cgroup_mount_point_free
);
544 if (meta_data
->hierarchies
) {
545 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++)
546 lxc_cgroup_hierarchy_free(meta_data
->hierarchies
[i
]);
548 free(meta_data
->hierarchies
);
553 struct cgroup_hierarchy
*lxc_cgroup_find_hierarchy(struct cgroup_meta_data
*meta_data
, const char *subsystem
)
556 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
557 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
558 if (h
&& lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
564 struct cgroup_mount_point
*lxc_cgroup_find_mount_point(struct cgroup_hierarchy
*hierarchy
, const char *group
, bool should_be_writable
)
566 struct cgroup_mount_point
**mps
;
567 struct cgroup_mount_point
*current_result
= NULL
;
568 ssize_t quality
= -1;
571 if (hierarchy
->rw_absolute_mount_point
)
572 return hierarchy
->rw_absolute_mount_point
;
573 if (!should_be_writable
&& hierarchy
->ro_absolute_mount_point
)
574 return hierarchy
->ro_absolute_mount_point
;
576 for (mps
= hierarchy
->all_mount_points
; mps
&& *mps
; mps
++) {
577 struct cgroup_mount_point
*mp
= *mps
;
578 size_t prefix_len
= mp
->mount_prefix
? strlen(mp
->mount_prefix
) : 0;
580 if (prefix_len
== 1 && mp
->mount_prefix
[0] == '/')
583 if (should_be_writable
&& mp
->read_only
)
587 (strncmp(group
, mp
->mount_prefix
, prefix_len
) == 0 &&
588 (group
[prefix_len
] == '\0' || group
[prefix_len
] == '/'))) {
589 /* search for the best quality match, i.e. the match with the
590 * shortest prefix where this group is still contained
592 if (quality
== -1 || prefix_len
< quality
) {
594 quality
= prefix_len
;
601 return current_result
;
604 char *lxc_cgroup_find_abs_path(const char *subsystem
, const char *group
, bool should_be_writable
, const char *suffix
)
606 struct cgroup_meta_data
*meta_data
;
607 struct cgroup_hierarchy
*h
;
608 struct cgroup_mount_point
*mp
;
612 meta_data
= lxc_cgroup_load_meta();
616 h
= lxc_cgroup_find_hierarchy(meta_data
, subsystem
);
620 mp
= lxc_cgroup_find_mount_point(h
, group
, should_be_writable
);
624 result
= cgroup_to_absolute_path(mp
, group
, suffix
);
628 lxc_cgroup_put_meta(meta_data
);
633 lxc_cgroup_put_meta(meta_data
);
638 struct cgroup_process_info
*lxc_cgroup_process_info_get(pid_t pid
, struct cgroup_meta_data
*meta
)
641 snprintf(pid_buf
, 32, "/proc/%lu/cgroup", (unsigned long)pid
);
642 return lxc_cgroup_process_info_getx(pid_buf
, meta
);
645 struct cgroup_process_info
*lxc_cgroup_process_info_get_init(struct cgroup_meta_data
*meta
)
647 return lxc_cgroup_process_info_get(1, meta
);
650 struct cgroup_process_info
*lxc_cgroup_process_info_get_self(struct cgroup_meta_data
*meta
)
652 struct cgroup_process_info
*i
;
653 i
= lxc_cgroup_process_info_getx("/proc/self/cgroup", meta
);
655 i
= lxc_cgroup_process_info_get(getpid(), meta
);
660 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
661 * is already in a new cgroup named after the pid. 'mnt' is passed in as
662 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
663 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
664 * and return the string /sys/fs/cgroup/lxc/c1.
666 static char *cgroup_rename_nsgroup(const char *mountpath
, const char *oldname
, pid_t pid
, const char *name
)
668 char *dir
, *fulloldpath
;
669 char *newname
, *fullnewpath
;
670 int len
, newlen
, ret
;
673 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
676 * fulloldpath = /cgroup/ab/2375
677 * fullnewpath = /cgroup/ab/c1
680 dir
= alloca(strlen(oldname
) + 1);
681 strcpy(dir
, oldname
);
683 len
= strlen(oldname
) + strlen(mountpath
) + 22;
684 fulloldpath
= alloca(len
);
685 ret
= snprintf(fulloldpath
, len
, "%s/%s/%ld", mountpath
, oldname
, (unsigned long)pid
);
686 if (ret
< 0 || ret
>= len
)
689 len
= strlen(dir
) + strlen(name
) + 2;
690 newname
= malloc(len
);
692 SYSERROR("Out of memory");
695 ret
= snprintf(newname
, len
, "%s/%s", dir
, name
);
696 if (ret
< 0 || ret
>= len
) {
701 newlen
= strlen(mountpath
) + len
+ 2;
702 fullnewpath
= alloca(newlen
);
703 ret
= snprintf(fullnewpath
, newlen
, "%s/%s", mountpath
, newname
);
704 if (ret
< 0 || ret
>= newlen
) {
709 if (access(fullnewpath
, F_OK
) == 0) {
710 if (rmdir(fullnewpath
) != 0) {
711 SYSERROR("container cgroup %s already exists.", fullnewpath
);
716 if (rename(fulloldpath
, fullnewpath
)) {
717 SYSERROR("failed to rename cgroup %s->%s", fulloldpath
, fullnewpath
);
722 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
727 /* create a new cgroup */
728 struct cgroup_process_info
*lxc_cgroupfs_create(const char *name
, const char *path_pattern
, struct cgroup_meta_data
*meta_data
, const char *sub_pattern
)
730 char **cgroup_path_components
= NULL
;
732 char *path_so_far
= NULL
;
733 char **new_cgroup_paths
= NULL
;
734 char **new_cgroup_paths_sub
= NULL
;
735 struct cgroup_mount_point
*mp
;
736 struct cgroup_hierarchy
*h
;
737 struct cgroup_process_info
*base_info
= NULL
;
738 struct cgroup_process_info
*info_ptr
;
742 bool had_sub_pattern
= false;
745 if (!is_valid_cgroup(name
)) {
746 ERROR("Invalid cgroup name: '%s'", name
);
751 if (!strstr(path_pattern
, "%n")) {
752 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern
);
757 /* we will modify the result of this operation directly,
758 * so we don't have to copy the data structure
760 base_info
= (path_pattern
[0] == '/') ?
761 lxc_cgroup_process_info_get_init(meta_data
) :
762 lxc_cgroup_process_info_get_self(meta_data
);
766 new_cgroup_paths
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
767 if (!new_cgroup_paths
)
768 goto out_initial_error
;
770 new_cgroup_paths_sub
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
771 if (!new_cgroup_paths_sub
)
772 goto out_initial_error
;
774 /* find mount points we can use */
775 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
776 h
= info_ptr
->hierarchy
;
777 mp
= lxc_cgroup_find_mount_point(h
, info_ptr
->cgroup_path
, true);
779 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h
->index
);
780 goto out_initial_error
;
782 info_ptr
->designated_mount_point
= mp
;
784 if (lxc_string_in_array("ns", (const char **)h
->subsystems
))
786 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
787 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
788 goto out_initial_error
;
792 /* normalize the path */
793 cgroup_path_components
= lxc_normalize_path(path_pattern
);
794 if (!cgroup_path_components
)
795 goto out_initial_error
;
797 /* go through the path components to see if we can create them */
798 for (p
= cgroup_path_components
; *p
|| (sub_pattern
&& !had_sub_pattern
); p
++) {
799 /* we only want to create the same component with -1, -2, etc.
800 * if the component contains the container name itself, otherwise
801 * it's not an error if it already exists
803 char *p_eff
= *p
? *p
: (char *)sub_pattern
;
804 bool contains_name
= strstr(p_eff
, "%n");
805 char *current_component
= NULL
;
806 char *current_subpath
= NULL
;
807 char *current_entire_path
= NULL
;
812 /* if we are processing the subpattern, we want to make sure
813 * loop is ended the next time around
816 had_sub_pattern
= true;
820 goto find_name_on_this_level
;
822 cleanup_name_on_this_level
:
823 /* This is reached if we found a name clash.
824 * In that case, remove the cgroup from all previous hierarchies
826 for (j
= 0, info_ptr
= base_info
; j
< i
&& info_ptr
; info_ptr
= info_ptr
->next
, j
++) {
827 r
= remove_cgroup(info_ptr
->designated_mount_point
, info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1], false);
829 WARN("could not clean up cgroup we created when trying to create container");
830 free(info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1]);
831 info_ptr
->created_paths
[--info_ptr
->created_paths_count
] = NULL
;
833 if (current_component
!= current_subpath
)
834 free(current_subpath
);
835 if (current_component
!= p_eff
)
836 free(current_component
);
837 current_component
= current_subpath
= NULL
;
838 /* try again with another suffix */
841 find_name_on_this_level
:
842 /* determine name of the path component we should create */
843 if (contains_name
&& suffix
> 0) {
844 char *buf
= calloc(strlen(name
) + 32, 1);
846 goto out_initial_error
;
847 snprintf(buf
, strlen(name
) + 32, "%s-%u", name
, suffix
);
848 current_component
= lxc_string_replace("%n", buf
, p_eff
);
851 current_component
= contains_name
? lxc_string_replace("%n", name
, p_eff
) : p_eff
;
853 parts
[0] = path_so_far
;
854 parts
[1] = current_component
;
856 current_subpath
= path_so_far
? lxc_string_join("/", (const char **)parts
, false) : current_component
;
858 /* Now go through each hierarchy and try to create the
859 * corresponding cgroup
861 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
864 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
866 current_entire_path
= NULL
;
868 parts2
[0] = !strcmp(info_ptr
->cgroup_path
, "/") ? "" : info_ptr
->cgroup_path
;
869 parts2
[1] = current_subpath
;
871 current_entire_path
= lxc_string_join("/", (const char **)parts2
, false);
874 /* we are processing the subpath, so only update that one */
875 free(new_cgroup_paths_sub
[i
]);
876 new_cgroup_paths_sub
[i
] = strdup(current_entire_path
);
877 if (!new_cgroup_paths_sub
[i
])
878 goto cleanup_from_error
;
880 /* remember which path was used on this controller */
881 free(new_cgroup_paths
[i
]);
882 new_cgroup_paths
[i
] = strdup(current_entire_path
);
883 if (!new_cgroup_paths
[i
])
884 goto cleanup_from_error
;
887 r
= create_cgroup(info_ptr
->designated_mount_point
, current_entire_path
);
888 if (r
< 0 && errno
== EEXIST
&& contains_name
) {
889 /* name clash => try new name with new suffix */
890 free(current_entire_path
);
891 current_entire_path
= NULL
;
892 goto cleanup_name_on_this_level
;
893 } else if (r
< 0 && errno
!= EEXIST
) {
894 SYSERROR("Could not create cgroup %s", current_entire_path
);
895 goto cleanup_from_error
;
897 /* successfully created */
898 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
900 goto cleanup_from_error
;
901 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, current_entire_path
)) {
902 ERROR("Failed to initialize cpuset in new '%s'.", current_entire_path
);
903 goto cleanup_from_error
;
905 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = current_entire_path
;
907 /* if we didn't create the cgroup, then we have to make sure that
908 * further cgroups will be created properly
910 if (handle_cgroup_settings(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
) < 0) {
911 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
912 goto cleanup_from_error
;
914 if (!init_cpuset_if_needed(info_ptr
->designated_mount_point
, info_ptr
->cgroup_path
)) {
915 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr
->cgroup_path
);
916 goto cleanup_from_error
;
919 /* already existed but path component of pattern didn't contain '%n',
920 * so this is not an error; but then we don't need current_entire_path
923 free(current_entire_path
);
924 current_entire_path
= NULL
;
928 /* save path so far */
930 path_so_far
= strdup(current_subpath
);
932 goto cleanup_from_error
;
935 if (current_component
!= current_subpath
)
936 free(current_subpath
);
937 if (current_component
!= p_eff
)
938 free(current_component
);
939 current_component
= current_subpath
= NULL
;
943 /* called if an error occured in the loop, so we
944 * do some additional cleanup here
947 if (current_component
!= current_subpath
)
948 free(current_subpath
);
949 if (current_component
!= p_eff
)
950 free(current_component
);
951 free(current_entire_path
);
953 goto out_initial_error
;
956 /* we're done, now update the paths */
957 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
958 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
959 * will take care of it
960 * Since we do a continue in above loop, new_cgroup_paths[i] is
961 * unset anyway, as is new_cgroup_paths_sub[i]
963 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
965 free(info_ptr
->cgroup_path
);
966 info_ptr
->cgroup_path
= new_cgroup_paths
[i
];
967 info_ptr
->cgroup_path_sub
= new_cgroup_paths_sub
[i
];
969 /* don't use lxc_free_array since we used the array members
970 * to store them in our result...
972 free(new_cgroup_paths
);
973 free(new_cgroup_paths_sub
);
975 lxc_free_array((void **)cgroup_path_components
, free
);
981 lxc_cgroup_process_info_free_and_remove(base_info
);
982 lxc_free_array((void **)new_cgroup_paths
, free
);
983 lxc_free_array((void **)new_cgroup_paths_sub
, free
);
984 lxc_free_array((void **)cgroup_path_components
, free
);
989 int lxc_cgroup_create_legacy(struct cgroup_process_info
*base_info
, const char *name
, pid_t pid
)
991 struct cgroup_process_info
*info_ptr
;
994 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
995 if (!lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
998 * For any path which has ns cgroup mounted, handler->pid is already
999 * moved into a container called '%d % (handler->pid)'. Rename it to
1000 * the cgroup name and record that.
1002 char *tmp
= cgroup_rename_nsgroup((const char *)info_ptr
->designated_mount_point
->mount_point
,
1003 info_ptr
->cgroup_path
, pid
, name
);
1006 free(info_ptr
->cgroup_path
);
1007 info_ptr
->cgroup_path
= tmp
;
1008 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1014 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = tmp
;
1019 /* get the cgroup membership of a given container */
1020 struct cgroup_process_info
*lxc_cgroup_get_container_info(const char *name
, const char *lxcpath
, struct cgroup_meta_data
*meta_data
)
1022 struct cgroup_process_info
*result
= NULL
;
1023 int saved_errno
= 0;
1025 struct cgroup_process_info
**cptr
= &result
;
1026 struct cgroup_process_info
*entry
= NULL
;
1029 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
1030 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
1034 /* use the command interface to look for the cgroup */
1035 path
= lxc_cmd_get_cgroup_path(name
, lxcpath
, h
->subsystems
[0]);
1039 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1042 entry
->meta_ref
= lxc_cgroup_get_meta(meta_data
);
1043 entry
->hierarchy
= h
;
1044 entry
->cgroup_path
= path
;
1047 /* it is not an error if we don't find anything here,
1048 * it is up to the caller to decide what to do in that
1050 entry
->designated_mount_point
= lxc_cgroup_find_mount_point(h
, entry
->cgroup_path
, true);
1053 cptr
= &entry
->next
;
1059 saved_errno
= errno
;
1061 lxc_cgroup_process_info_free(result
);
1062 lxc_cgroup_process_info_free(entry
);
1063 errno
= saved_errno
;
1067 /* move a processs to the cgroups specified by the membership */
1068 int lxc_cgroupfs_enter(struct cgroup_process_info
*info
, pid_t pid
, bool enter_sub
)
1071 char *cgroup_tasks_fn
;
1073 struct cgroup_process_info
*info_ptr
;
1075 snprintf(pid_buf
, 32, "%lu", (unsigned long)pid
);
1076 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1077 char *cgroup_path
= (enter_sub
&& info_ptr
->cgroup_path_sub
) ?
1078 info_ptr
->cgroup_path_sub
:
1079 info_ptr
->cgroup_path
;
1081 if (!info_ptr
->designated_mount_point
) {
1082 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, cgroup_path
, true);
1083 if (!info_ptr
->designated_mount_point
) {
1084 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid
, cgroup_path
);
1089 cgroup_tasks_fn
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, cgroup_path
, "/tasks");
1090 if (!cgroup_tasks_fn
) {
1091 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1095 r
= lxc_write_to_file(cgroup_tasks_fn
, pid_buf
, strlen(pid_buf
), false);
1096 free(cgroup_tasks_fn
);
1098 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1106 /* free process membership information */
1107 void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
)
1109 struct cgroup_process_info
*next
;
1113 lxc_cgroup_put_meta(info
->meta_ref
);
1114 free(info
->cgroup_path
);
1115 free(info
->cgroup_path_sub
);
1116 lxc_free_array((void **)info
->created_paths
, free
);
1118 lxc_cgroup_process_info_free(next
);
1121 /* free process membership information and remove cgroups that were created */
1122 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
)
1124 struct cgroup_process_info
*next
;
1130 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1132 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1134 /* ignore return value here, perhaps we created the
1135 * '/lxc' cgroup in this container but another container
1136 * is still running (for example)
1138 (void)remove_cgroup(mp
, info
->cgroup_path
, true);
1140 for (pp
= info
->created_paths
; pp
&& *pp
; pp
++);
1141 for ((void)(pp
&& --pp
); info
->created_paths
&& pp
>= info
->created_paths
; --pp
) {
1144 free(info
->created_paths
);
1145 lxc_cgroup_put_meta(info
->meta_ref
);
1146 free(info
->cgroup_path
);
1147 free(info
->cgroup_path_sub
);
1149 lxc_cgroup_process_info_free_and_remove(next
);
1152 static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem
, struct lxc_handler
*handler
)
1154 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1155 struct cgroup_process_info
*info
= d
->info
;
1156 info
= find_info_for_subsystem(info
, subsystem
);
1159 return info
->cgroup_path
;
1162 char *lxc_cgroup_get_hierarchy_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1164 return lxc_cmd_get_cgroup_path(name
, lxcpath
, subsystem
);
1167 char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem
, struct lxc_handler
*handler
)
1169 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1170 struct cgroup_process_info
*info
= d
->info
;
1171 struct cgroup_mount_point
*mp
= NULL
;
1173 info
= find_info_for_subsystem(info
, subsystem
);
1176 if (info
->designated_mount_point
) {
1177 mp
= info
->designated_mount_point
;
1179 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1183 return cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1186 char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1188 struct cgroup_meta_data
*meta
;
1189 struct cgroup_process_info
*base_info
, *info
;
1190 struct cgroup_mount_point
*mp
;
1191 char *result
= NULL
;
1193 meta
= lxc_cgroup_load_meta();
1196 base_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta
);
1199 info
= find_info_for_subsystem(base_info
, subsystem
);
1202 if (info
->designated_mount_point
) {
1203 mp
= info
->designated_mount_point
;
1205 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1209 result
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1212 lxc_cgroup_process_info_free(base_info
);
1214 lxc_cgroup_put_meta(meta
);
1218 int lxc_cgroup_set_handler(const char *filename
, const char *value
, struct lxc_handler
*handler
)
1220 char *subsystem
= NULL
, *p
, *path
;
1223 subsystem
= alloca(strlen(filename
) + 1);
1224 strcpy(subsystem
, filename
);
1225 if ((p
= index(subsystem
, '.')) != NULL
)
1228 path
= lxc_cgroup_get_hierarchy_abs_path_handler(subsystem
, handler
);
1230 ret
= do_cgroup_set(path
, filename
, value
);
1236 int lxc_cgroup_get_handler(const char *filename
, char *value
, size_t len
, struct lxc_handler
*handler
)
1238 char *subsystem
= NULL
, *p
, *path
;
1241 subsystem
= alloca(strlen(filename
) + 1);
1242 strcpy(subsystem
, filename
);
1243 if ((p
= index(subsystem
, '.')) != NULL
)
1246 path
= lxc_cgroup_get_hierarchy_abs_path_handler(subsystem
, handler
);
1248 ret
= do_cgroup_get(path
, filename
, value
, len
);
1254 int lxc_cgroupfs_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1256 char *subsystem
= NULL
, *p
, *path
;
1259 subsystem
= alloca(strlen(filename
) + 1);
1260 strcpy(subsystem
, filename
);
1261 if ((p
= index(subsystem
, '.')) != NULL
)
1264 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1266 ret
= do_cgroup_set(path
, filename
, value
);
1272 int lxc_cgroupfs_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
1274 char *subsystem
= NULL
, *p
, *path
;
1277 subsystem
= alloca(strlen(filename
) + 1);
1278 strcpy(subsystem
, filename
);
1279 if ((p
= index(subsystem
, '.')) != NULL
)
1282 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1284 ret
= do_cgroup_get(path
, filename
, value
, len
);
1291 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1292 * file for a running container.
1294 * @filename : the file of interest (e.g. "freezer.state") or
1295 * the subsystem name (e.g. "freezer") in which case
1296 * the directory where the cgroup may be modified
1298 * @name : name of container to connect to
1299 * @lxcpath : the lxcpath in which the container is running
1301 * This is the exported function, which determines cgpath from the
1302 * lxc-start of the @name container running in @lxcpath.
1304 * Returns path on success, NULL on error. The caller must free()
1305 * the returned path.
1307 char *lxc_cgroup_path_get(const char *filename
, const char *name
,
1308 const char *lxcpath
)
1310 char *subsystem
= NULL
, *longer_file
= NULL
, *p
, *group
, *path
;
1312 subsystem
= alloca(strlen(filename
) + 1);
1313 strcpy(subsystem
, filename
);
1314 if ((p
= index(subsystem
, '.')) != NULL
) {
1316 longer_file
= alloca(strlen(filename
) + 2);
1317 longer_file
[0] = '/';
1318 strcpy(longer_file
+ 1, filename
);
1321 group
= lxc_cgroup_get_hierarchy_path(subsystem
, name
, lxcpath
);
1325 path
= lxc_cgroup_find_abs_path(subsystem
, group
, true, p
? longer_file
: NULL
);
1330 int lxc_setup_mount_cgroup(const char *root
, struct lxc_cgroup_info
*cgroup_info
, int type
)
1332 size_t bufsz
= strlen(root
) + sizeof("/sys/fs/cgroup");
1334 char **parts
= NULL
;
1335 char *dirname
= NULL
;
1336 char *abs_path
= NULL
;
1337 char *abs_path2
= NULL
;
1338 struct cgfs_data
*cgfs_d
;
1339 struct cgroup_process_info
*info
, *base_info
;
1340 int r
, saved_errno
= 0;
1344 if (strcmp(active_cg_ops
->name
, "cgmanager") == 0) {
1345 // todo - offer to bind-mount /sys/fs/cgroup/cgmanager/
1349 cgfs_d
= cgroup_info
->data
;
1350 base_info
= cgfs_d
->info
;
1352 if (type
< LXC_AUTO_CGROUP_RO
|| type
> LXC_AUTO_CGROUP_FULL_MIXED
) {
1353 ERROR("could not mount cgroups into container: invalid type specified internally");
1358 path
= calloc(1, bufsz
);
1361 snprintf(path
, bufsz
, "%s/sys/fs/cgroup", root
);
1362 r
= mount("cgroup_root", path
, "tmpfs", MS_NOSUID
|MS_NODEV
|MS_NOEXEC
|MS_RELATIME
, "size=10240k,mode=755");
1364 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1368 /* now mount all the hierarchies we care about */
1369 for (info
= base_info
; info
; info
= info
->next
) {
1370 size_t subsystem_count
, i
;
1371 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1373 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1375 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1379 subsystem_count
= lxc_array_len((void **)info
->hierarchy
->subsystems
);
1380 parts
= calloc(subsystem_count
+ 1, sizeof(char *));
1384 for (i
= 0; i
< subsystem_count
; i
++) {
1385 if (!strncmp(info
->hierarchy
->subsystems
[i
], "name=", 5))
1386 parts
[i
] = info
->hierarchy
->subsystems
[i
] + 5;
1388 parts
[i
] = info
->hierarchy
->subsystems
[i
];
1390 dirname
= lxc_string_join(",", (const char **)parts
, false);
1394 /* create subsystem directory */
1395 abs_path
= lxc_append_paths(path
, dirname
);
1398 r
= mkdir_p(abs_path
, 0755);
1399 if (r
< 0 && errno
!= EEXIST
) {
1400 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname
);
1404 abs_path2
= lxc_append_paths(abs_path
, info
->cgroup_path
);
1408 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_RW
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1409 /* bind-mount the cgroup entire filesystem there */
1410 if (strcmp(mp
->mount_prefix
, "/") != 0) {
1411 /* FIXME: maybe we should just try to remount the entire hierarchy
1412 * with a regular mount command? may that works? */
1413 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname
);
1416 r
= mount(mp
->mount_point
, abs_path
, "none", MS_BIND
, 0);
1418 SYSERROR("error bind-mounting %s to %s", mp
->mount_point
, abs_path
);
1421 /* main cgroup path should be read-only */
1422 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1423 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1425 SYSERROR("error re-mounting %s readonly", abs_path
);
1429 /* own cgroup should be read-write */
1430 if (type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1431 r
= mount(abs_path2
, abs_path2
, NULL
, MS_BIND
, NULL
);
1433 SYSERROR("error bind-mounting %s onto itself", abs_path2
);
1436 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
, NULL
);
1438 SYSERROR("error re-mounting %s readwrite", abs_path2
);
1443 /* create path for container's cgroup */
1444 r
= mkdir_p(abs_path2
, 0755);
1445 if (r
< 0 && errno
!= EEXIST
) {
1446 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname
, info
->cgroup_path
);
1453 /* bind-mount container's cgroup to that directory */
1454 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1457 r
= mount(abs_path
, abs_path2
, "none", MS_BIND
, 0);
1459 SYSERROR("error bind-mounting %s to %s", abs_path
, abs_path2
);
1462 if (type
== LXC_AUTO_CGROUP_RO
) {
1463 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1465 SYSERROR("error re-mounting %s readonly", abs_path2
);
1476 /* add symlinks for every single subsystem */
1477 if (subsystem_count
> 1) {
1478 for (i
= 0; i
< subsystem_count
; i
++) {
1479 abs_path
= lxc_append_paths(path
, parts
[i
]);
1482 r
= symlink(dirname
, abs_path
);
1484 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts
[i
], dirname
);
1495 /* try to remount the tmpfs readonly, since the container shouldn't
1496 * change anything (this will also make sure that trying to create
1497 * new cgroups outside the allowed area fails with an error instead
1498 * of simply causing this to create directories in the tmpfs itself)
1500 if (type
!= LXC_AUTO_CGROUP_RW
&& type
!= LXC_AUTO_CGROUP_FULL_RW
)
1501 mount(NULL
, path
, NULL
, MS_REMOUNT
|MS_RDONLY
, NULL
);
1508 saved_errno
= errno
;
1514 errno
= saved_errno
;
1518 int lxc_cgroup_nrtasks_handler(struct lxc_handler
*handler
)
1520 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1521 struct cgroup_process_info
*info
= d
->info
;
1522 struct cgroup_mount_point
*mp
= NULL
;
1523 char *abs_path
= NULL
;
1531 if (info
->designated_mount_point
) {
1532 mp
= info
->designated_mount_point
;
1534 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, false);
1539 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1543 ret
= cgroup_recursive_task_count(abs_path
);
1548 static struct cgroup_process_info
*
1549 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
,
1550 struct cgroup_meta_data
*meta
)
1552 struct cgroup_process_info
*result
= NULL
;
1553 FILE *proc_pid_cgroup
= NULL
;
1556 int saved_errno
= 0;
1557 struct cgroup_process_info
**cptr
= &result
;
1558 struct cgroup_process_info
*entry
= NULL
;
1560 proc_pid_cgroup
= fopen_cloexec(proc_pid_cgroup_str
, "r");
1561 if (!proc_pid_cgroup
)
1564 while (getline(&line
, &sz
, proc_pid_cgroup
) != -1) {
1565 /* file format: hierarchy:subsystems:group */
1569 int hierarchy_number
;
1570 struct cgroup_hierarchy
*h
= NULL
;
1575 if (line
[strlen(line
) - 1] == '\n')
1576 line
[strlen(line
) - 1] = '\0';
1578 colon1
= strchr(line
, ':');
1582 colon2
= strchr(colon1
, ':');
1588 hierarchy_number
= strtoul(line
, &endptr
, 10);
1589 if (!endptr
|| *endptr
)
1592 if (hierarchy_number
> meta
->maximum_hierarchy
) {
1593 /* we encountered a hierarchy we didn't have before,
1594 * so probably somebody remounted some stuff in the
1601 h
= meta
->hierarchies
[hierarchy_number
];
1603 /* we encountered a hierarchy that was thought to be
1604 * dead before, so probably somebody remounted some
1605 * stuff in the mean time...
1611 /* we are told that we should ignore this hierarchy */
1615 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1619 entry
->meta_ref
= lxc_cgroup_get_meta(meta
);
1620 entry
->hierarchy
= h
;
1621 entry
->cgroup_path
= strdup(colon2
);
1622 if (!entry
->cgroup_path
)
1626 cptr
= &entry
->next
;
1630 fclose(proc_pid_cgroup
);
1635 saved_errno
= errno
;
1636 if (proc_pid_cgroup
)
1637 fclose(proc_pid_cgroup
);
1638 lxc_cgroup_process_info_free(result
);
1639 lxc_cgroup_process_info_free(entry
);
1641 errno
= saved_errno
;
1645 static char **subsystems_from_mount_options(const char *mount_options
,
1648 char *token
, *str
, *saveptr
= NULL
;
1649 char **result
= NULL
;
1650 size_t result_capacity
= 0;
1651 size_t result_count
= 0;
1655 str
= alloca(strlen(mount_options
)+1);
1656 strcpy(str
, mount_options
);
1657 for (; (token
= strtok_r(str
, ",", &saveptr
)); str
= NULL
) {
1658 /* we have a subsystem if it's either in the list of
1659 * subsystems provided by the kernel OR if it starts
1660 * with name= for named hierarchies
1662 if (!strncmp(token
, "name=", 5) || lxc_string_in_array(token
, (const char **)kernel_list
)) {
1663 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 12);
1666 result
[result_count
+ 1] = NULL
;
1667 result
[result_count
] = strdup(token
);
1668 if (!result
[result_count
])
1677 saved_errno
= errno
;
1678 lxc_free_array((void**)result
, free
);
1679 errno
= saved_errno
;
1683 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
)
1687 free(mp
->mount_point
);
1688 free(mp
->mount_prefix
);
1692 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
)
1696 lxc_free_array((void **)h
->subsystems
, free
);
1697 free(h
->all_mount_points
);
1701 static bool is_valid_cgroup(const char *name
)
1704 for (p
= name
; *p
; p
++) {
1705 /* Use the ASCII printable characters range(32 - 127)
1706 * is reasonable, we kick out 32(SPACE) because it'll
1707 * break legacy lxc-ls
1709 if (*p
<= 32 || *p
>= 127 || *p
== '/')
1712 return strcmp(name
, ".") != 0 && strcmp(name
, "..") != 0;
1715 static int create_or_remove_cgroup(bool do_remove
,
1716 struct cgroup_mount_point
*mp
, const char *path
, int recurse
)
1718 int r
, saved_errno
= 0;
1719 char *buf
= cgroup_to_absolute_path(mp
, path
, NULL
);
1723 /* create or remove directory */
1726 r
= cgroup_rmdir(buf
);
1730 r
= mkdir(buf
, 0777);
1731 saved_errno
= errno
;
1733 errno
= saved_errno
;
1737 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
)
1739 return create_or_remove_cgroup(false, mp
, path
, false);
1742 static int remove_cgroup(struct cgroup_mount_point
*mp
,
1743 const char *path
, bool recurse
)
1745 return create_or_remove_cgroup(true, mp
, path
, recurse
);
1748 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
,
1749 const char *path
, const char *suffix
)
1751 /* first we have to make sure we subtract the mount point's prefix */
1752 char *prefix
= mp
->mount_prefix
;
1756 /* we want to make sure only absolute paths to cgroups are passed to us */
1757 if (path
[0] != '/') {
1762 if (prefix
&& !strcmp(prefix
, "/"))
1765 /* prefix doesn't match */
1766 if (prefix
&& strncmp(prefix
, path
, strlen(prefix
)) != 0) {
1770 /* if prefix is /foo and path is /foobar */
1771 if (prefix
&& path
[strlen(prefix
)] != '/' && path
[strlen(prefix
)] != '\0') {
1776 /* remove prefix from path */
1777 path
+= prefix
? strlen(prefix
) : 0;
1779 len
= strlen(mp
->mount_point
) + strlen(path
) + (suffix
? strlen(suffix
) : 0);
1780 buf
= calloc(len
+ 1, 1);
1783 rv
= snprintf(buf
, len
+ 1, "%s%s%s", mp
->mount_point
, path
, suffix
? suffix
: "");
1793 static struct cgroup_process_info
*
1794 find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
)
1796 struct cgroup_process_info
*info_ptr
;
1797 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1798 struct cgroup_hierarchy
*h
= info_ptr
->hierarchy
;
1799 if (lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
1806 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
,
1807 char *value
, size_t len
)
1809 const char *parts
[3] = {
1815 int ret
, saved_errno
;
1817 filename
= lxc_string_join("/", parts
, false);
1821 ret
= lxc_read_from_file(filename
, value
, len
);
1822 saved_errno
= errno
;
1824 errno
= saved_errno
;
1828 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
,
1831 const char *parts
[3] = {
1837 int ret
, saved_errno
;
1839 filename
= lxc_string_join("/", parts
, false);
1843 ret
= lxc_write_to_file(filename
, value
, strlen(value
), false);
1844 saved_errno
= errno
;
1846 errno
= saved_errno
;
1850 static int do_setup_cgroup_limits(struct lxc_handler
*h
,
1851 struct lxc_list
*cgroup_settings
, bool do_devices
)
1853 struct lxc_list
*iterator
;
1854 struct lxc_cgroup
*cg
;
1857 if (lxc_list_empty(cgroup_settings
))
1860 lxc_list_for_each(iterator
, cgroup_settings
) {
1861 cg
= iterator
->elem
;
1863 if (do_devices
== !strncmp("devices", cg
->subsystem
, 7)) {
1864 if (strcmp(cg
->subsystem
, "devices.deny") == 0 &&
1865 cgroup_devices_has_allow_or_deny(h
, cg
->value
, false))
1867 if (strcmp(cg
->subsystem
, "devices.allow") == 0 &&
1868 cgroup_devices_has_allow_or_deny(h
, cg
->value
, true))
1870 if (lxc_cgroup_set_handler(cg
->subsystem
, cg
->value
, h
)) {
1871 ERROR("Error setting %s to %s for %s\n",
1872 cg
->subsystem
, cg
->value
, h
->name
);
1877 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1881 INFO("cgroup has been setup");
1886 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler
*h
,
1887 char *v
, bool for_allow
)
1893 bool ret
= !for_allow
;
1894 const char *parts
[3] = {
1900 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1901 // not sure they ever do, but they *could*
1902 // right now, I'm assuming they do NOT
1903 if (!for_allow
&& strcmp(v
, "a") != 0 && strcmp(v
, "a *:* rwm") != 0)
1906 parts
[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h
);
1909 path
= lxc_string_join("/", parts
, false);
1911 free((void *)parts
[0]);
1915 devices_list
= fopen_cloexec(path
, "r");
1916 if (!devices_list
) {
1921 while (getline(&line
, &sz
, devices_list
) != -1) {
1922 size_t len
= strlen(line
);
1923 if (len
> 0 && line
[len
-1] == '\n')
1925 if (strcmp(line
, "a *:* rwm") == 0) {
1928 } else if (for_allow
&& strcmp(line
, v
) == 0) {
1935 fclose(devices_list
);
1941 static int cgroup_recursive_task_count(const char *cgroup_path
)
1944 struct dirent
*dent_buf
;
1945 struct dirent
*dent
;
1949 /* see man readdir_r(3) */
1950 name_max
= pathconf(cgroup_path
, _PC_NAME_MAX
);
1953 dent_buf
= malloc(offsetof(struct dirent
, d_name
) + name_max
+ 1);
1957 d
= opendir(cgroup_path
);
1963 while (readdir_r(d
, dent_buf
, &dent
) == 0 && dent
) {
1964 const char *parts
[3] = {
1972 if (!strcmp(dent
->d_name
, ".") || !strcmp(dent
->d_name
, ".."))
1974 sub_path
= lxc_string_join("/", parts
, false);
1980 r
= stat(sub_path
, &st
);
1987 if (S_ISDIR(st
.st_mode
)) {
1988 r
= cgroup_recursive_task_count(sub_path
);
1991 } else if (!strcmp(dent
->d_name
, "tasks")) {
1992 r
= count_lines(sub_path
);
2004 static int count_lines(const char *fn
)
2011 f
= fopen_cloexec(fn
, "r");
2015 while (getline(&line
, &sz
, f
) != -1) {
2023 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
,
2026 int r
, saved_errno
= 0;
2029 mp
->need_cpuset_init
= false;
2031 /* If this is the memory cgroup, we want to enforce hierarchy.
2032 * But don't fail if for some reason we can't.
2034 if (lxc_string_in_array("memory", (const char **)mp
->hierarchy
->subsystems
)) {
2035 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/memory.use_hierarchy");
2037 r
= lxc_read_from_file(cc_path
, buf
, 1);
2038 if (r
< 1 || buf
[0] != '1') {
2039 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2041 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
2047 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2048 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2049 * and cpuset.cpus and then
2051 if (lxc_string_in_array("cpuset", (const char **)mp
->hierarchy
->subsystems
)) {
2052 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/cgroup.clone_children");
2057 /* cgroup.clone_children is not available when running under
2058 * older kernel versions; in this case, we'll initialize
2059 * cpuset.cpus and cpuset.mems later, after the new cgroup
2062 if (stat(cc_path
, &sb
) != 0 && errno
== ENOENT
) {
2063 mp
->need_cpuset_init
= true;
2067 r
= lxc_read_from_file(cc_path
, buf
, 1);
2068 if (r
== 1 && buf
[0] == '1') {
2072 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2073 saved_errno
= errno
;
2075 errno
= saved_errno
;
2076 return r
< 0 ? -1 : 0;
2081 static int cgroup_read_from_file(const char *fn
, char buf
[], size_t bufsize
)
2083 int ret
= lxc_read_from_file(fn
, buf
, bufsize
);
2085 SYSERROR("failed to read %s", fn
);
2088 if (ret
== bufsize
) {
2090 /* obviously this wasn't empty */
2091 buf
[bufsize
-1] = '\0';
2094 /* Callers don't do this, but regression/sanity check */
2095 ERROR("%s: was not expecting 0 bufsize", __func__
);
2102 static bool do_init_cpuset_file(struct cgroup_mount_point
*mp
,
2103 const char *path
, const char *name
)
2106 char *childfile
, *parentfile
= NULL
, *tmp
;
2110 if (!mp
->need_cpuset_init
)
2113 childfile
= cgroup_to_absolute_path(mp
, path
, name
);
2117 /* don't overwrite a non-empty value in the file */
2118 ret
= cgroup_read_from_file(childfile
, value
, sizeof(value
));
2121 if (value
[0] != '\0' && value
[0] != '\n') {
2126 /* path to the same name in the parent cgroup */
2127 parentfile
= strdup(path
);
2131 tmp
= strrchr(parentfile
, '/');
2134 if (tmp
== parentfile
)
2135 tmp
++; /* keep the '/' at the start */
2138 parentfile
= cgroup_to_absolute_path(mp
, tmp
, name
);
2143 /* copy from parent to child cgroup */
2144 ret
= cgroup_read_from_file(parentfile
, value
, sizeof(value
));
2147 if (ret
== sizeof(value
)) {
2148 /* If anyone actually sees this error, we can address it */
2149 ERROR("parent cpuset value too long");
2152 ok
= (lxc_write_to_file(childfile
, value
, strlen(value
), false) >= 0);
2156 SYSERROR("failed writing %s", childfile
);
2163 static bool init_cpuset_if_needed(struct cgroup_mount_point
*mp
,
2166 /* the files we have to handle here are only in cpuset hierarchies */
2167 if (!lxc_string_in_array("cpuset",
2168 (const char **)mp
->hierarchy
->subsystems
))
2171 return (do_init_cpuset_file(mp
, path
, "/cpuset.cpus") &&
2172 do_init_cpuset_file(mp
, path
, "/cpuset.mems") );
2175 extern void lxc_monitor_send_state(const char *name
, lxc_state_t state
,
2176 const char *lxcpath
);
2177 int do_unfreeze(int freeze
, const char *name
, const char *lxcpath
)
2180 const char *state
= freeze
? "FROZEN" : "THAWED";
2182 if (lxc_cgroup_set("freezer.state", state
, name
, lxcpath
) < 0) {
2183 ERROR("Failed to freeze %s:%s", lxcpath
, name
);
2187 if (lxc_cgroup_get("freezer.state", v
, 100, name
, lxcpath
) < 0) {
2188 ERROR("Failed to get new freezer state for %s:%s", lxcpath
, name
);
2191 if (v
[strlen(v
)-1] == '\n')
2192 v
[strlen(v
)-1] = '\0';
2193 if (strncmp(v
, state
, strlen(state
)) == 0) {
2195 lxc_monitor_send_state(name
, freeze
? FROZEN
: THAWED
, lxcpath
);
2202 int freeze_unfreeze(const char *name
, int freeze
, const char *lxcpath
)
2204 return do_unfreeze(freeze
, name
, lxcpath
);
2207 lxc_state_t
freezer_state(const char *name
, const char *lxcpath
)
2210 if (lxc_cgroup_get("freezer.state", v
, 100, name
, lxcpath
) < 0)
2213 if (v
[strlen(v
)-1] == '\n')
2214 v
[strlen(v
)-1] = '\0';
2215 return lxc_str2state(v
);
2218 static void cgfs_destroy(struct lxc_handler
*handler
)
2220 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2224 lxc_cgroup_process_info_free_and_remove(d
->info
);
2226 lxc_cgroup_put_meta(d
->meta
);
2228 handler
->cgroup_info
->data
= NULL
;
2231 static inline bool cgfs_init(struct lxc_handler
*handler
)
2233 struct cgfs_data
*d
= malloc(sizeof(*d
));
2237 d
->meta
= lxc_cgroup_load_meta();
2240 ERROR("cgroupfs failed to detect cgroup metadata");
2244 handler
->cgroup_info
->data
= d
;
2248 static inline bool cgfs_create(struct lxc_handler
*handler
)
2250 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2251 struct cgroup_process_info
*i
;
2252 struct cgroup_meta_data
*md
= d
->meta
;
2253 i
= lxc_cgroupfs_create(handler
->name
, handler
->cgroup_info
->cgroup_pattern
, md
, NULL
);
2260 static inline bool cgfs_enter(struct lxc_handler
*handler
)
2262 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2263 struct cgroup_process_info
*i
= d
->info
;
2266 ret
= lxc_cgroupfs_enter(i
, handler
->pid
, false);
2271 static inline bool cgfs_create_legacy(struct lxc_handler
*handler
)
2273 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2274 struct cgroup_process_info
*i
= d
->info
;
2275 if (lxc_cgroup_create_legacy(i
, handler
->name
, handler
->pid
) < 0) {
2276 ERROR("failed to create legacy ns cgroups for '%s'", handler
->name
);
2282 static char *cgfs_get_cgroup(struct lxc_handler
*handler
, const char *subsystem
)
2284 return lxc_cgroup_get_hierarchy_path_handler(subsystem
, handler
);
2287 static int cgfs_unfreeze_fromhandler(struct lxc_handler
*handler
)
2289 char *cgabspath
, *cgrelpath
;
2292 cgrelpath
= lxc_cgroup_get_hierarchy_path_handler("freezer", handler
);
2293 cgabspath
= lxc_cgroup_find_abs_path("freezer", cgrelpath
, true, NULL
);
2297 ret
= do_cgroup_set(cgabspath
, "freezer.state", "THAWED");
2302 bool cgroupfs_setup_limits(struct lxc_handler
*h
, bool with_devices
)
2304 return do_setup_cgroup_limits(h
, &h
->conf
->cgroup
, with_devices
) == 0;
2307 static struct cgroup_ops cgfs_ops
= {
2308 .destroy
= cgfs_destroy
,
2310 .create
= cgfs_create
,
2311 .enter
= cgfs_enter
,
2312 .create_legacy
= cgfs_create_legacy
,
2313 .get_cgroup
= cgfs_get_cgroup
,
2314 .get
= lxc_cgroupfs_get
,
2315 .set
= lxc_cgroupfs_set
,
2316 .unfreeze_fromhandler
= cgfs_unfreeze_fromhandler
,
2317 .setup_limits
= cgroupfs_setup_limits
,
2321 static void init_cg_ops(void)
2325 if (cgmanager_initialized
)
2327 if (!lxc_init_cgmanager()) {
2328 ERROR("Could not contact cgroup manager, falling back to cgroupfs");
2329 active_cg_ops
= &cgfs_ops
;
2334 * These are the backend-independent cgroup handlers for container
2338 /* Free all cgroup info held by the handler */
2339 void cgroup_destroy(struct lxc_handler
*handler
)
2341 if (!handler
->cgroup_info
)
2344 active_cg_ops
->destroy(handler
);
2348 * Allocate a lxc_cgroup_info for the active cgroup
2349 * backend, and assign it to the handler
2351 bool cgroup_init(struct lxc_handler
*handler
)
2354 handler
->cgroup_info
= malloc(sizeof(struct lxc_cgroup_info
));
2355 if (!handler
->cgroup_info
)
2357 memset(handler
->cgroup_info
, 0, sizeof(struct lxc_cgroup_info
));
2358 /* if we are running as root, use system cgroup pattern, otherwise
2359 * just create a cgroup under the current one. But also fall back to
2360 * that if for some reason reading the configuration fails and no
2361 * default value is available
2364 handler
->cgroup_info
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
2365 if (!handler
->cgroup_info
->cgroup_pattern
)
2366 handler
->cgroup_info
->cgroup_pattern
= "%n";
2368 return active_cg_ops
->init(handler
);
2371 /* Create the container cgroups for all requested controllers */
2372 bool cgroup_create(struct lxc_handler
*handler
)
2374 return active_cg_ops
->create(handler
);
2378 * Enter the container init into its new cgroups for all
2379 * requested controllers */
2380 bool cgroup_enter(struct lxc_handler
*handler
)
2382 return active_cg_ops
->enter(handler
);
2385 bool cgroup_create_legacy(struct lxc_handler
*handler
)
2387 if (active_cg_ops
->create_legacy
)
2388 return active_cg_ops
->create_legacy(handler
);
2392 char *cgroup_get_cgroup(struct lxc_handler
*handler
, const char *subsystem
)
2394 return active_cg_ops
->get_cgroup(handler
, subsystem
);
2397 int lxc_cgroup_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
2400 return active_cg_ops
->set(filename
, value
, name
, lxcpath
);
2403 int lxc_cgroup_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
2406 return active_cg_ops
->get(filename
, value
, len
, name
, lxcpath
);
2409 int lxc_unfreeze_fromhandler(struct lxc_handler
*handler
)
2411 return active_cg_ops
->unfreeze_fromhandler(handler
);
2414 bool cgroup_setup_limits(struct lxc_handler
*handler
, bool with_devices
)
2416 return active_cg_ops
->setup_limits(handler
, with_devices
);
2419 bool cgroup_chown(struct lxc_handler
*handler
)
2421 if (active_cg_ops
->chown
)
2422 return active_cg_ops
->chown(handler
);