2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/inotify.h>
37 #include <sys/mount.h>
38 #include <netinet/in.h>
54 #include <../include/lxcmntent.h>
59 lxc_log_define(lxc_cgroup
, lxc
);
61 static struct cgroup_process_info
*lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
, struct cgroup_meta_data
*meta
);
62 static char **subsystems_from_mount_options(const char *mount_options
, char **kernel_list
);
63 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
);
64 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
);
65 static bool is_valid_cgroup(const char *name
);
66 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
);
67 static int remove_cgroup(struct cgroup_mount_point
*mp
, const char *path
, bool recurse
);
68 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
, const char *path
, const char *suffix
);
69 static struct cgroup_process_info
*find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
);
70 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
, char *value
, size_t len
);
71 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
, const char *value
);
72 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler
*h
, char *v
, bool for_allow
);
73 static int do_setup_cgroup_limits(struct lxc_handler
*h
, struct lxc_list
*cgroup_settings
, bool do_devices
);
74 static int cgroup_recursive_task_count(const char *cgroup_path
);
75 static int count_lines(const char *fn
);
76 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
, char *cgroup_path
);
78 static struct cgroup_ops cgfs_ops
;
79 struct cgroup_ops
*active_cg_ops
= &cgfs_ops
;
80 static void init_cg_ops(void);
83 /* this needs to be mutexed for api use */
84 extern bool cgmanager_initialized
;
85 extern bool use_cgmanager
;
86 extern bool lxc_init_cgmanager(void);
88 static bool cgmanager_initialized
= false;
89 static bool use_cgmanager
= false;
90 static bool lxc_init_cgmanager(void) { return false; }
93 static int cgroup_rmdir(char *dirname
)
95 struct dirent dirent
, *direntp
;
99 char pathname
[MAXPATHLEN
];
101 dir
= opendir(dirname
);
103 ERROR("%s: failed to open %s", __func__
, dirname
);
107 while (!readdir_r(dir
, &dirent
, &direntp
)) {
114 if (!strcmp(direntp
->d_name
, ".") ||
115 !strcmp(direntp
->d_name
, ".."))
118 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
119 if (rc
< 0 || rc
>= MAXPATHLEN
) {
120 ERROR("pathname too long");
123 saved_errno
= -ENOMEM
;
126 ret
= lstat(pathname
, &mystat
);
128 SYSERROR("%s: failed to stat %s", __func__
, pathname
);
134 if (S_ISDIR(mystat
.st_mode
)) {
135 if (cgroup_rmdir(pathname
) < 0) {
143 if (rmdir(dirname
) < 0) {
144 SYSERROR("%s: failed to delete %s", __func__
, dirname
);
152 SYSERROR("%s: failed to close directory %s", __func__
, dirname
);
159 return failed
? -1 : 0;
162 struct cgroup_meta_data
*lxc_cgroup_load_meta()
164 const char *cgroup_use
= NULL
;
165 char **cgroup_use_list
= NULL
;
166 struct cgroup_meta_data
*md
= NULL
;
170 cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
171 if (!cgroup_use
&& errno
!= 0)
174 cgroup_use_list
= lxc_string_split_and_trim(cgroup_use
, ',');
175 if (!cgroup_use_list
)
179 md
= lxc_cgroup_load_meta2((const char **)cgroup_use_list
);
181 lxc_free_array((void **)cgroup_use_list
, free
);
186 /* Step 1: determine all kernel subsystems */
187 bool find_cgroup_subsystems(char ***kernel_subsystems
)
193 size_t kernel_subsystems_count
= 0;
194 size_t kernel_subsystems_capacity
= 0;
197 proc_cgroups
= fopen_cloexec("/proc/cgroups", "r");
201 while (getline(&line
, &sz
, proc_cgroups
) != -1) {
204 int hierarchy_number
;
211 tab1
= strchr(line
, '\t');
215 tab2
= strchr(tab1
, '\t');
221 hierarchy_number
= strtoul(tab1
, &tab2
, 10);
224 (void)hierarchy_number
;
226 r
= lxc_grow_array((void ***)kernel_subsystems
, &kernel_subsystems_capacity
, kernel_subsystems_count
+ 1, 12);
229 (*kernel_subsystems
)[kernel_subsystems_count
] = strdup(line
);
230 if (!(*kernel_subsystems
)[kernel_subsystems_count
])
232 kernel_subsystems_count
++;
237 fclose(proc_cgroups
);
242 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
243 * since mount points don't specify hierarchy number and
244 * /proc/cgroups does not contain named hierarchies
246 static bool find_cgroup_hierarchies(struct cgroup_meta_data
*meta_data
,
247 bool all_kernel_subsystems
, bool all_named_subsystems
,
248 const char **subsystem_whitelist
)
250 FILE *proc_self_cgroup
;
255 size_t hierarchy_capacity
= 0;
257 proc_self_cgroup
= fopen_cloexec("/proc/self/cgroup", "r");
258 /* if for some reason (because of setns() and pid namespace for example),
259 * /proc/self is not valid, we try /proc/1/cgroup... */
260 if (!proc_self_cgroup
)
261 proc_self_cgroup
= fopen_cloexec("/proc/1/cgroup", "r");
262 if (!proc_self_cgroup
)
265 while (getline(&line
, &sz
, proc_self_cgroup
) != -1) {
266 /* file format: hierarchy:subsystems:group,
267 * we only extract hierarchy and subsystems
271 int hierarchy_number
;
272 struct cgroup_hierarchy
*h
= NULL
;
278 colon1
= strchr(line
, ':');
282 colon2
= strchr(colon1
, ':');
288 hierarchy_number
= strtoul(line
, &colon2
, 10);
289 if (!colon2
|| *colon2
)
292 if (hierarchy_number
> meta_data
->maximum_hierarchy
) {
293 /* lxc_grow_array will never shrink, so even if we find a lower
294 * hierarchy number here, the array will never be smaller
296 r
= lxc_grow_array((void ***)&meta_data
->hierarchies
, &hierarchy_capacity
, hierarchy_number
+ 1, 12);
300 meta_data
->maximum_hierarchy
= hierarchy_number
;
303 /* this shouldn't happen, we had this already */
304 if (meta_data
->hierarchies
[hierarchy_number
])
307 h
= calloc(1, sizeof(struct cgroup_hierarchy
));
311 meta_data
->hierarchies
[hierarchy_number
] = h
;
313 h
->index
= hierarchy_number
;
314 h
->subsystems
= lxc_string_split_and_trim(colon1
, ',');
317 /* see if this hierarchy should be considered */
318 if (!all_kernel_subsystems
|| !all_named_subsystems
) {
319 for (p
= h
->subsystems
; *p
; p
++) {
320 if (!strncmp(*p
, "name=", 5)) {
321 if (all_named_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
326 if (all_kernel_subsystems
|| (subsystem_whitelist
&& lxc_string_in_array(*p
, subsystem_whitelist
))) {
333 /* we want all hierarchy anyway */
340 fclose(proc_self_cgroup
);
345 /* Step 3: determine all mount points of each hierarchy */
346 static bool find_hierarchy_mountpts( struct cgroup_meta_data
*meta_data
, char **kernel_subsystems
)
349 FILE *proc_self_mountinfo
;
352 char **tokens
= NULL
;
353 size_t mount_point_count
= 0;
354 size_t mount_point_capacity
= 0;
355 size_t token_capacity
= 0;
358 proc_self_mountinfo
= fopen_cloexec("/proc/self/mountinfo", "r");
359 /* if for some reason (because of setns() and pid namespace for example),
360 * /proc/self is not valid, we try /proc/1/cgroup... */
361 if (!proc_self_mountinfo
)
362 proc_self_mountinfo
= fopen_cloexec("/proc/1/mountinfo", "r");
363 if (!proc_self_mountinfo
)
366 while (getline(&line
, &sz
, proc_self_mountinfo
) != -1) {
367 char *token
, *line_tok
, *saveptr
= NULL
;
369 struct cgroup_mount_point
*mount_point
;
370 struct cgroup_hierarchy
*h
;
373 if (line
[0] && line
[strlen(line
) - 1] == '\n')
374 line
[strlen(line
) - 1] = '\0';
376 for (i
= 0, line_tok
= line
; (token
= strtok_r(line_tok
, " ", &saveptr
)); line_tok
= NULL
) {
377 r
= lxc_grow_array((void ***)&tokens
, &token_capacity
, i
+ 1, 64);
383 /* layout of /proc/self/mountinfo:
386 * 2: device major:minor
389 * 5: per-mount options
390 * [optional X]: additional data
394 * X+10: per-superblock options
396 for (j
= 6; j
< i
&& tokens
[j
]; j
++)
397 if (!strcmp(tokens
[j
], "-"))
400 /* could not find separator */
401 if (j
>= i
|| !tokens
[j
])
403 /* there should be exactly three fields after
409 /* not a cgroup filesystem */
410 if (strcmp(tokens
[j
+ 1], "cgroup") != 0)
413 subsystems
= subsystems_from_mount_options(tokens
[j
+ 3], kernel_subsystems
);
418 for (k
= 1; k
<= meta_data
->maximum_hierarchy
; k
++) {
419 if (meta_data
->hierarchies
[k
] &&
420 meta_data
->hierarchies
[k
]->subsystems
[0] &&
421 lxc_string_in_array(meta_data
->hierarchies
[k
]->subsystems
[0], (const char **)subsystems
)) {
422 /* TODO: we could also check if the lists really match completely,
423 * just to have an additional sanity check */
424 h
= meta_data
->hierarchies
[k
];
428 lxc_free_array((void **)subsystems
, free
);
430 r
= lxc_grow_array((void ***)&meta_data
->mount_points
, &mount_point_capacity
, mount_point_count
+ 1, 12);
434 /* create mount point object */
435 mount_point
= calloc(1, sizeof(*mount_point
));
439 meta_data
->mount_points
[mount_point_count
++] = mount_point
;
441 mount_point
->hierarchy
= h
;
442 mount_point
->mount_point
= strdup(tokens
[4]);
443 mount_point
->mount_prefix
= strdup(tokens
[3]);
444 if (!mount_point
->mount_point
|| !mount_point
->mount_prefix
)
446 mount_point
->read_only
= !lxc_string_in_list("rw", tokens
[5], ',');
448 if (!strcmp(mount_point
->mount_prefix
, "/")) {
449 if (mount_point
->read_only
) {
450 if (!h
->ro_absolute_mount_point
)
451 h
->ro_absolute_mount_point
= mount_point
;
453 if (!h
->rw_absolute_mount_point
)
454 h
->rw_absolute_mount_point
= mount_point
;
458 k
= lxc_array_len((void **)h
->all_mount_points
);
459 r
= lxc_grow_array((void ***)&h
->all_mount_points
, &h
->all_mount_point_capacity
, k
+ 1, 4);
462 h
->all_mount_points
[k
] = mount_point
;
467 fclose(proc_self_mountinfo
);
473 struct cgroup_meta_data
*lxc_cgroup_load_meta2(const char **subsystem_whitelist
)
475 bool all_kernel_subsystems
= true;
476 bool all_named_subsystems
= false;
477 struct cgroup_meta_data
*meta_data
= NULL
;
478 char **kernel_subsystems
= NULL
;
481 /* if the subsystem whitelist is not specified, include all
482 * hierarchies that contain kernel subsystems by default but
483 * no hierarchies that only contain named subsystems
485 * if it is specified, the specifier @all will select all
486 * hierarchies, @kernel will select all hierarchies with
487 * kernel subsystems and @named will select all named
490 all_kernel_subsystems
= subsystem_whitelist
?
491 (lxc_string_in_array("@kernel", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
493 all_named_subsystems
= subsystem_whitelist
?
494 (lxc_string_in_array("@named", subsystem_whitelist
) || lxc_string_in_array("@all", subsystem_whitelist
)) :
497 meta_data
= calloc(1, sizeof(struct cgroup_meta_data
));
502 if (!find_cgroup_subsystems(&kernel_subsystems
))
505 if (!find_cgroup_hierarchies(meta_data
, all_kernel_subsystems
,
506 all_named_subsystems
, subsystem_whitelist
))
509 if (!find_hierarchy_mountpts(meta_data
, kernel_subsystems
))
512 /* oops, we couldn't find anything */
513 if (!meta_data
->hierarchies
|| !meta_data
->mount_points
) {
518 lxc_free_array((void **)kernel_subsystems
, free
);
523 lxc_free_array((void **)kernel_subsystems
, free
);
524 lxc_cgroup_put_meta(meta_data
);
529 struct cgroup_meta_data
*lxc_cgroup_get_meta(struct cgroup_meta_data
*meta_data
)
535 struct cgroup_meta_data
*lxc_cgroup_put_meta(struct cgroup_meta_data
*meta_data
)
540 if (--meta_data
->ref
> 0)
542 lxc_free_array((void **)meta_data
->mount_points
, (lxc_free_fn
)lxc_cgroup_mount_point_free
);
543 if (meta_data
->hierarchies
) {
544 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++)
545 lxc_cgroup_hierarchy_free(meta_data
->hierarchies
[i
]);
547 free(meta_data
->hierarchies
);
552 struct cgroup_hierarchy
*lxc_cgroup_find_hierarchy(struct cgroup_meta_data
*meta_data
, const char *subsystem
)
555 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
556 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
557 if (h
&& lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
563 struct cgroup_mount_point
*lxc_cgroup_find_mount_point(struct cgroup_hierarchy
*hierarchy
, const char *group
, bool should_be_writable
)
565 struct cgroup_mount_point
**mps
;
566 struct cgroup_mount_point
*current_result
= NULL
;
567 ssize_t quality
= -1;
570 if (hierarchy
->rw_absolute_mount_point
)
571 return hierarchy
->rw_absolute_mount_point
;
572 if (!should_be_writable
&& hierarchy
->ro_absolute_mount_point
)
573 return hierarchy
->ro_absolute_mount_point
;
575 for (mps
= hierarchy
->all_mount_points
; mps
&& *mps
; mps
++) {
576 struct cgroup_mount_point
*mp
= *mps
;
577 size_t prefix_len
= mp
->mount_prefix
? strlen(mp
->mount_prefix
) : 0;
579 if (prefix_len
== 1 && mp
->mount_prefix
[0] == '/')
582 if (should_be_writable
&& mp
->read_only
)
586 (strncmp(group
, mp
->mount_prefix
, prefix_len
) == 0 &&
587 (group
[prefix_len
] == '\0' || group
[prefix_len
] == '/'))) {
588 /* search for the best quality match, i.e. the match with the
589 * shortest prefix where this group is still contained
591 if (quality
== -1 || prefix_len
< quality
) {
593 quality
= prefix_len
;
600 return current_result
;
603 char *lxc_cgroup_find_abs_path(const char *subsystem
, const char *group
, bool should_be_writable
, const char *suffix
)
605 struct cgroup_meta_data
*meta_data
;
606 struct cgroup_hierarchy
*h
;
607 struct cgroup_mount_point
*mp
;
611 meta_data
= lxc_cgroup_load_meta();
615 h
= lxc_cgroup_find_hierarchy(meta_data
, subsystem
);
619 mp
= lxc_cgroup_find_mount_point(h
, group
, should_be_writable
);
623 result
= cgroup_to_absolute_path(mp
, group
, suffix
);
627 lxc_cgroup_put_meta(meta_data
);
632 lxc_cgroup_put_meta(meta_data
);
637 struct cgroup_process_info
*lxc_cgroup_process_info_get(pid_t pid
, struct cgroup_meta_data
*meta
)
640 snprintf(pid_buf
, 32, "/proc/%lu/cgroup", (unsigned long)pid
);
641 return lxc_cgroup_process_info_getx(pid_buf
, meta
);
644 struct cgroup_process_info
*lxc_cgroup_process_info_get_init(struct cgroup_meta_data
*meta
)
646 return lxc_cgroup_process_info_get(1, meta
);
649 struct cgroup_process_info
*lxc_cgroup_process_info_get_self(struct cgroup_meta_data
*meta
)
651 struct cgroup_process_info
*i
;
652 i
= lxc_cgroup_process_info_getx("/proc/self/cgroup", meta
);
654 i
= lxc_cgroup_process_info_get(getpid(), meta
);
659 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
660 * is already in a new cgroup named after the pid. 'mnt' is passed in as
661 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
662 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
663 * and return the string /sys/fs/cgroup/lxc/c1.
665 static char *cgroup_rename_nsgroup(const char *mountpath
, const char *oldname
, pid_t pid
, const char *name
)
667 char *dir
, *fulloldpath
;
668 char *newname
, *fullnewpath
;
669 int len
, newlen
, ret
;
672 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
675 * fulloldpath = /cgroup/ab/2375
676 * fullnewpath = /cgroup/ab/c1
679 dir
= alloca(strlen(oldname
) + 1);
680 strcpy(dir
, oldname
);
682 len
= strlen(oldname
) + strlen(mountpath
) + 22;
683 fulloldpath
= alloca(len
);
684 ret
= snprintf(fulloldpath
, len
, "%s/%s/%ld", mountpath
, oldname
, (unsigned long)pid
);
685 if (ret
< 0 || ret
>= len
)
688 len
= strlen(dir
) + strlen(name
) + 2;
689 newname
= malloc(len
);
691 SYSERROR("Out of memory");
694 ret
= snprintf(newname
, len
, "%s/%s", dir
, name
);
695 if (ret
< 0 || ret
>= len
) {
700 newlen
= strlen(mountpath
) + len
+ 2;
701 fullnewpath
= alloca(newlen
);
702 ret
= snprintf(fullnewpath
, newlen
, "%s/%s", mountpath
, newname
);
703 if (ret
< 0 || ret
>= newlen
) {
708 if (access(fullnewpath
, F_OK
) == 0) {
709 if (rmdir(fullnewpath
) != 0) {
710 SYSERROR("container cgroup %s already exists.", fullnewpath
);
715 if (rename(fulloldpath
, fullnewpath
)) {
716 SYSERROR("failed to rename cgroup %s->%s", fulloldpath
, fullnewpath
);
721 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
726 /* create a new cgroup */
727 struct cgroup_process_info
*lxc_cgroupfs_create(const char *name
, const char *path_pattern
, struct cgroup_meta_data
*meta_data
, const char *sub_pattern
)
729 char **cgroup_path_components
= NULL
;
731 char *path_so_far
= NULL
;
732 char **new_cgroup_paths
= NULL
;
733 char **new_cgroup_paths_sub
= NULL
;
734 struct cgroup_mount_point
*mp
;
735 struct cgroup_hierarchy
*h
;
736 struct cgroup_process_info
*base_info
= NULL
;
737 struct cgroup_process_info
*info_ptr
;
741 bool had_sub_pattern
= false;
744 if (!is_valid_cgroup(name
)) {
745 ERROR("Invalid cgroup name: '%s'", name
);
750 if (!strstr(path_pattern
, "%n")) {
751 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern
);
756 /* we will modify the result of this operation directly,
757 * so we don't have to copy the data structure
759 base_info
= (path_pattern
[0] == '/') ?
760 lxc_cgroup_process_info_get_init(meta_data
) :
761 lxc_cgroup_process_info_get_self(meta_data
);
765 new_cgroup_paths
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
766 if (!new_cgroup_paths
)
767 goto out_initial_error
;
769 new_cgroup_paths_sub
= calloc(meta_data
->maximum_hierarchy
+ 1, sizeof(char *));
770 if (!new_cgroup_paths_sub
)
771 goto out_initial_error
;
773 /* find mount points we can use */
774 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
775 h
= info_ptr
->hierarchy
;
776 mp
= lxc_cgroup_find_mount_point(h
, info_ptr
->cgroup_path
, true);
778 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h
->index
);
779 goto out_initial_error
;
781 info_ptr
->designated_mount_point
= mp
;
783 if (lxc_string_in_array("ns", (const char **)h
->subsystems
))
785 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
786 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
787 goto out_initial_error
;
791 /* normalize the path */
792 cgroup_path_components
= lxc_normalize_path(path_pattern
);
793 if (!cgroup_path_components
)
794 goto out_initial_error
;
796 /* go through the path components to see if we can create them */
797 for (p
= cgroup_path_components
; *p
|| (sub_pattern
&& !had_sub_pattern
); p
++) {
798 /* we only want to create the same component with -1, -2, etc.
799 * if the component contains the container name itself, otherwise
800 * it's not an error if it already exists
802 char *p_eff
= *p
? *p
: (char *)sub_pattern
;
803 bool contains_name
= strstr(p_eff
, "%n");
804 char *current_component
= NULL
;
805 char *current_subpath
= NULL
;
806 char *current_entire_path
= NULL
;
811 /* if we are processing the subpattern, we want to make sure
812 * loop is ended the next time around
815 had_sub_pattern
= true;
819 goto find_name_on_this_level
;
821 cleanup_name_on_this_level
:
822 /* This is reached if we found a name clash.
823 * In that case, remove the cgroup from all previous hierarchies
825 for (j
= 0, info_ptr
= base_info
; j
< i
&& info_ptr
; info_ptr
= info_ptr
->next
, j
++) {
826 r
= remove_cgroup(info_ptr
->designated_mount_point
, info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1], false);
828 WARN("could not clean up cgroup we created when trying to create container");
829 free(info_ptr
->created_paths
[info_ptr
->created_paths_count
- 1]);
830 info_ptr
->created_paths
[--info_ptr
->created_paths_count
] = NULL
;
832 if (current_component
!= current_subpath
)
833 free(current_subpath
);
834 if (current_component
!= p_eff
)
835 free(current_component
);
836 current_component
= current_subpath
= NULL
;
837 /* try again with another suffix */
840 find_name_on_this_level
:
841 /* determine name of the path component we should create */
842 if (contains_name
&& suffix
> 0) {
843 char *buf
= calloc(strlen(name
) + 32, 1);
845 goto out_initial_error
;
846 snprintf(buf
, strlen(name
) + 32, "%s-%u", name
, suffix
);
847 current_component
= lxc_string_replace("%n", buf
, p_eff
);
850 current_component
= contains_name
? lxc_string_replace("%n", name
, p_eff
) : p_eff
;
852 parts
[0] = path_so_far
;
853 parts
[1] = current_component
;
855 current_subpath
= path_so_far
? lxc_string_join("/", (const char **)parts
, false) : current_component
;
857 /* Now go through each hierarchy and try to create the
858 * corresponding cgroup
860 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
863 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
865 current_entire_path
= NULL
;
867 parts2
[0] = !strcmp(info_ptr
->cgroup_path
, "/") ? "" : info_ptr
->cgroup_path
;
868 parts2
[1] = current_subpath
;
870 current_entire_path
= lxc_string_join("/", (const char **)parts2
, false);
873 /* we are processing the subpath, so only update that one */
874 free(new_cgroup_paths_sub
[i
]);
875 new_cgroup_paths_sub
[i
] = strdup(current_entire_path
);
876 if (!new_cgroup_paths_sub
[i
])
877 goto cleanup_from_error
;
879 /* remember which path was used on this controller */
880 free(new_cgroup_paths
[i
]);
881 new_cgroup_paths
[i
] = strdup(current_entire_path
);
882 if (!new_cgroup_paths
[i
])
883 goto cleanup_from_error
;
886 r
= create_cgroup(info_ptr
->designated_mount_point
, current_entire_path
);
887 if (r
< 0 && errno
== EEXIST
&& contains_name
) {
888 /* name clash => try new name with new suffix */
889 free(current_entire_path
);
890 current_entire_path
= NULL
;
891 goto cleanup_name_on_this_level
;
892 } else if (r
< 0 && errno
!= EEXIST
) {
893 SYSERROR("Could not create cgroup %s", current_entire_path
);
894 goto cleanup_from_error
;
896 /* successfully created */
897 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
899 goto cleanup_from_error
;
900 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = current_entire_path
;
902 /* if we didn't create the cgroup, then we have to make sure that
903 * further cgroups will be created properly
905 if (handle_cgroup_settings(mp
, info_ptr
->cgroup_path
) < 0) {
906 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
907 goto cleanup_from_error
;
910 /* already existed but path component of pattern didn't contain '%n',
911 * so this is not an error; but then we don't need current_entire_path
914 free(current_entire_path
);
915 current_entire_path
= NULL
;
919 /* save path so far */
921 path_so_far
= strdup(current_subpath
);
923 goto cleanup_from_error
;
926 if (current_component
!= current_subpath
)
927 free(current_subpath
);
928 if (current_component
!= p_eff
)
929 free(current_component
);
930 current_component
= current_subpath
= NULL
;
934 /* called if an error occured in the loop, so we
935 * do some additional cleanup here
938 if (current_component
!= current_subpath
)
939 free(current_subpath
);
940 if (current_component
!= p_eff
)
941 free(current_component
);
942 free(current_entire_path
);
944 goto out_initial_error
;
947 /* we're done, now update the paths */
948 for (i
= 0, info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
, i
++) {
949 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
950 * will take care of it
951 * Since we do a continue in above loop, new_cgroup_paths[i] is
952 * unset anyway, as is new_cgroup_paths_sub[i]
954 if (lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
956 free(info_ptr
->cgroup_path
);
957 info_ptr
->cgroup_path
= new_cgroup_paths
[i
];
958 info_ptr
->cgroup_path_sub
= new_cgroup_paths_sub
[i
];
960 /* don't use lxc_free_array since we used the array members
961 * to store them in our result...
963 free(new_cgroup_paths
);
964 free(new_cgroup_paths_sub
);
966 lxc_free_array((void **)cgroup_path_components
, free
);
972 lxc_cgroup_process_info_free_and_remove(base_info
);
973 lxc_free_array((void **)new_cgroup_paths
, free
);
974 lxc_free_array((void **)new_cgroup_paths_sub
, free
);
975 lxc_free_array((void **)cgroup_path_components
, free
);
980 int lxc_cgroup_create_legacy(struct cgroup_process_info
*base_info
, const char *name
, pid_t pid
)
982 struct cgroup_process_info
*info_ptr
;
985 for (info_ptr
= base_info
; info_ptr
; info_ptr
= info_ptr
->next
) {
986 if (!lxc_string_in_array("ns", (const char **)info_ptr
->hierarchy
->subsystems
))
989 * For any path which has ns cgroup mounted, handler->pid is already
990 * moved into a container called '%d % (handler->pid)'. Rename it to
991 * the cgroup name and record that.
993 char *tmp
= cgroup_rename_nsgroup((const char *)info_ptr
->designated_mount_point
->mount_point
,
994 info_ptr
->cgroup_path
, pid
, name
);
997 free(info_ptr
->cgroup_path
);
998 info_ptr
->cgroup_path
= tmp
;
999 r
= lxc_grow_array((void ***)&info_ptr
->created_paths
, &info_ptr
->created_paths_capacity
, info_ptr
->created_paths_count
+ 1, 8);
1005 info_ptr
->created_paths
[info_ptr
->created_paths_count
++] = tmp
;
1010 /* get the cgroup membership of a given container */
1011 struct cgroup_process_info
*lxc_cgroup_get_container_info(const char *name
, const char *lxcpath
, struct cgroup_meta_data
*meta_data
)
1013 struct cgroup_process_info
*result
= NULL
;
1014 int saved_errno
= 0;
1016 struct cgroup_process_info
**cptr
= &result
;
1017 struct cgroup_process_info
*entry
= NULL
;
1020 for (i
= 0; i
<= meta_data
->maximum_hierarchy
; i
++) {
1021 struct cgroup_hierarchy
*h
= meta_data
->hierarchies
[i
];
1025 /* use the command interface to look for the cgroup */
1026 path
= lxc_cmd_get_cgroup_path(name
, lxcpath
, h
->subsystems
[0]);
1030 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1033 entry
->meta_ref
= lxc_cgroup_get_meta(meta_data
);
1034 entry
->hierarchy
= h
;
1035 entry
->cgroup_path
= path
;
1038 /* it is not an error if we don't find anything here,
1039 * it is up to the caller to decide what to do in that
1041 entry
->designated_mount_point
= lxc_cgroup_find_mount_point(h
, entry
->cgroup_path
, true);
1044 cptr
= &entry
->next
;
1050 saved_errno
= errno
;
1052 lxc_cgroup_process_info_free(result
);
1053 lxc_cgroup_process_info_free(entry
);
1054 errno
= saved_errno
;
1058 /* move a processs to the cgroups specified by the membership */
1059 int lxc_cgroupfs_enter(struct cgroup_process_info
*info
, pid_t pid
, bool enter_sub
)
1062 char *cgroup_tasks_fn
;
1064 struct cgroup_process_info
*info_ptr
;
1066 snprintf(pid_buf
, 32, "%lu", (unsigned long)pid
);
1067 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1068 char *cgroup_path
= (enter_sub
&& info_ptr
->cgroup_path_sub
) ?
1069 info_ptr
->cgroup_path_sub
:
1070 info_ptr
->cgroup_path
;
1072 if (!info_ptr
->designated_mount_point
) {
1073 info_ptr
->designated_mount_point
= lxc_cgroup_find_mount_point(info_ptr
->hierarchy
, cgroup_path
, true);
1074 if (!info_ptr
->designated_mount_point
) {
1075 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid
, cgroup_path
);
1080 cgroup_tasks_fn
= cgroup_to_absolute_path(info_ptr
->designated_mount_point
, cgroup_path
, "/tasks");
1081 if (!cgroup_tasks_fn
) {
1082 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1086 r
= lxc_write_to_file(cgroup_tasks_fn
, pid_buf
, strlen(pid_buf
), false);
1087 free(cgroup_tasks_fn
);
1089 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid
, cgroup_path
);
1097 /* free process membership information */
1098 void lxc_cgroup_process_info_free(struct cgroup_process_info
*info
)
1100 struct cgroup_process_info
*next
;
1104 lxc_cgroup_put_meta(info
->meta_ref
);
1105 free(info
->cgroup_path
);
1106 free(info
->cgroup_path_sub
);
1107 lxc_free_array((void **)info
->created_paths
, free
);
1109 lxc_cgroup_process_info_free(next
);
1112 /* free process membership information and remove cgroups that were created */
1113 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info
*info
)
1115 struct cgroup_process_info
*next
;
1121 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1123 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1125 /* ignore return value here, perhaps we created the
1126 * '/lxc' cgroup in this container but another container
1127 * is still running (for example)
1129 (void)remove_cgroup(mp
, info
->cgroup_path
, true);
1131 for (pp
= info
->created_paths
; pp
&& *pp
; pp
++);
1132 for ((void)(pp
&& --pp
); info
->created_paths
&& pp
>= info
->created_paths
; --pp
) {
1135 free(info
->created_paths
);
1136 lxc_cgroup_put_meta(info
->meta_ref
);
1137 free(info
->cgroup_path
);
1138 free(info
->cgroup_path_sub
);
1140 lxc_cgroup_process_info_free_and_remove(next
);
1143 static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem
, struct lxc_handler
*handler
)
1145 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1146 struct cgroup_process_info
*info
= d
->info
;
1147 info
= find_info_for_subsystem(info
, subsystem
);
1150 return info
->cgroup_path
;
1153 char *lxc_cgroup_get_hierarchy_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1155 return lxc_cmd_get_cgroup_path(name
, lxcpath
, subsystem
);
1158 char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem
, struct lxc_handler
*handler
)
1160 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1161 struct cgroup_process_info
*info
= d
->info
;
1162 struct cgroup_mount_point
*mp
= NULL
;
1164 info
= find_info_for_subsystem(info
, subsystem
);
1167 if (info
->designated_mount_point
) {
1168 mp
= info
->designated_mount_point
;
1170 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1174 return cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1177 char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem
, const char *name
, const char *lxcpath
)
1179 struct cgroup_meta_data
*meta
;
1180 struct cgroup_process_info
*base_info
, *info
;
1181 struct cgroup_mount_point
*mp
;
1182 char *result
= NULL
;
1184 meta
= lxc_cgroup_load_meta();
1187 base_info
= lxc_cgroup_get_container_info(name
, lxcpath
, meta
);
1190 info
= find_info_for_subsystem(base_info
, subsystem
);
1193 if (info
->designated_mount_point
) {
1194 mp
= info
->designated_mount_point
;
1196 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1200 result
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1203 lxc_cgroup_process_info_free(base_info
);
1205 lxc_cgroup_put_meta(meta
);
1209 int lxc_cgroup_set_handler(const char *filename
, const char *value
, struct lxc_handler
*handler
)
1211 char *subsystem
= NULL
, *p
, *path
;
1214 subsystem
= alloca(strlen(filename
) + 1);
1215 strcpy(subsystem
, filename
);
1216 if ((p
= index(subsystem
, '.')) != NULL
)
1219 path
= lxc_cgroup_get_hierarchy_abs_path_handler(subsystem
, handler
);
1221 ret
= do_cgroup_set(path
, filename
, value
);
1227 int lxc_cgroup_get_handler(const char *filename
, char *value
, size_t len
, struct lxc_handler
*handler
)
1229 char *subsystem
= NULL
, *p
, *path
;
1232 subsystem
= alloca(strlen(filename
) + 1);
1233 strcpy(subsystem
, filename
);
1234 if ((p
= index(subsystem
, '.')) != NULL
)
1237 path
= lxc_cgroup_get_hierarchy_abs_path_handler(subsystem
, handler
);
1239 ret
= do_cgroup_get(path
, filename
, value
, len
);
1245 int lxc_cgroupfs_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1247 char *subsystem
= NULL
, *p
, *path
;
1250 subsystem
= alloca(strlen(filename
) + 1);
1251 strcpy(subsystem
, filename
);
1252 if ((p
= index(subsystem
, '.')) != NULL
)
1255 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1257 ret
= do_cgroup_set(path
, filename
, value
);
1263 int lxc_cgroupfs_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
1265 char *subsystem
= NULL
, *p
, *path
;
1268 subsystem
= alloca(strlen(filename
) + 1);
1269 strcpy(subsystem
, filename
);
1270 if ((p
= index(subsystem
, '.')) != NULL
)
1273 path
= lxc_cgroup_get_hierarchy_abs_path(subsystem
, name
, lxcpath
);
1275 ret
= do_cgroup_get(path
, filename
, value
, len
);
1282 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1283 * file for a running container.
1285 * @filename : the file of interest (e.g. "freezer.state") or
1286 * the subsystem name (e.g. "freezer") in which case
1287 * the directory where the cgroup may be modified
1289 * @name : name of container to connect to
1290 * @lxcpath : the lxcpath in which the container is running
1292 * This is the exported function, which determines cgpath from the
1293 * lxc-start of the @name container running in @lxcpath.
1295 * Returns path on success, NULL on error. The caller must free()
1296 * the returned path.
1298 char *lxc_cgroup_path_get(const char *filename
, const char *name
,
1299 const char *lxcpath
)
1301 char *subsystem
= NULL
, *longer_file
= NULL
, *p
, *group
, *path
;
1303 subsystem
= alloca(strlen(filename
) + 1);
1304 strcpy(subsystem
, filename
);
1305 if ((p
= index(subsystem
, '.')) != NULL
) {
1307 longer_file
= alloca(strlen(filename
) + 2);
1308 longer_file
[0] = '/';
1309 strcpy(longer_file
+ 1, filename
);
1312 group
= lxc_cgroup_get_hierarchy_path(subsystem
, name
, lxcpath
);
1316 path
= lxc_cgroup_find_abs_path(subsystem
, group
, true, p
? longer_file
: NULL
);
1321 int lxc_setup_mount_cgroup(const char *root
, struct lxc_cgroup_info
*cgroup_info
, int type
)
1323 size_t bufsz
= strlen(root
) + sizeof("/sys/fs/cgroup");
1325 char **parts
= NULL
;
1326 char *dirname
= NULL
;
1327 char *abs_path
= NULL
;
1328 char *abs_path2
= NULL
;
1329 struct cgfs_data
*cgfs_d
;
1330 struct cgroup_process_info
*info
, *base_info
;
1331 int r
, saved_errno
= 0;
1335 if (strcmp(active_cg_ops
->name
, "cgmanager") == 0) {
1336 // todo - offer to bind-mount /sys/fs/cgroup/cgmanager/
1340 cgfs_d
= cgroup_info
->data
;
1341 base_info
= cgfs_d
->info
;
1343 if (type
< LXC_AUTO_CGROUP_RO
|| type
> LXC_AUTO_CGROUP_FULL_MIXED
) {
1344 ERROR("could not mount cgroups into container: invalid type specified internally");
1349 path
= calloc(1, bufsz
);
1352 snprintf(path
, bufsz
, "%s/sys/fs/cgroup", root
);
1353 r
= mount("cgroup_root", path
, "tmpfs", MS_NOSUID
|MS_NODEV
|MS_NOEXEC
|MS_RELATIME
, "size=10240k,mode=755");
1355 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1359 /* now mount all the hierarchies we care about */
1360 for (info
= base_info
; info
; info
= info
->next
) {
1361 size_t subsystem_count
, i
;
1362 struct cgroup_mount_point
*mp
= info
->designated_mount_point
;
1364 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, true);
1366 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1370 subsystem_count
= lxc_array_len((void **)info
->hierarchy
->subsystems
);
1371 parts
= calloc(subsystem_count
+ 1, sizeof(char *));
1375 for (i
= 0; i
< subsystem_count
; i
++) {
1376 if (!strncmp(info
->hierarchy
->subsystems
[i
], "name=", 5))
1377 parts
[i
] = info
->hierarchy
->subsystems
[i
] + 5;
1379 parts
[i
] = info
->hierarchy
->subsystems
[i
];
1381 dirname
= lxc_string_join(",", (const char **)parts
, false);
1385 /* create subsystem directory */
1386 abs_path
= lxc_append_paths(path
, dirname
);
1389 r
= mkdir_p(abs_path
, 0755);
1390 if (r
< 0 && errno
!= EEXIST
) {
1391 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname
);
1395 abs_path2
= lxc_append_paths(abs_path
, info
->cgroup_path
);
1399 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_RW
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1400 /* bind-mount the cgroup entire filesystem there */
1401 if (strcmp(mp
->mount_prefix
, "/") != 0) {
1402 /* FIXME: maybe we should just try to remount the entire hierarchy
1403 * with a regular mount command? may that works? */
1404 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname
);
1407 r
= mount(mp
->mount_point
, abs_path
, "none", MS_BIND
, 0);
1409 SYSERROR("error bind-mounting %s to %s", mp
->mount_point
, abs_path
);
1412 /* main cgroup path should be read-only */
1413 if (type
== LXC_AUTO_CGROUP_FULL_RO
|| type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1414 r
= mount(NULL
, abs_path
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1416 SYSERROR("error re-mounting %s readonly", abs_path
);
1420 /* own cgroup should be read-write */
1421 if (type
== LXC_AUTO_CGROUP_FULL_MIXED
) {
1422 r
= mount(abs_path2
, abs_path2
, NULL
, MS_BIND
, NULL
);
1424 SYSERROR("error bind-mounting %s onto itself", abs_path2
);
1427 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
, NULL
);
1429 SYSERROR("error re-mounting %s readwrite", abs_path2
);
1434 /* create path for container's cgroup */
1435 r
= mkdir_p(abs_path2
, 0755);
1436 if (r
< 0 && errno
!= EEXIST
) {
1437 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname
, info
->cgroup_path
);
1444 /* bind-mount container's cgroup to that directory */
1445 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1448 r
= mount(abs_path
, abs_path2
, "none", MS_BIND
, 0);
1450 SYSERROR("error bind-mounting %s to %s", abs_path
, abs_path2
);
1453 if (type
== LXC_AUTO_CGROUP_RO
) {
1454 r
= mount(NULL
, abs_path2
, NULL
, MS_REMOUNT
|MS_BIND
|MS_RDONLY
, NULL
);
1456 SYSERROR("error re-mounting %s readonly", abs_path2
);
1467 /* add symlinks for every single subsystem */
1468 if (subsystem_count
> 1) {
1469 for (i
= 0; i
< subsystem_count
; i
++) {
1470 abs_path
= lxc_append_paths(path
, parts
[i
]);
1473 r
= symlink(dirname
, abs_path
);
1475 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts
[i
], dirname
);
1486 /* try to remount the tmpfs readonly, since the container shouldn't
1487 * change anything (this will also make sure that trying to create
1488 * new cgroups outside the allowed area fails with an error instead
1489 * of simply causing this to create directories in the tmpfs itself)
1491 if (type
!= LXC_AUTO_CGROUP_RW
&& type
!= LXC_AUTO_CGROUP_FULL_RW
)
1492 mount(NULL
, path
, NULL
, MS_REMOUNT
|MS_RDONLY
, NULL
);
1499 saved_errno
= errno
;
1505 errno
= saved_errno
;
1509 int lxc_cgroup_nrtasks_handler(struct lxc_handler
*handler
)
1511 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
1512 struct cgroup_process_info
*info
= d
->info
;
1513 struct cgroup_mount_point
*mp
= NULL
;
1514 char *abs_path
= NULL
;
1522 if (info
->designated_mount_point
) {
1523 mp
= info
->designated_mount_point
;
1525 mp
= lxc_cgroup_find_mount_point(info
->hierarchy
, info
->cgroup_path
, false);
1530 abs_path
= cgroup_to_absolute_path(mp
, info
->cgroup_path
, NULL
);
1534 ret
= cgroup_recursive_task_count(abs_path
);
1539 static struct cgroup_process_info
*
1540 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str
,
1541 struct cgroup_meta_data
*meta
)
1543 struct cgroup_process_info
*result
= NULL
;
1544 FILE *proc_pid_cgroup
= NULL
;
1547 int saved_errno
= 0;
1548 struct cgroup_process_info
**cptr
= &result
;
1549 struct cgroup_process_info
*entry
= NULL
;
1551 proc_pid_cgroup
= fopen_cloexec(proc_pid_cgroup_str
, "r");
1552 if (!proc_pid_cgroup
)
1555 while (getline(&line
, &sz
, proc_pid_cgroup
) != -1) {
1556 /* file format: hierarchy:subsystems:group */
1560 int hierarchy_number
;
1561 struct cgroup_hierarchy
*h
= NULL
;
1566 if (line
[strlen(line
) - 1] == '\n')
1567 line
[strlen(line
) - 1] = '\0';
1569 colon1
= strchr(line
, ':');
1573 colon2
= strchr(colon1
, ':');
1579 hierarchy_number
= strtoul(line
, &endptr
, 10);
1580 if (!endptr
|| *endptr
)
1583 if (hierarchy_number
> meta
->maximum_hierarchy
) {
1584 /* we encountered a hierarchy we didn't have before,
1585 * so probably somebody remounted some stuff in the
1592 h
= meta
->hierarchies
[hierarchy_number
];
1594 /* we encountered a hierarchy that was thought to be
1595 * dead before, so probably somebody remounted some
1596 * stuff in the mean time...
1602 /* we are told that we should ignore this hierarchy */
1606 entry
= calloc(1, sizeof(struct cgroup_process_info
));
1610 entry
->meta_ref
= lxc_cgroup_get_meta(meta
);
1611 entry
->hierarchy
= h
;
1612 entry
->cgroup_path
= strdup(colon2
);
1613 if (!entry
->cgroup_path
)
1617 cptr
= &entry
->next
;
1621 fclose(proc_pid_cgroup
);
1626 saved_errno
= errno
;
1627 if (proc_pid_cgroup
)
1628 fclose(proc_pid_cgroup
);
1629 lxc_cgroup_process_info_free(result
);
1630 lxc_cgroup_process_info_free(entry
);
1632 errno
= saved_errno
;
1636 static char **subsystems_from_mount_options(const char *mount_options
,
1639 char *token
, *str
, *saveptr
= NULL
;
1640 char **result
= NULL
;
1641 size_t result_capacity
= 0;
1642 size_t result_count
= 0;
1646 str
= alloca(strlen(mount_options
)+1);
1647 strcpy(str
, mount_options
);
1648 for (; (token
= strtok_r(str
, ",", &saveptr
)); str
= NULL
) {
1649 /* we have a subsystem if it's either in the list of
1650 * subsystems provided by the kernel OR if it starts
1651 * with name= for named hierarchies
1653 if (!strncmp(token
, "name=", 5) || lxc_string_in_array(token
, (const char **)kernel_list
)) {
1654 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 12);
1657 result
[result_count
+ 1] = NULL
;
1658 result
[result_count
] = strdup(token
);
1659 if (!result
[result_count
])
1668 saved_errno
= errno
;
1669 lxc_free_array((void**)result
, free
);
1670 errno
= saved_errno
;
1674 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point
*mp
)
1678 free(mp
->mount_point
);
1679 free(mp
->mount_prefix
);
1683 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy
*h
)
1687 lxc_free_array((void **)h
->subsystems
, free
);
1688 free(h
->all_mount_points
);
1692 static bool is_valid_cgroup(const char *name
)
1695 for (p
= name
; *p
; p
++) {
1696 /* Use the ASCII printable characters range(32 - 127)
1697 * is reasonable, we kick out 32(SPACE) because it'll
1698 * break legacy lxc-ls
1700 if (*p
<= 32 || *p
>= 127 || *p
== '/')
1703 return strcmp(name
, ".") != 0 && strcmp(name
, "..") != 0;
1706 static int create_or_remove_cgroup(bool do_remove
,
1707 struct cgroup_mount_point
*mp
, const char *path
, int recurse
)
1709 int r
, saved_errno
= 0;
1710 char *buf
= cgroup_to_absolute_path(mp
, path
, NULL
);
1714 /* create or remove directory */
1717 r
= cgroup_rmdir(buf
);
1721 r
= mkdir(buf
, 0777);
1722 saved_errno
= errno
;
1724 errno
= saved_errno
;
1728 static int create_cgroup(struct cgroup_mount_point
*mp
, const char *path
)
1730 return create_or_remove_cgroup(false, mp
, path
, false);
1733 static int remove_cgroup(struct cgroup_mount_point
*mp
,
1734 const char *path
, bool recurse
)
1736 return create_or_remove_cgroup(true, mp
, path
, recurse
);
1739 static char *cgroup_to_absolute_path(struct cgroup_mount_point
*mp
,
1740 const char *path
, const char *suffix
)
1742 /* first we have to make sure we subtract the mount point's prefix */
1743 char *prefix
= mp
->mount_prefix
;
1747 /* we want to make sure only absolute paths to cgroups are passed to us */
1748 if (path
[0] != '/') {
1753 if (prefix
&& !strcmp(prefix
, "/"))
1756 /* prefix doesn't match */
1757 if (prefix
&& strncmp(prefix
, path
, strlen(prefix
)) != 0) {
1761 /* if prefix is /foo and path is /foobar */
1762 if (prefix
&& path
[strlen(prefix
)] != '/' && path
[strlen(prefix
)] != '\0') {
1767 /* remove prefix from path */
1768 path
+= prefix
? strlen(prefix
) : 0;
1770 len
= strlen(mp
->mount_point
) + strlen(path
) + (suffix
? strlen(suffix
) : 0);
1771 buf
= calloc(len
+ 1, 1);
1774 rv
= snprintf(buf
, len
+ 1, "%s%s%s", mp
->mount_point
, path
, suffix
? suffix
: "");
1784 static struct cgroup_process_info
*
1785 find_info_for_subsystem(struct cgroup_process_info
*info
, const char *subsystem
)
1787 struct cgroup_process_info
*info_ptr
;
1788 for (info_ptr
= info
; info_ptr
; info_ptr
= info_ptr
->next
) {
1789 struct cgroup_hierarchy
*h
= info_ptr
->hierarchy
;
1790 if (lxc_string_in_array(subsystem
, (const char **)h
->subsystems
))
1797 static int do_cgroup_get(const char *cgroup_path
, const char *sub_filename
,
1798 char *value
, size_t len
)
1800 const char *parts
[3] = {
1806 int ret
, saved_errno
;
1808 filename
= lxc_string_join("/", parts
, false);
1812 ret
= lxc_read_from_file(filename
, value
, len
);
1813 saved_errno
= errno
;
1815 errno
= saved_errno
;
1819 static int do_cgroup_set(const char *cgroup_path
, const char *sub_filename
,
1822 const char *parts
[3] = {
1828 int ret
, saved_errno
;
1830 filename
= lxc_string_join("/", parts
, false);
1834 ret
= lxc_write_to_file(filename
, value
, strlen(value
), false);
1835 saved_errno
= errno
;
1837 errno
= saved_errno
;
1841 static int do_setup_cgroup_limits(struct lxc_handler
*h
,
1842 struct lxc_list
*cgroup_settings
, bool do_devices
)
1844 struct lxc_list
*iterator
;
1845 struct lxc_cgroup
*cg
;
1848 if (lxc_list_empty(cgroup_settings
))
1851 lxc_list_for_each(iterator
, cgroup_settings
) {
1852 cg
= iterator
->elem
;
1854 if (do_devices
== !strncmp("devices", cg
->subsystem
, 7)) {
1855 if (strcmp(cg
->subsystem
, "devices.deny") == 0 &&
1856 cgroup_devices_has_allow_or_deny(h
, cg
->value
, false))
1858 if (strcmp(cg
->subsystem
, "devices.allow") == 0 &&
1859 cgroup_devices_has_allow_or_deny(h
, cg
->value
, true))
1861 if (lxc_cgroup_set_handler(cg
->subsystem
, cg
->value
, h
)) {
1862 ERROR("Error setting %s to %s for %s\n",
1863 cg
->subsystem
, cg
->value
, h
->name
);
1868 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1872 INFO("cgroup has been setup");
1877 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler
*h
,
1878 char *v
, bool for_allow
)
1884 bool ret
= !for_allow
;
1885 const char *parts
[3] = {
1891 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1892 // not sure they ever do, but they *could*
1893 // right now, I'm assuming they do NOT
1894 if (!for_allow
&& strcmp(v
, "a") != 0 && strcmp(v
, "a *:* rwm") != 0)
1897 parts
[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h
);
1900 path
= lxc_string_join("/", parts
, false);
1902 free((void *)parts
[0]);
1906 devices_list
= fopen_cloexec(path
, "r");
1907 if (!devices_list
) {
1912 while (getline(&line
, &sz
, devices_list
) != -1) {
1913 size_t len
= strlen(line
);
1914 if (len
> 0 && line
[len
-1] == '\n')
1916 if (strcmp(line
, "a *:* rwm") == 0) {
1919 } else if (for_allow
&& strcmp(line
, v
) == 0) {
1926 fclose(devices_list
);
1932 static int cgroup_recursive_task_count(const char *cgroup_path
)
1935 struct dirent
*dent_buf
;
1936 struct dirent
*dent
;
1940 /* see man readdir_r(3) */
1941 name_max
= pathconf(cgroup_path
, _PC_NAME_MAX
);
1944 dent_buf
= malloc(offsetof(struct dirent
, d_name
) + name_max
+ 1);
1948 d
= opendir(cgroup_path
);
1954 while (readdir_r(d
, dent_buf
, &dent
) == 0 && dent
) {
1955 const char *parts
[3] = {
1963 if (!strcmp(dent
->d_name
, ".") || !strcmp(dent
->d_name
, ".."))
1965 sub_path
= lxc_string_join("/", parts
, false);
1971 r
= stat(sub_path
, &st
);
1978 if (S_ISDIR(st
.st_mode
)) {
1979 r
= cgroup_recursive_task_count(sub_path
);
1982 } else if (!strcmp(dent
->d_name
, "tasks")) {
1983 r
= count_lines(sub_path
);
1995 static int count_lines(const char *fn
)
2002 f
= fopen_cloexec(fn
, "r");
2006 while (getline(&line
, &sz
, f
) != -1) {
2014 static int handle_cgroup_settings(struct cgroup_mount_point
*mp
,
2017 int r
, saved_errno
= 0;
2020 /* If this is the memory cgroup, we want to enforce hierarchy.
2021 * But don't fail if for some reason we can't.
2023 if (lxc_string_in_array("memory", (const char **)mp
->hierarchy
->subsystems
)) {
2024 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/memory.use_hierarchy");
2026 r
= lxc_read_from_file(cc_path
, buf
, 1);
2027 if (r
< 1 || buf
[0] != '1') {
2028 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2030 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
2036 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2037 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2038 * and cpuset.cpus and then
2040 if (lxc_string_in_array("cpuset", (const char **)mp
->hierarchy
->subsystems
)) {
2041 char *cc_path
= cgroup_to_absolute_path(mp
, cgroup_path
, "/cgroup.clone_children");
2044 r
= lxc_read_from_file(cc_path
, buf
, 1);
2045 if (r
== 1 && buf
[0] == '1') {
2049 r
= lxc_write_to_file(cc_path
, "1", 1, false);
2050 saved_errno
= errno
;
2052 errno
= saved_errno
;
2053 return r
< 0 ? -1 : 0;
2058 extern void lxc_monitor_send_state(const char *name
, lxc_state_t state
,
2059 const char *lxcpath
);
2060 int do_unfreeze(int freeze
, const char *name
, const char *lxcpath
)
2063 const char *state
= freeze
? "FROZEN" : "THAWED";
2065 if (lxc_cgroup_set("freezer.state", state
, name
, lxcpath
) < 0) {
2066 ERROR("Failed to freeze %s:%s", lxcpath
, name
);
2070 if (lxc_cgroup_get("freezer.state", v
, 100, name
, lxcpath
) < 0) {
2071 ERROR("Failed to get new freezer state for %s:%s", lxcpath
, name
);
2074 if (v
[strlen(v
)-1] == '\n')
2075 v
[strlen(v
)-1] = '\0';
2076 if (strncmp(v
, state
, strlen(state
)) == 0) {
2078 lxc_monitor_send_state(name
, freeze
? FROZEN
: THAWED
, lxcpath
);
2085 int freeze_unfreeze(const char *name
, int freeze
, const char *lxcpath
)
2087 return do_unfreeze(freeze
, name
, lxcpath
);
2090 lxc_state_t
freezer_state(const char *name
, const char *lxcpath
)
2093 if (lxc_cgroup_get("freezer.state", v
, 100, name
, lxcpath
) < 0)
2096 if (v
[strlen(v
)-1] == '\n')
2097 v
[strlen(v
)-1] = '\0';
2098 return lxc_str2state(v
);
2101 static void cgfs_destroy(struct lxc_handler
*handler
)
2103 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2107 lxc_cgroup_process_info_free_and_remove(d
->info
);
2109 lxc_cgroup_put_meta(d
->meta
);
2111 handler
->cgroup_info
->data
= NULL
;
2114 static inline bool cgfs_init(struct lxc_handler
*handler
)
2116 struct cgfs_data
*d
= malloc(sizeof(*d
));
2120 d
->meta
= lxc_cgroup_load_meta();
2123 ERROR("cgroupfs failed to detect cgroup metadata");
2127 handler
->cgroup_info
->data
= d
;
2131 static inline bool cgfs_create(struct lxc_handler
*handler
)
2133 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2134 struct cgroup_process_info
*i
;
2135 struct cgroup_meta_data
*md
= d
->meta
;
2136 i
= lxc_cgroupfs_create(handler
->name
, handler
->cgroup_info
->cgroup_pattern
, md
, NULL
);
2143 static inline bool cgfs_enter(struct lxc_handler
*handler
)
2145 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2146 struct cgroup_process_info
*i
= d
->info
;
2149 ret
= lxc_cgroupfs_enter(i
, handler
->pid
, false);
2154 static inline bool cgfs_create_legacy(struct lxc_handler
*handler
)
2156 struct cgfs_data
*d
= handler
->cgroup_info
->data
;
2157 struct cgroup_process_info
*i
= d
->info
;
2158 if (lxc_cgroup_create_legacy(i
, handler
->name
, handler
->pid
) < 0) {
2159 ERROR("failed to create legacy ns cgroups for '%s'", handler
->name
);
2165 static char *cgfs_get_cgroup(struct lxc_handler
*handler
, const char *subsystem
)
2167 return lxc_cgroup_get_hierarchy_path_handler(subsystem
, handler
);
2170 static int cgfs_unfreeze_fromhandler(struct lxc_handler
*handler
)
2172 char *cgabspath
, *cgrelpath
;
2175 cgrelpath
= lxc_cgroup_get_hierarchy_path_handler("freezer", handler
);
2176 cgabspath
= lxc_cgroup_find_abs_path("freezer", cgrelpath
, true, NULL
);
2180 ret
= do_cgroup_set(cgabspath
, "freezer.state", "THAWED");
2185 bool cgroupfs_setup_limits(struct lxc_handler
*h
, bool with_devices
)
2187 return do_setup_cgroup_limits(h
, &h
->conf
->cgroup
, with_devices
) == 0;
2190 static struct cgroup_ops cgfs_ops
= {
2191 .destroy
= cgfs_destroy
,
2193 .create
= cgfs_create
,
2194 .enter
= cgfs_enter
,
2195 .create_legacy
= cgfs_create_legacy
,
2196 .get_cgroup
= cgfs_get_cgroup
,
2197 .get
= lxc_cgroupfs_get
,
2198 .set
= lxc_cgroupfs_set
,
2199 .unfreeze_fromhandler
= cgfs_unfreeze_fromhandler
,
2200 .setup_limits
= cgroupfs_setup_limits
,
2204 static void init_cg_ops(void)
2208 if (cgmanager_initialized
)
2210 if (!lxc_init_cgmanager()) {
2211 ERROR("Could not contact cgroup manager, falling back to cgroupfs");
2212 active_cg_ops
= &cgfs_ops
;
2217 * These are the backend-independent cgroup handlers for container
2221 /* Free all cgroup info held by the handler */
2222 void cgroup_destroy(struct lxc_handler
*handler
)
2224 if (!handler
->cgroup_info
)
2227 active_cg_ops
->destroy(handler
);
2231 * Allocate a lxc_cgroup_info for the active cgroup
2232 * backend, and assign it to the handler
2234 bool cgroup_init(struct lxc_handler
*handler
)
2237 handler
->cgroup_info
= malloc(sizeof(struct lxc_cgroup_info
));
2238 if (!handler
->cgroup_info
)
2240 memset(handler
->cgroup_info
, 0, sizeof(struct lxc_cgroup_info
));
2241 /* if we are running as root, use system cgroup pattern, otherwise
2242 * just create a cgroup under the current one. But also fall back to
2243 * that if for some reason reading the configuration fails and no
2244 * default value is available
2247 handler
->cgroup_info
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
2248 if (!handler
->cgroup_info
->cgroup_pattern
)
2249 handler
->cgroup_info
->cgroup_pattern
= "%n";
2251 return active_cg_ops
->init(handler
);
2254 /* Create the container cgroups for all requested controllers */
2255 bool cgroup_create(struct lxc_handler
*handler
)
2257 return active_cg_ops
->create(handler
);
2261 * Enter the container init into its new cgroups for all
2262 * requested controllers */
2263 bool cgroup_enter(struct lxc_handler
*handler
)
2265 return active_cg_ops
->enter(handler
);
2268 bool cgroup_create_legacy(struct lxc_handler
*handler
)
2270 if (active_cg_ops
->create_legacy
)
2271 return active_cg_ops
->create_legacy(handler
);
2275 char *cgroup_get_cgroup(struct lxc_handler
*handler
, const char *subsystem
)
2277 return active_cg_ops
->get_cgroup(handler
, subsystem
);
2280 int lxc_cgroup_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
2283 return active_cg_ops
->set(filename
, value
, name
, lxcpath
);
2286 int lxc_cgroup_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
2289 return active_cg_ops
->get(filename
, value
, len
, name
, lxcpath
);
2292 int lxc_unfreeze_fromhandler(struct lxc_handler
*handler
)
2294 return active_cg_ops
->unfreeze_fromhandler(handler
);
2297 bool cgroup_setup_limits(struct lxc_handler
*handler
, bool with_devices
)
2299 return active_cg_ops
->setup_limits(handler
, with_devices
);
2302 bool cgroup_chown(struct lxc_handler
*handler
)
2304 if (active_cg_ops
->chown
)
2305 return active_cg_ops
->chown(handler
);