]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroup.c
Trailing whitespace
[mirror_lxc.git] / src / lxc / cgroup.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
576f946d 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
aae1f3c4 37#include <sys/mount.h>
576f946d 38#include <netinet/in.h>
39#include <net/if.h>
40
e2bcd7db 41#include "error.h"
881450bb 42#include "config.h"
ae5c8b8e 43#include "commands.h"
b98f7d6e
SH
44#include "list.h"
45#include "conf.h"
33ad9f1a 46#include "utils.h"
740d1928 47#include "bdev.h"
f2363e38
ÇO
48#include "log.h"
49#include "cgroup.h"
50#include "start.h"
484ed030 51#include "state.h"
36eb9bde 52
edaf8b1b
SG
53#if IS_BIONIC
54#include <../include/lxcmntent.h>
55#else
56#include <mntent.h>
57#endif
58
36eb9bde 59lxc_log_define(lxc_cgroup, lxc);
576f946d 60
33ad9f1a
CS
61static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
62static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
63static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
64static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
65static bool is_valid_cgroup(const char *name);
603c64c2 66static int create_or_remove_cgroup(bool remove, struct cgroup_mount_point *mp, const char *path, int recurse);
33ad9f1a 67static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
603c64c2 68static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
33ad9f1a
CS
69static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
70static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
71static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
72static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
73static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
74static int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
75static int cgroup_recursive_task_count(const char *cgroup_path);
76static int count_lines(const char *fn);
1ea59ad2 77static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
33ad9f1a 78
603c64c2
SH
79static int cgroup_rmdir(char *dirname)
80{
81 struct dirent dirent, *direntp;
82 int saved_errno = 0;
83 DIR *dir;
84 int ret, failed=0;
85 char pathname[MAXPATHLEN];
86
87 dir = opendir(dirname);
88 if (!dir) {
89 ERROR("%s: failed to open %s", __func__, dirname);
90 return -1;
91 }
92
93 while (!readdir_r(dir, &dirent, &direntp)) {
94 struct stat mystat;
95 int rc;
96
97 if (!direntp)
98 break;
99
100 if (!strcmp(direntp->d_name, ".") ||
101 !strcmp(direntp->d_name, ".."))
102 continue;
103
104 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
105 if (rc < 0 || rc >= MAXPATHLEN) {
106 ERROR("pathname too long");
107 failed=1;
108 if (!saved_errno)
109 saved_errno = -ENOMEM;
110 continue;
111 }
112 ret = lstat(pathname, &mystat);
113 if (ret) {
114 SYSERROR("%s: failed to stat %s", __func__, pathname);
115 failed=1;
116 if (!saved_errno)
117 saved_errno = errno;
118 continue;
119 }
120 if (S_ISDIR(mystat.st_mode)) {
121 if (cgroup_rmdir(pathname) < 0) {
122 if (!saved_errno)
123 saved_errno = errno;
124 failed=1;
125 }
126 }
127 }
128
129 if (rmdir(dirname) < 0) {
130 SYSERROR("%s: failed to delete %s", __func__, dirname);
131 if (!saved_errno)
132 saved_errno = errno;
133 failed=1;
134 }
135
136 ret = closedir(dir);
137 if (ret) {
138 SYSERROR("%s: failed to close directory %s", __func__, dirname);
139 if (!saved_errno)
140 saved_errno = errno;
141 failed=1;
142 }
143
144 errno = saved_errno;
145 return failed ? -1 : 0;
146}
147
33ad9f1a
CS
148struct cgroup_meta_data *lxc_cgroup_load_meta()
149{
150 const char *cgroup_use = NULL;
151 char **cgroup_use_list = NULL;
152 struct cgroup_meta_data *md = NULL;
153 int saved_errno;
154
155 errno = 0;
593e8478 156 cgroup_use = lxc_global_config_value("lxc.cgroup.use");
33ad9f1a
CS
157 if (!cgroup_use && errno != 0)
158 return NULL;
159 if (cgroup_use) {
160 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
161 if (!cgroup_use_list)
162 return NULL;
163 }
576f946d 164
33ad9f1a
CS
165 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
166 saved_errno = errno;
167 lxc_free_array((void **)cgroup_use_list, free);
168 errno = saved_errno;
169 return md;
170}
fd37327f 171
b653309a
SH
172/* Step 1: determine all kernel subsystems */
173static bool find_cgroup_subsystems(char ***kernel_subsystems)
1d39a065 174{
b653309a
SH
175 FILE *proc_cgroups;
176 bool bret = false;
33ad9f1a
CS
177 char *line = NULL;
178 size_t sz = 0;
b653309a
SH
179 size_t kernel_subsystems_count = 0;
180 size_t kernel_subsystems_capacity = 0;
181 int r;
1d39a065 182
33ad9f1a
CS
183 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
184 if (!proc_cgroups)
b653309a 185 return false;
1d39a065 186
33ad9f1a
CS
187 while (getline(&line, &sz, proc_cgroups) != -1) {
188 char *tab1;
189 char *tab2;
190 int hierarchy_number;
1d39a065 191
33ad9f1a
CS
192 if (line[0] == '#')
193 continue;
194 if (!line[0])
195 continue;
1d39a065 196
33ad9f1a
CS
197 tab1 = strchr(line, '\t');
198 if (!tab1)
8900b9eb 199 continue;
33ad9f1a
CS
200 *tab1++ = '\0';
201 tab2 = strchr(tab1, '\t');
202 if (!tab2)
203 continue;
204 *tab2 = '\0';
fd37327f 205
33ad9f1a
CS
206 tab2 = NULL;
207 hierarchy_number = strtoul(tab1, &tab2, 10);
208 if (!tab2 || *tab2)
209 continue;
210 (void)hierarchy_number;
211
b653309a 212 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
33ad9f1a 213 if (r < 0)
b653309a
SH
214 goto out;
215 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
216 if (!(*kernel_subsystems)[kernel_subsystems_count])
217 goto out;
33ad9f1a 218 kernel_subsystems_count++;
bcbd102c 219 }
b653309a 220 bret = true;
0d9f8e18 221
b653309a 222out:
33ad9f1a 223 fclose(proc_cgroups);
0ccf7c2a 224 free(line);
b653309a
SH
225 return bret;
226}
227
228/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
229 * since mount points don't specify hierarchy number and
230 * /proc/cgroups does not contain named hierarchies
231 */
232static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
233 bool all_kernel_subsystems, bool all_named_subsystems,
234 const char **subsystem_whitelist)
235{
236 FILE *proc_self_cgroup;
237 char *line = NULL;
238 size_t sz = 0;
239 int r;
240 bool bret = false;
241 size_t hierarchy_capacity = 0;
ef6e34ee 242
33ad9f1a
CS
243 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
244 /* if for some reason (because of setns() and pid namespace for example),
245 * /proc/self is not valid, we try /proc/1/cgroup... */
246 if (!proc_self_cgroup)
247 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
248 if (!proc_self_cgroup)
b653309a 249 return false;
33ad9f1a
CS
250
251 while (getline(&line, &sz, proc_self_cgroup) != -1) {
252 /* file format: hierarchy:subsystems:group,
253 * we only extract hierarchy and subsystems
254 * here */
255 char *colon1;
256 char *colon2;
257 int hierarchy_number;
258 struct cgroup_hierarchy *h = NULL;
259 char **p;
260
261 if (!line[0])
262 continue;
ad08bbb7 263
33ad9f1a
CS
264 colon1 = strchr(line, ':');
265 if (!colon1)
8900b9eb 266 continue;
33ad9f1a
CS
267 *colon1++ = '\0';
268 colon2 = strchr(colon1, ':');
269 if (!colon2)
270 continue;
271 *colon2 = '\0';
ad08bbb7 272
33ad9f1a
CS
273 colon2 = NULL;
274 hierarchy_number = strtoul(line, &colon2, 10);
275 if (!colon2 || *colon2)
276 continue;
576f946d 277
33ad9f1a
CS
278 if (hierarchy_number > meta_data->maximum_hierarchy) {
279 /* lxc_grow_array will never shrink, so even if we find a lower
280 * hierarchy number here, the array will never be smaller
281 */
282 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
283 if (r < 0)
b653309a 284 goto out;
5193cc3d 285
33ad9f1a
CS
286 meta_data->maximum_hierarchy = hierarchy_number;
287 }
fd37327f 288
33ad9f1a
CS
289 /* this shouldn't happen, we had this already */
290 if (meta_data->hierarchies[hierarchy_number])
b653309a 291 goto out;
33ad9f1a
CS
292
293 h = calloc(1, sizeof(struct cgroup_hierarchy));
294 if (!h)
b653309a 295 goto out;
33ad9f1a
CS
296
297 meta_data->hierarchies[hierarchy_number] = h;
298
299 h->index = hierarchy_number;
300 h->subsystems = lxc_string_split_and_trim(colon1, ',');
301 if (!h->subsystems)
b653309a 302 goto out;
33ad9f1a
CS
303 /* see if this hierarchy should be considered */
304 if (!all_kernel_subsystems || !all_named_subsystems) {
305 for (p = h->subsystems; *p; p++) {
306 if (!strncmp(*p, "name=", 5)) {
307 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
308 h->used = true;
309 break;
310 }
311 } else {
312 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
313 h->used = true;
314 break;
315 }
316 }
317 }
318 } else {
319 /* we want all hierarchy anyway */
320 h->used = true;
ae5c8b8e 321 }
ae5c8b8e 322 }
b653309a 323 bret = true;
0b9c21ab 324
b653309a 325out:
33ad9f1a 326 fclose(proc_self_cgroup);
0ccf7c2a 327 free(line);
b653309a
SH
328 return bret;
329}
330
331/* Step 3: determine all mount points of each hierarchy */
332static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
333{
334 bool bret = false;
335 FILE *proc_self_mountinfo;
336 char *line = NULL;
337 size_t sz = 0;
338 char **tokens = NULL;
339 size_t mount_point_count = 0;
340 size_t mount_point_capacity = 0;
341 size_t token_capacity = 0;
342 int r;
343
33ad9f1a
CS
344 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
345 /* if for some reason (because of setns() and pid namespace for example),
346 * /proc/self is not valid, we try /proc/1/cgroup... */
347 if (!proc_self_mountinfo)
348 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
349 if (!proc_self_mountinfo)
b653309a 350 return false;
33ad9f1a
CS
351
352 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
178938fe 353 char *token, *line_tok, *saveptr = NULL;
33ad9f1a
CS
354 size_t i, j, k;
355 struct cgroup_mount_point *mount_point;
356 struct cgroup_hierarchy *h;
357 char **subsystems;
358
359 if (line[0] && line[strlen(line) - 1] == '\n')
360 line[strlen(line) - 1] = '\0';
361
178938fe 362 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
33ad9f1a
CS
363 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
364 if (r < 0)
b653309a 365 goto out;
33ad9f1a
CS
366 tokens[i++] = token;
367 }
b98f7d6e 368
33ad9f1a
CS
369 /* layout of /proc/self/mountinfo:
370 * 0: id
371 * 1: parent id
372 * 2: device major:minor
373 * 3: mount prefix
8900b9eb 374 * 4: mount point
33ad9f1a
CS
375 * 5: per-mount options
376 * [optional X]: additional data
377 * X+7: "-"
378 * X+8: type
379 * X+9: source
380 * X+10: per-superblock options
381 */
382 for (j = 6; j < i && tokens[j]; j++)
383 if (!strcmp(tokens[j], "-"))
384 break;
fd4f5a56 385
33ad9f1a
CS
386 /* could not find separator */
387 if (j >= i || !tokens[j])
388 continue;
389 /* there should be exactly three fields after
390 * the separator
391 */
392 if (i != j + 4)
393 continue;
fd4f5a56 394
33ad9f1a
CS
395 /* not a cgroup filesystem */
396 if (strcmp(tokens[j + 1], "cgroup") != 0)
397 continue;
b98f7d6e 398
33ad9f1a
CS
399 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
400 if (!subsystems)
b653309a 401 goto out;
33ad9f1a
CS
402
403 h = NULL;
404 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
405 if (meta_data->hierarchies[k] &&
406 meta_data->hierarchies[k]->subsystems[0] &&
407 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
408 /* TODO: we could also check if the lists really match completely,
409 * just to have an additional sanity check */
410 h = meta_data->hierarchies[k];
b98f7d6e 411 break;
33ad9f1a 412 }
b98f7d6e 413 }
33ad9f1a
CS
414 lxc_free_array((void **)subsystems, free);
415
416 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
417 if (r < 0)
b653309a 418 goto out;
33ad9f1a
CS
419
420 /* create mount point object */
421 mount_point = calloc(1, sizeof(*mount_point));
422 if (!mount_point)
b653309a 423 goto out;
33ad9f1a
CS
424
425 meta_data->mount_points[mount_point_count++] = mount_point;
426
427 mount_point->hierarchy = h;
428 mount_point->mount_point = strdup(tokens[4]);
429 mount_point->mount_prefix = strdup(tokens[3]);
430 if (!mount_point->mount_point || !mount_point->mount_prefix)
b653309a 431 goto out;
33ad9f1a
CS
432 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
433
434 if (!strcmp(mount_point->mount_prefix, "/")) {
435 if (mount_point->read_only) {
436 if (!h->ro_absolute_mount_point)
437 h->ro_absolute_mount_point = mount_point;
438 } else {
439 if (!h->rw_absolute_mount_point)
440 h->rw_absolute_mount_point = mount_point;
441 }
b98f7d6e 442 }
ae5c8b8e 443
33ad9f1a
CS
444 k = lxc_array_len((void **)h->all_mount_points);
445 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
446 if (r < 0)
b653309a 447 goto out;
33ad9f1a 448 h->all_mount_points[k] = mount_point;
fd4f5a56 449 }
b653309a
SH
450 bret = true;
451
452out:
b653309a 453 fclose(proc_self_mountinfo);
b653309a 454 free(tokens);
2cdafc54 455 free(line);
b653309a
SH
456 return bret;
457}
458
459struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
460{
461 bool all_kernel_subsystems = true;
462 bool all_named_subsystems = false;
463 struct cgroup_meta_data *meta_data = NULL;
464 char **kernel_subsystems = NULL;
465 int saved_errno = 0;
466
467 /* if the subsystem whitelist is not specified, include all
468 * hierarchies that contain kernel subsystems by default but
469 * no hierarchies that only contain named subsystems
470 *
471 * if it is specified, the specifier @all will select all
472 * hierarchies, @kernel will select all hierarchies with
473 * kernel subsystems and @named will select all named
474 * hierarchies
475 */
476 all_kernel_subsystems = subsystem_whitelist ?
477 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
478 true;
479 all_named_subsystems = subsystem_whitelist ?
480 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
481 false;
482
483 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
484 if (!meta_data)
485 return NULL;
486 meta_data->ref = 1;
487
488 if (!find_cgroup_subsystems(&kernel_subsystems))
489 goto out_error;
490
491 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
492 all_named_subsystems, subsystem_whitelist))
493 goto out_error;
494
495 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
496 goto out_error;
fd4f5a56 497
33ad9f1a
CS
498 /* oops, we couldn't find anything */
499 if (!meta_data->hierarchies || !meta_data->mount_points) {
500 errno = EINVAL;
501 goto out_error;
ae5c8b8e 502 }
fd4f5a56 503
3a0abb3a 504 lxc_free_array((void **)kernel_subsystems, free);
33ad9f1a
CS
505 return meta_data;
506
507out_error:
508 saved_errno = errno;
33ad9f1a
CS
509 lxc_free_array((void **)kernel_subsystems, free);
510 lxc_cgroup_put_meta(meta_data);
511 errno = saved_errno;
512 return NULL;
fd4f5a56
DL
513}
514
33ad9f1a 515struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
e14f67a7 516{
33ad9f1a
CS
517 meta_data->ref++;
518 return meta_data;
519}
e14f67a7 520
33ad9f1a
CS
521struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
522{
523 size_t i;
524 if (!meta_data)
525 return NULL;
526 if (--meta_data->ref > 0)
527 return meta_data;
528 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
529 if (meta_data->hierarchies) {
530 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
531 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
e14f67a7 532 }
33ad9f1a 533 free(meta_data->hierarchies);
178938fe 534 free(meta_data);
33ad9f1a 535 return NULL;
e14f67a7
U
536}
537
33ad9f1a 538struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
e14f67a7 539{
33ad9f1a
CS
540 size_t i;
541 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
542 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
543 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
544 return h;
e14f67a7 545 }
e14f67a7
U
546 return NULL;
547}
548
33ad9f1a 549struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
b98f7d6e 550{
33ad9f1a
CS
551 struct cgroup_mount_point **mps;
552 struct cgroup_mount_point *current_result = NULL;
553 ssize_t quality = -1;
b98f7d6e 554
33ad9f1a
CS
555 /* trivial case */
556 if (hierarchy->rw_absolute_mount_point)
557 return hierarchy->rw_absolute_mount_point;
558 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
559 return hierarchy->ro_absolute_mount_point;
b98f7d6e 560
33ad9f1a
CS
561 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
562 struct cgroup_mount_point *mp = *mps;
563 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
b98f7d6e 564
33ad9f1a
CS
565 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
566 prefix_len = 0;
b98f7d6e 567
33ad9f1a
CS
568 if (should_be_writable && mp->read_only)
569 continue;
570
571 if (!prefix_len ||
572 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
573 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
574 /* search for the best quality match, i.e. the match with the
575 * shortest prefix where this group is still contained
576 */
577 if (quality == -1 || prefix_len < quality) {
578 current_result = mp;
579 quality = prefix_len;
580 }
b98f7d6e
SH
581 }
582 }
583
33ad9f1a
CS
584 if (!current_result)
585 errno = ENOENT;
586 return current_result;
b98f7d6e
SH
587}
588
33ad9f1a 589char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
b98f7d6e 590{
33ad9f1a
CS
591 struct cgroup_meta_data *meta_data;
592 struct cgroup_hierarchy *h;
593 struct cgroup_mount_point *mp;
594 char *result;
595 int saved_errno;
596
597 meta_data = lxc_cgroup_load_meta();
598 if (!meta_data)
599 return NULL;
b98f7d6e 600
33ad9f1a
CS
601 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
602 if (!h)
603 goto out_error;
b98f7d6e 604
33ad9f1a
CS
605 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
606 if (!mp)
607 goto out_error;
b98f7d6e 608
33ad9f1a
CS
609 result = cgroup_to_absolute_path(mp, group, suffix);
610 if (!result)
611 goto out_error;
b98f7d6e 612
33ad9f1a
CS
613 lxc_cgroup_put_meta(meta_data);
614 return result;
b98f7d6e 615
33ad9f1a
CS
616out_error:
617 saved_errno = errno;
618 lxc_cgroup_put_meta(meta_data);
619 errno = saved_errno;
620 return NULL;
b98f7d6e
SH
621}
622
33ad9f1a 623struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
fd4f5a56 624{
33ad9f1a
CS
625 char pid_buf[32];
626 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
627 return lxc_cgroup_process_info_getx(pid_buf, meta);
c8f7c563
CS
628}
629
33ad9f1a 630struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
c8f7c563 631{
33ad9f1a
CS
632 return lxc_cgroup_process_info_get(1, meta);
633}
b98f7d6e 634
33ad9f1a
CS
635struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
636{
637 struct cgroup_process_info *i;
638 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
639 if (!i)
640 i = lxc_cgroup_process_info_get(getpid(), meta);
641 return i;
642}
ae5c8b8e 643
692ba18f
SH
644/*
645 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
646 * is already in a new cgroup named after the pid. 'mnt' is passed in as
647 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
648 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
649 * and return the string /sys/fs/cgroup/lxc/c1.
650 */
cea0552e 651static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
692ba18f
SH
652{
653 char *dir, *fulloldpath;
654 char *newname, *fullnewpath;
cea0552e 655 int len, newlen, ret;
692ba18f
SH
656
657 /*
658 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
659 * name is c1,
660 * dir: /ab
661 * fulloldpath = /cgroup/ab/2375
662 * fullnewpath = /cgroup/ab/c1
663 * newname = /ab/c1
664 */
665 dir = alloca(strlen(oldname) + 1);
666 strcpy(dir, oldname);
667
cea0552e
SH
668 len = strlen(oldname) + strlen(mountpath) + 22;
669 fulloldpath = alloca(len);
670 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
671 if (ret < 0 || ret >= len)
672 return NULL;
692ba18f
SH
673
674 len = strlen(dir) + strlen(name) + 2;
675 newname = malloc(len);
676 if (!newname) {
677 SYSERROR("Out of memory");
678 return NULL;
679 }
cea0552e
SH
680 ret = snprintf(newname, len, "%s/%s", dir, name);
681 if (ret < 0 || ret >= len) {
682 free(newname);
683 return NULL;
684 }
692ba18f 685
cea0552e
SH
686 newlen = strlen(mountpath) + len + 2;
687 fullnewpath = alloca(newlen);
688 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
689 if (ret < 0 || ret >= newlen) {
690 free(newname);
691 return NULL;
692 }
692ba18f
SH
693
694 if (access(fullnewpath, F_OK) == 0) {
695 if (rmdir(fullnewpath) != 0) {
696 SYSERROR("container cgroup %s already exists.", fullnewpath);
697 free(newname);
698 return NULL;
699 }
700 }
701 if (rename(fulloldpath, fullnewpath)) {
702 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
703 free(newname);
704 return NULL;
705 }
706
707 DEBUG("'%s' renamed to '%s'", oldname, newname);
708
709 return newname;
710}
711
33ad9f1a 712/* create a new cgroup */
47d8fb3b 713extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
33ad9f1a 714{
001b026e 715 char **cgroup_path_components = NULL;
33ad9f1a
CS
716 char **p = NULL;
717 char *path_so_far = NULL;
718 char **new_cgroup_paths = NULL;
719 char **new_cgroup_paths_sub = NULL;
720 struct cgroup_mount_point *mp;
721 struct cgroup_hierarchy *h;
722 struct cgroup_process_info *base_info = NULL;
723 struct cgroup_process_info *info_ptr;
724 int saved_errno;
725 int r;
726 unsigned suffix = 0;
727 bool had_sub_pattern = false;
728 size_t i;
ae5c8b8e 729
33ad9f1a
CS
730 if (!is_valid_cgroup(name)) {
731 ERROR("Invalid cgroup name: '%s'", name);
732 errno = EINVAL;
733 return NULL;
ae5c8b8e
SH
734 }
735
33ad9f1a
CS
736 if (!strstr(path_pattern, "%n")) {
737 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
738 errno = EINVAL;
739 return NULL;
740 }
fd37327f 741
33ad9f1a
CS
742 /* we will modify the result of this operation directly,
743 * so we don't have to copy the data structure
744 */
745 base_info = (path_pattern[0] == '/') ?
746 lxc_cgroup_process_info_get_init(meta_data) :
747 lxc_cgroup_process_info_get_self(meta_data);
748 if (!base_info)
749 return NULL;
c8f7c563 750
33ad9f1a
CS
751 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
752 if (!new_cgroup_paths)
753 goto out_initial_error;
754
755 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
756 if (!new_cgroup_paths_sub)
757 goto out_initial_error;
758
759 /* find mount points we can use */
760 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
761 h = info_ptr->hierarchy;
762 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
763 if (!mp) {
764 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
765 goto out_initial_error;
766 }
767 info_ptr->designated_mount_point = mp;
460a1cf0 768
692ba18f
SH
769 if (lxc_string_in_array("ns", (const char **)h->subsystems))
770 continue;
1ea59ad2 771 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
772 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
773 goto out_initial_error;
774 }
775 }
b98f7d6e 776
33ad9f1a
CS
777 /* normalize the path */
778 cgroup_path_components = lxc_normalize_path(path_pattern);
779 if (!cgroup_path_components)
780 goto out_initial_error;
781
782 /* go through the path components to see if we can create them */
783 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
784 /* we only want to create the same component with -1, -2, etc.
785 * if the component contains the container name itself, otherwise
786 * it's not an error if it already exists
787 */
788 char *p_eff = *p ? *p : (char *)sub_pattern;
789 bool contains_name = strstr(p_eff, "%n");
790 char *current_component = NULL;
791 char *current_subpath = NULL;
792 char *current_entire_path = NULL;
793 char *parts[3];
794 size_t j = 0;
795 i = 0;
796
797 /* if we are processing the subpattern, we want to make sure
798 * loop is ended the next time around
799 */
800 if (!*p) {
801 had_sub_pattern = true;
802 p--;
803 }
b98f7d6e 804
33ad9f1a
CS
805 goto find_name_on_this_level;
806
807 cleanup_name_on_this_level:
808 /* This is reached if we found a name clash.
809 * In that case, remove the cgroup from all previous hierarchies
810 */
811 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
603c64c2 812 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
33ad9f1a
CS
813 if (r < 0)
814 WARN("could not clean up cgroup we created when trying to create container");
815 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
816 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
817 }
818 if (current_component != current_subpath)
819 free(current_subpath);
820 if (current_component != p_eff)
821 free(current_component);
822 current_component = current_subpath = NULL;
823 /* try again with another suffix */
824 ++suffix;
825
826 find_name_on_this_level:
827 /* determine name of the path component we should create */
828 if (contains_name && suffix > 0) {
829 char *buf = calloc(strlen(name) + 32, 1);
830 if (!buf)
831 goto out_initial_error;
832 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
833 current_component = lxc_string_replace("%n", buf, p_eff);
834 free(buf);
835 } else {
836 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
837 }
838 parts[0] = path_so_far;
839 parts[1] = current_component;
840 parts[2] = NULL;
841 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
842
843 /* Now go through each hierarchy and try to create the
844 * corresponding cgroup
845 */
846 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
847 char *parts2[3];
692ba18f
SH
848
849 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
850 continue;
33ad9f1a
CS
851 current_entire_path = NULL;
852
853 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
854 parts2[1] = current_subpath;
855 parts2[2] = NULL;
856 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
857
858 if (!*p) {
859 /* we are processing the subpath, so only update that one */
860 free(new_cgroup_paths_sub[i]);
861 new_cgroup_paths_sub[i] = strdup(current_entire_path);
862 if (!new_cgroup_paths_sub[i])
863 goto cleanup_from_error;
864 } else {
865 /* remember which path was used on this controller */
866 free(new_cgroup_paths[i]);
867 new_cgroup_paths[i] = strdup(current_entire_path);
868 if (!new_cgroup_paths[i])
869 goto cleanup_from_error;
870 }
fd4f5a56 871
33ad9f1a
CS
872 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
873 if (r < 0 && errno == EEXIST && contains_name) {
874 /* name clash => try new name with new suffix */
875 free(current_entire_path);
876 current_entire_path = NULL;
877 goto cleanup_name_on_this_level;
878 } else if (r < 0 && errno != EEXIST) {
879 SYSERROR("Could not create cgroup %s", current_entire_path);
880 goto cleanup_from_error;
881 } else if (r == 0) {
882 /* successfully created */
883 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
884 if (r < 0)
885 goto cleanup_from_error;
886 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
887 } else {
888 /* if we didn't create the cgroup, then we have to make sure that
889 * further cgroups will be created properly
890 */
1ea59ad2 891 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
892 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
893 goto cleanup_from_error;
894 }
895
896 /* already existed but path component of pattern didn't contain '%n',
897 * so this is not an error; but then we don't need current_entire_path
898 * anymore...
899 */
900 free(current_entire_path);
901 current_entire_path = NULL;
902 }
903 }
fd4f5a56 904
33ad9f1a
CS
905 /* save path so far */
906 free(path_so_far);
907 path_so_far = strdup(current_subpath);
908 if (!path_so_far)
909 goto cleanup_from_error;
910
911 /* cleanup */
912 if (current_component != current_subpath)
913 free(current_subpath);
914 if (current_component != p_eff)
915 free(current_component);
916 current_component = current_subpath = NULL;
917 continue;
918
919 cleanup_from_error:
920 /* called if an error occured in the loop, so we
921 * do some additional cleanup here
922 */
923 saved_errno = errno;
924 if (current_component != current_subpath)
925 free(current_subpath);
926 if (current_component != p_eff)
927 free(current_component);
928 free(current_entire_path);
929 errno = saved_errno;
930 goto out_initial_error;
fd4f5a56
DL
931 }
932
33ad9f1a
CS
933 /* we're done, now update the paths */
934 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
47d8fb3b
CS
935 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
936 * will take care of it
937 * Since we do a continue in above loop, new_cgroup_paths[i] is
938 * unset anyway, as is new_cgroup_paths_sub[i]
692ba18f 939 */
47d8fb3b
CS
940 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
941 continue;
942 free(info_ptr->cgroup_path);
943 info_ptr->cgroup_path = new_cgroup_paths[i];
944 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
fd4f5a56 945 }
33ad9f1a
CS
946 /* don't use lxc_free_array since we used the array members
947 * to store them in our result...
948 */
949 free(new_cgroup_paths);
950 free(new_cgroup_paths_sub);
951 free(path_so_far);
952 lxc_free_array((void **)cgroup_path_components, free);
953 return base_info;
954
955out_initial_error:
956 saved_errno = errno;
957 free(path_so_far);
958 lxc_cgroup_process_info_free_and_remove(base_info);
959 lxc_free_array((void **)new_cgroup_paths, free);
960 lxc_free_array((void **)new_cgroup_paths_sub, free);
961 lxc_free_array((void **)cgroup_path_components, free);
962 errno = saved_errno;
963 return NULL;
c8f7c563
CS
964}
965
47d8fb3b
CS
966int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
967{
968 struct cgroup_process_info *info_ptr;
969 int r;
970
971 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
972 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
973 continue;
974 /*
975 * For any path which has ns cgroup mounted, handler->pid is already
976 * moved into a container called '%d % (handler->pid)'. Rename it to
977 * the cgroup name and record that.
978 */
979 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
980 info_ptr->cgroup_path, pid, name);
981 if (!tmp)
982 return -1;
983 free(info_ptr->cgroup_path);
984 info_ptr->cgroup_path = tmp;
985 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
986 if (r < 0)
987 return -1;
988 tmp = strdup(tmp);
989 if (!tmp)
990 return -1;
991 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
992 }
993 return 0;
994}
995
33ad9f1a
CS
996/* get the cgroup membership of a given container */
997struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
c8f7c563 998{
33ad9f1a
CS
999 struct cgroup_process_info *result = NULL;
1000 int saved_errno = 0;
1001 size_t i;
1002 struct cgroup_process_info **cptr = &result;
1003 struct cgroup_process_info *entry = NULL;
1004 char *path = NULL;
1005
1006 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
1007 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
1008 if (!h || !h->used)
1009 continue;
c8f7c563 1010
33ad9f1a
CS
1011 /* use the command interface to look for the cgroup */
1012 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
1013 if (!path)
1014 goto out_error;
1015
1016 entry = calloc(1, sizeof(struct cgroup_process_info));
1017 if (!entry)
1018 goto out_error;
1019 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
1020 entry->hierarchy = h;
1021 entry->cgroup_path = path;
1022 path = NULL;
1023
1024 /* it is not an error if we don't find anything here,
1025 * it is up to the caller to decide what to do in that
1026 * case */
1027 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
1028
1029 *cptr = entry;
1030 cptr = &entry->next;
1031 entry = NULL;
c8f7c563
CS
1032 }
1033
33ad9f1a
CS
1034 return result;
1035out_error:
1036 saved_errno = errno;
1037 free(path);
1038 lxc_cgroup_process_info_free(result);
1039 lxc_cgroup_process_info_free(entry);
1040 errno = saved_errno;
1041 return NULL;
fd4f5a56
DL
1042}
1043
33ad9f1a
CS
1044/* move a processs to the cgroups specified by the membership */
1045int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
4f17323e 1046{
33ad9f1a
CS
1047 char pid_buf[32];
1048 char *cgroup_tasks_fn;
1049 int r;
1050 struct cgroup_process_info *info_ptr;
1051
1052 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1053 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1054 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1055 info_ptr->cgroup_path_sub :
1056 info_ptr->cgroup_path;
1057
1058 if (!info_ptr->designated_mount_point) {
1059 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1060 if (!info_ptr->designated_mount_point) {
1061 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1062 return -1;
1063 }
1064 }
4f17323e 1065
33ad9f1a
CS
1066 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1067 if (!cgroup_tasks_fn) {
1068 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1069 return -1;
1070 }
4f17323e 1071
33ad9f1a 1072 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
5903da82 1073 free(cgroup_tasks_fn);
33ad9f1a
CS
1074 if (r < 0) {
1075 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1076 return -1;
1077 }
4f17323e
CS
1078 }
1079
33ad9f1a 1080 return 0;
4f17323e
CS
1081}
1082
33ad9f1a
CS
1083/* free process membership information */
1084void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
fc7de561 1085{
33ad9f1a
CS
1086 struct cgroup_process_info *next;
1087 if (!info)
b98f7d6e 1088 return;
33ad9f1a
CS
1089 next = info->next;
1090 lxc_cgroup_put_meta(info->meta_ref);
1091 free(info->cgroup_path);
1092 free(info->cgroup_path_sub);
1093 lxc_free_array((void **)info->created_paths, free);
1094 free(info);
1095 lxc_cgroup_process_info_free(next);
fc7de561
SH
1096}
1097
33ad9f1a
CS
1098/* free process membership information and remove cgroups that were created */
1099void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
b98f7d6e 1100{
33ad9f1a
CS
1101 struct cgroup_process_info *next;
1102 char **pp;
1103 if (!info)
1104 return;
1105 next = info->next;
603c64c2 1106 {
33ad9f1a
CS
1107 struct cgroup_mount_point *mp = info->designated_mount_point;
1108 if (!mp)
1109 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1110 if (mp)
1111 /* ignore return value here, perhaps we created the
1112 * '/lxc' cgroup in this container but another container
1113 * is still running (for example)
1114 */
603c64c2
SH
1115 (void)remove_cgroup(mp, info->cgroup_path, true);
1116 }
1117 for (pp = info->created_paths; pp && *pp; pp++);
1118 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
33ad9f1a 1119 free(*pp);
b98f7d6e 1120 }
33ad9f1a
CS
1121 free(info->created_paths);
1122 lxc_cgroup_put_meta(info->meta_ref);
1123 free(info->cgroup_path);
1124 free(info->cgroup_path_sub);
1125 free(info);
9431aa65 1126 lxc_cgroup_process_info_free_and_remove(next);
33ad9f1a 1127}
b98f7d6e 1128
33ad9f1a
CS
1129char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
1130{
1131 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1132 if (!info)
1133 return NULL;
1134 return info->cgroup_path;
b98f7d6e
SH
1135}
1136
33ad9f1a 1137char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
b98f7d6e 1138{
33ad9f1a 1139 return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
b98f7d6e
SH
1140}
1141
33ad9f1a 1142char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
b98f7d6e 1143{
33ad9f1a
CS
1144 struct cgroup_mount_point *mp = NULL;
1145 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1146 if (!info)
1147 return NULL;
1148 if (info->designated_mount_point) {
8900b9eb 1149 mp = info->designated_mount_point;
33ad9f1a
CS
1150 } else {
1151 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1152 if (!mp)
1153 return NULL;
b98f7d6e 1154 }
33ad9f1a 1155 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
b98f7d6e 1156}
55c76589 1157
33ad9f1a 1158char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
9a93d992 1159{
33ad9f1a
CS
1160 struct cgroup_meta_data *meta;
1161 struct cgroup_process_info *base_info, *info;
1162 struct cgroup_mount_point *mp;
1163 char *result = NULL;
33ad9f1a
CS
1164
1165 meta = lxc_cgroup_load_meta();
1166 if (!meta)
9a93d992 1167 return NULL;
33ad9f1a
CS
1168 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1169 if (!base_info)
178938fe 1170 goto out1;
33ad9f1a
CS
1171 info = find_info_for_subsystem(base_info, subsystem);
1172 if (!info)
178938fe 1173 goto out2;
33ad9f1a 1174 if (info->designated_mount_point) {
8900b9eb 1175 mp = info->designated_mount_point;
33ad9f1a
CS
1176 } else {
1177 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1178 if (!mp)
178938fe 1179 goto out3;
33ad9f1a
CS
1180 }
1181 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
178938fe 1182out3:
178938fe 1183out2:
33ad9f1a 1184 lxc_cgroup_process_info_free(base_info);
178938fe 1185out1:
33ad9f1a 1186 lxc_cgroup_put_meta(meta);
33ad9f1a
CS
1187 return result;
1188}
9a93d992 1189
33ad9f1a
CS
1190int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
1191{
1192 char *subsystem = NULL, *p, *path;
1193 int ret = -1;
9a93d992 1194
33ad9f1a
CS
1195 subsystem = alloca(strlen(filename) + 1);
1196 strcpy(subsystem, filename);
1197 if ((p = index(subsystem, '.')) != NULL)
1198 *p = '\0';
9a93d992 1199
33ad9f1a
CS
1200 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1201 if (path) {
1202 ret = do_cgroup_set(path, filename, value);
1203 free(path);
9a93d992 1204 }
33ad9f1a
CS
1205 return ret;
1206}
9a93d992 1207
33ad9f1a
CS
1208int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
1209{
1210 char *subsystem = NULL, *p, *path;
1211 int ret = -1;
1212
1213 subsystem = alloca(strlen(filename) + 1);
1214 strcpy(subsystem, filename);
1215 if ((p = index(subsystem, '.')) != NULL)
1216 *p = '\0';
1217
1218 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1219 if (path) {
1220 ret = do_cgroup_get(path, filename, value, len);
1221 free(path);
1222 }
9a93d992
SH
1223 return ret;
1224}
1225
33ad9f1a 1226int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
9a93d992 1227{
33ad9f1a
CS
1228 char *subsystem = NULL, *p, *path;
1229 int ret = -1;
9a93d992 1230
33ad9f1a
CS
1231 subsystem = alloca(strlen(filename) + 1);
1232 strcpy(subsystem, filename);
1233 if ((p = index(subsystem, '.')) != NULL)
1234 *p = '\0';
9a93d992 1235
33ad9f1a
CS
1236 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1237 if (path) {
1238 ret = do_cgroup_set(path, filename, value);
1239 free(path);
1240 }
b98f7d6e 1241 return ret;
9a93d992
SH
1242}
1243
33ad9f1a 1244int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
9a93d992 1245{
33ad9f1a
CS
1246 char *subsystem = NULL, *p, *path;
1247 int ret = -1;
1248
1249 subsystem = alloca(strlen(filename) + 1);
1250 strcpy(subsystem, filename);
1251 if ((p = index(subsystem, '.')) != NULL)
1252 *p = '\0';
1253
1254 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1255 if (path) {
1256 ret = do_cgroup_get(path, filename, value, len);
1257 free(path);
9a93d992 1258 }
33ad9f1a 1259 return ret;
9a93d992
SH
1260}
1261
33ad9f1a
CS
1262/*
1263 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1264 * file for a running container.
1265 *
1266 * @filename : the file of interest (e.g. "freezer.state") or
1267 * the subsystem name (e.g. "freezer") in which case
1268 * the directory where the cgroup may be modified
1269 * will be returned
1270 * @name : name of container to connect to
1271 * @lxcpath : the lxcpath in which the container is running
8900b9eb 1272 *
33ad9f1a
CS
1273 * This is the exported function, which determines cgpath from the
1274 * lxc-start of the @name container running in @lxcpath.
1275 *
1276 * Returns path on success, NULL on error. The caller must free()
1277 * the returned path.
1278 */
1279char *lxc_cgroup_path_get(const char *filename, const char *name,
1280 const char *lxcpath)
9a93d992 1281{
33ad9f1a 1282 char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
9a93d992 1283
33ad9f1a
CS
1284 subsystem = alloca(strlen(filename) + 1);
1285 strcpy(subsystem, filename);
1286 if ((p = index(subsystem, '.')) != NULL) {
1287 *p = '\0';
1288 longer_file = alloca(strlen(filename) + 2);
1289 longer_file[0] = '/';
1290 strcpy(longer_file + 1, filename);
b98f7d6e
SH
1291 }
1292
33ad9f1a
CS
1293 group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
1294 if (!group)
1295 return NULL;
b98f7d6e 1296
86b3688b 1297 path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
33ad9f1a
CS
1298 free(group);
1299 return path;
9a93d992
SH
1300}
1301
33ad9f1a
CS
1302int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
1303{
1304 return do_setup_cgroup(h, cgroup_settings, false);
1305}
b98f7d6e 1306
33ad9f1a 1307int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
460a1cf0 1308{
33ad9f1a
CS
1309 return do_setup_cgroup(h, cgroup_settings, true);
1310}
fd37327f 1311
7997d7da 1312int lxc_setup_mount_cgroup(const char *root, struct cgroup_process_info *base_info, int type)
aae1f3c4
CS
1313{
1314 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1315 char *path = NULL;
1316 char **parts = NULL;
1317 char *dirname = NULL;
1318 char *abs_path = NULL;
1319 char *abs_path2 = NULL;
1320 struct cgroup_process_info *info;
1321 int r, saved_errno = 0;
1322
7997d7da
CS
1323 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1324 ERROR("could not mount cgroups into container: invalid type specified internally");
1325 errno = EINVAL;
1326 return -1;
1327 }
1328
aae1f3c4
CS
1329 path = calloc(1, bufsz);
1330 if (!path)
1331 return -1;
1332 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1333 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1334 if (r < 0) {
1335 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1336 return -1;
1337 }
1338
1339 /* now mount all the hierarchies we care about */
1340 for (info = base_info; info; info = info->next) {
1341 size_t subsystem_count, i;
1342 struct cgroup_mount_point *mp = info->designated_mount_point;
1343 if (!mp)
1344 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1345 if (!mp) {
1346 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1347 goto out_error;
1348 }
1349
1350 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1351 parts = calloc(subsystem_count + 1, sizeof(char *));
1352 if (!parts)
1353 goto out_error;
1354
1355 for (i = 0; i < subsystem_count; i++) {
1356 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1357 parts[i] = info->hierarchy->subsystems[i] + 5;
1358 else
1359 parts[i] = info->hierarchy->subsystems[i];
1360 }
1361 dirname = lxc_string_join(",", (const char **)parts, false);
1362 if (!dirname)
1363 goto out_error;
1364
1365 /* create subsystem directory */
1366 abs_path = lxc_append_paths(path, dirname);
1367 if (!abs_path)
1368 goto out_error;
1369 r = mkdir_p(abs_path, 0755);
1370 if (r < 0 && errno != EEXIST) {
1371 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1372 goto out_error;
1373 }
1374
aae1f3c4
CS
1375 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1376 if (!abs_path2)
1377 goto out_error;
aae1f3c4 1378
7997d7da
CS
1379 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1380 /* bind-mount the cgroup entire filesystem there */
1381 if (strcmp(mp->mount_prefix, "/") != 0) {
1382 /* FIXME: maybe we should just try to remount the entire hierarchy
1383 * with a regular mount command? may that works? */
1384 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1385 goto out_error;
1386 }
1387 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1388 if (r < 0) {
1389 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1390 goto out_error;
1391 }
1392 /* main cgroup path should be read-only */
1393 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1394 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1395 if (r < 0) {
1396 SYSERROR("error re-mounting %s readonly", abs_path);
1397 goto out_error;
1398 }
1399 }
1400 /* own cgroup should be read-write */
1401 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1402 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1403 if (r < 0) {
1404 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1405 goto out_error;
1406 }
1407 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1408 if (r < 0) {
1409 SYSERROR("error re-mounting %s readwrite", abs_path2);
1410 goto out_error;
1411 }
1412 }
1413 } else {
1414 /* create path for container's cgroup */
1415 r = mkdir_p(abs_path2, 0755);
1416 if (r < 0 && errno != EEXIST) {
1417 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1418 goto out_error;
1419 }
aae1f3c4 1420
7997d7da
CS
1421 free(abs_path);
1422 abs_path = NULL;
1423
1424 /* bind-mount container's cgroup to that directory */
1425 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1426 if (!abs_path)
1427 goto out_error;
1428 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1429 if (r < 0) {
1430 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1431 goto out_error;
1432 }
1433 if (type == LXC_AUTO_CGROUP_RO) {
1434 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1435 if (r < 0) {
1436 SYSERROR("error re-mounting %s readonly", abs_path2);
1437 goto out_error;
1438 }
1439 }
aae1f3c4
CS
1440 }
1441
1442 free(abs_path);
1443 free(abs_path2);
1444 abs_path = NULL;
1445 abs_path2 = NULL;
1446
1447 /* add symlinks for every single subsystem */
1448 if (subsystem_count > 1) {
1449 for (i = 0; i < subsystem_count; i++) {
1450 abs_path = lxc_append_paths(path, parts[i]);
1451 if (!abs_path)
1452 goto out_error;
1453 r = symlink(dirname, abs_path);
1454 if (r < 0)
1455 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1456 free(abs_path);
1457 abs_path = NULL;
1458 }
1459 }
1460 free(dirname);
1461 free(parts);
1462 dirname = NULL;
1463 parts = NULL;
1464 }
1465
1466 /* try to remount the tmpfs readonly, since the container shouldn't
1467 * change anything (this will also make sure that trying to create
1468 * new cgroups outside the allowed area fails with an error instead
1469 * of simply causing this to create directories in the tmpfs itself)
1470 */
7997d7da
CS
1471 if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
1472 mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
aae1f3c4
CS
1473
1474 free(path);
1475
1476 return 0;
1477
1478out_error:
1479 saved_errno = errno;
1480 free(path);
1481 free(dirname);
1482 free(parts);
1483 free(abs_path);
1484 free(abs_path2);
1485 errno = saved_errno;
1486 return -1;
1487}
1488
33ad9f1a
CS
1489int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
1490{
1491 struct cgroup_process_info *info = handler->cgroup;
1492 struct cgroup_mount_point *mp = NULL;
1493 char *abs_path = NULL;
1494 int ret;
460a1cf0 1495
33ad9f1a
CS
1496 if (!info) {
1497 errno = ENOENT;
1498 return -1;
b98f7d6e 1499 }
c8f7c563 1500
33ad9f1a 1501 if (info->designated_mount_point) {
8900b9eb 1502 mp = info->designated_mount_point;
33ad9f1a
CS
1503 } else {
1504 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1505 if (!mp)
1506 return -1;
c8f7c563
CS
1507 }
1508
33ad9f1a
CS
1509 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1510 if (!abs_path)
1511 return -1;
1512
1513 ret = cgroup_recursive_task_count(abs_path);
1514 free(abs_path);
1515 return ret;
c8f7c563
CS
1516}
1517
33ad9f1a 1518struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta)
d08ba6ec 1519{
33ad9f1a
CS
1520 struct cgroup_process_info *result = NULL;
1521 FILE *proc_pid_cgroup = NULL;
1522 char *line = NULL;
1523 size_t sz = 0;
1524 int saved_errno = 0;
1525 struct cgroup_process_info **cptr = &result;
1526 struct cgroup_process_info *entry = NULL;
1527
1528 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
1529 if (!proc_pid_cgroup)
b98f7d6e 1530 return NULL;
1ac470c0 1531
33ad9f1a
CS
1532 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1533 /* file format: hierarchy:subsystems:group */
1534 char *colon1;
1535 char *colon2;
1536 char *endptr;
1537 int hierarchy_number;
1538 struct cgroup_hierarchy *h = NULL;
fd4f5a56 1539
33ad9f1a 1540 if (!line[0])
ae5c8b8e 1541 continue;
b98f7d6e 1542
33ad9f1a
CS
1543 if (line[strlen(line) - 1] == '\n')
1544 line[strlen(line) - 1] = '\0';
1545
1546 colon1 = strchr(line, ':');
1547 if (!colon1)
8900b9eb 1548 continue;
33ad9f1a
CS
1549 *colon1++ = '\0';
1550 colon2 = strchr(colon1, ':');
1551 if (!colon2)
ae5c8b8e 1552 continue;
33ad9f1a 1553 *colon2++ = '\0';
e4659536 1554
33ad9f1a
CS
1555 endptr = NULL;
1556 hierarchy_number = strtoul(line, &endptr, 10);
1557 if (!endptr || *endptr)
9a93d992 1558 continue;
9a93d992 1559
33ad9f1a
CS
1560 if (hierarchy_number > meta->maximum_hierarchy) {
1561 /* we encountered a hierarchy we didn't have before,
1562 * so probably somebody remounted some stuff in the
1563 * mean time...
1564 */
1565 errno = EAGAIN;
1566 goto out_error;
b98f7d6e 1567 }
33ad9f1a
CS
1568
1569 h = meta->hierarchies[hierarchy_number];
1570 if (!h) {
1571 /* we encountered a hierarchy that was thought to be
1572 * dead before, so probably somebody remounted some
1573 * stuff in the mean time...
1574 */
1575 errno = EAGAIN;
1576 goto out_error;
b98f7d6e 1577 }
33ad9f1a
CS
1578
1579 /* we are told that we should ignore this hierarchy */
1580 if (!h->used)
b98f7d6e 1581 continue;
5193cc3d 1582
33ad9f1a
CS
1583 entry = calloc(1, sizeof(struct cgroup_process_info));
1584 if (!entry)
1585 goto out_error;
fd4f5a56 1586
33ad9f1a
CS
1587 entry->meta_ref = lxc_cgroup_get_meta(meta);
1588 entry->hierarchy = h;
1589 entry->cgroup_path = strdup(colon2);
1590 if (!entry->cgroup_path)
1591 goto out_error;
d08ba6ec 1592
33ad9f1a
CS
1593 *cptr = entry;
1594 cptr = &entry->next;
1595 entry = NULL;
b98f7d6e 1596 }
b98f7d6e 1597
33ad9f1a
CS
1598 fclose(proc_pid_cgroup);
1599 free(line);
1600 return result;
1601
1602out_error:
1603 saved_errno = errno;
1604 if (proc_pid_cgroup)
1605 fclose(proc_pid_cgroup);
1606 lxc_cgroup_process_info_free(result);
1607 lxc_cgroup_process_info_free(entry);
1608 free(line);
1609 errno = saved_errno;
ae5c8b8e 1610 return NULL;
36b86299
DL
1611}
1612
33ad9f1a 1613char **subsystems_from_mount_options(const char *mount_options, char **kernel_list)
36b86299 1614{
33ad9f1a
CS
1615 char *token, *str, *saveptr = NULL;
1616 char **result = NULL;
1617 size_t result_capacity = 0;
8900b9eb 1618 size_t result_count = 0;
33ad9f1a
CS
1619 int saved_errno;
1620 int r;
ef342abb 1621
33ad9f1a
CS
1622 str = alloca(strlen(mount_options)+1);
1623 strcpy(str, mount_options);
1624 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1625 /* we have a subsystem if it's either in the list of
1626 * subsystems provided by the kernel OR if it starts
1627 * with name= for named hierarchies
1628 */
1629 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1630 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1631 if (r < 0)
1632 goto out_free;
1633 result[result_count + 1] = NULL;
1634 result[result_count] = strdup(token);
1635 if (!result[result_count])
1636 goto out_free;
1637 result_count++;
1638 }
ae5c8b8e 1639 }
f0e64b8b 1640
33ad9f1a
CS
1641 return result;
1642
1643out_free:
1644 saved_errno = errno;
1645 lxc_free_array((void**)result, free);
1646 errno = saved_errno;
1647 return NULL;
b98f7d6e
SH
1648}
1649
33ad9f1a 1650void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
b98f7d6e 1651{
33ad9f1a
CS
1652 if (!mp)
1653 return;
1654 free(mp->mount_point);
1655 free(mp->mount_prefix);
1656 free(mp);
bcbd102c
SH
1657}
1658
33ad9f1a 1659void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
341a9bd8 1660{
33ad9f1a
CS
1661 if (!h)
1662 return;
1663 lxc_free_array((void **)h->subsystems, free);
8bfcb981 1664 free(h->all_mount_points);
33ad9f1a
CS
1665 free(h);
1666}
341a9bd8 1667
33ad9f1a
CS
1668bool is_valid_cgroup(const char *name)
1669{
1670 const char *p;
1671 for (p = name; *p; p++) {
1672 if (*p < 32 || *p == 127 || *p == '/')
1673 return false;
341a9bd8 1674 }
33ad9f1a
CS
1675 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1676}
341a9bd8 1677
603c64c2 1678int create_or_remove_cgroup(bool do_remove, struct cgroup_mount_point *mp, const char *path, int recurse)
33ad9f1a
CS
1679{
1680 int r, saved_errno = 0;
1681 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1682 if (!buf)
1683 return -1;
341a9bd8 1684
33ad9f1a 1685 /* create or remove directory */
603c64c2
SH
1686 if (do_remove) {
1687 if (recurse)
1688 r = cgroup_rmdir(buf);
1689 else
1690 r = rmdir(buf);
1691 } else
1692 r = mkdir(buf, 0777);
33ad9f1a
CS
1693 saved_errno = errno;
1694 free(buf);
1695 errno = saved_errno;
1696 return r;
341a9bd8 1697}
bcbd102c 1698
33ad9f1a 1699int create_cgroup(struct cgroup_mount_point *mp, const char *path)
a6ddef61 1700{
603c64c2 1701 return create_or_remove_cgroup(false, mp, path, false);
a6ddef61
MN
1702}
1703
603c64c2 1704int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse)
576f946d 1705{
603c64c2 1706 return create_or_remove_cgroup(true, mp, path, recurse);
33ad9f1a 1707}
576f946d 1708
33ad9f1a
CS
1709char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix)
1710{
1711 /* first we have to make sure we subtract the mount point's prefix */
1712 char *prefix = mp->mount_prefix;
1713 char *buf;
1714 ssize_t len, rv;
1715
1716 /* we want to make sure only absolute paths to cgroups are passed to us */
1717 if (path[0] != '/') {
1718 errno = EINVAL;
1719 return NULL;
1720 }
b98f7d6e 1721
33ad9f1a
CS
1722 if (prefix && !strcmp(prefix, "/"))
1723 prefix = NULL;
b98f7d6e 1724
33ad9f1a
CS
1725 /* prefix doesn't match */
1726 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1727 errno = EINVAL;
1728 return NULL;
1729 }
1730 /* if prefix is /foo and path is /foobar */
1731 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1732 errno = EINVAL;
1733 return NULL;
1734 }
b98f7d6e 1735
33ad9f1a
CS
1736 /* remove prefix from path */
1737 path += prefix ? strlen(prefix) : 0;
b98f7d6e 1738
33ad9f1a
CS
1739 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1740 buf = calloc(len + 1, 1);
50266dc6
DE
1741 if (!buf)
1742 return NULL;
33ad9f1a 1743 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
8900b9eb 1744 if (rv > len) {
33ad9f1a
CS
1745 free(buf);
1746 errno = ENOMEM;
8900b9eb 1747 return NULL;
8b92dc3a 1748 }
576f946d 1749
33ad9f1a 1750 return buf;
e0f888d9 1751}
283678ed 1752
33ad9f1a 1753struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
283678ed 1754{
33ad9f1a
CS
1755 struct cgroup_process_info *info_ptr;
1756 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1757 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1758 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1759 return info_ptr;
b98f7d6e 1760 }
33ad9f1a
CS
1761 errno = ENOENT;
1762 return NULL;
1763}
283678ed 1764
33ad9f1a
CS
1765int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len)
1766{
1767 const char *parts[3] = {
1768 cgroup_path,
1769 sub_filename,
1770 NULL
1771 };
1772 char *filename;
1773 int ret, saved_errno;
1774
1775 filename = lxc_string_join("/", parts, false);
1776 if (!filename)
1777 return -1;
1778
1779 ret = lxc_read_from_file(filename, value, len);
1780 saved_errno = errno;
1781 free(filename);
1782 errno = saved_errno;
1783 return ret;
283678ed 1784}
b113383b 1785
33ad9f1a 1786int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value)
b113383b 1787{
33ad9f1a
CS
1788 const char *parts[3] = {
1789 cgroup_path,
1790 sub_filename,
1791 NULL
1792 };
1793 char *filename;
1794 int ret, saved_errno;
b113383b 1795
33ad9f1a
CS
1796 filename = lxc_string_join("/", parts, false);
1797 if (!filename)
1798 return -1;
b113383b 1799
33ad9f1a
CS
1800 ret = lxc_write_to_file(filename, value, strlen(value), false);
1801 saved_errno = errno;
1802 free(filename);
1803 errno = saved_errno;
1804 return ret;
b98f7d6e
SH
1805}
1806
33ad9f1a 1807int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices)
b98f7d6e
SH
1808{
1809 struct lxc_list *iterator;
1810 struct lxc_cgroup *cg;
1811 int ret = -1;
1812
33ad9f1a 1813 if (lxc_list_empty(cgroup_settings))
b98f7d6e
SH
1814 return 0;
1815
33ad9f1a 1816 lxc_list_for_each(iterator, cgroup_settings) {
b98f7d6e
SH
1817 cg = iterator->elem;
1818
33ad9f1a 1819 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
b98f7d6e 1820 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
33ad9f1a 1821 cgroup_devices_has_allow_or_deny(h, cg->value, false))
b98f7d6e
SH
1822 continue;
1823 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
33ad9f1a 1824 cgroup_devices_has_allow_or_deny(h, cg->value, true))
b98f7d6e 1825 continue;
33ad9f1a 1826 if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
b98f7d6e
SH
1827 ERROR("Error setting %s to %s for %s\n",
1828 cg->subsystem, cg->value, h->name);
1829 goto out;
1830 }
b113383b 1831 }
b98f7d6e
SH
1832
1833 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1834 }
1835
b98f7d6e
SH
1836 ret = 0;
1837 INFO("cgroup has been setup");
1838out:
b113383b
SH
1839 return ret;
1840}
b98f7d6e 1841
33ad9f1a
CS
1842bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow)
1843{
1844 char *path;
1845 FILE *devices_list;
8900b9eb 1846 char *line = NULL;
33ad9f1a
CS
1847 size_t sz = 0;
1848 bool ret = !for_allow;
1849 const char *parts[3] = {
1850 NULL,
1851 "devices.list",
1852 NULL
1853 };
1854
1855 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1856 // not sure they ever do, but they *could*
1857 // right now, I'm assuming they do NOT
1858 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1859 return false;
1860
1861 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
1862 if (!parts[0])
1863 return false;
1864 path = lxc_string_join("/", parts, false);
1865 if (!path) {
1866 free((void *)parts[0]);
1867 return false;
1868 }
1869
1870 devices_list = fopen_cloexec(path, "r");
1871 if (!devices_list) {
1872 free(path);
1873 return false;
1874 }
1875
1876 while (getline(&line, &sz, devices_list) != -1) {
1877 size_t len = strlen(line);
1878 if (len > 0 && line[len-1] == '\n')
1879 line[len-1] = '\0';
1880 if (strcmp(line, "a *:* rwm") == 0) {
1881 ret = for_allow;
1882 goto out;
1883 } else if (for_allow && strcmp(line, v) == 0) {
1884 ret = true;
8900b9eb 1885 goto out;
33ad9f1a
CS
1886 }
1887 }
1888
1889out:
1890 fclose(devices_list);
1891 free(line);
1892 free(path);
1893 return ret;
1894}
1895
1896int cgroup_recursive_task_count(const char *cgroup_path)
b98f7d6e 1897{
33ad9f1a
CS
1898 DIR *d;
1899 struct dirent *dent_buf;
1900 struct dirent *dent;
8900b9eb 1901 ssize_t name_max;
33ad9f1a
CS
1902 int n = 0, r;
1903
1904 /* see man readdir_r(3) */
1905 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1906 if (name_max <= 0)
1907 name_max = 255;
1908 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1909 if (!dent_buf)
1910 return -1;
1911
1912 d = opendir(cgroup_path);
034ef75d
SH
1913 if (!d) {
1914 free(dent_buf);
33ad9f1a 1915 return 0;
034ef75d 1916 }
33ad9f1a
CS
1917
1918 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
1919 const char *parts[3] = {
1920 cgroup_path,
1921 dent->d_name,
1922 NULL
1923 };
1924 char *sub_path;
1925 struct stat st;
1926
1927 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
1928 continue;
1929 sub_path = lxc_string_join("/", parts, false);
1930 if (!sub_path) {
1931 closedir(d);
1932 free(dent_buf);
1933 return -1;
1934 }
1935 r = stat(sub_path, &st);
1936 if (r < 0) {
1937 closedir(d);
1938 free(dent_buf);
1939 free(sub_path);
1940 return -1;
1941 }
1942 if (S_ISDIR(st.st_mode)) {
1943 r = cgroup_recursive_task_count(sub_path);
1944 if (r >= 0)
1945 n += r;
1946 } else if (!strcmp(dent->d_name, "tasks")) {
1947 r = count_lines(sub_path);
1948 if (r >= 0)
1949 n += r;
1950 }
1951 free(sub_path);
1952 }
1953 closedir(d);
1954 free(dent_buf);
1955
1956 return n;
1957}
1958
8900b9eb 1959int count_lines(const char *fn)
33ad9f1a
CS
1960{
1961 FILE *f;
1962 char *line = NULL;
1963 size_t sz = 0;
1964 int n = 0;
1965
1966 f = fopen_cloexec(fn, "r");
1967 if (!f)
1968 return -1;
1969
1970 while (getline(&line, &sz, f) != -1) {
1971 n++;
1972 }
1973 free(line);
1974 fclose(f);
1975 return n;
b98f7d6e
SH
1976}
1977
1ea59ad2 1978int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path)
b98f7d6e 1979{
33ad9f1a 1980 int r, saved_errno = 0;
7e7243e1 1981 char buf[2];
1ea59ad2
SH
1982
1983 /* If this is the memory cgroup, we want to enforce hierarchy.
1984 * But don't fail if for some reason we can't.
1985 */
1986 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
1987 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
1988 if (cc_path) {
7e7243e1
SH
1989 r = lxc_read_from_file(cc_path, buf, 1);
1990 if (r < 1 || buf[0] != '1') {
1991 r = lxc_write_to_file(cc_path, "1", 1, false);
1992 if (r < 0)
1993 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
1994 }
1ea59ad2
SH
1995 free(cc_path);
1996 }
1997 }
1998
33ad9f1a
CS
1999 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2000 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2001 * and cpuset.cpus and then
2002 */
2003 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
2004 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
2005 if (!cc_path)
2006 return -1;
7e7243e1
SH
2007 r = lxc_read_from_file(cc_path, buf, 1);
2008 if (r == 1 && buf[0] == '1') {
2009 free(cc_path);
2010 return 0;
2011 }
33ad9f1a
CS
2012 r = lxc_write_to_file(cc_path, "1", 1, false);
2013 saved_errno = errno;
2014 free(cc_path);
2015 errno = saved_errno;
2016 return r < 0 ? -1 : 0;
2017 }
2018 return 0;
b98f7d6e 2019}
484ed030
SH
2020
2021extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
2022 const char *lxcpath);
2023int do_unfreeze(const char *nsgroup, int freeze, const char *name, const char *lxcpath)
2024{
2025 char freezer[MAXPATHLEN], *f;
2026 char tmpf[32];
2027 int fd, ret;
2028
2029 ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup);
2030 if (ret >= MAXPATHLEN) {
2031 ERROR("freezer.state name too long");
2032 return -1;
2033 }
2034
2035 fd = open(freezer, O_RDWR);
2036 if (fd < 0) {
2037 SYSERROR("failed to open freezer at '%s'", nsgroup);
2038 return -1;
2039 }
2040
2041 if (freeze) {
2042 f = "FROZEN";
2043 ret = write(fd, f, strlen(f) + 1);
2044 } else {
2045 f = "THAWED";
2046 ret = write(fd, f, strlen(f) + 1);
2047
2048 /* compatibility code with old freezer interface */
2049 if (ret < 0) {
2050 f = "RUNNING";
2051 ret = write(fd, f, strlen(f) + 1) < 0;
2052 }
2053 }
2054
2055 if (ret < 0) {
2056 SYSERROR("failed to write '%s' to '%s'", f, freezer);
2057 goto out;
2058 }
2059
2060 while (1) {
2061 ret = lseek(fd, 0L, SEEK_SET);
2062 if (ret < 0) {
2063 SYSERROR("failed to lseek on file '%s'", freezer);
2064 goto out;
2065 }
2066
2067 ret = read(fd, tmpf, sizeof(tmpf));
2068 if (ret < 0) {
2069 SYSERROR("failed to read to '%s'", freezer);
2070 goto out;
2071 }
2072
2073 ret = strncmp(f, tmpf, strlen(f));
2074 if (!ret)
2075 {
2076 if (name)
2077 lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
2078 break; /* Success */
2079 }
2080
2081 sleep(1);
2082
2083 ret = lseek(fd, 0L, SEEK_SET);
2084 if (ret < 0) {
2085 SYSERROR("failed to lseek on file '%s'", freezer);
2086 goto out;
2087 }
2088
2089 ret = write(fd, f, strlen(f) + 1);
2090 if (ret < 0) {
2091 SYSERROR("failed to write '%s' to '%s'", f, freezer);
2092 goto out;
2093 }
2094 }
2095
2096out:
2097 close(fd);
2098 return ret;
2099}
2100
2101int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
2102{
2103 char *cgabspath;
2104 int ret;
2105
2106 cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
2107 if (!cgabspath)
2108 return -1;
2109
2110 ret = do_unfreeze(cgabspath, freeze, name, lxcpath);
2111 free(cgabspath);
2112 return ret;
2113}
2114
2115lxc_state_t freezer_state(const char *name, const char *lxcpath)
2116{
2117 char *cgabspath = NULL;
2118 char freezer[MAXPATHLEN];
2119 char status[MAXPATHLEN];
2120 FILE *file;
2121 int ret;
2122
2123 cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
2124 if (!cgabspath)
2125 return -1;
2126
2127 ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", cgabspath);
2128 if (ret < 0 || ret >= MAXPATHLEN)
2129 goto out;
2130
2131 file = fopen(freezer, "r");
2132 if (!file) {
2133 ret = -1;
2134 goto out;
2135 }
2136
2137 ret = fscanf(file, "%s", status);
2138 fclose(file);
2139
2140 if (ret == EOF) {
2141 SYSERROR("failed to read %s", freezer);
2142 ret = -1;
2143 goto out;
2144 }
2145
2146 ret = lxc_str2state(status);
2147
2148out:
2149 free(cgabspath);
2150 return ret;
2151}
2152