]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroup.c
lxc-ubuntu-cloud: Fix cache and lock location
[mirror_lxc.git] / src / lxc / cgroup.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
576f946d 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
aae1f3c4 37#include <sys/mount.h>
576f946d 38#include <netinet/in.h>
39#include <net/if.h>
40
e2bcd7db 41#include "error.h"
881450bb 42#include "config.h"
ae5c8b8e 43#include "commands.h"
b98f7d6e
SH
44#include "list.h"
45#include "conf.h"
33ad9f1a 46#include "utils.h"
740d1928 47#include "bdev.h"
f2363e38
ÇO
48#include "log.h"
49#include "cgroup.h"
50#include "start.h"
484ed030 51#include "state.h"
36eb9bde 52
edaf8b1b
SG
53#if IS_BIONIC
54#include <../include/lxcmntent.h>
55#else
56#include <mntent.h>
57#endif
58
36eb9bde 59lxc_log_define(lxc_cgroup, lxc);
576f946d 60
33ad9f1a
CS
61static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
62static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
63static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
64static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
65static bool is_valid_cgroup(const char *name);
33ad9f1a 66static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
603c64c2 67static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
33ad9f1a
CS
68static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
69static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
70static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
71static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
72static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
73static int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
74static int cgroup_recursive_task_count(const char *cgroup_path);
75static int count_lines(const char *fn);
1ea59ad2 76static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
33ad9f1a 77
603c64c2
SH
78static int cgroup_rmdir(char *dirname)
79{
80 struct dirent dirent, *direntp;
81 int saved_errno = 0;
82 DIR *dir;
83 int ret, failed=0;
84 char pathname[MAXPATHLEN];
85
86 dir = opendir(dirname);
87 if (!dir) {
88 ERROR("%s: failed to open %s", __func__, dirname);
89 return -1;
90 }
91
92 while (!readdir_r(dir, &dirent, &direntp)) {
93 struct stat mystat;
94 int rc;
95
96 if (!direntp)
97 break;
98
99 if (!strcmp(direntp->d_name, ".") ||
100 !strcmp(direntp->d_name, ".."))
101 continue;
102
103 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
104 if (rc < 0 || rc >= MAXPATHLEN) {
105 ERROR("pathname too long");
106 failed=1;
107 if (!saved_errno)
108 saved_errno = -ENOMEM;
109 continue;
110 }
111 ret = lstat(pathname, &mystat);
112 if (ret) {
113 SYSERROR("%s: failed to stat %s", __func__, pathname);
114 failed=1;
115 if (!saved_errno)
116 saved_errno = errno;
117 continue;
118 }
119 if (S_ISDIR(mystat.st_mode)) {
120 if (cgroup_rmdir(pathname) < 0) {
121 if (!saved_errno)
122 saved_errno = errno;
123 failed=1;
124 }
125 }
126 }
127
128 if (rmdir(dirname) < 0) {
129 SYSERROR("%s: failed to delete %s", __func__, dirname);
130 if (!saved_errno)
131 saved_errno = errno;
132 failed=1;
133 }
134
135 ret = closedir(dir);
136 if (ret) {
137 SYSERROR("%s: failed to close directory %s", __func__, dirname);
138 if (!saved_errno)
139 saved_errno = errno;
140 failed=1;
141 }
142
143 errno = saved_errno;
144 return failed ? -1 : 0;
145}
146
33ad9f1a
CS
147struct cgroup_meta_data *lxc_cgroup_load_meta()
148{
149 const char *cgroup_use = NULL;
150 char **cgroup_use_list = NULL;
151 struct cgroup_meta_data *md = NULL;
152 int saved_errno;
153
154 errno = 0;
593e8478 155 cgroup_use = lxc_global_config_value("lxc.cgroup.use");
33ad9f1a
CS
156 if (!cgroup_use && errno != 0)
157 return NULL;
158 if (cgroup_use) {
159 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
160 if (!cgroup_use_list)
161 return NULL;
162 }
576f946d 163
33ad9f1a
CS
164 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
165 saved_errno = errno;
166 lxc_free_array((void **)cgroup_use_list, free);
167 errno = saved_errno;
168 return md;
169}
fd37327f 170
b653309a
SH
171/* Step 1: determine all kernel subsystems */
172static bool find_cgroup_subsystems(char ***kernel_subsystems)
1d39a065 173{
b653309a
SH
174 FILE *proc_cgroups;
175 bool bret = false;
33ad9f1a
CS
176 char *line = NULL;
177 size_t sz = 0;
b653309a
SH
178 size_t kernel_subsystems_count = 0;
179 size_t kernel_subsystems_capacity = 0;
180 int r;
1d39a065 181
33ad9f1a
CS
182 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
183 if (!proc_cgroups)
b653309a 184 return false;
1d39a065 185
33ad9f1a
CS
186 while (getline(&line, &sz, proc_cgroups) != -1) {
187 char *tab1;
188 char *tab2;
189 int hierarchy_number;
1d39a065 190
33ad9f1a
CS
191 if (line[0] == '#')
192 continue;
193 if (!line[0])
194 continue;
1d39a065 195
33ad9f1a
CS
196 tab1 = strchr(line, '\t');
197 if (!tab1)
8900b9eb 198 continue;
33ad9f1a
CS
199 *tab1++ = '\0';
200 tab2 = strchr(tab1, '\t');
201 if (!tab2)
202 continue;
203 *tab2 = '\0';
fd37327f 204
33ad9f1a
CS
205 tab2 = NULL;
206 hierarchy_number = strtoul(tab1, &tab2, 10);
207 if (!tab2 || *tab2)
208 continue;
209 (void)hierarchy_number;
210
b653309a 211 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
33ad9f1a 212 if (r < 0)
b653309a
SH
213 goto out;
214 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
215 if (!(*kernel_subsystems)[kernel_subsystems_count])
216 goto out;
33ad9f1a 217 kernel_subsystems_count++;
bcbd102c 218 }
b653309a 219 bret = true;
0d9f8e18 220
b653309a 221out:
33ad9f1a 222 fclose(proc_cgroups);
0ccf7c2a 223 free(line);
b653309a
SH
224 return bret;
225}
226
227/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
228 * since mount points don't specify hierarchy number and
229 * /proc/cgroups does not contain named hierarchies
230 */
231static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
232 bool all_kernel_subsystems, bool all_named_subsystems,
233 const char **subsystem_whitelist)
234{
235 FILE *proc_self_cgroup;
236 char *line = NULL;
237 size_t sz = 0;
238 int r;
239 bool bret = false;
240 size_t hierarchy_capacity = 0;
ef6e34ee 241
33ad9f1a
CS
242 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
243 /* if for some reason (because of setns() and pid namespace for example),
244 * /proc/self is not valid, we try /proc/1/cgroup... */
245 if (!proc_self_cgroup)
246 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
247 if (!proc_self_cgroup)
b653309a 248 return false;
33ad9f1a
CS
249
250 while (getline(&line, &sz, proc_self_cgroup) != -1) {
251 /* file format: hierarchy:subsystems:group,
252 * we only extract hierarchy and subsystems
253 * here */
254 char *colon1;
255 char *colon2;
256 int hierarchy_number;
257 struct cgroup_hierarchy *h = NULL;
258 char **p;
259
260 if (!line[0])
261 continue;
ad08bbb7 262
33ad9f1a
CS
263 colon1 = strchr(line, ':');
264 if (!colon1)
8900b9eb 265 continue;
33ad9f1a
CS
266 *colon1++ = '\0';
267 colon2 = strchr(colon1, ':');
268 if (!colon2)
269 continue;
270 *colon2 = '\0';
ad08bbb7 271
33ad9f1a
CS
272 colon2 = NULL;
273 hierarchy_number = strtoul(line, &colon2, 10);
274 if (!colon2 || *colon2)
275 continue;
576f946d 276
33ad9f1a
CS
277 if (hierarchy_number > meta_data->maximum_hierarchy) {
278 /* lxc_grow_array will never shrink, so even if we find a lower
279 * hierarchy number here, the array will never be smaller
280 */
281 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
282 if (r < 0)
b653309a 283 goto out;
5193cc3d 284
33ad9f1a
CS
285 meta_data->maximum_hierarchy = hierarchy_number;
286 }
fd37327f 287
33ad9f1a
CS
288 /* this shouldn't happen, we had this already */
289 if (meta_data->hierarchies[hierarchy_number])
b653309a 290 goto out;
33ad9f1a
CS
291
292 h = calloc(1, sizeof(struct cgroup_hierarchy));
293 if (!h)
b653309a 294 goto out;
33ad9f1a
CS
295
296 meta_data->hierarchies[hierarchy_number] = h;
297
298 h->index = hierarchy_number;
299 h->subsystems = lxc_string_split_and_trim(colon1, ',');
300 if (!h->subsystems)
b653309a 301 goto out;
33ad9f1a
CS
302 /* see if this hierarchy should be considered */
303 if (!all_kernel_subsystems || !all_named_subsystems) {
304 for (p = h->subsystems; *p; p++) {
305 if (!strncmp(*p, "name=", 5)) {
306 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
307 h->used = true;
308 break;
309 }
310 } else {
311 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
312 h->used = true;
313 break;
314 }
315 }
316 }
317 } else {
318 /* we want all hierarchy anyway */
319 h->used = true;
ae5c8b8e 320 }
ae5c8b8e 321 }
b653309a 322 bret = true;
0b9c21ab 323
b653309a 324out:
33ad9f1a 325 fclose(proc_self_cgroup);
0ccf7c2a 326 free(line);
b653309a
SH
327 return bret;
328}
329
330/* Step 3: determine all mount points of each hierarchy */
331static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
332{
333 bool bret = false;
334 FILE *proc_self_mountinfo;
335 char *line = NULL;
336 size_t sz = 0;
337 char **tokens = NULL;
338 size_t mount_point_count = 0;
339 size_t mount_point_capacity = 0;
340 size_t token_capacity = 0;
341 int r;
342
33ad9f1a
CS
343 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
344 /* if for some reason (because of setns() and pid namespace for example),
345 * /proc/self is not valid, we try /proc/1/cgroup... */
346 if (!proc_self_mountinfo)
347 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
348 if (!proc_self_mountinfo)
b653309a 349 return false;
33ad9f1a
CS
350
351 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
178938fe 352 char *token, *line_tok, *saveptr = NULL;
33ad9f1a
CS
353 size_t i, j, k;
354 struct cgroup_mount_point *mount_point;
355 struct cgroup_hierarchy *h;
356 char **subsystems;
357
358 if (line[0] && line[strlen(line) - 1] == '\n')
359 line[strlen(line) - 1] = '\0';
360
178938fe 361 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
33ad9f1a
CS
362 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
363 if (r < 0)
b653309a 364 goto out;
33ad9f1a
CS
365 tokens[i++] = token;
366 }
b98f7d6e 367
33ad9f1a
CS
368 /* layout of /proc/self/mountinfo:
369 * 0: id
370 * 1: parent id
371 * 2: device major:minor
372 * 3: mount prefix
8900b9eb 373 * 4: mount point
33ad9f1a
CS
374 * 5: per-mount options
375 * [optional X]: additional data
376 * X+7: "-"
377 * X+8: type
378 * X+9: source
379 * X+10: per-superblock options
380 */
381 for (j = 6; j < i && tokens[j]; j++)
382 if (!strcmp(tokens[j], "-"))
383 break;
fd4f5a56 384
33ad9f1a
CS
385 /* could not find separator */
386 if (j >= i || !tokens[j])
387 continue;
388 /* there should be exactly three fields after
389 * the separator
390 */
391 if (i != j + 4)
392 continue;
fd4f5a56 393
33ad9f1a
CS
394 /* not a cgroup filesystem */
395 if (strcmp(tokens[j + 1], "cgroup") != 0)
396 continue;
b98f7d6e 397
33ad9f1a
CS
398 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
399 if (!subsystems)
b653309a 400 goto out;
33ad9f1a
CS
401
402 h = NULL;
403 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
404 if (meta_data->hierarchies[k] &&
405 meta_data->hierarchies[k]->subsystems[0] &&
406 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
407 /* TODO: we could also check if the lists really match completely,
408 * just to have an additional sanity check */
409 h = meta_data->hierarchies[k];
b98f7d6e 410 break;
33ad9f1a 411 }
b98f7d6e 412 }
33ad9f1a
CS
413 lxc_free_array((void **)subsystems, free);
414
415 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
416 if (r < 0)
b653309a 417 goto out;
33ad9f1a
CS
418
419 /* create mount point object */
420 mount_point = calloc(1, sizeof(*mount_point));
421 if (!mount_point)
b653309a 422 goto out;
33ad9f1a
CS
423
424 meta_data->mount_points[mount_point_count++] = mount_point;
425
426 mount_point->hierarchy = h;
427 mount_point->mount_point = strdup(tokens[4]);
428 mount_point->mount_prefix = strdup(tokens[3]);
429 if (!mount_point->mount_point || !mount_point->mount_prefix)
b653309a 430 goto out;
33ad9f1a
CS
431 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
432
433 if (!strcmp(mount_point->mount_prefix, "/")) {
434 if (mount_point->read_only) {
435 if (!h->ro_absolute_mount_point)
436 h->ro_absolute_mount_point = mount_point;
437 } else {
438 if (!h->rw_absolute_mount_point)
439 h->rw_absolute_mount_point = mount_point;
440 }
b98f7d6e 441 }
ae5c8b8e 442
33ad9f1a
CS
443 k = lxc_array_len((void **)h->all_mount_points);
444 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
445 if (r < 0)
b653309a 446 goto out;
33ad9f1a 447 h->all_mount_points[k] = mount_point;
fd4f5a56 448 }
b653309a
SH
449 bret = true;
450
451out:
b653309a 452 fclose(proc_self_mountinfo);
b653309a 453 free(tokens);
2cdafc54 454 free(line);
b653309a
SH
455 return bret;
456}
457
458struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
459{
460 bool all_kernel_subsystems = true;
461 bool all_named_subsystems = false;
462 struct cgroup_meta_data *meta_data = NULL;
463 char **kernel_subsystems = NULL;
464 int saved_errno = 0;
465
466 /* if the subsystem whitelist is not specified, include all
467 * hierarchies that contain kernel subsystems by default but
468 * no hierarchies that only contain named subsystems
469 *
470 * if it is specified, the specifier @all will select all
471 * hierarchies, @kernel will select all hierarchies with
472 * kernel subsystems and @named will select all named
473 * hierarchies
474 */
475 all_kernel_subsystems = subsystem_whitelist ?
476 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
477 true;
478 all_named_subsystems = subsystem_whitelist ?
479 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
480 false;
481
482 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
483 if (!meta_data)
484 return NULL;
485 meta_data->ref = 1;
486
487 if (!find_cgroup_subsystems(&kernel_subsystems))
488 goto out_error;
489
490 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
491 all_named_subsystems, subsystem_whitelist))
492 goto out_error;
493
494 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
495 goto out_error;
fd4f5a56 496
33ad9f1a
CS
497 /* oops, we couldn't find anything */
498 if (!meta_data->hierarchies || !meta_data->mount_points) {
499 errno = EINVAL;
500 goto out_error;
ae5c8b8e 501 }
fd4f5a56 502
3a0abb3a 503 lxc_free_array((void **)kernel_subsystems, free);
33ad9f1a
CS
504 return meta_data;
505
506out_error:
507 saved_errno = errno;
33ad9f1a
CS
508 lxc_free_array((void **)kernel_subsystems, free);
509 lxc_cgroup_put_meta(meta_data);
510 errno = saved_errno;
511 return NULL;
fd4f5a56
DL
512}
513
33ad9f1a 514struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
e14f67a7 515{
33ad9f1a
CS
516 meta_data->ref++;
517 return meta_data;
518}
e14f67a7 519
33ad9f1a
CS
520struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
521{
522 size_t i;
523 if (!meta_data)
524 return NULL;
525 if (--meta_data->ref > 0)
526 return meta_data;
527 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
528 if (meta_data->hierarchies) {
529 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
530 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
e14f67a7 531 }
33ad9f1a 532 free(meta_data->hierarchies);
178938fe 533 free(meta_data);
33ad9f1a 534 return NULL;
e14f67a7
U
535}
536
33ad9f1a 537struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
e14f67a7 538{
33ad9f1a
CS
539 size_t i;
540 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
541 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
542 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
543 return h;
e14f67a7 544 }
e14f67a7
U
545 return NULL;
546}
547
33ad9f1a 548struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
b98f7d6e 549{
33ad9f1a
CS
550 struct cgroup_mount_point **mps;
551 struct cgroup_mount_point *current_result = NULL;
552 ssize_t quality = -1;
b98f7d6e 553
33ad9f1a
CS
554 /* trivial case */
555 if (hierarchy->rw_absolute_mount_point)
556 return hierarchy->rw_absolute_mount_point;
557 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
558 return hierarchy->ro_absolute_mount_point;
b98f7d6e 559
33ad9f1a
CS
560 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
561 struct cgroup_mount_point *mp = *mps;
562 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
b98f7d6e 563
33ad9f1a
CS
564 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
565 prefix_len = 0;
b98f7d6e 566
33ad9f1a
CS
567 if (should_be_writable && mp->read_only)
568 continue;
569
570 if (!prefix_len ||
571 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
572 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
573 /* search for the best quality match, i.e. the match with the
574 * shortest prefix where this group is still contained
575 */
576 if (quality == -1 || prefix_len < quality) {
577 current_result = mp;
578 quality = prefix_len;
579 }
b98f7d6e
SH
580 }
581 }
582
33ad9f1a
CS
583 if (!current_result)
584 errno = ENOENT;
585 return current_result;
b98f7d6e
SH
586}
587
33ad9f1a 588char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
b98f7d6e 589{
33ad9f1a
CS
590 struct cgroup_meta_data *meta_data;
591 struct cgroup_hierarchy *h;
592 struct cgroup_mount_point *mp;
593 char *result;
594 int saved_errno;
595
596 meta_data = lxc_cgroup_load_meta();
597 if (!meta_data)
598 return NULL;
b98f7d6e 599
33ad9f1a
CS
600 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
601 if (!h)
602 goto out_error;
b98f7d6e 603
33ad9f1a
CS
604 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
605 if (!mp)
606 goto out_error;
b98f7d6e 607
33ad9f1a
CS
608 result = cgroup_to_absolute_path(mp, group, suffix);
609 if (!result)
610 goto out_error;
b98f7d6e 611
33ad9f1a
CS
612 lxc_cgroup_put_meta(meta_data);
613 return result;
b98f7d6e 614
33ad9f1a
CS
615out_error:
616 saved_errno = errno;
617 lxc_cgroup_put_meta(meta_data);
618 errno = saved_errno;
619 return NULL;
b98f7d6e
SH
620}
621
33ad9f1a 622struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
fd4f5a56 623{
33ad9f1a
CS
624 char pid_buf[32];
625 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
626 return lxc_cgroup_process_info_getx(pid_buf, meta);
c8f7c563
CS
627}
628
33ad9f1a 629struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
c8f7c563 630{
33ad9f1a
CS
631 return lxc_cgroup_process_info_get(1, meta);
632}
b98f7d6e 633
33ad9f1a
CS
634struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
635{
636 struct cgroup_process_info *i;
637 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
638 if (!i)
639 i = lxc_cgroup_process_info_get(getpid(), meta);
640 return i;
641}
ae5c8b8e 642
692ba18f
SH
643/*
644 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
645 * is already in a new cgroup named after the pid. 'mnt' is passed in as
646 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
647 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
648 * and return the string /sys/fs/cgroup/lxc/c1.
649 */
cea0552e 650static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
692ba18f
SH
651{
652 char *dir, *fulloldpath;
653 char *newname, *fullnewpath;
cea0552e 654 int len, newlen, ret;
692ba18f
SH
655
656 /*
657 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
658 * name is c1,
659 * dir: /ab
660 * fulloldpath = /cgroup/ab/2375
661 * fullnewpath = /cgroup/ab/c1
662 * newname = /ab/c1
663 */
664 dir = alloca(strlen(oldname) + 1);
665 strcpy(dir, oldname);
666
cea0552e
SH
667 len = strlen(oldname) + strlen(mountpath) + 22;
668 fulloldpath = alloca(len);
669 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
670 if (ret < 0 || ret >= len)
671 return NULL;
692ba18f
SH
672
673 len = strlen(dir) + strlen(name) + 2;
674 newname = malloc(len);
675 if (!newname) {
676 SYSERROR("Out of memory");
677 return NULL;
678 }
cea0552e
SH
679 ret = snprintf(newname, len, "%s/%s", dir, name);
680 if (ret < 0 || ret >= len) {
681 free(newname);
682 return NULL;
683 }
692ba18f 684
cea0552e
SH
685 newlen = strlen(mountpath) + len + 2;
686 fullnewpath = alloca(newlen);
687 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
688 if (ret < 0 || ret >= newlen) {
689 free(newname);
690 return NULL;
691 }
692ba18f
SH
692
693 if (access(fullnewpath, F_OK) == 0) {
694 if (rmdir(fullnewpath) != 0) {
695 SYSERROR("container cgroup %s already exists.", fullnewpath);
696 free(newname);
697 return NULL;
698 }
699 }
700 if (rename(fulloldpath, fullnewpath)) {
701 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
702 free(newname);
703 return NULL;
704 }
705
706 DEBUG("'%s' renamed to '%s'", oldname, newname);
707
708 return newname;
709}
710
33ad9f1a 711/* create a new cgroup */
47d8fb3b 712extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
33ad9f1a 713{
001b026e 714 char **cgroup_path_components = NULL;
33ad9f1a
CS
715 char **p = NULL;
716 char *path_so_far = NULL;
717 char **new_cgroup_paths = NULL;
718 char **new_cgroup_paths_sub = NULL;
719 struct cgroup_mount_point *mp;
720 struct cgroup_hierarchy *h;
721 struct cgroup_process_info *base_info = NULL;
722 struct cgroup_process_info *info_ptr;
723 int saved_errno;
724 int r;
725 unsigned suffix = 0;
726 bool had_sub_pattern = false;
727 size_t i;
ae5c8b8e 728
33ad9f1a
CS
729 if (!is_valid_cgroup(name)) {
730 ERROR("Invalid cgroup name: '%s'", name);
731 errno = EINVAL;
732 return NULL;
ae5c8b8e
SH
733 }
734
33ad9f1a
CS
735 if (!strstr(path_pattern, "%n")) {
736 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
737 errno = EINVAL;
738 return NULL;
739 }
fd37327f 740
33ad9f1a
CS
741 /* we will modify the result of this operation directly,
742 * so we don't have to copy the data structure
743 */
744 base_info = (path_pattern[0] == '/') ?
745 lxc_cgroup_process_info_get_init(meta_data) :
746 lxc_cgroup_process_info_get_self(meta_data);
747 if (!base_info)
748 return NULL;
c8f7c563 749
33ad9f1a
CS
750 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
751 if (!new_cgroup_paths)
752 goto out_initial_error;
753
754 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
755 if (!new_cgroup_paths_sub)
756 goto out_initial_error;
757
758 /* find mount points we can use */
759 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
760 h = info_ptr->hierarchy;
761 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
762 if (!mp) {
763 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
764 goto out_initial_error;
765 }
766 info_ptr->designated_mount_point = mp;
460a1cf0 767
692ba18f
SH
768 if (lxc_string_in_array("ns", (const char **)h->subsystems))
769 continue;
1ea59ad2 770 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
771 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
772 goto out_initial_error;
773 }
774 }
b98f7d6e 775
33ad9f1a
CS
776 /* normalize the path */
777 cgroup_path_components = lxc_normalize_path(path_pattern);
778 if (!cgroup_path_components)
779 goto out_initial_error;
780
781 /* go through the path components to see if we can create them */
782 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
783 /* we only want to create the same component with -1, -2, etc.
784 * if the component contains the container name itself, otherwise
785 * it's not an error if it already exists
786 */
787 char *p_eff = *p ? *p : (char *)sub_pattern;
788 bool contains_name = strstr(p_eff, "%n");
789 char *current_component = NULL;
790 char *current_subpath = NULL;
791 char *current_entire_path = NULL;
792 char *parts[3];
793 size_t j = 0;
794 i = 0;
795
796 /* if we are processing the subpattern, we want to make sure
797 * loop is ended the next time around
798 */
799 if (!*p) {
800 had_sub_pattern = true;
801 p--;
802 }
b98f7d6e 803
33ad9f1a
CS
804 goto find_name_on_this_level;
805
806 cleanup_name_on_this_level:
807 /* This is reached if we found a name clash.
808 * In that case, remove the cgroup from all previous hierarchies
809 */
810 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
603c64c2 811 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
33ad9f1a
CS
812 if (r < 0)
813 WARN("could not clean up cgroup we created when trying to create container");
814 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
815 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
816 }
817 if (current_component != current_subpath)
818 free(current_subpath);
819 if (current_component != p_eff)
820 free(current_component);
821 current_component = current_subpath = NULL;
822 /* try again with another suffix */
823 ++suffix;
824
825 find_name_on_this_level:
826 /* determine name of the path component we should create */
827 if (contains_name && suffix > 0) {
828 char *buf = calloc(strlen(name) + 32, 1);
829 if (!buf)
830 goto out_initial_error;
831 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
832 current_component = lxc_string_replace("%n", buf, p_eff);
833 free(buf);
834 } else {
835 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
836 }
837 parts[0] = path_so_far;
838 parts[1] = current_component;
839 parts[2] = NULL;
840 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
841
842 /* Now go through each hierarchy and try to create the
843 * corresponding cgroup
844 */
845 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
846 char *parts2[3];
692ba18f
SH
847
848 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
849 continue;
33ad9f1a
CS
850 current_entire_path = NULL;
851
852 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
853 parts2[1] = current_subpath;
854 parts2[2] = NULL;
855 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
856
857 if (!*p) {
858 /* we are processing the subpath, so only update that one */
859 free(new_cgroup_paths_sub[i]);
860 new_cgroup_paths_sub[i] = strdup(current_entire_path);
861 if (!new_cgroup_paths_sub[i])
862 goto cleanup_from_error;
863 } else {
864 /* remember which path was used on this controller */
865 free(new_cgroup_paths[i]);
866 new_cgroup_paths[i] = strdup(current_entire_path);
867 if (!new_cgroup_paths[i])
868 goto cleanup_from_error;
869 }
fd4f5a56 870
33ad9f1a
CS
871 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
872 if (r < 0 && errno == EEXIST && contains_name) {
873 /* name clash => try new name with new suffix */
874 free(current_entire_path);
875 current_entire_path = NULL;
876 goto cleanup_name_on_this_level;
877 } else if (r < 0 && errno != EEXIST) {
878 SYSERROR("Could not create cgroup %s", current_entire_path);
879 goto cleanup_from_error;
880 } else if (r == 0) {
881 /* successfully created */
882 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
883 if (r < 0)
884 goto cleanup_from_error;
885 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
886 } else {
887 /* if we didn't create the cgroup, then we have to make sure that
888 * further cgroups will be created properly
889 */
1ea59ad2 890 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
891 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
892 goto cleanup_from_error;
893 }
894
895 /* already existed but path component of pattern didn't contain '%n',
896 * so this is not an error; but then we don't need current_entire_path
897 * anymore...
898 */
899 free(current_entire_path);
900 current_entire_path = NULL;
901 }
902 }
fd4f5a56 903
33ad9f1a
CS
904 /* save path so far */
905 free(path_so_far);
906 path_so_far = strdup(current_subpath);
907 if (!path_so_far)
908 goto cleanup_from_error;
909
910 /* cleanup */
911 if (current_component != current_subpath)
912 free(current_subpath);
913 if (current_component != p_eff)
914 free(current_component);
915 current_component = current_subpath = NULL;
916 continue;
917
918 cleanup_from_error:
919 /* called if an error occured in the loop, so we
920 * do some additional cleanup here
921 */
922 saved_errno = errno;
923 if (current_component != current_subpath)
924 free(current_subpath);
925 if (current_component != p_eff)
926 free(current_component);
927 free(current_entire_path);
928 errno = saved_errno;
929 goto out_initial_error;
fd4f5a56
DL
930 }
931
33ad9f1a
CS
932 /* we're done, now update the paths */
933 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
47d8fb3b
CS
934 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
935 * will take care of it
936 * Since we do a continue in above loop, new_cgroup_paths[i] is
937 * unset anyway, as is new_cgroup_paths_sub[i]
692ba18f 938 */
47d8fb3b
CS
939 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
940 continue;
941 free(info_ptr->cgroup_path);
942 info_ptr->cgroup_path = new_cgroup_paths[i];
943 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
fd4f5a56 944 }
33ad9f1a
CS
945 /* don't use lxc_free_array since we used the array members
946 * to store them in our result...
947 */
948 free(new_cgroup_paths);
949 free(new_cgroup_paths_sub);
950 free(path_so_far);
951 lxc_free_array((void **)cgroup_path_components, free);
952 return base_info;
953
954out_initial_error:
955 saved_errno = errno;
956 free(path_so_far);
957 lxc_cgroup_process_info_free_and_remove(base_info);
958 lxc_free_array((void **)new_cgroup_paths, free);
959 lxc_free_array((void **)new_cgroup_paths_sub, free);
960 lxc_free_array((void **)cgroup_path_components, free);
961 errno = saved_errno;
962 return NULL;
c8f7c563
CS
963}
964
47d8fb3b
CS
965int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
966{
967 struct cgroup_process_info *info_ptr;
968 int r;
969
970 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
971 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
972 continue;
973 /*
974 * For any path which has ns cgroup mounted, handler->pid is already
975 * moved into a container called '%d % (handler->pid)'. Rename it to
976 * the cgroup name and record that.
977 */
978 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
979 info_ptr->cgroup_path, pid, name);
980 if (!tmp)
981 return -1;
982 free(info_ptr->cgroup_path);
983 info_ptr->cgroup_path = tmp;
984 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
985 if (r < 0)
986 return -1;
987 tmp = strdup(tmp);
988 if (!tmp)
989 return -1;
990 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
991 }
992 return 0;
993}
994
33ad9f1a
CS
995/* get the cgroup membership of a given container */
996struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
c8f7c563 997{
33ad9f1a
CS
998 struct cgroup_process_info *result = NULL;
999 int saved_errno = 0;
1000 size_t i;
1001 struct cgroup_process_info **cptr = &result;
1002 struct cgroup_process_info *entry = NULL;
1003 char *path = NULL;
1004
1005 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
1006 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
1007 if (!h || !h->used)
1008 continue;
c8f7c563 1009
33ad9f1a
CS
1010 /* use the command interface to look for the cgroup */
1011 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
1012 if (!path)
1013 goto out_error;
1014
1015 entry = calloc(1, sizeof(struct cgroup_process_info));
1016 if (!entry)
1017 goto out_error;
1018 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
1019 entry->hierarchy = h;
1020 entry->cgroup_path = path;
1021 path = NULL;
1022
1023 /* it is not an error if we don't find anything here,
1024 * it is up to the caller to decide what to do in that
1025 * case */
1026 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
1027
1028 *cptr = entry;
1029 cptr = &entry->next;
1030 entry = NULL;
c8f7c563
CS
1031 }
1032
33ad9f1a
CS
1033 return result;
1034out_error:
1035 saved_errno = errno;
1036 free(path);
1037 lxc_cgroup_process_info_free(result);
1038 lxc_cgroup_process_info_free(entry);
1039 errno = saved_errno;
1040 return NULL;
fd4f5a56
DL
1041}
1042
33ad9f1a
CS
1043/* move a processs to the cgroups specified by the membership */
1044int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
4f17323e 1045{
33ad9f1a
CS
1046 char pid_buf[32];
1047 char *cgroup_tasks_fn;
1048 int r;
1049 struct cgroup_process_info *info_ptr;
1050
1051 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1052 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1053 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1054 info_ptr->cgroup_path_sub :
1055 info_ptr->cgroup_path;
1056
1057 if (!info_ptr->designated_mount_point) {
1058 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1059 if (!info_ptr->designated_mount_point) {
1060 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1061 return -1;
1062 }
1063 }
4f17323e 1064
33ad9f1a
CS
1065 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1066 if (!cgroup_tasks_fn) {
1067 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1068 return -1;
1069 }
4f17323e 1070
33ad9f1a 1071 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
5903da82 1072 free(cgroup_tasks_fn);
33ad9f1a
CS
1073 if (r < 0) {
1074 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1075 return -1;
1076 }
4f17323e
CS
1077 }
1078
33ad9f1a 1079 return 0;
4f17323e
CS
1080}
1081
33ad9f1a
CS
1082/* free process membership information */
1083void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
fc7de561 1084{
33ad9f1a
CS
1085 struct cgroup_process_info *next;
1086 if (!info)
b98f7d6e 1087 return;
33ad9f1a
CS
1088 next = info->next;
1089 lxc_cgroup_put_meta(info->meta_ref);
1090 free(info->cgroup_path);
1091 free(info->cgroup_path_sub);
1092 lxc_free_array((void **)info->created_paths, free);
1093 free(info);
1094 lxc_cgroup_process_info_free(next);
fc7de561
SH
1095}
1096
33ad9f1a
CS
1097/* free process membership information and remove cgroups that were created */
1098void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
b98f7d6e 1099{
33ad9f1a
CS
1100 struct cgroup_process_info *next;
1101 char **pp;
1102 if (!info)
1103 return;
1104 next = info->next;
603c64c2 1105 {
33ad9f1a
CS
1106 struct cgroup_mount_point *mp = info->designated_mount_point;
1107 if (!mp)
1108 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1109 if (mp)
1110 /* ignore return value here, perhaps we created the
1111 * '/lxc' cgroup in this container but another container
1112 * is still running (for example)
1113 */
603c64c2
SH
1114 (void)remove_cgroup(mp, info->cgroup_path, true);
1115 }
1116 for (pp = info->created_paths; pp && *pp; pp++);
1117 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
33ad9f1a 1118 free(*pp);
b98f7d6e 1119 }
33ad9f1a
CS
1120 free(info->created_paths);
1121 lxc_cgroup_put_meta(info->meta_ref);
1122 free(info->cgroup_path);
1123 free(info->cgroup_path_sub);
1124 free(info);
9431aa65 1125 lxc_cgroup_process_info_free_and_remove(next);
33ad9f1a 1126}
b98f7d6e 1127
33ad9f1a
CS
1128char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
1129{
1130 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1131 if (!info)
1132 return NULL;
1133 return info->cgroup_path;
b98f7d6e
SH
1134}
1135
33ad9f1a 1136char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
b98f7d6e 1137{
33ad9f1a 1138 return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
b98f7d6e
SH
1139}
1140
33ad9f1a 1141char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
b98f7d6e 1142{
33ad9f1a
CS
1143 struct cgroup_mount_point *mp = NULL;
1144 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1145 if (!info)
1146 return NULL;
1147 if (info->designated_mount_point) {
8900b9eb 1148 mp = info->designated_mount_point;
33ad9f1a
CS
1149 } else {
1150 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1151 if (!mp)
1152 return NULL;
b98f7d6e 1153 }
33ad9f1a 1154 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
b98f7d6e 1155}
55c76589 1156
33ad9f1a 1157char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
9a93d992 1158{
33ad9f1a
CS
1159 struct cgroup_meta_data *meta;
1160 struct cgroup_process_info *base_info, *info;
1161 struct cgroup_mount_point *mp;
1162 char *result = NULL;
33ad9f1a
CS
1163
1164 meta = lxc_cgroup_load_meta();
1165 if (!meta)
9a93d992 1166 return NULL;
33ad9f1a
CS
1167 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1168 if (!base_info)
178938fe 1169 goto out1;
33ad9f1a
CS
1170 info = find_info_for_subsystem(base_info, subsystem);
1171 if (!info)
178938fe 1172 goto out2;
33ad9f1a 1173 if (info->designated_mount_point) {
8900b9eb 1174 mp = info->designated_mount_point;
33ad9f1a
CS
1175 } else {
1176 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1177 if (!mp)
178938fe 1178 goto out3;
33ad9f1a
CS
1179 }
1180 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
178938fe 1181out3:
178938fe 1182out2:
33ad9f1a 1183 lxc_cgroup_process_info_free(base_info);
178938fe 1184out1:
33ad9f1a 1185 lxc_cgroup_put_meta(meta);
33ad9f1a
CS
1186 return result;
1187}
9a93d992 1188
33ad9f1a
CS
1189int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
1190{
1191 char *subsystem = NULL, *p, *path;
1192 int ret = -1;
9a93d992 1193
33ad9f1a
CS
1194 subsystem = alloca(strlen(filename) + 1);
1195 strcpy(subsystem, filename);
1196 if ((p = index(subsystem, '.')) != NULL)
1197 *p = '\0';
9a93d992 1198
33ad9f1a
CS
1199 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1200 if (path) {
1201 ret = do_cgroup_set(path, filename, value);
1202 free(path);
9a93d992 1203 }
33ad9f1a
CS
1204 return ret;
1205}
9a93d992 1206
33ad9f1a
CS
1207int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
1208{
1209 char *subsystem = NULL, *p, *path;
1210 int ret = -1;
1211
1212 subsystem = alloca(strlen(filename) + 1);
1213 strcpy(subsystem, filename);
1214 if ((p = index(subsystem, '.')) != NULL)
1215 *p = '\0';
1216
1217 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1218 if (path) {
1219 ret = do_cgroup_get(path, filename, value, len);
1220 free(path);
1221 }
9a93d992
SH
1222 return ret;
1223}
1224
33ad9f1a 1225int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
9a93d992 1226{
33ad9f1a
CS
1227 char *subsystem = NULL, *p, *path;
1228 int ret = -1;
9a93d992 1229
33ad9f1a
CS
1230 subsystem = alloca(strlen(filename) + 1);
1231 strcpy(subsystem, filename);
1232 if ((p = index(subsystem, '.')) != NULL)
1233 *p = '\0';
9a93d992 1234
33ad9f1a
CS
1235 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1236 if (path) {
1237 ret = do_cgroup_set(path, filename, value);
1238 free(path);
1239 }
b98f7d6e 1240 return ret;
9a93d992
SH
1241}
1242
33ad9f1a 1243int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
9a93d992 1244{
33ad9f1a
CS
1245 char *subsystem = NULL, *p, *path;
1246 int ret = -1;
1247
1248 subsystem = alloca(strlen(filename) + 1);
1249 strcpy(subsystem, filename);
1250 if ((p = index(subsystem, '.')) != NULL)
1251 *p = '\0';
1252
1253 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1254 if (path) {
1255 ret = do_cgroup_get(path, filename, value, len);
1256 free(path);
9a93d992 1257 }
33ad9f1a 1258 return ret;
9a93d992
SH
1259}
1260
33ad9f1a
CS
1261/*
1262 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1263 * file for a running container.
1264 *
1265 * @filename : the file of interest (e.g. "freezer.state") or
1266 * the subsystem name (e.g. "freezer") in which case
1267 * the directory where the cgroup may be modified
1268 * will be returned
1269 * @name : name of container to connect to
1270 * @lxcpath : the lxcpath in which the container is running
8900b9eb 1271 *
33ad9f1a
CS
1272 * This is the exported function, which determines cgpath from the
1273 * lxc-start of the @name container running in @lxcpath.
1274 *
1275 * Returns path on success, NULL on error. The caller must free()
1276 * the returned path.
1277 */
1278char *lxc_cgroup_path_get(const char *filename, const char *name,
1279 const char *lxcpath)
9a93d992 1280{
33ad9f1a 1281 char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
9a93d992 1282
33ad9f1a
CS
1283 subsystem = alloca(strlen(filename) + 1);
1284 strcpy(subsystem, filename);
1285 if ((p = index(subsystem, '.')) != NULL) {
1286 *p = '\0';
1287 longer_file = alloca(strlen(filename) + 2);
1288 longer_file[0] = '/';
1289 strcpy(longer_file + 1, filename);
b98f7d6e
SH
1290 }
1291
33ad9f1a
CS
1292 group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
1293 if (!group)
1294 return NULL;
b98f7d6e 1295
86b3688b 1296 path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
33ad9f1a
CS
1297 free(group);
1298 return path;
9a93d992
SH
1299}
1300
33ad9f1a
CS
1301int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
1302{
1303 return do_setup_cgroup(h, cgroup_settings, false);
1304}
b98f7d6e 1305
33ad9f1a 1306int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
460a1cf0 1307{
33ad9f1a
CS
1308 return do_setup_cgroup(h, cgroup_settings, true);
1309}
fd37327f 1310
7997d7da 1311int lxc_setup_mount_cgroup(const char *root, struct cgroup_process_info *base_info, int type)
aae1f3c4
CS
1312{
1313 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1314 char *path = NULL;
1315 char **parts = NULL;
1316 char *dirname = NULL;
1317 char *abs_path = NULL;
1318 char *abs_path2 = NULL;
1319 struct cgroup_process_info *info;
1320 int r, saved_errno = 0;
1321
7997d7da
CS
1322 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1323 ERROR("could not mount cgroups into container: invalid type specified internally");
1324 errno = EINVAL;
1325 return -1;
1326 }
1327
aae1f3c4
CS
1328 path = calloc(1, bufsz);
1329 if (!path)
1330 return -1;
1331 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1332 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1333 if (r < 0) {
1334 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1335 return -1;
1336 }
1337
1338 /* now mount all the hierarchies we care about */
1339 for (info = base_info; info; info = info->next) {
1340 size_t subsystem_count, i;
1341 struct cgroup_mount_point *mp = info->designated_mount_point;
1342 if (!mp)
1343 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1344 if (!mp) {
1345 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1346 goto out_error;
1347 }
1348
1349 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1350 parts = calloc(subsystem_count + 1, sizeof(char *));
1351 if (!parts)
1352 goto out_error;
1353
1354 for (i = 0; i < subsystem_count; i++) {
1355 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1356 parts[i] = info->hierarchy->subsystems[i] + 5;
1357 else
1358 parts[i] = info->hierarchy->subsystems[i];
1359 }
1360 dirname = lxc_string_join(",", (const char **)parts, false);
1361 if (!dirname)
1362 goto out_error;
1363
1364 /* create subsystem directory */
1365 abs_path = lxc_append_paths(path, dirname);
1366 if (!abs_path)
1367 goto out_error;
1368 r = mkdir_p(abs_path, 0755);
1369 if (r < 0 && errno != EEXIST) {
1370 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1371 goto out_error;
1372 }
1373
aae1f3c4
CS
1374 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1375 if (!abs_path2)
1376 goto out_error;
aae1f3c4 1377
7997d7da
CS
1378 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1379 /* bind-mount the cgroup entire filesystem there */
1380 if (strcmp(mp->mount_prefix, "/") != 0) {
1381 /* FIXME: maybe we should just try to remount the entire hierarchy
1382 * with a regular mount command? may that works? */
1383 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1384 goto out_error;
1385 }
1386 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1387 if (r < 0) {
1388 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1389 goto out_error;
1390 }
1391 /* main cgroup path should be read-only */
1392 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1393 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1394 if (r < 0) {
1395 SYSERROR("error re-mounting %s readonly", abs_path);
1396 goto out_error;
1397 }
1398 }
1399 /* own cgroup should be read-write */
1400 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1401 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1402 if (r < 0) {
1403 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1404 goto out_error;
1405 }
1406 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1407 if (r < 0) {
1408 SYSERROR("error re-mounting %s readwrite", abs_path2);
1409 goto out_error;
1410 }
1411 }
1412 } else {
1413 /* create path for container's cgroup */
1414 r = mkdir_p(abs_path2, 0755);
1415 if (r < 0 && errno != EEXIST) {
1416 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1417 goto out_error;
1418 }
aae1f3c4 1419
7997d7da
CS
1420 free(abs_path);
1421 abs_path = NULL;
1422
1423 /* bind-mount container's cgroup to that directory */
1424 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1425 if (!abs_path)
1426 goto out_error;
1427 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1428 if (r < 0) {
1429 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1430 goto out_error;
1431 }
1432 if (type == LXC_AUTO_CGROUP_RO) {
1433 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1434 if (r < 0) {
1435 SYSERROR("error re-mounting %s readonly", abs_path2);
1436 goto out_error;
1437 }
1438 }
aae1f3c4
CS
1439 }
1440
1441 free(abs_path);
1442 free(abs_path2);
1443 abs_path = NULL;
1444 abs_path2 = NULL;
1445
1446 /* add symlinks for every single subsystem */
1447 if (subsystem_count > 1) {
1448 for (i = 0; i < subsystem_count; i++) {
1449 abs_path = lxc_append_paths(path, parts[i]);
1450 if (!abs_path)
1451 goto out_error;
1452 r = symlink(dirname, abs_path);
1453 if (r < 0)
1454 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1455 free(abs_path);
1456 abs_path = NULL;
1457 }
1458 }
1459 free(dirname);
1460 free(parts);
1461 dirname = NULL;
1462 parts = NULL;
1463 }
1464
1465 /* try to remount the tmpfs readonly, since the container shouldn't
1466 * change anything (this will also make sure that trying to create
1467 * new cgroups outside the allowed area fails with an error instead
1468 * of simply causing this to create directories in the tmpfs itself)
1469 */
7997d7da
CS
1470 if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
1471 mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
aae1f3c4
CS
1472
1473 free(path);
1474
1475 return 0;
1476
1477out_error:
1478 saved_errno = errno;
1479 free(path);
1480 free(dirname);
1481 free(parts);
1482 free(abs_path);
1483 free(abs_path2);
1484 errno = saved_errno;
1485 return -1;
1486}
1487
33ad9f1a
CS
1488int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
1489{
1490 struct cgroup_process_info *info = handler->cgroup;
1491 struct cgroup_mount_point *mp = NULL;
1492 char *abs_path = NULL;
1493 int ret;
460a1cf0 1494
33ad9f1a
CS
1495 if (!info) {
1496 errno = ENOENT;
1497 return -1;
b98f7d6e 1498 }
c8f7c563 1499
33ad9f1a 1500 if (info->designated_mount_point) {
8900b9eb 1501 mp = info->designated_mount_point;
33ad9f1a
CS
1502 } else {
1503 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1504 if (!mp)
1505 return -1;
c8f7c563
CS
1506 }
1507
33ad9f1a
CS
1508 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1509 if (!abs_path)
1510 return -1;
1511
1512 ret = cgroup_recursive_task_count(abs_path);
1513 free(abs_path);
1514 return ret;
c8f7c563
CS
1515}
1516
574c4428
QH
1517static struct cgroup_process_info *
1518lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
1519 struct cgroup_meta_data *meta)
d08ba6ec 1520{
33ad9f1a
CS
1521 struct cgroup_process_info *result = NULL;
1522 FILE *proc_pid_cgroup = NULL;
1523 char *line = NULL;
1524 size_t sz = 0;
1525 int saved_errno = 0;
1526 struct cgroup_process_info **cptr = &result;
1527 struct cgroup_process_info *entry = NULL;
1528
1529 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
1530 if (!proc_pid_cgroup)
b98f7d6e 1531 return NULL;
1ac470c0 1532
33ad9f1a
CS
1533 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1534 /* file format: hierarchy:subsystems:group */
1535 char *colon1;
1536 char *colon2;
1537 char *endptr;
1538 int hierarchy_number;
1539 struct cgroup_hierarchy *h = NULL;
fd4f5a56 1540
33ad9f1a 1541 if (!line[0])
ae5c8b8e 1542 continue;
b98f7d6e 1543
33ad9f1a
CS
1544 if (line[strlen(line) - 1] == '\n')
1545 line[strlen(line) - 1] = '\0';
1546
1547 colon1 = strchr(line, ':');
1548 if (!colon1)
8900b9eb 1549 continue;
33ad9f1a
CS
1550 *colon1++ = '\0';
1551 colon2 = strchr(colon1, ':');
1552 if (!colon2)
ae5c8b8e 1553 continue;
33ad9f1a 1554 *colon2++ = '\0';
e4659536 1555
33ad9f1a
CS
1556 endptr = NULL;
1557 hierarchy_number = strtoul(line, &endptr, 10);
1558 if (!endptr || *endptr)
9a93d992 1559 continue;
9a93d992 1560
33ad9f1a
CS
1561 if (hierarchy_number > meta->maximum_hierarchy) {
1562 /* we encountered a hierarchy we didn't have before,
1563 * so probably somebody remounted some stuff in the
1564 * mean time...
1565 */
1566 errno = EAGAIN;
1567 goto out_error;
b98f7d6e 1568 }
33ad9f1a
CS
1569
1570 h = meta->hierarchies[hierarchy_number];
1571 if (!h) {
1572 /* we encountered a hierarchy that was thought to be
1573 * dead before, so probably somebody remounted some
1574 * stuff in the mean time...
1575 */
1576 errno = EAGAIN;
1577 goto out_error;
b98f7d6e 1578 }
33ad9f1a
CS
1579
1580 /* we are told that we should ignore this hierarchy */
1581 if (!h->used)
b98f7d6e 1582 continue;
5193cc3d 1583
33ad9f1a
CS
1584 entry = calloc(1, sizeof(struct cgroup_process_info));
1585 if (!entry)
1586 goto out_error;
fd4f5a56 1587
33ad9f1a
CS
1588 entry->meta_ref = lxc_cgroup_get_meta(meta);
1589 entry->hierarchy = h;
1590 entry->cgroup_path = strdup(colon2);
1591 if (!entry->cgroup_path)
1592 goto out_error;
d08ba6ec 1593
33ad9f1a
CS
1594 *cptr = entry;
1595 cptr = &entry->next;
1596 entry = NULL;
b98f7d6e 1597 }
b98f7d6e 1598
33ad9f1a
CS
1599 fclose(proc_pid_cgroup);
1600 free(line);
1601 return result;
1602
1603out_error:
1604 saved_errno = errno;
1605 if (proc_pid_cgroup)
1606 fclose(proc_pid_cgroup);
1607 lxc_cgroup_process_info_free(result);
1608 lxc_cgroup_process_info_free(entry);
1609 free(line);
1610 errno = saved_errno;
ae5c8b8e 1611 return NULL;
36b86299
DL
1612}
1613
574c4428
QH
1614static char **subsystems_from_mount_options(const char *mount_options,
1615 char **kernel_list)
36b86299 1616{
33ad9f1a
CS
1617 char *token, *str, *saveptr = NULL;
1618 char **result = NULL;
1619 size_t result_capacity = 0;
8900b9eb 1620 size_t result_count = 0;
33ad9f1a
CS
1621 int saved_errno;
1622 int r;
ef342abb 1623
33ad9f1a
CS
1624 str = alloca(strlen(mount_options)+1);
1625 strcpy(str, mount_options);
1626 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1627 /* we have a subsystem if it's either in the list of
1628 * subsystems provided by the kernel OR if it starts
1629 * with name= for named hierarchies
1630 */
1631 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1632 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1633 if (r < 0)
1634 goto out_free;
1635 result[result_count + 1] = NULL;
1636 result[result_count] = strdup(token);
1637 if (!result[result_count])
1638 goto out_free;
1639 result_count++;
1640 }
ae5c8b8e 1641 }
f0e64b8b 1642
33ad9f1a
CS
1643 return result;
1644
1645out_free:
1646 saved_errno = errno;
1647 lxc_free_array((void**)result, free);
1648 errno = saved_errno;
1649 return NULL;
b98f7d6e
SH
1650}
1651
574c4428 1652static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
b98f7d6e 1653{
33ad9f1a
CS
1654 if (!mp)
1655 return;
1656 free(mp->mount_point);
1657 free(mp->mount_prefix);
1658 free(mp);
bcbd102c
SH
1659}
1660
574c4428 1661static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
341a9bd8 1662{
33ad9f1a
CS
1663 if (!h)
1664 return;
1665 lxc_free_array((void **)h->subsystems, free);
8bfcb981 1666 free(h->all_mount_points);
33ad9f1a
CS
1667 free(h);
1668}
341a9bd8 1669
574c4428 1670static bool is_valid_cgroup(const char *name)
33ad9f1a
CS
1671{
1672 const char *p;
1673 for (p = name; *p; p++) {
28bb9321
QH
1674 /* Use the ASCII printable characters range(32 - 127)
1675 * is reasonable, we kick out 32(SPACE) because it'll
1676 * break legacy lxc-ls
1677 */
1678 if (*p <= 32 || *p >= 127 || *p == '/')
33ad9f1a 1679 return false;
341a9bd8 1680 }
33ad9f1a
CS
1681 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1682}
341a9bd8 1683
574c4428
QH
1684static int create_or_remove_cgroup(bool do_remove,
1685 struct cgroup_mount_point *mp, const char *path, int recurse)
33ad9f1a
CS
1686{
1687 int r, saved_errno = 0;
1688 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1689 if (!buf)
1690 return -1;
341a9bd8 1691
33ad9f1a 1692 /* create or remove directory */
603c64c2
SH
1693 if (do_remove) {
1694 if (recurse)
1695 r = cgroup_rmdir(buf);
1696 else
1697 r = rmdir(buf);
1698 } else
1699 r = mkdir(buf, 0777);
33ad9f1a
CS
1700 saved_errno = errno;
1701 free(buf);
1702 errno = saved_errno;
1703 return r;
341a9bd8 1704}
bcbd102c 1705
574c4428 1706static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
a6ddef61 1707{
603c64c2 1708 return create_or_remove_cgroup(false, mp, path, false);
a6ddef61
MN
1709}
1710
574c4428
QH
1711static int remove_cgroup(struct cgroup_mount_point *mp,
1712 const char *path, bool recurse)
576f946d 1713{
603c64c2 1714 return create_or_remove_cgroup(true, mp, path, recurse);
33ad9f1a 1715}
576f946d 1716
574c4428
QH
1717static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
1718 const char *path, const char *suffix)
33ad9f1a
CS
1719{
1720 /* first we have to make sure we subtract the mount point's prefix */
1721 char *prefix = mp->mount_prefix;
1722 char *buf;
1723 ssize_t len, rv;
1724
1725 /* we want to make sure only absolute paths to cgroups are passed to us */
1726 if (path[0] != '/') {
1727 errno = EINVAL;
1728 return NULL;
1729 }
b98f7d6e 1730
33ad9f1a
CS
1731 if (prefix && !strcmp(prefix, "/"))
1732 prefix = NULL;
b98f7d6e 1733
33ad9f1a
CS
1734 /* prefix doesn't match */
1735 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1736 errno = EINVAL;
1737 return NULL;
1738 }
1739 /* if prefix is /foo and path is /foobar */
1740 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1741 errno = EINVAL;
1742 return NULL;
1743 }
b98f7d6e 1744
33ad9f1a
CS
1745 /* remove prefix from path */
1746 path += prefix ? strlen(prefix) : 0;
b98f7d6e 1747
33ad9f1a
CS
1748 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1749 buf = calloc(len + 1, 1);
50266dc6
DE
1750 if (!buf)
1751 return NULL;
33ad9f1a 1752 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
8900b9eb 1753 if (rv > len) {
33ad9f1a
CS
1754 free(buf);
1755 errno = ENOMEM;
8900b9eb 1756 return NULL;
8b92dc3a 1757 }
576f946d 1758
33ad9f1a 1759 return buf;
e0f888d9 1760}
283678ed 1761
574c4428
QH
1762static struct cgroup_process_info *
1763find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
283678ed 1764{
33ad9f1a
CS
1765 struct cgroup_process_info *info_ptr;
1766 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1767 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1768 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1769 return info_ptr;
b98f7d6e 1770 }
33ad9f1a
CS
1771 errno = ENOENT;
1772 return NULL;
1773}
283678ed 1774
574c4428
QH
1775static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
1776 char *value, size_t len)
33ad9f1a
CS
1777{
1778 const char *parts[3] = {
1779 cgroup_path,
1780 sub_filename,
1781 NULL
1782 };
1783 char *filename;
1784 int ret, saved_errno;
1785
1786 filename = lxc_string_join("/", parts, false);
1787 if (!filename)
1788 return -1;
1789
1790 ret = lxc_read_from_file(filename, value, len);
1791 saved_errno = errno;
1792 free(filename);
1793 errno = saved_errno;
1794 return ret;
283678ed 1795}
b113383b 1796
574c4428
QH
1797static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
1798 const char *value)
b113383b 1799{
33ad9f1a
CS
1800 const char *parts[3] = {
1801 cgroup_path,
1802 sub_filename,
1803 NULL
1804 };
1805 char *filename;
1806 int ret, saved_errno;
b113383b 1807
33ad9f1a
CS
1808 filename = lxc_string_join("/", parts, false);
1809 if (!filename)
1810 return -1;
b113383b 1811
33ad9f1a
CS
1812 ret = lxc_write_to_file(filename, value, strlen(value), false);
1813 saved_errno = errno;
1814 free(filename);
1815 errno = saved_errno;
1816 return ret;
b98f7d6e
SH
1817}
1818
574c4428
QH
1819static int do_setup_cgroup(struct lxc_handler *h,
1820 struct lxc_list *cgroup_settings, bool do_devices)
b98f7d6e
SH
1821{
1822 struct lxc_list *iterator;
1823 struct lxc_cgroup *cg;
1824 int ret = -1;
1825
33ad9f1a 1826 if (lxc_list_empty(cgroup_settings))
b98f7d6e
SH
1827 return 0;
1828
33ad9f1a 1829 lxc_list_for_each(iterator, cgroup_settings) {
b98f7d6e
SH
1830 cg = iterator->elem;
1831
33ad9f1a 1832 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
b98f7d6e 1833 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
33ad9f1a 1834 cgroup_devices_has_allow_or_deny(h, cg->value, false))
b98f7d6e
SH
1835 continue;
1836 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
33ad9f1a 1837 cgroup_devices_has_allow_or_deny(h, cg->value, true))
b98f7d6e 1838 continue;
33ad9f1a 1839 if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
b98f7d6e
SH
1840 ERROR("Error setting %s to %s for %s\n",
1841 cg->subsystem, cg->value, h->name);
1842 goto out;
1843 }
b113383b 1844 }
b98f7d6e
SH
1845
1846 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1847 }
1848
b98f7d6e
SH
1849 ret = 0;
1850 INFO("cgroup has been setup");
1851out:
b113383b
SH
1852 return ret;
1853}
b98f7d6e 1854
574c4428
QH
1855static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h,
1856 char *v, bool for_allow)
33ad9f1a
CS
1857{
1858 char *path;
1859 FILE *devices_list;
8900b9eb 1860 char *line = NULL;
33ad9f1a
CS
1861 size_t sz = 0;
1862 bool ret = !for_allow;
1863 const char *parts[3] = {
1864 NULL,
1865 "devices.list",
1866 NULL
1867 };
1868
1869 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1870 // not sure they ever do, but they *could*
1871 // right now, I'm assuming they do NOT
1872 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1873 return false;
1874
1875 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
1876 if (!parts[0])
1877 return false;
1878 path = lxc_string_join("/", parts, false);
1879 if (!path) {
1880 free((void *)parts[0]);
1881 return false;
1882 }
1883
1884 devices_list = fopen_cloexec(path, "r");
1885 if (!devices_list) {
1886 free(path);
1887 return false;
1888 }
1889
1890 while (getline(&line, &sz, devices_list) != -1) {
1891 size_t len = strlen(line);
1892 if (len > 0 && line[len-1] == '\n')
1893 line[len-1] = '\0';
1894 if (strcmp(line, "a *:* rwm") == 0) {
1895 ret = for_allow;
1896 goto out;
1897 } else if (for_allow && strcmp(line, v) == 0) {
1898 ret = true;
8900b9eb 1899 goto out;
33ad9f1a
CS
1900 }
1901 }
1902
1903out:
1904 fclose(devices_list);
1905 free(line);
1906 free(path);
1907 return ret;
1908}
1909
574c4428 1910static int cgroup_recursive_task_count(const char *cgroup_path)
b98f7d6e 1911{
33ad9f1a
CS
1912 DIR *d;
1913 struct dirent *dent_buf;
1914 struct dirent *dent;
8900b9eb 1915 ssize_t name_max;
33ad9f1a
CS
1916 int n = 0, r;
1917
1918 /* see man readdir_r(3) */
1919 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1920 if (name_max <= 0)
1921 name_max = 255;
1922 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1923 if (!dent_buf)
1924 return -1;
1925
1926 d = opendir(cgroup_path);
034ef75d
SH
1927 if (!d) {
1928 free(dent_buf);
33ad9f1a 1929 return 0;
034ef75d 1930 }
33ad9f1a
CS
1931
1932 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
1933 const char *parts[3] = {
1934 cgroup_path,
1935 dent->d_name,
1936 NULL
1937 };
1938 char *sub_path;
1939 struct stat st;
1940
1941 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
1942 continue;
1943 sub_path = lxc_string_join("/", parts, false);
1944 if (!sub_path) {
1945 closedir(d);
1946 free(dent_buf);
1947 return -1;
1948 }
1949 r = stat(sub_path, &st);
1950 if (r < 0) {
1951 closedir(d);
1952 free(dent_buf);
1953 free(sub_path);
1954 return -1;
1955 }
1956 if (S_ISDIR(st.st_mode)) {
1957 r = cgroup_recursive_task_count(sub_path);
1958 if (r >= 0)
1959 n += r;
1960 } else if (!strcmp(dent->d_name, "tasks")) {
1961 r = count_lines(sub_path);
1962 if (r >= 0)
1963 n += r;
1964 }
1965 free(sub_path);
1966 }
1967 closedir(d);
1968 free(dent_buf);
1969
1970 return n;
1971}
1972
574c4428 1973static int count_lines(const char *fn)
33ad9f1a
CS
1974{
1975 FILE *f;
1976 char *line = NULL;
1977 size_t sz = 0;
1978 int n = 0;
1979
1980 f = fopen_cloexec(fn, "r");
1981 if (!f)
1982 return -1;
1983
1984 while (getline(&line, &sz, f) != -1) {
1985 n++;
1986 }
1987 free(line);
1988 fclose(f);
1989 return n;
b98f7d6e
SH
1990}
1991
574c4428
QH
1992static int handle_cgroup_settings(struct cgroup_mount_point *mp,
1993 char *cgroup_path)
b98f7d6e 1994{
33ad9f1a 1995 int r, saved_errno = 0;
7e7243e1 1996 char buf[2];
1ea59ad2
SH
1997
1998 /* If this is the memory cgroup, we want to enforce hierarchy.
1999 * But don't fail if for some reason we can't.
2000 */
2001 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
2002 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
2003 if (cc_path) {
7e7243e1
SH
2004 r = lxc_read_from_file(cc_path, buf, 1);
2005 if (r < 1 || buf[0] != '1') {
2006 r = lxc_write_to_file(cc_path, "1", 1, false);
2007 if (r < 0)
2008 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
2009 }
1ea59ad2
SH
2010 free(cc_path);
2011 }
2012 }
2013
33ad9f1a
CS
2014 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2015 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2016 * and cpuset.cpus and then
2017 */
2018 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
2019 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
2020 if (!cc_path)
2021 return -1;
7e7243e1
SH
2022 r = lxc_read_from_file(cc_path, buf, 1);
2023 if (r == 1 && buf[0] == '1') {
2024 free(cc_path);
2025 return 0;
2026 }
33ad9f1a
CS
2027 r = lxc_write_to_file(cc_path, "1", 1, false);
2028 saved_errno = errno;
2029 free(cc_path);
2030 errno = saved_errno;
2031 return r < 0 ? -1 : 0;
2032 }
2033 return 0;
b98f7d6e 2034}
484ed030
SH
2035
2036extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
2037 const char *lxcpath);
2038int do_unfreeze(const char *nsgroup, int freeze, const char *name, const char *lxcpath)
2039{
2040 char freezer[MAXPATHLEN], *f;
2041 char tmpf[32];
2042 int fd, ret;
2043
2044 ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup);
2045 if (ret >= MAXPATHLEN) {
2046 ERROR("freezer.state name too long");
2047 return -1;
2048 }
2049
2050 fd = open(freezer, O_RDWR);
2051 if (fd < 0) {
2052 SYSERROR("failed to open freezer at '%s'", nsgroup);
2053 return -1;
2054 }
2055
2056 if (freeze) {
2057 f = "FROZEN";
2058 ret = write(fd, f, strlen(f) + 1);
2059 } else {
2060 f = "THAWED";
2061 ret = write(fd, f, strlen(f) + 1);
2062
2063 /* compatibility code with old freezer interface */
2064 if (ret < 0) {
2065 f = "RUNNING";
2066 ret = write(fd, f, strlen(f) + 1) < 0;
2067 }
2068 }
2069
2070 if (ret < 0) {
2071 SYSERROR("failed to write '%s' to '%s'", f, freezer);
2072 goto out;
2073 }
2074
2075 while (1) {
2076 ret = lseek(fd, 0L, SEEK_SET);
2077 if (ret < 0) {
2078 SYSERROR("failed to lseek on file '%s'", freezer);
2079 goto out;
2080 }
2081
2082 ret = read(fd, tmpf, sizeof(tmpf));
2083 if (ret < 0) {
2084 SYSERROR("failed to read to '%s'", freezer);
2085 goto out;
2086 }
2087
2088 ret = strncmp(f, tmpf, strlen(f));
2089 if (!ret)
2090 {
2091 if (name)
2092 lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
2093 break; /* Success */
2094 }
2095
2096 sleep(1);
2097
2098 ret = lseek(fd, 0L, SEEK_SET);
2099 if (ret < 0) {
2100 SYSERROR("failed to lseek on file '%s'", freezer);
2101 goto out;
2102 }
2103
2104 ret = write(fd, f, strlen(f) + 1);
2105 if (ret < 0) {
2106 SYSERROR("failed to write '%s' to '%s'", f, freezer);
2107 goto out;
2108 }
2109 }
2110
2111out:
2112 close(fd);
2113 return ret;
2114}
2115
2116int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
2117{
2118 char *cgabspath;
2119 int ret;
2120
2121 cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
2122 if (!cgabspath)
2123 return -1;
2124
2125 ret = do_unfreeze(cgabspath, freeze, name, lxcpath);
2126 free(cgabspath);
2127 return ret;
2128}
2129
2130lxc_state_t freezer_state(const char *name, const char *lxcpath)
2131{
2132 char *cgabspath = NULL;
2133 char freezer[MAXPATHLEN];
2134 char status[MAXPATHLEN];
2135 FILE *file;
2136 int ret;
2137
2138 cgabspath = lxc_cgroup_get_hierarchy_abs_path("freezer", name, lxcpath);
2139 if (!cgabspath)
2140 return -1;
2141
2142 ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", cgabspath);
2143 if (ret < 0 || ret >= MAXPATHLEN)
2144 goto out;
2145
2146 file = fopen(freezer, "r");
2147 if (!file) {
2148 ret = -1;
2149 goto out;
2150 }
2151
2152 ret = fscanf(file, "%s", status);
2153 fclose(file);
2154
2155 if (ret == EOF) {
2156 SYSERROR("failed to read %s", freezer);
2157 ret = -1;
2158 goto out;
2159 }
2160
2161 ret = lxc_str2state(status);
2162
2163out:
2164 free(cgabspath);
2165 return ret;
2166}
2167