]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgfs.c
lxc-config can show lxc.cgroup.(use|pattern)
[mirror_lxc.git] / src / lxc / cgfs.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
576f946d 22 */
d06245b8
NC
23#include "config.h"
24
576f946d 25#include <stdio.h>
576f946d 26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
aae1f3c4 37#include <sys/mount.h>
576f946d 38#include <netinet/in.h>
39#include <net/if.h>
40
e2bcd7db 41#include "error.h"
ae5c8b8e 42#include "commands.h"
b98f7d6e
SH
43#include "list.h"
44#include "conf.h"
33ad9f1a 45#include "utils.h"
740d1928 46#include "bdev.h"
f2363e38
ÇO
47#include "log.h"
48#include "cgroup.h"
49#include "start.h"
484ed030 50#include "state.h"
36eb9bde 51
edaf8b1b
SG
52#if IS_BIONIC
53#include <../include/lxcmntent.h>
54#else
55#include <mntent.h>
56#endif
57
4fb3cba5
DE
58struct cgroup_hierarchy;
59struct cgroup_meta_data;
60struct cgroup_mount_point;
61
62/*
63 * cgroup_meta_data: the metadata about the cgroup infrastructure on this
64 * host
65 */
66struct cgroup_meta_data {
67 ptrdiff_t ref; /* simple refcount */
68 struct cgroup_hierarchy **hierarchies;
69 struct cgroup_mount_point **mount_points;
70 int maximum_hierarchy;
71};
72
73/*
74 * cgroup_hierarchy: describes a single cgroup hierarchy
75 * (may have multiple mount points)
76 */
77struct cgroup_hierarchy {
78 int index;
79 bool used; /* false if the hierarchy should be ignored by lxc */
80 char **subsystems;
81 struct cgroup_mount_point *rw_absolute_mount_point;
82 struct cgroup_mount_point *ro_absolute_mount_point;
83 struct cgroup_mount_point **all_mount_points;
84 size_t all_mount_point_capacity;
85};
86
87/*
88 * cgroup_mount_point: a mount point to where a hierarchy
89 * is mounted to
90 */
91struct cgroup_mount_point {
92 struct cgroup_hierarchy *hierarchy;
93 char *mount_point;
94 char *mount_prefix;
95 bool read_only;
96 bool need_cpuset_init;
97};
98
99/*
100 * cgroup_process_info: describes the membership of a
101 * process to the different cgroup
102 * hierarchies
103 *
104 * Note this is the per-process info tracked by the cgfs_ops.
105 * This is not used with cgmanager.
106 */
107struct cgroup_process_info {
108 struct cgroup_process_info *next;
109 struct cgroup_meta_data *meta_ref;
110 struct cgroup_hierarchy *hierarchy;
111 char *cgroup_path;
112 char *cgroup_path_sub;
113 char **created_paths;
114 size_t created_paths_capacity;
115 size_t created_paths_count;
116 struct cgroup_mount_point *designated_mount_point;
117};
118
119struct cgfs_data {
120 char *name;
121 const char *cgroup_pattern;
122 struct cgroup_meta_data *meta;
123 struct cgroup_process_info *info;
124};
125
126lxc_log_define(lxc_cgfs, lxc);
576f946d 127
33ad9f1a
CS
128static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
129static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
130static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
131static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
132static bool is_valid_cgroup(const char *name);
33ad9f1a 133static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
603c64c2 134static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
33ad9f1a
CS
135static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
136static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
137static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
138static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
4fb3cba5
DE
139static bool cgroup_devices_has_allow_or_deny(struct cgfs_data *d, char *v, bool for_allow);
140static int do_setup_cgroup_limits(struct cgfs_data *d, struct lxc_list *cgroup_settings, bool do_devices);
33ad9f1a
CS
141static int cgroup_recursive_task_count(const char *cgroup_path);
142static int count_lines(const char *fn);
1ea59ad2 143static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
d703c2b1 144static bool init_cpuset_if_needed(struct cgroup_mount_point *mp, const char *path);
33ad9f1a 145
4fb3cba5
DE
146static struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist);
147static struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data);
148static struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data);
149
150/* free process membership information */
151static void lxc_cgroup_process_info_free(struct cgroup_process_info *info);
152static void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info);
153
d4ef7c50 154static struct cgroup_ops cgfs_ops;
d4ef7c50 155
603c64c2
SH
156static int cgroup_rmdir(char *dirname)
157{
158 struct dirent dirent, *direntp;
159 int saved_errno = 0;
160 DIR *dir;
161 int ret, failed=0;
162 char pathname[MAXPATHLEN];
163
164 dir = opendir(dirname);
165 if (!dir) {
166 ERROR("%s: failed to open %s", __func__, dirname);
167 return -1;
168 }
169
170 while (!readdir_r(dir, &dirent, &direntp)) {
171 struct stat mystat;
172 int rc;
173
174 if (!direntp)
175 break;
176
177 if (!strcmp(direntp->d_name, ".") ||
178 !strcmp(direntp->d_name, ".."))
179 continue;
180
181 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
182 if (rc < 0 || rc >= MAXPATHLEN) {
183 ERROR("pathname too long");
184 failed=1;
185 if (!saved_errno)
186 saved_errno = -ENOMEM;
187 continue;
188 }
189 ret = lstat(pathname, &mystat);
190 if (ret) {
191 SYSERROR("%s: failed to stat %s", __func__, pathname);
192 failed=1;
193 if (!saved_errno)
194 saved_errno = errno;
195 continue;
196 }
197 if (S_ISDIR(mystat.st_mode)) {
198 if (cgroup_rmdir(pathname) < 0) {
199 if (!saved_errno)
200 saved_errno = errno;
201 failed=1;
202 }
203 }
204 }
205
206 if (rmdir(dirname) < 0) {
207 SYSERROR("%s: failed to delete %s", __func__, dirname);
208 if (!saved_errno)
209 saved_errno = errno;
210 failed=1;
211 }
212
213 ret = closedir(dir);
214 if (ret) {
215 SYSERROR("%s: failed to close directory %s", __func__, dirname);
216 if (!saved_errno)
217 saved_errno = errno;
218 failed=1;
219 }
220
221 errno = saved_errno;
222 return failed ? -1 : 0;
223}
224
4fb3cba5 225static struct cgroup_meta_data *lxc_cgroup_load_meta()
33ad9f1a
CS
226{
227 const char *cgroup_use = NULL;
228 char **cgroup_use_list = NULL;
229 struct cgroup_meta_data *md = NULL;
230 int saved_errno;
231
232 errno = 0;
593e8478 233 cgroup_use = lxc_global_config_value("lxc.cgroup.use");
33ad9f1a
CS
234 if (!cgroup_use && errno != 0)
235 return NULL;
236 if (cgroup_use) {
237 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
238 if (!cgroup_use_list)
239 return NULL;
240 }
576f946d 241
33ad9f1a
CS
242 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
243 saved_errno = errno;
244 lxc_free_array((void **)cgroup_use_list, free);
245 errno = saved_errno;
246 return md;
247}
fd37327f 248
b653309a 249/* Step 1: determine all kernel subsystems */
4fb3cba5 250static bool find_cgroup_subsystems(char ***kernel_subsystems)
1d39a065 251{
b653309a
SH
252 FILE *proc_cgroups;
253 bool bret = false;
33ad9f1a
CS
254 char *line = NULL;
255 size_t sz = 0;
b653309a
SH
256 size_t kernel_subsystems_count = 0;
257 size_t kernel_subsystems_capacity = 0;
258 int r;
1d39a065 259
33ad9f1a
CS
260 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
261 if (!proc_cgroups)
b653309a 262 return false;
1d39a065 263
33ad9f1a
CS
264 while (getline(&line, &sz, proc_cgroups) != -1) {
265 char *tab1;
266 char *tab2;
267 int hierarchy_number;
1d39a065 268
33ad9f1a
CS
269 if (line[0] == '#')
270 continue;
271 if (!line[0])
272 continue;
1d39a065 273
33ad9f1a
CS
274 tab1 = strchr(line, '\t');
275 if (!tab1)
8900b9eb 276 continue;
33ad9f1a
CS
277 *tab1++ = '\0';
278 tab2 = strchr(tab1, '\t');
279 if (!tab2)
280 continue;
281 *tab2 = '\0';
fd37327f 282
33ad9f1a
CS
283 tab2 = NULL;
284 hierarchy_number = strtoul(tab1, &tab2, 10);
285 if (!tab2 || *tab2)
286 continue;
287 (void)hierarchy_number;
288
b653309a 289 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
33ad9f1a 290 if (r < 0)
b653309a
SH
291 goto out;
292 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
293 if (!(*kernel_subsystems)[kernel_subsystems_count])
294 goto out;
33ad9f1a 295 kernel_subsystems_count++;
bcbd102c 296 }
b653309a 297 bret = true;
0d9f8e18 298
b653309a 299out:
33ad9f1a 300 fclose(proc_cgroups);
0ccf7c2a 301 free(line);
b653309a
SH
302 return bret;
303}
304
305/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
306 * since mount points don't specify hierarchy number and
307 * /proc/cgroups does not contain named hierarchies
308 */
309static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
310 bool all_kernel_subsystems, bool all_named_subsystems,
311 const char **subsystem_whitelist)
312{
313 FILE *proc_self_cgroup;
314 char *line = NULL;
315 size_t sz = 0;
316 int r;
317 bool bret = false;
318 size_t hierarchy_capacity = 0;
ef6e34ee 319
33ad9f1a
CS
320 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
321 /* if for some reason (because of setns() and pid namespace for example),
322 * /proc/self is not valid, we try /proc/1/cgroup... */
323 if (!proc_self_cgroup)
324 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
325 if (!proc_self_cgroup)
b653309a 326 return false;
33ad9f1a
CS
327
328 while (getline(&line, &sz, proc_self_cgroup) != -1) {
329 /* file format: hierarchy:subsystems:group,
330 * we only extract hierarchy and subsystems
331 * here */
332 char *colon1;
333 char *colon2;
334 int hierarchy_number;
335 struct cgroup_hierarchy *h = NULL;
336 char **p;
337
338 if (!line[0])
339 continue;
ad08bbb7 340
33ad9f1a
CS
341 colon1 = strchr(line, ':');
342 if (!colon1)
8900b9eb 343 continue;
33ad9f1a
CS
344 *colon1++ = '\0';
345 colon2 = strchr(colon1, ':');
346 if (!colon2)
347 continue;
348 *colon2 = '\0';
ad08bbb7 349
33ad9f1a
CS
350 colon2 = NULL;
351 hierarchy_number = strtoul(line, &colon2, 10);
352 if (!colon2 || *colon2)
353 continue;
576f946d 354
33ad9f1a
CS
355 if (hierarchy_number > meta_data->maximum_hierarchy) {
356 /* lxc_grow_array will never shrink, so even if we find a lower
357 * hierarchy number here, the array will never be smaller
358 */
359 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
360 if (r < 0)
b653309a 361 goto out;
5193cc3d 362
33ad9f1a
CS
363 meta_data->maximum_hierarchy = hierarchy_number;
364 }
fd37327f 365
33ad9f1a
CS
366 /* this shouldn't happen, we had this already */
367 if (meta_data->hierarchies[hierarchy_number])
b653309a 368 goto out;
33ad9f1a
CS
369
370 h = calloc(1, sizeof(struct cgroup_hierarchy));
371 if (!h)
b653309a 372 goto out;
33ad9f1a
CS
373
374 meta_data->hierarchies[hierarchy_number] = h;
375
376 h->index = hierarchy_number;
377 h->subsystems = lxc_string_split_and_trim(colon1, ',');
378 if (!h->subsystems)
b653309a 379 goto out;
33ad9f1a
CS
380 /* see if this hierarchy should be considered */
381 if (!all_kernel_subsystems || !all_named_subsystems) {
382 for (p = h->subsystems; *p; p++) {
383 if (!strncmp(*p, "name=", 5)) {
384 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
385 h->used = true;
386 break;
387 }
388 } else {
389 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
390 h->used = true;
391 break;
392 }
393 }
394 }
395 } else {
396 /* we want all hierarchy anyway */
397 h->used = true;
ae5c8b8e 398 }
ae5c8b8e 399 }
b653309a 400 bret = true;
0b9c21ab 401
b653309a 402out:
33ad9f1a 403 fclose(proc_self_cgroup);
0ccf7c2a 404 free(line);
b653309a
SH
405 return bret;
406}
407
408/* Step 3: determine all mount points of each hierarchy */
409static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
410{
411 bool bret = false;
412 FILE *proc_self_mountinfo;
413 char *line = NULL;
414 size_t sz = 0;
415 char **tokens = NULL;
416 size_t mount_point_count = 0;
417 size_t mount_point_capacity = 0;
418 size_t token_capacity = 0;
419 int r;
420
33ad9f1a
CS
421 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
422 /* if for some reason (because of setns() and pid namespace for example),
423 * /proc/self is not valid, we try /proc/1/cgroup... */
424 if (!proc_self_mountinfo)
425 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
426 if (!proc_self_mountinfo)
b653309a 427 return false;
33ad9f1a
CS
428
429 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
178938fe 430 char *token, *line_tok, *saveptr = NULL;
33ad9f1a
CS
431 size_t i, j, k;
432 struct cgroup_mount_point *mount_point;
433 struct cgroup_hierarchy *h;
434 char **subsystems;
435
436 if (line[0] && line[strlen(line) - 1] == '\n')
437 line[strlen(line) - 1] = '\0';
438
178938fe 439 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
33ad9f1a
CS
440 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
441 if (r < 0)
b653309a 442 goto out;
33ad9f1a
CS
443 tokens[i++] = token;
444 }
b98f7d6e 445
33ad9f1a
CS
446 /* layout of /proc/self/mountinfo:
447 * 0: id
448 * 1: parent id
449 * 2: device major:minor
450 * 3: mount prefix
8900b9eb 451 * 4: mount point
33ad9f1a
CS
452 * 5: per-mount options
453 * [optional X]: additional data
454 * X+7: "-"
455 * X+8: type
456 * X+9: source
457 * X+10: per-superblock options
458 */
459 for (j = 6; j < i && tokens[j]; j++)
460 if (!strcmp(tokens[j], "-"))
461 break;
fd4f5a56 462
33ad9f1a
CS
463 /* could not find separator */
464 if (j >= i || !tokens[j])
465 continue;
466 /* there should be exactly three fields after
467 * the separator
468 */
469 if (i != j + 4)
470 continue;
fd4f5a56 471
33ad9f1a
CS
472 /* not a cgroup filesystem */
473 if (strcmp(tokens[j + 1], "cgroup") != 0)
474 continue;
b98f7d6e 475
33ad9f1a
CS
476 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
477 if (!subsystems)
b653309a 478 goto out;
33ad9f1a
CS
479
480 h = NULL;
481 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
482 if (meta_data->hierarchies[k] &&
483 meta_data->hierarchies[k]->subsystems[0] &&
484 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
485 /* TODO: we could also check if the lists really match completely,
486 * just to have an additional sanity check */
487 h = meta_data->hierarchies[k];
b98f7d6e 488 break;
33ad9f1a 489 }
b98f7d6e 490 }
33ad9f1a
CS
491 lxc_free_array((void **)subsystems, free);
492
493 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
494 if (r < 0)
b653309a 495 goto out;
33ad9f1a
CS
496
497 /* create mount point object */
498 mount_point = calloc(1, sizeof(*mount_point));
499 if (!mount_point)
b653309a 500 goto out;
33ad9f1a
CS
501
502 meta_data->mount_points[mount_point_count++] = mount_point;
503
504 mount_point->hierarchy = h;
505 mount_point->mount_point = strdup(tokens[4]);
506 mount_point->mount_prefix = strdup(tokens[3]);
507 if (!mount_point->mount_point || !mount_point->mount_prefix)
b653309a 508 goto out;
33ad9f1a
CS
509 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
510
511 if (!strcmp(mount_point->mount_prefix, "/")) {
512 if (mount_point->read_only) {
513 if (!h->ro_absolute_mount_point)
514 h->ro_absolute_mount_point = mount_point;
515 } else {
516 if (!h->rw_absolute_mount_point)
517 h->rw_absolute_mount_point = mount_point;
518 }
b98f7d6e 519 }
ae5c8b8e 520
33ad9f1a
CS
521 k = lxc_array_len((void **)h->all_mount_points);
522 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
523 if (r < 0)
b653309a 524 goto out;
33ad9f1a 525 h->all_mount_points[k] = mount_point;
fd4f5a56 526 }
b653309a
SH
527 bret = true;
528
529out:
b653309a 530 fclose(proc_self_mountinfo);
b653309a 531 free(tokens);
2cdafc54 532 free(line);
b653309a
SH
533 return bret;
534}
535
4fb3cba5 536static struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
b653309a
SH
537{
538 bool all_kernel_subsystems = true;
539 bool all_named_subsystems = false;
540 struct cgroup_meta_data *meta_data = NULL;
541 char **kernel_subsystems = NULL;
542 int saved_errno = 0;
543
544 /* if the subsystem whitelist is not specified, include all
545 * hierarchies that contain kernel subsystems by default but
546 * no hierarchies that only contain named subsystems
547 *
548 * if it is specified, the specifier @all will select all
549 * hierarchies, @kernel will select all hierarchies with
550 * kernel subsystems and @named will select all named
551 * hierarchies
552 */
553 all_kernel_subsystems = subsystem_whitelist ?
554 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
555 true;
556 all_named_subsystems = subsystem_whitelist ?
557 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
558 false;
559
560 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
561 if (!meta_data)
562 return NULL;
563 meta_data->ref = 1;
564
565 if (!find_cgroup_subsystems(&kernel_subsystems))
566 goto out_error;
567
568 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
569 all_named_subsystems, subsystem_whitelist))
570 goto out_error;
571
572 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
573 goto out_error;
fd4f5a56 574
33ad9f1a
CS
575 /* oops, we couldn't find anything */
576 if (!meta_data->hierarchies || !meta_data->mount_points) {
577 errno = EINVAL;
578 goto out_error;
ae5c8b8e 579 }
fd4f5a56 580
3a0abb3a 581 lxc_free_array((void **)kernel_subsystems, free);
33ad9f1a
CS
582 return meta_data;
583
584out_error:
585 saved_errno = errno;
33ad9f1a
CS
586 lxc_free_array((void **)kernel_subsystems, free);
587 lxc_cgroup_put_meta(meta_data);
588 errno = saved_errno;
589 return NULL;
fd4f5a56
DL
590}
591
4fb3cba5 592static struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
e14f67a7 593{
33ad9f1a
CS
594 meta_data->ref++;
595 return meta_data;
596}
e14f67a7 597
4fb3cba5 598static struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
33ad9f1a
CS
599{
600 size_t i;
601 if (!meta_data)
602 return NULL;
603 if (--meta_data->ref > 0)
604 return meta_data;
605 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
606 if (meta_data->hierarchies) {
607 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
608 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
e14f67a7 609 }
33ad9f1a 610 free(meta_data->hierarchies);
178938fe 611 free(meta_data);
33ad9f1a 612 return NULL;
e14f67a7
U
613}
614
4fb3cba5 615static struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
e14f67a7 616{
33ad9f1a
CS
617 size_t i;
618 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
619 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
620 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
621 return h;
e14f67a7 622 }
e14f67a7
U
623 return NULL;
624}
625
4fb3cba5 626static struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
b98f7d6e 627{
33ad9f1a
CS
628 struct cgroup_mount_point **mps;
629 struct cgroup_mount_point *current_result = NULL;
630 ssize_t quality = -1;
b98f7d6e 631
33ad9f1a
CS
632 /* trivial case */
633 if (hierarchy->rw_absolute_mount_point)
634 return hierarchy->rw_absolute_mount_point;
635 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
636 return hierarchy->ro_absolute_mount_point;
b98f7d6e 637
33ad9f1a
CS
638 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
639 struct cgroup_mount_point *mp = *mps;
640 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
b98f7d6e 641
33ad9f1a
CS
642 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
643 prefix_len = 0;
b98f7d6e 644
33ad9f1a
CS
645 if (should_be_writable && mp->read_only)
646 continue;
647
648 if (!prefix_len ||
649 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
650 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
651 /* search for the best quality match, i.e. the match with the
652 * shortest prefix where this group is still contained
653 */
654 if (quality == -1 || prefix_len < quality) {
655 current_result = mp;
656 quality = prefix_len;
657 }
b98f7d6e
SH
658 }
659 }
660
33ad9f1a
CS
661 if (!current_result)
662 errno = ENOENT;
663 return current_result;
b98f7d6e
SH
664}
665
4fb3cba5 666static char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
b98f7d6e 667{
33ad9f1a
CS
668 struct cgroup_meta_data *meta_data;
669 struct cgroup_hierarchy *h;
670 struct cgroup_mount_point *mp;
671 char *result;
672 int saved_errno;
673
674 meta_data = lxc_cgroup_load_meta();
675 if (!meta_data)
676 return NULL;
b98f7d6e 677
33ad9f1a
CS
678 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
679 if (!h)
680 goto out_error;
b98f7d6e 681
33ad9f1a
CS
682 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
683 if (!mp)
684 goto out_error;
b98f7d6e 685
33ad9f1a
CS
686 result = cgroup_to_absolute_path(mp, group, suffix);
687 if (!result)
688 goto out_error;
b98f7d6e 689
33ad9f1a
CS
690 lxc_cgroup_put_meta(meta_data);
691 return result;
b98f7d6e 692
33ad9f1a
CS
693out_error:
694 saved_errno = errno;
695 lxc_cgroup_put_meta(meta_data);
696 errno = saved_errno;
697 return NULL;
b98f7d6e
SH
698}
699
4fb3cba5 700static struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
fd4f5a56 701{
33ad9f1a
CS
702 char pid_buf[32];
703 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
704 return lxc_cgroup_process_info_getx(pid_buf, meta);
c8f7c563
CS
705}
706
4fb3cba5 707static struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
c8f7c563 708{
33ad9f1a
CS
709 return lxc_cgroup_process_info_get(1, meta);
710}
b98f7d6e 711
4fb3cba5 712static struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
33ad9f1a
CS
713{
714 struct cgroup_process_info *i;
715 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
716 if (!i)
717 i = lxc_cgroup_process_info_get(getpid(), meta);
718 return i;
719}
ae5c8b8e 720
692ba18f
SH
721/*
722 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
723 * is already in a new cgroup named after the pid. 'mnt' is passed in as
724 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
725 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
726 * and return the string /sys/fs/cgroup/lxc/c1.
727 */
cea0552e 728static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
692ba18f
SH
729{
730 char *dir, *fulloldpath;
731 char *newname, *fullnewpath;
cea0552e 732 int len, newlen, ret;
692ba18f
SH
733
734 /*
735 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
736 * name is c1,
737 * dir: /ab
738 * fulloldpath = /cgroup/ab/2375
739 * fullnewpath = /cgroup/ab/c1
740 * newname = /ab/c1
741 */
742 dir = alloca(strlen(oldname) + 1);
743 strcpy(dir, oldname);
744
cea0552e
SH
745 len = strlen(oldname) + strlen(mountpath) + 22;
746 fulloldpath = alloca(len);
747 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
748 if (ret < 0 || ret >= len)
749 return NULL;
692ba18f
SH
750
751 len = strlen(dir) + strlen(name) + 2;
752 newname = malloc(len);
753 if (!newname) {
754 SYSERROR("Out of memory");
755 return NULL;
756 }
cea0552e
SH
757 ret = snprintf(newname, len, "%s/%s", dir, name);
758 if (ret < 0 || ret >= len) {
759 free(newname);
760 return NULL;
761 }
692ba18f 762
cea0552e
SH
763 newlen = strlen(mountpath) + len + 2;
764 fullnewpath = alloca(newlen);
765 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
766 if (ret < 0 || ret >= newlen) {
767 free(newname);
768 return NULL;
769 }
692ba18f
SH
770
771 if (access(fullnewpath, F_OK) == 0) {
772 if (rmdir(fullnewpath) != 0) {
773 SYSERROR("container cgroup %s already exists.", fullnewpath);
774 free(newname);
775 return NULL;
776 }
777 }
778 if (rename(fulloldpath, fullnewpath)) {
779 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
780 free(newname);
781 return NULL;
782 }
783
784 DEBUG("'%s' renamed to '%s'", oldname, newname);
785
786 return newname;
787}
788
33ad9f1a 789/* create a new cgroup */
4fb3cba5 790static struct cgroup_process_info *lxc_cgroupfs_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
33ad9f1a 791{
001b026e 792 char **cgroup_path_components = NULL;
33ad9f1a
CS
793 char **p = NULL;
794 char *path_so_far = NULL;
795 char **new_cgroup_paths = NULL;
796 char **new_cgroup_paths_sub = NULL;
797 struct cgroup_mount_point *mp;
798 struct cgroup_hierarchy *h;
799 struct cgroup_process_info *base_info = NULL;
800 struct cgroup_process_info *info_ptr;
801 int saved_errno;
802 int r;
803 unsigned suffix = 0;
804 bool had_sub_pattern = false;
805 size_t i;
ae5c8b8e 806
33ad9f1a
CS
807 if (!is_valid_cgroup(name)) {
808 ERROR("Invalid cgroup name: '%s'", name);
809 errno = EINVAL;
810 return NULL;
ae5c8b8e
SH
811 }
812
33ad9f1a
CS
813 if (!strstr(path_pattern, "%n")) {
814 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
815 errno = EINVAL;
816 return NULL;
817 }
fd37327f 818
33ad9f1a
CS
819 /* we will modify the result of this operation directly,
820 * so we don't have to copy the data structure
821 */
822 base_info = (path_pattern[0] == '/') ?
823 lxc_cgroup_process_info_get_init(meta_data) :
824 lxc_cgroup_process_info_get_self(meta_data);
825 if (!base_info)
826 return NULL;
c8f7c563 827
33ad9f1a
CS
828 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
829 if (!new_cgroup_paths)
830 goto out_initial_error;
831
832 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
833 if (!new_cgroup_paths_sub)
834 goto out_initial_error;
835
836 /* find mount points we can use */
837 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
838 h = info_ptr->hierarchy;
839 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
840 if (!mp) {
841 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
842 goto out_initial_error;
843 }
844 info_ptr->designated_mount_point = mp;
460a1cf0 845
692ba18f
SH
846 if (lxc_string_in_array("ns", (const char **)h->subsystems))
847 continue;
2edb53c7
SH
848 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
849 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
33ad9f1a 850 goto out_initial_error;
2edb53c7 851 }
33ad9f1a 852 }
b98f7d6e 853
33ad9f1a
CS
854 /* normalize the path */
855 cgroup_path_components = lxc_normalize_path(path_pattern);
856 if (!cgroup_path_components)
857 goto out_initial_error;
858
859 /* go through the path components to see if we can create them */
860 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
861 /* we only want to create the same component with -1, -2, etc.
862 * if the component contains the container name itself, otherwise
863 * it's not an error if it already exists
864 */
865 char *p_eff = *p ? *p : (char *)sub_pattern;
866 bool contains_name = strstr(p_eff, "%n");
867 char *current_component = NULL;
868 char *current_subpath = NULL;
869 char *current_entire_path = NULL;
870 char *parts[3];
871 size_t j = 0;
872 i = 0;
873
874 /* if we are processing the subpattern, we want to make sure
875 * loop is ended the next time around
876 */
877 if (!*p) {
878 had_sub_pattern = true;
879 p--;
880 }
b98f7d6e 881
33ad9f1a 882 goto find_name_on_this_level;
4fb3cba5 883
33ad9f1a
CS
884 cleanup_name_on_this_level:
885 /* This is reached if we found a name clash.
886 * In that case, remove the cgroup from all previous hierarchies
887 */
888 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
603c64c2 889 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
33ad9f1a
CS
890 if (r < 0)
891 WARN("could not clean up cgroup we created when trying to create container");
892 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
893 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
894 }
895 if (current_component != current_subpath)
896 free(current_subpath);
897 if (current_component != p_eff)
898 free(current_component);
899 current_component = current_subpath = NULL;
900 /* try again with another suffix */
901 ++suffix;
4fb3cba5 902
33ad9f1a
CS
903 find_name_on_this_level:
904 /* determine name of the path component we should create */
905 if (contains_name && suffix > 0) {
906 char *buf = calloc(strlen(name) + 32, 1);
907 if (!buf)
908 goto out_initial_error;
909 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
910 current_component = lxc_string_replace("%n", buf, p_eff);
911 free(buf);
912 } else {
913 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
914 }
915 parts[0] = path_so_far;
916 parts[1] = current_component;
917 parts[2] = NULL;
918 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
919
920 /* Now go through each hierarchy and try to create the
921 * corresponding cgroup
922 */
923 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
924 char *parts2[3];
692ba18f
SH
925
926 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
927 continue;
33ad9f1a
CS
928 current_entire_path = NULL;
929
930 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
931 parts2[1] = current_subpath;
932 parts2[2] = NULL;
933 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
934
935 if (!*p) {
936 /* we are processing the subpath, so only update that one */
937 free(new_cgroup_paths_sub[i]);
938 new_cgroup_paths_sub[i] = strdup(current_entire_path);
939 if (!new_cgroup_paths_sub[i])
940 goto cleanup_from_error;
941 } else {
942 /* remember which path was used on this controller */
943 free(new_cgroup_paths[i]);
944 new_cgroup_paths[i] = strdup(current_entire_path);
945 if (!new_cgroup_paths[i])
946 goto cleanup_from_error;
947 }
fd4f5a56 948
33ad9f1a
CS
949 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
950 if (r < 0 && errno == EEXIST && contains_name) {
951 /* name clash => try new name with new suffix */
952 free(current_entire_path);
953 current_entire_path = NULL;
954 goto cleanup_name_on_this_level;
955 } else if (r < 0 && errno != EEXIST) {
b38b62a6 956 SYSERROR("Could not create cgroup '%s' in '%s'.", current_entire_path, info_ptr->designated_mount_point->mount_point);
33ad9f1a
CS
957 goto cleanup_from_error;
958 } else if (r == 0) {
959 /* successfully created */
960 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
961 if (r < 0)
962 goto cleanup_from_error;
d703c2b1 963 if (!init_cpuset_if_needed(info_ptr->designated_mount_point, current_entire_path)) {
b38b62a6 964 ERROR("Failed to initialize cpuset for '%s' in '%s'.", current_entire_path, info_ptr->designated_mount_point->mount_point);
d703c2b1
RV
965 goto cleanup_from_error;
966 }
33ad9f1a
CS
967 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
968 } else {
969 /* if we didn't create the cgroup, then we have to make sure that
970 * further cgroups will be created properly
971 */
d703c2b1 972 if (handle_cgroup_settings(info_ptr->designated_mount_point, info_ptr->cgroup_path) < 0) {
f6ac3b9e 973 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
33ad9f1a 974 goto cleanup_from_error;
f6ac3b9e 975 }
d703c2b1
RV
976 if (!init_cpuset_if_needed(info_ptr->designated_mount_point, info_ptr->cgroup_path)) {
977 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr->cgroup_path);
978 goto cleanup_from_error;
979 }
33ad9f1a
CS
980
981 /* already existed but path component of pattern didn't contain '%n',
982 * so this is not an error; but then we don't need current_entire_path
983 * anymore...
984 */
985 free(current_entire_path);
986 current_entire_path = NULL;
987 }
988 }
fd4f5a56 989
33ad9f1a
CS
990 /* save path so far */
991 free(path_so_far);
992 path_so_far = strdup(current_subpath);
993 if (!path_so_far)
994 goto cleanup_from_error;
995
996 /* cleanup */
997 if (current_component != current_subpath)
998 free(current_subpath);
999 if (current_component != p_eff)
1000 free(current_component);
1001 current_component = current_subpath = NULL;
1002 continue;
4fb3cba5 1003
33ad9f1a
CS
1004 cleanup_from_error:
1005 /* called if an error occured in the loop, so we
1006 * do some additional cleanup here
1007 */
1008 saved_errno = errno;
1009 if (current_component != current_subpath)
1010 free(current_subpath);
1011 if (current_component != p_eff)
1012 free(current_component);
1013 free(current_entire_path);
1014 errno = saved_errno;
1015 goto out_initial_error;
fd4f5a56
DL
1016 }
1017
33ad9f1a
CS
1018 /* we're done, now update the paths */
1019 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
47d8fb3b
CS
1020 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
1021 * will take care of it
1022 * Since we do a continue in above loop, new_cgroup_paths[i] is
1023 * unset anyway, as is new_cgroup_paths_sub[i]
692ba18f 1024 */
47d8fb3b
CS
1025 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
1026 continue;
1027 free(info_ptr->cgroup_path);
1028 info_ptr->cgroup_path = new_cgroup_paths[i];
1029 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
fd4f5a56 1030 }
33ad9f1a
CS
1031 /* don't use lxc_free_array since we used the array members
1032 * to store them in our result...
1033 */
1034 free(new_cgroup_paths);
1035 free(new_cgroup_paths_sub);
1036 free(path_so_far);
1037 lxc_free_array((void **)cgroup_path_components, free);
1038 return base_info;
1039
1040out_initial_error:
1041 saved_errno = errno;
1042 free(path_so_far);
1043 lxc_cgroup_process_info_free_and_remove(base_info);
1044 lxc_free_array((void **)new_cgroup_paths, free);
1045 lxc_free_array((void **)new_cgroup_paths_sub, free);
1046 lxc_free_array((void **)cgroup_path_components, free);
1047 errno = saved_errno;
1048 return NULL;
c8f7c563
CS
1049}
1050
4fb3cba5 1051static int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
47d8fb3b
CS
1052{
1053 struct cgroup_process_info *info_ptr;
1054 int r;
1055
1056 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
1057 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
1058 continue;
1059 /*
1060 * For any path which has ns cgroup mounted, handler->pid is already
1061 * moved into a container called '%d % (handler->pid)'. Rename it to
1062 * the cgroup name and record that.
1063 */
1064 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
1065 info_ptr->cgroup_path, pid, name);
1066 if (!tmp)
1067 return -1;
1068 free(info_ptr->cgroup_path);
1069 info_ptr->cgroup_path = tmp;
1070 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
1071 if (r < 0)
1072 return -1;
1073 tmp = strdup(tmp);
1074 if (!tmp)
1075 return -1;
1076 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
1077 }
1078 return 0;
1079}
1080
33ad9f1a 1081/* get the cgroup membership of a given container */
4fb3cba5 1082static struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
c8f7c563 1083{
33ad9f1a
CS
1084 struct cgroup_process_info *result = NULL;
1085 int saved_errno = 0;
1086 size_t i;
1087 struct cgroup_process_info **cptr = &result;
1088 struct cgroup_process_info *entry = NULL;
1089 char *path = NULL;
1090
1091 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
1092 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
1093 if (!h || !h->used)
1094 continue;
c8f7c563 1095
33ad9f1a
CS
1096 /* use the command interface to look for the cgroup */
1097 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
c661b0a8
DE
1098 if (!path) {
1099 h->used = false;
1100 WARN("Not attaching to cgroup %s unknown to %s %s", h->subsystems[0], lxcpath, name);
1101 continue;
1102 }
33ad9f1a
CS
1103
1104 entry = calloc(1, sizeof(struct cgroup_process_info));
1105 if (!entry)
1106 goto out_error;
1107 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
1108 entry->hierarchy = h;
1109 entry->cgroup_path = path;
1110 path = NULL;
1111
1112 /* it is not an error if we don't find anything here,
1113 * it is up to the caller to decide what to do in that
1114 * case */
1115 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
1116
1117 *cptr = entry;
1118 cptr = &entry->next;
1119 entry = NULL;
c8f7c563
CS
1120 }
1121
33ad9f1a
CS
1122 return result;
1123out_error:
1124 saved_errno = errno;
1125 free(path);
1126 lxc_cgroup_process_info_free(result);
1127 lxc_cgroup_process_info_free(entry);
1128 errno = saved_errno;
1129 return NULL;
fd4f5a56
DL
1130}
1131
33ad9f1a 1132/* move a processs to the cgroups specified by the membership */
4fb3cba5 1133static int lxc_cgroupfs_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
4f17323e 1134{
33ad9f1a
CS
1135 char pid_buf[32];
1136 char *cgroup_tasks_fn;
1137 int r;
1138 struct cgroup_process_info *info_ptr;
1139
1140 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1141 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1142 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1143 info_ptr->cgroup_path_sub :
1144 info_ptr->cgroup_path;
1145
1146 if (!info_ptr->designated_mount_point) {
1147 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1148 if (!info_ptr->designated_mount_point) {
1149 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1150 return -1;
1151 }
1152 }
4f17323e 1153
33ad9f1a
CS
1154 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1155 if (!cgroup_tasks_fn) {
1156 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1157 return -1;
1158 }
4f17323e 1159
33ad9f1a 1160 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
5903da82 1161 free(cgroup_tasks_fn);
33ad9f1a
CS
1162 if (r < 0) {
1163 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1164 return -1;
1165 }
4f17323e
CS
1166 }
1167
33ad9f1a 1168 return 0;
4f17323e
CS
1169}
1170
33ad9f1a
CS
1171/* free process membership information */
1172void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
fc7de561 1173{
33ad9f1a
CS
1174 struct cgroup_process_info *next;
1175 if (!info)
b98f7d6e 1176 return;
33ad9f1a
CS
1177 next = info->next;
1178 lxc_cgroup_put_meta(info->meta_ref);
1179 free(info->cgroup_path);
1180 free(info->cgroup_path_sub);
1181 lxc_free_array((void **)info->created_paths, free);
1182 free(info);
1183 lxc_cgroup_process_info_free(next);
fc7de561
SH
1184}
1185
33ad9f1a
CS
1186/* free process membership information and remove cgroups that were created */
1187void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
b98f7d6e 1188{
33ad9f1a
CS
1189 struct cgroup_process_info *next;
1190 char **pp;
1191 if (!info)
1192 return;
1193 next = info->next;
603c64c2 1194 {
33ad9f1a
CS
1195 struct cgroup_mount_point *mp = info->designated_mount_point;
1196 if (!mp)
1197 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1198 if (mp)
1199 /* ignore return value here, perhaps we created the
1200 * '/lxc' cgroup in this container but another container
1201 * is still running (for example)
1202 */
603c64c2
SH
1203 (void)remove_cgroup(mp, info->cgroup_path, true);
1204 }
1205 for (pp = info->created_paths; pp && *pp; pp++);
1206 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
33ad9f1a 1207 free(*pp);
b98f7d6e 1208 }
33ad9f1a
CS
1209 free(info->created_paths);
1210 lxc_cgroup_put_meta(info->meta_ref);
1211 free(info->cgroup_path);
1212 free(info->cgroup_path_sub);
1213 free(info);
9431aa65 1214 lxc_cgroup_process_info_free_and_remove(next);
33ad9f1a 1215}
b98f7d6e 1216
4fb3cba5 1217static char *lxc_cgroup_get_hierarchy_path_data(const char *subsystem, struct cgfs_data *d)
33ad9f1a 1218{
d4ef7c50
SH
1219 struct cgroup_process_info *info = d->info;
1220 info = find_info_for_subsystem(info, subsystem);
33ad9f1a
CS
1221 if (!info)
1222 return NULL;
1223 return info->cgroup_path;
b98f7d6e
SH
1224}
1225
4fb3cba5 1226static char *lxc_cgroup_get_hierarchy_abs_path_data(const char *subsystem, struct cgfs_data *d)
b98f7d6e 1227{
d4ef7c50 1228 struct cgroup_process_info *info = d->info;
33ad9f1a 1229 struct cgroup_mount_point *mp = NULL;
d4ef7c50
SH
1230
1231 info = find_info_for_subsystem(info, subsystem);
33ad9f1a
CS
1232 if (!info)
1233 return NULL;
1234 if (info->designated_mount_point) {
8900b9eb 1235 mp = info->designated_mount_point;
33ad9f1a
CS
1236 } else {
1237 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1238 if (!mp)
1239 return NULL;
b98f7d6e 1240 }
33ad9f1a 1241 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
b98f7d6e 1242}
55c76589 1243
4fb3cba5 1244static char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
9a93d992 1245{
33ad9f1a
CS
1246 struct cgroup_meta_data *meta;
1247 struct cgroup_process_info *base_info, *info;
1248 struct cgroup_mount_point *mp;
1249 char *result = NULL;
33ad9f1a
CS
1250
1251 meta = lxc_cgroup_load_meta();
1252 if (!meta)
9a93d992 1253 return NULL;
33ad9f1a
CS
1254 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1255 if (!base_info)
178938fe 1256 goto out1;
33ad9f1a
CS
1257 info = find_info_for_subsystem(base_info, subsystem);
1258 if (!info)
178938fe 1259 goto out2;
33ad9f1a 1260 if (info->designated_mount_point) {
8900b9eb 1261 mp = info->designated_mount_point;
33ad9f1a
CS
1262 } else {
1263 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1264 if (!mp)
178938fe 1265 goto out3;
33ad9f1a
CS
1266 }
1267 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
178938fe 1268out3:
178938fe 1269out2:
33ad9f1a 1270 lxc_cgroup_process_info_free(base_info);
178938fe 1271out1:
33ad9f1a 1272 lxc_cgroup_put_meta(meta);
33ad9f1a
CS
1273 return result;
1274}
9a93d992 1275
4fb3cba5 1276static int lxc_cgroup_set_data(const char *filename, const char *value, struct cgfs_data *d)
33ad9f1a
CS
1277{
1278 char *subsystem = NULL, *p, *path;
1279 int ret = -1;
9a93d992 1280
33ad9f1a
CS
1281 subsystem = alloca(strlen(filename) + 1);
1282 strcpy(subsystem, filename);
1283 if ((p = index(subsystem, '.')) != NULL)
1284 *p = '\0';
9a93d992 1285
4fb3cba5 1286 path = lxc_cgroup_get_hierarchy_abs_path_data(subsystem, d);
33ad9f1a
CS
1287 if (path) {
1288 ret = do_cgroup_set(path, filename, value);
1289 free(path);
9a93d992 1290 }
33ad9f1a
CS
1291 return ret;
1292}
9a93d992 1293
4fb3cba5 1294static int lxc_cgroupfs_set(const char *filename, const char *value, const char *name, const char *lxcpath)
9a93d992 1295{
33ad9f1a
CS
1296 char *subsystem = NULL, *p, *path;
1297 int ret = -1;
9a93d992 1298
33ad9f1a
CS
1299 subsystem = alloca(strlen(filename) + 1);
1300 strcpy(subsystem, filename);
1301 if ((p = index(subsystem, '.')) != NULL)
1302 *p = '\0';
9a93d992 1303
33ad9f1a
CS
1304 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1305 if (path) {
1306 ret = do_cgroup_set(path, filename, value);
1307 free(path);
1308 }
b98f7d6e 1309 return ret;
9a93d992
SH
1310}
1311
4fb3cba5 1312static int lxc_cgroupfs_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
9a93d992 1313{
33ad9f1a
CS
1314 char *subsystem = NULL, *p, *path;
1315 int ret = -1;
1316
1317 subsystem = alloca(strlen(filename) + 1);
1318 strcpy(subsystem, filename);
1319 if ((p = index(subsystem, '.')) != NULL)
1320 *p = '\0';
1321
1322 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1323 if (path) {
1324 ret = do_cgroup_get(path, filename, value, len);
1325 free(path);
9a93d992 1326 }
33ad9f1a 1327 return ret;
9a93d992
SH
1328}
1329
4fb3cba5 1330static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
aae1f3c4
CS
1331{
1332 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1333 char *path = NULL;
1334 char **parts = NULL;
1335 char *dirname = NULL;
1336 char *abs_path = NULL;
1337 char *abs_path2 = NULL;
d4ef7c50
SH
1338 struct cgfs_data *cgfs_d;
1339 struct cgroup_process_info *info, *base_info;
aae1f3c4
CS
1340 int r, saved_errno = 0;
1341
4fb3cba5
DE
1342 cgfs_d = hdata;
1343 if (!cgfs_d)
1344 return false;
d4ef7c50
SH
1345 base_info = cgfs_d->info;
1346
0769b82a
CS
1347 /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
1348 * have access to the lxc_conf object at this point. It really should be up
1349 * to the caller to fix this, but this doesn't really hurt.
1350 */
1351 if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
1352 type = LXC_AUTO_CGROUP_FULL_MIXED;
1353 else if (type == LXC_AUTO_CGROUP_NOSPEC)
1354 type = LXC_AUTO_CGROUP_MIXED;
1355
7997d7da
CS
1356 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1357 ERROR("could not mount cgroups into container: invalid type specified internally");
1358 errno = EINVAL;
c476bdce 1359 return false;
7997d7da
CS
1360 }
1361
aae1f3c4
CS
1362 path = calloc(1, bufsz);
1363 if (!path)
c476bdce 1364 return false;
aae1f3c4
CS
1365 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1366 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1367 if (r < 0) {
1368 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
c476bdce 1369 return false;
aae1f3c4
CS
1370 }
1371
1372 /* now mount all the hierarchies we care about */
1373 for (info = base_info; info; info = info->next) {
1374 size_t subsystem_count, i;
1375 struct cgroup_mount_point *mp = info->designated_mount_point;
1376 if (!mp)
1377 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1378 if (!mp) {
1379 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1380 goto out_error;
1381 }
1382
1383 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1384 parts = calloc(subsystem_count + 1, sizeof(char *));
1385 if (!parts)
1386 goto out_error;
1387
1388 for (i = 0; i < subsystem_count; i++) {
1389 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1390 parts[i] = info->hierarchy->subsystems[i] + 5;
1391 else
1392 parts[i] = info->hierarchy->subsystems[i];
1393 }
1394 dirname = lxc_string_join(",", (const char **)parts, false);
1395 if (!dirname)
1396 goto out_error;
1397
1398 /* create subsystem directory */
1399 abs_path = lxc_append_paths(path, dirname);
1400 if (!abs_path)
1401 goto out_error;
1402 r = mkdir_p(abs_path, 0755);
1403 if (r < 0 && errno != EEXIST) {
1404 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1405 goto out_error;
1406 }
1407
aae1f3c4
CS
1408 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1409 if (!abs_path2)
1410 goto out_error;
aae1f3c4 1411
7997d7da
CS
1412 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1413 /* bind-mount the cgroup entire filesystem there */
1414 if (strcmp(mp->mount_prefix, "/") != 0) {
1415 /* FIXME: maybe we should just try to remount the entire hierarchy
1416 * with a regular mount command? may that works? */
1417 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1418 goto out_error;
1419 }
1420 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1421 if (r < 0) {
1422 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1423 goto out_error;
1424 }
f8f3c3c0
SG
1425 /* main cgroup path should be read-only */
1426 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1427 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1428 if (r < 0) {
1429 SYSERROR("error re-mounting %s readonly", abs_path);
1430 goto out_error;
1431 }
1432 }
7997d7da
CS
1433 /* own cgroup should be read-write */
1434 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1435 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1436 if (r < 0) {
1437 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1438 goto out_error;
1439 }
1440 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1441 if (r < 0) {
1442 SYSERROR("error re-mounting %s readwrite", abs_path2);
1443 goto out_error;
1444 }
1445 }
1446 } else {
1447 /* create path for container's cgroup */
1448 r = mkdir_p(abs_path2, 0755);
1449 if (r < 0 && errno != EEXIST) {
1450 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1451 goto out_error;
1452 }
aae1f3c4 1453
b46f0553
CS
1454 /* for read-only and mixed cases, we have to bind-mount the tmpfs directory
1455 * that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
1456 * itself and then bind-mount it read-only, since we keep the tmpfs itself
1457 * read-write (see comment below)
1458 */
1459 if (type == LXC_AUTO_CGROUP_MIXED || type == LXC_AUTO_CGROUP_RO) {
1460 r = mount(abs_path, abs_path, NULL, MS_BIND, NULL);
1461 if (r < 0) {
1462 SYSERROR("error bind-mounting %s onto itself", abs_path);
1463 goto out_error;
1464 }
1465 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1466 if (r < 0) {
1467 SYSERROR("error re-mounting %s readonly", abs_path);
1468 goto out_error;
1469 }
1470 }
1471
7997d7da
CS
1472 free(abs_path);
1473 abs_path = NULL;
1474
1475 /* bind-mount container's cgroup to that directory */
1476 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1477 if (!abs_path)
1478 goto out_error;
1479 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1480 if (r < 0) {
1481 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1482 goto out_error;
1483 }
1484 if (type == LXC_AUTO_CGROUP_RO) {
1485 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1486 if (r < 0) {
1487 SYSERROR("error re-mounting %s readonly", abs_path2);
1488 goto out_error;
1489 }
1490 }
aae1f3c4
CS
1491 }
1492
1493 free(abs_path);
1494 free(abs_path2);
1495 abs_path = NULL;
1496 abs_path2 = NULL;
1497
1498 /* add symlinks for every single subsystem */
1499 if (subsystem_count > 1) {
1500 for (i = 0; i < subsystem_count; i++) {
1501 abs_path = lxc_append_paths(path, parts[i]);
1502 if (!abs_path)
1503 goto out_error;
1504 r = symlink(dirname, abs_path);
1505 if (r < 0)
1506 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1507 free(abs_path);
1508 abs_path = NULL;
1509 }
1510 }
1511 free(dirname);
1512 free(parts);
1513 dirname = NULL;
1514 parts = NULL;
1515 }
1516
b46f0553
CS
1517 /* We used to remount the entire tmpfs readonly if any :ro or
1518 * :mixed mode was specified. However, Ubuntu's mountall has the
1519 * unfortunate behavior to block bootup if /sys/fs/cgroup is
1520 * mounted read-only and cannot be remounted read-write.
1521 * (mountall reads /lib/init/fstab and tries to (re-)mount all of
1522 * these if they are not already mounted with the right options;
1523 * it contains an entry for /sys/fs/cgroup. In case it can't do
1524 * that, it prompts for the user to either manually fix it or
1525 * boot anyway. But without user input, booting of the container
1526 * hangs.)
1527 *
1528 * Instead of remounting the entire tmpfs readonly, we only
1529 * remount the paths readonly that are part of the cgroup
1530 * hierarchy.
f8f3c3c0 1531 */
f8f3c3c0 1532
aae1f3c4
CS
1533 free(path);
1534
c476bdce 1535 return true;
aae1f3c4
CS
1536
1537out_error:
1538 saved_errno = errno;
1539 free(path);
1540 free(dirname);
1541 free(parts);
1542 free(abs_path);
1543 free(abs_path2);
1544 errno = saved_errno;
c476bdce 1545 return false;
aae1f3c4
CS
1546}
1547
4fb3cba5 1548static int cgfs_nrtasks(void *hdata)
33ad9f1a 1549{
4fb3cba5
DE
1550 struct cgfs_data *d = hdata;
1551 struct cgroup_process_info *info;
33ad9f1a
CS
1552 struct cgroup_mount_point *mp = NULL;
1553 char *abs_path = NULL;
1554 int ret;
460a1cf0 1555
4fb3cba5
DE
1556 if (!d) {
1557 errno = ENOENT;
1558 return -1;
1559 }
1560
1561 info = d->info;
33ad9f1a
CS
1562 if (!info) {
1563 errno = ENOENT;
1564 return -1;
b98f7d6e 1565 }
c8f7c563 1566
33ad9f1a 1567 if (info->designated_mount_point) {
8900b9eb 1568 mp = info->designated_mount_point;
33ad9f1a
CS
1569 } else {
1570 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1571 if (!mp)
1572 return -1;
c8f7c563
CS
1573 }
1574
33ad9f1a
CS
1575 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1576 if (!abs_path)
1577 return -1;
1578
1579 ret = cgroup_recursive_task_count(abs_path);
1580 free(abs_path);
1581 return ret;
c8f7c563
CS
1582}
1583
574c4428
QH
1584static struct cgroup_process_info *
1585lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
1586 struct cgroup_meta_data *meta)
d08ba6ec 1587{
33ad9f1a
CS
1588 struct cgroup_process_info *result = NULL;
1589 FILE *proc_pid_cgroup = NULL;
1590 char *line = NULL;
1591 size_t sz = 0;
1592 int saved_errno = 0;
1593 struct cgroup_process_info **cptr = &result;
1594 struct cgroup_process_info *entry = NULL;
1595
1596 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
1597 if (!proc_pid_cgroup)
b98f7d6e 1598 return NULL;
1ac470c0 1599
33ad9f1a
CS
1600 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1601 /* file format: hierarchy:subsystems:group */
1602 char *colon1;
1603 char *colon2;
1604 char *endptr;
1605 int hierarchy_number;
1606 struct cgroup_hierarchy *h = NULL;
fd4f5a56 1607
33ad9f1a 1608 if (!line[0])
ae5c8b8e 1609 continue;
b98f7d6e 1610
33ad9f1a
CS
1611 if (line[strlen(line) - 1] == '\n')
1612 line[strlen(line) - 1] = '\0';
1613
1614 colon1 = strchr(line, ':');
1615 if (!colon1)
8900b9eb 1616 continue;
33ad9f1a
CS
1617 *colon1++ = '\0';
1618 colon2 = strchr(colon1, ':');
1619 if (!colon2)
ae5c8b8e 1620 continue;
33ad9f1a 1621 *colon2++ = '\0';
e4659536 1622
33ad9f1a
CS
1623 endptr = NULL;
1624 hierarchy_number = strtoul(line, &endptr, 10);
1625 if (!endptr || *endptr)
9a93d992 1626 continue;
9a93d992 1627
33ad9f1a
CS
1628 if (hierarchy_number > meta->maximum_hierarchy) {
1629 /* we encountered a hierarchy we didn't have before,
1630 * so probably somebody remounted some stuff in the
1631 * mean time...
1632 */
1633 errno = EAGAIN;
1634 goto out_error;
b98f7d6e 1635 }
33ad9f1a
CS
1636
1637 h = meta->hierarchies[hierarchy_number];
1638 if (!h) {
1639 /* we encountered a hierarchy that was thought to be
1640 * dead before, so probably somebody remounted some
1641 * stuff in the mean time...
1642 */
1643 errno = EAGAIN;
1644 goto out_error;
b98f7d6e 1645 }
33ad9f1a
CS
1646
1647 /* we are told that we should ignore this hierarchy */
1648 if (!h->used)
b98f7d6e 1649 continue;
5193cc3d 1650
33ad9f1a
CS
1651 entry = calloc(1, sizeof(struct cgroup_process_info));
1652 if (!entry)
1653 goto out_error;
fd4f5a56 1654
33ad9f1a
CS
1655 entry->meta_ref = lxc_cgroup_get_meta(meta);
1656 entry->hierarchy = h;
1657 entry->cgroup_path = strdup(colon2);
1658 if (!entry->cgroup_path)
1659 goto out_error;
d08ba6ec 1660
33ad9f1a
CS
1661 *cptr = entry;
1662 cptr = &entry->next;
1663 entry = NULL;
b98f7d6e 1664 }
b98f7d6e 1665
33ad9f1a
CS
1666 fclose(proc_pid_cgroup);
1667 free(line);
1668 return result;
1669
1670out_error:
1671 saved_errno = errno;
1672 if (proc_pid_cgroup)
1673 fclose(proc_pid_cgroup);
1674 lxc_cgroup_process_info_free(result);
1675 lxc_cgroup_process_info_free(entry);
1676 free(line);
1677 errno = saved_errno;
ae5c8b8e 1678 return NULL;
36b86299
DL
1679}
1680
574c4428
QH
1681static char **subsystems_from_mount_options(const char *mount_options,
1682 char **kernel_list)
36b86299 1683{
33ad9f1a
CS
1684 char *token, *str, *saveptr = NULL;
1685 char **result = NULL;
1686 size_t result_capacity = 0;
8900b9eb 1687 size_t result_count = 0;
33ad9f1a
CS
1688 int saved_errno;
1689 int r;
ef342abb 1690
33ad9f1a
CS
1691 str = alloca(strlen(mount_options)+1);
1692 strcpy(str, mount_options);
1693 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1694 /* we have a subsystem if it's either in the list of
1695 * subsystems provided by the kernel OR if it starts
1696 * with name= for named hierarchies
1697 */
1698 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1699 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1700 if (r < 0)
1701 goto out_free;
1702 result[result_count + 1] = NULL;
1703 result[result_count] = strdup(token);
1704 if (!result[result_count])
1705 goto out_free;
1706 result_count++;
1707 }
ae5c8b8e 1708 }
f0e64b8b 1709
33ad9f1a
CS
1710 return result;
1711
1712out_free:
1713 saved_errno = errno;
1714 lxc_free_array((void**)result, free);
1715 errno = saved_errno;
1716 return NULL;
b98f7d6e
SH
1717}
1718
574c4428 1719static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
b98f7d6e 1720{
33ad9f1a
CS
1721 if (!mp)
1722 return;
1723 free(mp->mount_point);
1724 free(mp->mount_prefix);
1725 free(mp);
bcbd102c
SH
1726}
1727
574c4428 1728static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
341a9bd8 1729{
33ad9f1a
CS
1730 if (!h)
1731 return;
1732 lxc_free_array((void **)h->subsystems, free);
8bfcb981 1733 free(h->all_mount_points);
33ad9f1a
CS
1734 free(h);
1735}
341a9bd8 1736
574c4428 1737static bool is_valid_cgroup(const char *name)
33ad9f1a
CS
1738{
1739 const char *p;
1740 for (p = name; *p; p++) {
28bb9321
QH
1741 /* Use the ASCII printable characters range(32 - 127)
1742 * is reasonable, we kick out 32(SPACE) because it'll
1743 * break legacy lxc-ls
1744 */
1745 if (*p <= 32 || *p >= 127 || *p == '/')
33ad9f1a 1746 return false;
341a9bd8 1747 }
33ad9f1a
CS
1748 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1749}
341a9bd8 1750
574c4428
QH
1751static int create_or_remove_cgroup(bool do_remove,
1752 struct cgroup_mount_point *mp, const char *path, int recurse)
33ad9f1a
CS
1753{
1754 int r, saved_errno = 0;
1755 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1756 if (!buf)
1757 return -1;
341a9bd8 1758
33ad9f1a 1759 /* create or remove directory */
603c64c2
SH
1760 if (do_remove) {
1761 if (recurse)
1762 r = cgroup_rmdir(buf);
1763 else
1764 r = rmdir(buf);
1765 } else
1766 r = mkdir(buf, 0777);
33ad9f1a
CS
1767 saved_errno = errno;
1768 free(buf);
1769 errno = saved_errno;
1770 return r;
341a9bd8 1771}
bcbd102c 1772
574c4428 1773static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
a6ddef61 1774{
603c64c2 1775 return create_or_remove_cgroup(false, mp, path, false);
a6ddef61
MN
1776}
1777
574c4428
QH
1778static int remove_cgroup(struct cgroup_mount_point *mp,
1779 const char *path, bool recurse)
576f946d 1780{
603c64c2 1781 return create_or_remove_cgroup(true, mp, path, recurse);
33ad9f1a 1782}
576f946d 1783
574c4428
QH
1784static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
1785 const char *path, const char *suffix)
33ad9f1a
CS
1786{
1787 /* first we have to make sure we subtract the mount point's prefix */
1788 char *prefix = mp->mount_prefix;
1789 char *buf;
1790 ssize_t len, rv;
1791
1792 /* we want to make sure only absolute paths to cgroups are passed to us */
1793 if (path[0] != '/') {
1794 errno = EINVAL;
1795 return NULL;
1796 }
b98f7d6e 1797
33ad9f1a
CS
1798 if (prefix && !strcmp(prefix, "/"))
1799 prefix = NULL;
b98f7d6e 1800
33ad9f1a
CS
1801 /* prefix doesn't match */
1802 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1803 errno = EINVAL;
1804 return NULL;
1805 }
1806 /* if prefix is /foo and path is /foobar */
1807 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1808 errno = EINVAL;
1809 return NULL;
1810 }
b98f7d6e 1811
33ad9f1a
CS
1812 /* remove prefix from path */
1813 path += prefix ? strlen(prefix) : 0;
b98f7d6e 1814
33ad9f1a
CS
1815 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1816 buf = calloc(len + 1, 1);
50266dc6
DE
1817 if (!buf)
1818 return NULL;
33ad9f1a 1819 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
8900b9eb 1820 if (rv > len) {
33ad9f1a
CS
1821 free(buf);
1822 errno = ENOMEM;
8900b9eb 1823 return NULL;
8b92dc3a 1824 }
576f946d 1825
33ad9f1a 1826 return buf;
e0f888d9 1827}
283678ed 1828
574c4428
QH
1829static struct cgroup_process_info *
1830find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
283678ed 1831{
33ad9f1a
CS
1832 struct cgroup_process_info *info_ptr;
1833 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1834 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1835 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1836 return info_ptr;
b98f7d6e 1837 }
33ad9f1a
CS
1838 errno = ENOENT;
1839 return NULL;
1840}
283678ed 1841
574c4428
QH
1842static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
1843 char *value, size_t len)
33ad9f1a
CS
1844{
1845 const char *parts[3] = {
1846 cgroup_path,
1847 sub_filename,
1848 NULL
1849 };
1850 char *filename;
1851 int ret, saved_errno;
1852
1853 filename = lxc_string_join("/", parts, false);
1854 if (!filename)
1855 return -1;
1856
1857 ret = lxc_read_from_file(filename, value, len);
1858 saved_errno = errno;
1859 free(filename);
1860 errno = saved_errno;
1861 return ret;
283678ed 1862}
b113383b 1863
574c4428
QH
1864static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
1865 const char *value)
b113383b 1866{
33ad9f1a
CS
1867 const char *parts[3] = {
1868 cgroup_path,
1869 sub_filename,
1870 NULL
1871 };
1872 char *filename;
1873 int ret, saved_errno;
b113383b 1874
33ad9f1a
CS
1875 filename = lxc_string_join("/", parts, false);
1876 if (!filename)
1877 return -1;
b113383b 1878
33ad9f1a
CS
1879 ret = lxc_write_to_file(filename, value, strlen(value), false);
1880 saved_errno = errno;
1881 free(filename);
1882 errno = saved_errno;
1883 return ret;
b98f7d6e
SH
1884}
1885
4fb3cba5 1886static int do_setup_cgroup_limits(struct cgfs_data *d,
574c4428 1887 struct lxc_list *cgroup_settings, bool do_devices)
b98f7d6e
SH
1888{
1889 struct lxc_list *iterator;
1890 struct lxc_cgroup *cg;
1891 int ret = -1;
1892
33ad9f1a 1893 if (lxc_list_empty(cgroup_settings))
b98f7d6e
SH
1894 return 0;
1895
33ad9f1a 1896 lxc_list_for_each(iterator, cgroup_settings) {
b98f7d6e
SH
1897 cg = iterator->elem;
1898
33ad9f1a 1899 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
b98f7d6e 1900 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
4fb3cba5 1901 cgroup_devices_has_allow_or_deny(d, cg->value, false))
b98f7d6e
SH
1902 continue;
1903 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
4fb3cba5 1904 cgroup_devices_has_allow_or_deny(d, cg->value, true))
b98f7d6e 1905 continue;
4fb3cba5 1906 if (lxc_cgroup_set_data(cg->subsystem, cg->value, d)) {
959aee9c 1907 ERROR("Error setting %s to %s for %s",
4fb3cba5 1908 cg->subsystem, cg->value, d->name);
b98f7d6e
SH
1909 goto out;
1910 }
b113383b 1911 }
b98f7d6e
SH
1912
1913 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1914 }
1915
b98f7d6e
SH
1916 ret = 0;
1917 INFO("cgroup has been setup");
1918out:
b113383b
SH
1919 return ret;
1920}
b98f7d6e 1921
4fb3cba5 1922static bool cgroup_devices_has_allow_or_deny(struct cgfs_data *d,
574c4428 1923 char *v, bool for_allow)
33ad9f1a
CS
1924{
1925 char *path;
1926 FILE *devices_list;
8900b9eb 1927 char *line = NULL;
33ad9f1a
CS
1928 size_t sz = 0;
1929 bool ret = !for_allow;
1930 const char *parts[3] = {
1931 NULL,
1932 "devices.list",
1933 NULL
1934 };
1935
1936 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1937 // not sure they ever do, but they *could*
1938 // right now, I'm assuming they do NOT
1939 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1940 return false;
1941
4fb3cba5 1942 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_data("devices", d);
33ad9f1a
CS
1943 if (!parts[0])
1944 return false;
1945 path = lxc_string_join("/", parts, false);
1946 if (!path) {
1947 free((void *)parts[0]);
1948 return false;
1949 }
1950
1951 devices_list = fopen_cloexec(path, "r");
1952 if (!devices_list) {
1953 free(path);
1954 return false;
1955 }
1956
1957 while (getline(&line, &sz, devices_list) != -1) {
1958 size_t len = strlen(line);
1959 if (len > 0 && line[len-1] == '\n')
1960 line[len-1] = '\0';
1961 if (strcmp(line, "a *:* rwm") == 0) {
1962 ret = for_allow;
1963 goto out;
1964 } else if (for_allow && strcmp(line, v) == 0) {
1965 ret = true;
8900b9eb 1966 goto out;
33ad9f1a
CS
1967 }
1968 }
1969
1970out:
1971 fclose(devices_list);
1972 free(line);
1973 free(path);
1974 return ret;
1975}
1976
574c4428 1977static int cgroup_recursive_task_count(const char *cgroup_path)
b98f7d6e 1978{
33ad9f1a
CS
1979 DIR *d;
1980 struct dirent *dent_buf;
1981 struct dirent *dent;
8900b9eb 1982 ssize_t name_max;
33ad9f1a
CS
1983 int n = 0, r;
1984
1985 /* see man readdir_r(3) */
1986 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1987 if (name_max <= 0)
1988 name_max = 255;
1989 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1990 if (!dent_buf)
1991 return -1;
1992
1993 d = opendir(cgroup_path);
034ef75d
SH
1994 if (!d) {
1995 free(dent_buf);
33ad9f1a 1996 return 0;
034ef75d 1997 }
33ad9f1a
CS
1998
1999 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
2000 const char *parts[3] = {
2001 cgroup_path,
2002 dent->d_name,
2003 NULL
2004 };
2005 char *sub_path;
2006 struct stat st;
2007
2008 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
2009 continue;
2010 sub_path = lxc_string_join("/", parts, false);
2011 if (!sub_path) {
2012 closedir(d);
2013 free(dent_buf);
2014 return -1;
2015 }
2016 r = stat(sub_path, &st);
2017 if (r < 0) {
2018 closedir(d);
2019 free(dent_buf);
2020 free(sub_path);
2021 return -1;
2022 }
2023 if (S_ISDIR(st.st_mode)) {
2024 r = cgroup_recursive_task_count(sub_path);
2025 if (r >= 0)
2026 n += r;
2027 } else if (!strcmp(dent->d_name, "tasks")) {
2028 r = count_lines(sub_path);
2029 if (r >= 0)
2030 n += r;
2031 }
2032 free(sub_path);
2033 }
2034 closedir(d);
2035 free(dent_buf);
2036
2037 return n;
2038}
2039
574c4428 2040static int count_lines(const char *fn)
33ad9f1a
CS
2041{
2042 FILE *f;
2043 char *line = NULL;
2044 size_t sz = 0;
2045 int n = 0;
2046
2047 f = fopen_cloexec(fn, "r");
2048 if (!f)
2049 return -1;
2050
2051 while (getline(&line, &sz, f) != -1) {
2052 n++;
2053 }
2054 free(line);
2055 fclose(f);
2056 return n;
b98f7d6e
SH
2057}
2058
574c4428
QH
2059static int handle_cgroup_settings(struct cgroup_mount_point *mp,
2060 char *cgroup_path)
b98f7d6e 2061{
33ad9f1a 2062 int r, saved_errno = 0;
7e7243e1 2063 char buf[2];
1ea59ad2 2064
934b1673
SH
2065 mp->need_cpuset_init = false;
2066
1ea59ad2
SH
2067 /* If this is the memory cgroup, we want to enforce hierarchy.
2068 * But don't fail if for some reason we can't.
2069 */
2edb53c7
SH
2070 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
2071 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
2072 if (cc_path) {
2073 r = lxc_read_from_file(cc_path, buf, 1);
2074 if (r < 1 || buf[0] != '1') {
2075 r = lxc_write_to_file(cc_path, "1", 1, false);
2076 if (r < 0)
a8916143 2077 SYSERROR("failed to set memory.use_hierarchy to 1; continuing");
2edb53c7 2078 }
1ea59ad2
SH
2079 free(cc_path);
2080 }
2edb53c7 2081 }
1ea59ad2 2082
33ad9f1a
CS
2083 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2084 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2085 * and cpuset.cpus and then
2086 */
2edb53c7
SH
2087 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
2088 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
d703c2b1
RV
2089 struct stat sb;
2090
33ad9f1a 2091 if (!cc_path)
2edb53c7 2092 return -1;
d703c2b1
RV
2093 /* cgroup.clone_children is not available when running under
2094 * older kernel versions; in this case, we'll initialize
2095 * cpuset.cpus and cpuset.mems later, after the new cgroup
2096 * was created
2097 */
2098 if (stat(cc_path, &sb) != 0 && errno == ENOENT) {
934b1673 2099 mp->need_cpuset_init = true;
d703c2b1
RV
2100 free(cc_path);
2101 return 0;
2102 }
7e7243e1
SH
2103 r = lxc_read_from_file(cc_path, buf, 1);
2104 if (r == 1 && buf[0] == '1') {
2105 free(cc_path);
2edb53c7 2106 return 0;
7e7243e1 2107 }
33ad9f1a 2108 r = lxc_write_to_file(cc_path, "1", 1, false);
2edb53c7
SH
2109 saved_errno = errno;
2110 free(cc_path);
2111 errno = saved_errno;
2112 return r < 0 ? -1 : 0;
33ad9f1a
CS
2113 }
2114 return 0;
b98f7d6e 2115}
484ed030 2116
934b1673 2117static int cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
d703c2b1
RV
2118{
2119 int ret = lxc_read_from_file(fn, buf, bufsize);
2120 if (ret < 0) {
2121 SYSERROR("failed to read %s", fn);
934b1673 2122 return ret;
d703c2b1
RV
2123 }
2124 if (ret == bufsize) {
934b1673
SH
2125 if (bufsize > 0) {
2126 /* obviously this wasn't empty */
2127 buf[bufsize-1] = '\0';
2128 return ret;
2129 }
2130 /* Callers don't do this, but regression/sanity check */
2131 ERROR("%s: was not expecting 0 bufsize", __func__);
2132 return -1;
d703c2b1
RV
2133 }
2134 buf[ret] = '\0';
934b1673 2135 return ret;
d703c2b1
RV
2136}
2137
2138static bool do_init_cpuset_file(struct cgroup_mount_point *mp,
2139 const char *path, const char *name)
2140{
934b1673
SH
2141 char value[1024];
2142 char *childfile, *parentfile = NULL, *tmp;
2143 int ret;
2144 bool ok = false;
2145
d703c2b1
RV
2146 childfile = cgroup_to_absolute_path(mp, path, name);
2147 if (!childfile)
2148 return false;
2149
2150 /* don't overwrite a non-empty value in the file */
934b1673
SH
2151 ret = cgroup_read_from_file(childfile, value, sizeof(value));
2152 if (ret < 0)
2153 goto out;
d703c2b1 2154 if (value[0] != '\0' && value[0] != '\n') {
934b1673
SH
2155 ok = true;
2156 goto out;
d703c2b1
RV
2157 }
2158
2159 /* path to the same name in the parent cgroup */
2160 parentfile = strdup(path);
2161 if (!parentfile)
934b1673
SH
2162 goto out;
2163
d703c2b1 2164 tmp = strrchr(parentfile, '/');
934b1673
SH
2165 if (!tmp)
2166 goto out;
d703c2b1
RV
2167 if (tmp == parentfile)
2168 tmp++; /* keep the '/' at the start */
2169 *tmp = '\0';
2170 tmp = parentfile;
2171 parentfile = cgroup_to_absolute_path(mp, tmp, name);
2172 free(tmp);
934b1673
SH
2173 if (!parentfile)
2174 goto out;
d703c2b1
RV
2175
2176 /* copy from parent to child cgroup */
934b1673
SH
2177 ret = cgroup_read_from_file(parentfile, value, sizeof(value));
2178 if (ret < 0)
2179 goto out;
2180 if (ret == sizeof(value)) {
2181 /* If anyone actually sees this error, we can address it */
2182 ERROR("parent cpuset value too long");
2183 goto out;
d703c2b1
RV
2184 }
2185 ok = (lxc_write_to_file(childfile, value, strlen(value), false) >= 0);
2186 if (!ok)
2187 SYSERROR("failed writing %s", childfile);
b1dad6f6
RV
2188
2189out:
934b1673
SH
2190 if (parentfile)
2191 free(parentfile);
d703c2b1 2192 free(childfile);
d703c2b1
RV
2193 return ok;
2194}
2195
2196static bool init_cpuset_if_needed(struct cgroup_mount_point *mp,
2197 const char *path)
2198{
2199 /* the files we have to handle here are only in cpuset hierarchies */
2200 if (!lxc_string_in_array("cpuset",
2201 (const char **)mp->hierarchy->subsystems))
2202 return true;
2203
b1dad6f6
RV
2204 if (!mp->need_cpuset_init)
2205 return true;
2206
d703c2b1
RV
2207 return (do_init_cpuset_file(mp, path, "/cpuset.cpus") &&
2208 do_init_cpuset_file(mp, path, "/cpuset.mems") );
2209}
2210
4fb3cba5 2211struct cgroup_ops *cgfs_ops_init(void)
484ed030 2212{
4fb3cba5 2213 return &cgfs_ops;
d4ef7c50 2214}
484ed030 2215
4fb3cba5 2216static void *cgfs_init(const char *name)
d4ef7c50 2217{
4fb3cba5 2218 struct cgfs_data *d;
484ed030 2219
4fb3cba5
DE
2220 d = malloc(sizeof(*d));
2221 if (!d)
2222 return NULL;
484ed030 2223
4fb3cba5
DE
2224 memset(d, 0, sizeof(*d));
2225 d->name = strdup(name);
2226 if (!d->name)
2227 goto err1;
2228
2229 /* if we are running as root, use system cgroup pattern, otherwise
2230 * just create a cgroup under the current one. But also fall back to
2231 * that if for some reason reading the configuration fails and no
2232 * default value is available
2233 */
2234 if (geteuid() == 0)
2235 d->cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
2236 if (!d->cgroup_pattern)
2237 d->cgroup_pattern = "%n";
2238
2239 d->meta = lxc_cgroup_load_meta();
2240 if (!d->meta) {
2241 ERROR("cgroupfs failed to detect cgroup metadata");
2242 goto err2;
2243 }
2244 return d;
2245
2246err2:
2247 free(d->name);
2248err1:
2249 free(d);
2250 return NULL;
d4ef7c50 2251}
484ed030 2252
4fb3cba5 2253static void cgfs_destroy(void *hdata)
d4ef7c50 2254{
4fb3cba5
DE
2255 struct cgfs_data *d = hdata;
2256
d4ef7c50
SH
2257 if (!d)
2258 return;
4fb3cba5
DE
2259 if (d->name)
2260 free(d->name);
d4ef7c50
SH
2261 if (d->info)
2262 lxc_cgroup_process_info_free_and_remove(d->info);
2263 if (d->meta)
2264 lxc_cgroup_put_meta(d->meta);
2265 free(d);
d4ef7c50 2266}
484ed030 2267
4fb3cba5 2268static inline bool cgfs_create(void *hdata)
d4ef7c50 2269{
4fb3cba5
DE
2270 struct cgfs_data *d = hdata;
2271 struct cgroup_process_info *i;
2272 struct cgroup_meta_data *md;
484ed030 2273
4fb3cba5 2274 if (!d)
d4ef7c50 2275 return false;
4fb3cba5
DE
2276 md = d->meta;
2277 i = lxc_cgroupfs_create(d->name, d->cgroup_pattern, md, NULL);
d4ef7c50
SH
2278 if (!i)
2279 return false;
2280 d->info = i;
2281 return true;
2282}
484ed030 2283
4fb3cba5 2284static inline bool cgfs_enter(void *hdata, pid_t pid)
d4ef7c50 2285{
4fb3cba5
DE
2286 struct cgfs_data *d = hdata;
2287 struct cgroup_process_info *i;
d4ef7c50 2288 int ret;
4fb3cba5
DE
2289
2290 if (!d)
2291 return false;
2292 i = d->info;
2293 ret = lxc_cgroupfs_enter(i, pid, false);
484ed030 2294
d4ef7c50
SH
2295 return ret == 0;
2296}
2297
4fb3cba5 2298static inline bool cgfs_create_legacy(void *hdata, pid_t pid)
d4ef7c50 2299{
4fb3cba5
DE
2300 struct cgfs_data *d = hdata;
2301 struct cgroup_process_info *i;
2302
2303 if (!d)
2304 return false;
2305 i = d->info;
2306 if (lxc_cgroup_create_legacy(i, d->name, pid) < 0) {
2307 ERROR("failed to create legacy ns cgroups for '%s'", d->name);
d4ef7c50 2308 return false;
484ed030 2309 }
d4ef7c50
SH
2310 return true;
2311}
484ed030 2312
4fb3cba5 2313static const char *cgfs_get_cgroup(void *hdata, const char *subsystem)
d4ef7c50 2314{
4fb3cba5
DE
2315 struct cgfs_data *d = hdata;
2316
2317 if (!d)
2318 return NULL;
2319 return lxc_cgroup_get_hierarchy_path_data(subsystem, d);
484ed030
SH
2320}
2321
2ba7a429
TA
2322static const char *cgfs_canonical_path(void *hdata)
2323{
2324 struct cgfs_data *d = hdata;
2325 struct cgroup_process_info *info_ptr;
2326 char *path = NULL;
2327
2328 if (!d)
2329 return NULL;
2330
2331 for (info_ptr = d->info; info_ptr; info_ptr = info_ptr->next) {
2332 if (!path)
2333 path = info_ptr->cgroup_path;
2334 else if (strcmp(path, info_ptr->cgroup_path) != 0) {
2335 ERROR("not all paths match %s, %s has path %s", path,
2336 info_ptr->hierarchy->subsystems[0], info_ptr->cgroup_path);
2337 return NULL;
2338 }
2339 }
2340
2341 return path;
2342}
2343
4fb3cba5 2344static bool cgfs_unfreeze(void *hdata)
0086f499 2345{
4fb3cba5 2346 struct cgfs_data *d = hdata;
0086f499
SH
2347 char *cgabspath, *cgrelpath;
2348 int ret;
2349
4fb3cba5
DE
2350 if (!d)
2351 return false;
2352
2353 cgrelpath = lxc_cgroup_get_hierarchy_path_data("freezer", d);
0086f499
SH
2354 cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
2355 if (!cgabspath)
ecfcb3f0 2356 return false;
0086f499
SH
2357
2358 ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
2359 free(cgabspath);
ecfcb3f0 2360 return ret == 0;
0086f499
SH
2361}
2362
4fb3cba5
DE
2363static bool cgroupfs_setup_limits(void *hdata, struct lxc_list *cgroup_conf,
2364 bool with_devices)
9daf6f5d 2365{
4fb3cba5
DE
2366 struct cgfs_data *d = hdata;
2367
2368 if (!d)
2369 return false;
2370 return do_setup_cgroup_limits(d, cgroup_conf, with_devices) == 0;
9daf6f5d
SH
2371}
2372
4fb3cba5 2373static bool lxc_cgroupfs_attach(const char *name, const char *lxcpath, pid_t pid)
5d897655
SH
2374{
2375 struct cgroup_meta_data *meta_data;
2376 struct cgroup_process_info *container_info;
2377 int ret;
2378
2379 meta_data = lxc_cgroup_load_meta();
2380 if (!meta_data) {
2381 ERROR("could not move attached process %d to cgroup of container", pid);
2382 return false;
2383 }
2384
2385 container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
2386 lxc_cgroup_put_meta(meta_data);
2387 if (!container_info) {
2388 ERROR("could not move attached process %d to cgroup of container", pid);
2389 return false;
2390 }
2391
2392 ret = lxc_cgroupfs_enter(container_info, pid, false);
2393 lxc_cgroup_process_info_free(container_info);
2394 if (ret < 0) {
2395 ERROR("could not move attached process %d to cgroup of container", pid);
2396 return false;
2397 }
2398 return true;
2399}
2400
d4ef7c50 2401static struct cgroup_ops cgfs_ops = {
d4ef7c50 2402 .init = cgfs_init,
4fb3cba5 2403 .destroy = cgfs_destroy,
d4ef7c50
SH
2404 .create = cgfs_create,
2405 .enter = cgfs_enter,
2406 .create_legacy = cgfs_create_legacy,
2407 .get_cgroup = cgfs_get_cgroup,
2ba7a429 2408 .canonical_path = cgfs_canonical_path,
d4ef7c50
SH
2409 .get = lxc_cgroupfs_get,
2410 .set = lxc_cgroupfs_set,
4fb3cba5 2411 .unfreeze = cgfs_unfreeze,
9daf6f5d 2412 .setup_limits = cgroupfs_setup_limits,
d4ef7c50 2413 .name = "cgroupfs",
5d897655 2414 .attach = lxc_cgroupfs_attach,
0996e18a 2415 .chown = NULL,
c476bdce 2416 .mount_cgroup = cgroupfs_mount_cgroup,
4fb3cba5 2417 .nrtasks = cgfs_nrtasks,
d4ef7c50 2418};