]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroup.c
cgmanager: support lxc.mount.auto = cgroup
[mirror_lxc.git] / src / lxc / cgroup.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
576f946d 22 */
d06245b8
NC
23#include "config.h"
24
576f946d 25#include <stdio.h>
576f946d 26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
aae1f3c4 37#include <sys/mount.h>
576f946d 38#include <netinet/in.h>
39#include <net/if.h>
40
e2bcd7db 41#include "error.h"
ae5c8b8e 42#include "commands.h"
b98f7d6e
SH
43#include "list.h"
44#include "conf.h"
33ad9f1a 45#include "utils.h"
740d1928 46#include "bdev.h"
f2363e38
ÇO
47#include "log.h"
48#include "cgroup.h"
49#include "start.h"
484ed030 50#include "state.h"
36eb9bde 51
edaf8b1b
SG
52#if IS_BIONIC
53#include <../include/lxcmntent.h>
54#else
55#include <mntent.h>
56#endif
57
36eb9bde 58lxc_log_define(lxc_cgroup, lxc);
576f946d 59
33ad9f1a
CS
60static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
61static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
62static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
63static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
64static bool is_valid_cgroup(const char *name);
33ad9f1a 65static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
603c64c2 66static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
33ad9f1a
CS
67static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
68static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
69static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
70static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
71static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
9daf6f5d 72static int do_setup_cgroup_limits(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
33ad9f1a
CS
73static int cgroup_recursive_task_count(const char *cgroup_path);
74static int count_lines(const char *fn);
1ea59ad2 75static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
d703c2b1 76static bool init_cpuset_if_needed(struct cgroup_mount_point *mp, const char *path);
33ad9f1a 77
d4ef7c50
SH
78static struct cgroup_ops cgfs_ops;
79struct cgroup_ops *active_cg_ops = &cgfs_ops;
80static void init_cg_ops(void);
81
82#ifdef HAVE_CGMANAGER
83/* this needs to be mutexed for api use */
84extern bool cgmanager_initialized;
85extern bool use_cgmanager;
86extern bool lxc_init_cgmanager(void);
87#else
88static bool cgmanager_initialized = false;
89static bool use_cgmanager = false;
90static bool lxc_init_cgmanager(void) { return false; }
91#endif
92
603c64c2
SH
93static int cgroup_rmdir(char *dirname)
94{
95 struct dirent dirent, *direntp;
96 int saved_errno = 0;
97 DIR *dir;
98 int ret, failed=0;
99 char pathname[MAXPATHLEN];
100
101 dir = opendir(dirname);
102 if (!dir) {
103 ERROR("%s: failed to open %s", __func__, dirname);
104 return -1;
105 }
106
107 while (!readdir_r(dir, &dirent, &direntp)) {
108 struct stat mystat;
109 int rc;
110
111 if (!direntp)
112 break;
113
114 if (!strcmp(direntp->d_name, ".") ||
115 !strcmp(direntp->d_name, ".."))
116 continue;
117
118 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
119 if (rc < 0 || rc >= MAXPATHLEN) {
120 ERROR("pathname too long");
121 failed=1;
122 if (!saved_errno)
123 saved_errno = -ENOMEM;
124 continue;
125 }
126 ret = lstat(pathname, &mystat);
127 if (ret) {
128 SYSERROR("%s: failed to stat %s", __func__, pathname);
129 failed=1;
130 if (!saved_errno)
131 saved_errno = errno;
132 continue;
133 }
134 if (S_ISDIR(mystat.st_mode)) {
135 if (cgroup_rmdir(pathname) < 0) {
136 if (!saved_errno)
137 saved_errno = errno;
138 failed=1;
139 }
140 }
141 }
142
143 if (rmdir(dirname) < 0) {
144 SYSERROR("%s: failed to delete %s", __func__, dirname);
145 if (!saved_errno)
146 saved_errno = errno;
147 failed=1;
148 }
149
150 ret = closedir(dir);
151 if (ret) {
152 SYSERROR("%s: failed to close directory %s", __func__, dirname);
153 if (!saved_errno)
154 saved_errno = errno;
155 failed=1;
156 }
157
158 errno = saved_errno;
159 return failed ? -1 : 0;
160}
161
33ad9f1a
CS
162struct cgroup_meta_data *lxc_cgroup_load_meta()
163{
164 const char *cgroup_use = NULL;
165 char **cgroup_use_list = NULL;
166 struct cgroup_meta_data *md = NULL;
167 int saved_errno;
168
169 errno = 0;
593e8478 170 cgroup_use = lxc_global_config_value("lxc.cgroup.use");
33ad9f1a
CS
171 if (!cgroup_use && errno != 0)
172 return NULL;
173 if (cgroup_use) {
174 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
175 if (!cgroup_use_list)
176 return NULL;
177 }
576f946d 178
33ad9f1a
CS
179 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
180 saved_errno = errno;
181 lxc_free_array((void **)cgroup_use_list, free);
182 errno = saved_errno;
183 return md;
184}
fd37327f 185
b653309a 186/* Step 1: determine all kernel subsystems */
d4ef7c50 187bool find_cgroup_subsystems(char ***kernel_subsystems)
1d39a065 188{
b653309a
SH
189 FILE *proc_cgroups;
190 bool bret = false;
33ad9f1a
CS
191 char *line = NULL;
192 size_t sz = 0;
b653309a
SH
193 size_t kernel_subsystems_count = 0;
194 size_t kernel_subsystems_capacity = 0;
195 int r;
1d39a065 196
33ad9f1a
CS
197 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
198 if (!proc_cgroups)
b653309a 199 return false;
1d39a065 200
33ad9f1a
CS
201 while (getline(&line, &sz, proc_cgroups) != -1) {
202 char *tab1;
203 char *tab2;
204 int hierarchy_number;
1d39a065 205
33ad9f1a
CS
206 if (line[0] == '#')
207 continue;
208 if (!line[0])
209 continue;
1d39a065 210
33ad9f1a
CS
211 tab1 = strchr(line, '\t');
212 if (!tab1)
8900b9eb 213 continue;
33ad9f1a
CS
214 *tab1++ = '\0';
215 tab2 = strchr(tab1, '\t');
216 if (!tab2)
217 continue;
218 *tab2 = '\0';
fd37327f 219
33ad9f1a
CS
220 tab2 = NULL;
221 hierarchy_number = strtoul(tab1, &tab2, 10);
222 if (!tab2 || *tab2)
223 continue;
224 (void)hierarchy_number;
225
b653309a 226 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
33ad9f1a 227 if (r < 0)
b653309a
SH
228 goto out;
229 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
230 if (!(*kernel_subsystems)[kernel_subsystems_count])
231 goto out;
33ad9f1a 232 kernel_subsystems_count++;
bcbd102c 233 }
b653309a 234 bret = true;
0d9f8e18 235
b653309a 236out:
33ad9f1a 237 fclose(proc_cgroups);
0ccf7c2a 238 free(line);
b653309a
SH
239 return bret;
240}
241
242/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
243 * since mount points don't specify hierarchy number and
244 * /proc/cgroups does not contain named hierarchies
245 */
246static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
247 bool all_kernel_subsystems, bool all_named_subsystems,
248 const char **subsystem_whitelist)
249{
250 FILE *proc_self_cgroup;
251 char *line = NULL;
252 size_t sz = 0;
253 int r;
254 bool bret = false;
255 size_t hierarchy_capacity = 0;
ef6e34ee 256
33ad9f1a
CS
257 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
258 /* if for some reason (because of setns() and pid namespace for example),
259 * /proc/self is not valid, we try /proc/1/cgroup... */
260 if (!proc_self_cgroup)
261 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
262 if (!proc_self_cgroup)
b653309a 263 return false;
33ad9f1a
CS
264
265 while (getline(&line, &sz, proc_self_cgroup) != -1) {
266 /* file format: hierarchy:subsystems:group,
267 * we only extract hierarchy and subsystems
268 * here */
269 char *colon1;
270 char *colon2;
271 int hierarchy_number;
272 struct cgroup_hierarchy *h = NULL;
273 char **p;
274
275 if (!line[0])
276 continue;
ad08bbb7 277
33ad9f1a
CS
278 colon1 = strchr(line, ':');
279 if (!colon1)
8900b9eb 280 continue;
33ad9f1a
CS
281 *colon1++ = '\0';
282 colon2 = strchr(colon1, ':');
283 if (!colon2)
284 continue;
285 *colon2 = '\0';
ad08bbb7 286
33ad9f1a
CS
287 colon2 = NULL;
288 hierarchy_number = strtoul(line, &colon2, 10);
289 if (!colon2 || *colon2)
290 continue;
576f946d 291
33ad9f1a
CS
292 if (hierarchy_number > meta_data->maximum_hierarchy) {
293 /* lxc_grow_array will never shrink, so even if we find a lower
294 * hierarchy number here, the array will never be smaller
295 */
296 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
297 if (r < 0)
b653309a 298 goto out;
5193cc3d 299
33ad9f1a
CS
300 meta_data->maximum_hierarchy = hierarchy_number;
301 }
fd37327f 302
33ad9f1a
CS
303 /* this shouldn't happen, we had this already */
304 if (meta_data->hierarchies[hierarchy_number])
b653309a 305 goto out;
33ad9f1a
CS
306
307 h = calloc(1, sizeof(struct cgroup_hierarchy));
308 if (!h)
b653309a 309 goto out;
33ad9f1a
CS
310
311 meta_data->hierarchies[hierarchy_number] = h;
312
313 h->index = hierarchy_number;
314 h->subsystems = lxc_string_split_and_trim(colon1, ',');
315 if (!h->subsystems)
b653309a 316 goto out;
33ad9f1a
CS
317 /* see if this hierarchy should be considered */
318 if (!all_kernel_subsystems || !all_named_subsystems) {
319 for (p = h->subsystems; *p; p++) {
320 if (!strncmp(*p, "name=", 5)) {
321 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
322 h->used = true;
323 break;
324 }
325 } else {
326 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
327 h->used = true;
328 break;
329 }
330 }
331 }
332 } else {
333 /* we want all hierarchy anyway */
334 h->used = true;
ae5c8b8e 335 }
ae5c8b8e 336 }
b653309a 337 bret = true;
0b9c21ab 338
b653309a 339out:
33ad9f1a 340 fclose(proc_self_cgroup);
0ccf7c2a 341 free(line);
b653309a
SH
342 return bret;
343}
344
345/* Step 3: determine all mount points of each hierarchy */
346static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
347{
348 bool bret = false;
349 FILE *proc_self_mountinfo;
350 char *line = NULL;
351 size_t sz = 0;
352 char **tokens = NULL;
353 size_t mount_point_count = 0;
354 size_t mount_point_capacity = 0;
355 size_t token_capacity = 0;
356 int r;
357
33ad9f1a
CS
358 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
359 /* if for some reason (because of setns() and pid namespace for example),
360 * /proc/self is not valid, we try /proc/1/cgroup... */
361 if (!proc_self_mountinfo)
362 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
363 if (!proc_self_mountinfo)
b653309a 364 return false;
33ad9f1a
CS
365
366 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
178938fe 367 char *token, *line_tok, *saveptr = NULL;
33ad9f1a
CS
368 size_t i, j, k;
369 struct cgroup_mount_point *mount_point;
370 struct cgroup_hierarchy *h;
371 char **subsystems;
372
373 if (line[0] && line[strlen(line) - 1] == '\n')
374 line[strlen(line) - 1] = '\0';
375
178938fe 376 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
33ad9f1a
CS
377 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
378 if (r < 0)
b653309a 379 goto out;
33ad9f1a
CS
380 tokens[i++] = token;
381 }
b98f7d6e 382
33ad9f1a
CS
383 /* layout of /proc/self/mountinfo:
384 * 0: id
385 * 1: parent id
386 * 2: device major:minor
387 * 3: mount prefix
8900b9eb 388 * 4: mount point
33ad9f1a
CS
389 * 5: per-mount options
390 * [optional X]: additional data
391 * X+7: "-"
392 * X+8: type
393 * X+9: source
394 * X+10: per-superblock options
395 */
396 for (j = 6; j < i && tokens[j]; j++)
397 if (!strcmp(tokens[j], "-"))
398 break;
fd4f5a56 399
33ad9f1a
CS
400 /* could not find separator */
401 if (j >= i || !tokens[j])
402 continue;
403 /* there should be exactly three fields after
404 * the separator
405 */
406 if (i != j + 4)
407 continue;
fd4f5a56 408
33ad9f1a
CS
409 /* not a cgroup filesystem */
410 if (strcmp(tokens[j + 1], "cgroup") != 0)
411 continue;
b98f7d6e 412
33ad9f1a
CS
413 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
414 if (!subsystems)
b653309a 415 goto out;
33ad9f1a
CS
416
417 h = NULL;
418 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
419 if (meta_data->hierarchies[k] &&
420 meta_data->hierarchies[k]->subsystems[0] &&
421 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
422 /* TODO: we could also check if the lists really match completely,
423 * just to have an additional sanity check */
424 h = meta_data->hierarchies[k];
b98f7d6e 425 break;
33ad9f1a 426 }
b98f7d6e 427 }
33ad9f1a
CS
428 lxc_free_array((void **)subsystems, free);
429
430 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
431 if (r < 0)
b653309a 432 goto out;
33ad9f1a
CS
433
434 /* create mount point object */
435 mount_point = calloc(1, sizeof(*mount_point));
436 if (!mount_point)
b653309a 437 goto out;
33ad9f1a
CS
438
439 meta_data->mount_points[mount_point_count++] = mount_point;
440
441 mount_point->hierarchy = h;
442 mount_point->mount_point = strdup(tokens[4]);
443 mount_point->mount_prefix = strdup(tokens[3]);
444 if (!mount_point->mount_point || !mount_point->mount_prefix)
b653309a 445 goto out;
33ad9f1a
CS
446 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
447
448 if (!strcmp(mount_point->mount_prefix, "/")) {
449 if (mount_point->read_only) {
450 if (!h->ro_absolute_mount_point)
451 h->ro_absolute_mount_point = mount_point;
452 } else {
453 if (!h->rw_absolute_mount_point)
454 h->rw_absolute_mount_point = mount_point;
455 }
b98f7d6e 456 }
ae5c8b8e 457
33ad9f1a
CS
458 k = lxc_array_len((void **)h->all_mount_points);
459 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
460 if (r < 0)
b653309a 461 goto out;
33ad9f1a 462 h->all_mount_points[k] = mount_point;
fd4f5a56 463 }
b653309a
SH
464 bret = true;
465
466out:
b653309a 467 fclose(proc_self_mountinfo);
b653309a 468 free(tokens);
2cdafc54 469 free(line);
b653309a
SH
470 return bret;
471}
472
473struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
474{
475 bool all_kernel_subsystems = true;
476 bool all_named_subsystems = false;
477 struct cgroup_meta_data *meta_data = NULL;
478 char **kernel_subsystems = NULL;
479 int saved_errno = 0;
480
481 /* if the subsystem whitelist is not specified, include all
482 * hierarchies that contain kernel subsystems by default but
483 * no hierarchies that only contain named subsystems
484 *
485 * if it is specified, the specifier @all will select all
486 * hierarchies, @kernel will select all hierarchies with
487 * kernel subsystems and @named will select all named
488 * hierarchies
489 */
490 all_kernel_subsystems = subsystem_whitelist ?
491 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
492 true;
493 all_named_subsystems = subsystem_whitelist ?
494 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
495 false;
496
497 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
498 if (!meta_data)
499 return NULL;
500 meta_data->ref = 1;
501
502 if (!find_cgroup_subsystems(&kernel_subsystems))
503 goto out_error;
504
505 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
506 all_named_subsystems, subsystem_whitelist))
507 goto out_error;
508
509 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
510 goto out_error;
fd4f5a56 511
33ad9f1a
CS
512 /* oops, we couldn't find anything */
513 if (!meta_data->hierarchies || !meta_data->mount_points) {
514 errno = EINVAL;
515 goto out_error;
ae5c8b8e 516 }
fd4f5a56 517
3a0abb3a 518 lxc_free_array((void **)kernel_subsystems, free);
33ad9f1a
CS
519 return meta_data;
520
521out_error:
522 saved_errno = errno;
33ad9f1a
CS
523 lxc_free_array((void **)kernel_subsystems, free);
524 lxc_cgroup_put_meta(meta_data);
525 errno = saved_errno;
526 return NULL;
fd4f5a56
DL
527}
528
33ad9f1a 529struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
e14f67a7 530{
33ad9f1a
CS
531 meta_data->ref++;
532 return meta_data;
533}
e14f67a7 534
33ad9f1a
CS
535struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
536{
537 size_t i;
538 if (!meta_data)
539 return NULL;
540 if (--meta_data->ref > 0)
541 return meta_data;
542 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
543 if (meta_data->hierarchies) {
544 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
545 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
e14f67a7 546 }
33ad9f1a 547 free(meta_data->hierarchies);
178938fe 548 free(meta_data);
33ad9f1a 549 return NULL;
e14f67a7
U
550}
551
33ad9f1a 552struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
e14f67a7 553{
33ad9f1a
CS
554 size_t i;
555 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
556 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
557 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
558 return h;
e14f67a7 559 }
e14f67a7
U
560 return NULL;
561}
562
33ad9f1a 563struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
b98f7d6e 564{
33ad9f1a
CS
565 struct cgroup_mount_point **mps;
566 struct cgroup_mount_point *current_result = NULL;
567 ssize_t quality = -1;
b98f7d6e 568
33ad9f1a
CS
569 /* trivial case */
570 if (hierarchy->rw_absolute_mount_point)
571 return hierarchy->rw_absolute_mount_point;
572 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
573 return hierarchy->ro_absolute_mount_point;
b98f7d6e 574
33ad9f1a
CS
575 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
576 struct cgroup_mount_point *mp = *mps;
577 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
b98f7d6e 578
33ad9f1a
CS
579 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
580 prefix_len = 0;
b98f7d6e 581
33ad9f1a
CS
582 if (should_be_writable && mp->read_only)
583 continue;
584
585 if (!prefix_len ||
586 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
587 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
588 /* search for the best quality match, i.e. the match with the
589 * shortest prefix where this group is still contained
590 */
591 if (quality == -1 || prefix_len < quality) {
592 current_result = mp;
593 quality = prefix_len;
594 }
b98f7d6e
SH
595 }
596 }
597
33ad9f1a
CS
598 if (!current_result)
599 errno = ENOENT;
600 return current_result;
b98f7d6e
SH
601}
602
33ad9f1a 603char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
b98f7d6e 604{
33ad9f1a
CS
605 struct cgroup_meta_data *meta_data;
606 struct cgroup_hierarchy *h;
607 struct cgroup_mount_point *mp;
608 char *result;
609 int saved_errno;
610
611 meta_data = lxc_cgroup_load_meta();
612 if (!meta_data)
613 return NULL;
b98f7d6e 614
33ad9f1a
CS
615 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
616 if (!h)
617 goto out_error;
b98f7d6e 618
33ad9f1a
CS
619 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
620 if (!mp)
621 goto out_error;
b98f7d6e 622
33ad9f1a
CS
623 result = cgroup_to_absolute_path(mp, group, suffix);
624 if (!result)
625 goto out_error;
b98f7d6e 626
33ad9f1a
CS
627 lxc_cgroup_put_meta(meta_data);
628 return result;
b98f7d6e 629
33ad9f1a
CS
630out_error:
631 saved_errno = errno;
632 lxc_cgroup_put_meta(meta_data);
633 errno = saved_errno;
634 return NULL;
b98f7d6e
SH
635}
636
33ad9f1a 637struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
fd4f5a56 638{
33ad9f1a
CS
639 char pid_buf[32];
640 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
641 return lxc_cgroup_process_info_getx(pid_buf, meta);
c8f7c563
CS
642}
643
33ad9f1a 644struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
c8f7c563 645{
33ad9f1a
CS
646 return lxc_cgroup_process_info_get(1, meta);
647}
b98f7d6e 648
33ad9f1a
CS
649struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
650{
651 struct cgroup_process_info *i;
652 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
653 if (!i)
654 i = lxc_cgroup_process_info_get(getpid(), meta);
655 return i;
656}
ae5c8b8e 657
692ba18f
SH
658/*
659 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
660 * is already in a new cgroup named after the pid. 'mnt' is passed in as
661 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
662 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
663 * and return the string /sys/fs/cgroup/lxc/c1.
664 */
cea0552e 665static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
692ba18f
SH
666{
667 char *dir, *fulloldpath;
668 char *newname, *fullnewpath;
cea0552e 669 int len, newlen, ret;
692ba18f
SH
670
671 /*
672 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
673 * name is c1,
674 * dir: /ab
675 * fulloldpath = /cgroup/ab/2375
676 * fullnewpath = /cgroup/ab/c1
677 * newname = /ab/c1
678 */
679 dir = alloca(strlen(oldname) + 1);
680 strcpy(dir, oldname);
681
cea0552e
SH
682 len = strlen(oldname) + strlen(mountpath) + 22;
683 fulloldpath = alloca(len);
684 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
685 if (ret < 0 || ret >= len)
686 return NULL;
692ba18f
SH
687
688 len = strlen(dir) + strlen(name) + 2;
689 newname = malloc(len);
690 if (!newname) {
691 SYSERROR("Out of memory");
692 return NULL;
693 }
cea0552e
SH
694 ret = snprintf(newname, len, "%s/%s", dir, name);
695 if (ret < 0 || ret >= len) {
696 free(newname);
697 return NULL;
698 }
692ba18f 699
cea0552e
SH
700 newlen = strlen(mountpath) + len + 2;
701 fullnewpath = alloca(newlen);
702 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
703 if (ret < 0 || ret >= newlen) {
704 free(newname);
705 return NULL;
706 }
692ba18f
SH
707
708 if (access(fullnewpath, F_OK) == 0) {
709 if (rmdir(fullnewpath) != 0) {
710 SYSERROR("container cgroup %s already exists.", fullnewpath);
711 free(newname);
712 return NULL;
713 }
714 }
715 if (rename(fulloldpath, fullnewpath)) {
716 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
717 free(newname);
718 return NULL;
719 }
720
721 DEBUG("'%s' renamed to '%s'", oldname, newname);
722
723 return newname;
724}
725
33ad9f1a 726/* create a new cgroup */
d4ef7c50 727struct cgroup_process_info *lxc_cgroupfs_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
33ad9f1a 728{
001b026e 729 char **cgroup_path_components = NULL;
33ad9f1a
CS
730 char **p = NULL;
731 char *path_so_far = NULL;
732 char **new_cgroup_paths = NULL;
733 char **new_cgroup_paths_sub = NULL;
734 struct cgroup_mount_point *mp;
735 struct cgroup_hierarchy *h;
736 struct cgroup_process_info *base_info = NULL;
737 struct cgroup_process_info *info_ptr;
738 int saved_errno;
739 int r;
740 unsigned suffix = 0;
741 bool had_sub_pattern = false;
742 size_t i;
ae5c8b8e 743
33ad9f1a
CS
744 if (!is_valid_cgroup(name)) {
745 ERROR("Invalid cgroup name: '%s'", name);
746 errno = EINVAL;
747 return NULL;
ae5c8b8e
SH
748 }
749
33ad9f1a
CS
750 if (!strstr(path_pattern, "%n")) {
751 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
752 errno = EINVAL;
753 return NULL;
754 }
fd37327f 755
33ad9f1a
CS
756 /* we will modify the result of this operation directly,
757 * so we don't have to copy the data structure
758 */
759 base_info = (path_pattern[0] == '/') ?
760 lxc_cgroup_process_info_get_init(meta_data) :
761 lxc_cgroup_process_info_get_self(meta_data);
762 if (!base_info)
763 return NULL;
c8f7c563 764
33ad9f1a
CS
765 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
766 if (!new_cgroup_paths)
767 goto out_initial_error;
768
769 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
770 if (!new_cgroup_paths_sub)
771 goto out_initial_error;
772
773 /* find mount points we can use */
774 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
775 h = info_ptr->hierarchy;
776 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
777 if (!mp) {
778 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
779 goto out_initial_error;
780 }
781 info_ptr->designated_mount_point = mp;
460a1cf0 782
692ba18f
SH
783 if (lxc_string_in_array("ns", (const char **)h->subsystems))
784 continue;
2edb53c7
SH
785 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
786 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
33ad9f1a 787 goto out_initial_error;
2edb53c7 788 }
33ad9f1a 789 }
b98f7d6e 790
33ad9f1a
CS
791 /* normalize the path */
792 cgroup_path_components = lxc_normalize_path(path_pattern);
793 if (!cgroup_path_components)
794 goto out_initial_error;
795
796 /* go through the path components to see if we can create them */
797 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
798 /* we only want to create the same component with -1, -2, etc.
799 * if the component contains the container name itself, otherwise
800 * it's not an error if it already exists
801 */
802 char *p_eff = *p ? *p : (char *)sub_pattern;
803 bool contains_name = strstr(p_eff, "%n");
804 char *current_component = NULL;
805 char *current_subpath = NULL;
806 char *current_entire_path = NULL;
807 char *parts[3];
808 size_t j = 0;
809 i = 0;
810
811 /* if we are processing the subpattern, we want to make sure
812 * loop is ended the next time around
813 */
814 if (!*p) {
815 had_sub_pattern = true;
816 p--;
817 }
b98f7d6e 818
33ad9f1a
CS
819 goto find_name_on_this_level;
820
821 cleanup_name_on_this_level:
822 /* This is reached if we found a name clash.
823 * In that case, remove the cgroup from all previous hierarchies
824 */
825 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
603c64c2 826 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
33ad9f1a
CS
827 if (r < 0)
828 WARN("could not clean up cgroup we created when trying to create container");
829 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
830 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
831 }
832 if (current_component != current_subpath)
833 free(current_subpath);
834 if (current_component != p_eff)
835 free(current_component);
836 current_component = current_subpath = NULL;
837 /* try again with another suffix */
838 ++suffix;
839
840 find_name_on_this_level:
841 /* determine name of the path component we should create */
842 if (contains_name && suffix > 0) {
843 char *buf = calloc(strlen(name) + 32, 1);
844 if (!buf)
845 goto out_initial_error;
846 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
847 current_component = lxc_string_replace("%n", buf, p_eff);
848 free(buf);
849 } else {
850 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
851 }
852 parts[0] = path_so_far;
853 parts[1] = current_component;
854 parts[2] = NULL;
855 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
856
857 /* Now go through each hierarchy and try to create the
858 * corresponding cgroup
859 */
860 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
861 char *parts2[3];
692ba18f
SH
862
863 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
864 continue;
33ad9f1a
CS
865 current_entire_path = NULL;
866
867 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
868 parts2[1] = current_subpath;
869 parts2[2] = NULL;
870 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
871
872 if (!*p) {
873 /* we are processing the subpath, so only update that one */
874 free(new_cgroup_paths_sub[i]);
875 new_cgroup_paths_sub[i] = strdup(current_entire_path);
876 if (!new_cgroup_paths_sub[i])
877 goto cleanup_from_error;
878 } else {
879 /* remember which path was used on this controller */
880 free(new_cgroup_paths[i]);
881 new_cgroup_paths[i] = strdup(current_entire_path);
882 if (!new_cgroup_paths[i])
883 goto cleanup_from_error;
884 }
fd4f5a56 885
33ad9f1a
CS
886 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
887 if (r < 0 && errno == EEXIST && contains_name) {
888 /* name clash => try new name with new suffix */
889 free(current_entire_path);
890 current_entire_path = NULL;
891 goto cleanup_name_on_this_level;
892 } else if (r < 0 && errno != EEXIST) {
893 SYSERROR("Could not create cgroup %s", current_entire_path);
894 goto cleanup_from_error;
895 } else if (r == 0) {
896 /* successfully created */
897 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
898 if (r < 0)
899 goto cleanup_from_error;
d703c2b1
RV
900 if (!init_cpuset_if_needed(info_ptr->designated_mount_point, current_entire_path)) {
901 ERROR("Failed to initialize cpuset in new '%s'.", current_entire_path);
902 goto cleanup_from_error;
903 }
33ad9f1a
CS
904 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
905 } else {
906 /* if we didn't create the cgroup, then we have to make sure that
907 * further cgroups will be created properly
908 */
d703c2b1 909 if (handle_cgroup_settings(info_ptr->designated_mount_point, info_ptr->cgroup_path) < 0) {
f6ac3b9e 910 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
33ad9f1a 911 goto cleanup_from_error;
f6ac3b9e 912 }
d703c2b1
RV
913 if (!init_cpuset_if_needed(info_ptr->designated_mount_point, info_ptr->cgroup_path)) {
914 ERROR("Failed to initialize cpuset in pre-existing '%s'.", info_ptr->cgroup_path);
915 goto cleanup_from_error;
916 }
33ad9f1a
CS
917
918 /* already existed but path component of pattern didn't contain '%n',
919 * so this is not an error; but then we don't need current_entire_path
920 * anymore...
921 */
922 free(current_entire_path);
923 current_entire_path = NULL;
924 }
925 }
fd4f5a56 926
33ad9f1a
CS
927 /* save path so far */
928 free(path_so_far);
929 path_so_far = strdup(current_subpath);
930 if (!path_so_far)
931 goto cleanup_from_error;
932
933 /* cleanup */
934 if (current_component != current_subpath)
935 free(current_subpath);
936 if (current_component != p_eff)
937 free(current_component);
938 current_component = current_subpath = NULL;
939 continue;
940
941 cleanup_from_error:
942 /* called if an error occured in the loop, so we
943 * do some additional cleanup here
944 */
945 saved_errno = errno;
946 if (current_component != current_subpath)
947 free(current_subpath);
948 if (current_component != p_eff)
949 free(current_component);
950 free(current_entire_path);
951 errno = saved_errno;
952 goto out_initial_error;
fd4f5a56
DL
953 }
954
33ad9f1a
CS
955 /* we're done, now update the paths */
956 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
47d8fb3b
CS
957 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
958 * will take care of it
959 * Since we do a continue in above loop, new_cgroup_paths[i] is
960 * unset anyway, as is new_cgroup_paths_sub[i]
692ba18f 961 */
47d8fb3b
CS
962 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
963 continue;
964 free(info_ptr->cgroup_path);
965 info_ptr->cgroup_path = new_cgroup_paths[i];
966 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
fd4f5a56 967 }
33ad9f1a
CS
968 /* don't use lxc_free_array since we used the array members
969 * to store them in our result...
970 */
971 free(new_cgroup_paths);
972 free(new_cgroup_paths_sub);
973 free(path_so_far);
974 lxc_free_array((void **)cgroup_path_components, free);
975 return base_info;
976
977out_initial_error:
978 saved_errno = errno;
979 free(path_so_far);
980 lxc_cgroup_process_info_free_and_remove(base_info);
981 lxc_free_array((void **)new_cgroup_paths, free);
982 lxc_free_array((void **)new_cgroup_paths_sub, free);
983 lxc_free_array((void **)cgroup_path_components, free);
984 errno = saved_errno;
985 return NULL;
c8f7c563
CS
986}
987
47d8fb3b
CS
988int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
989{
990 struct cgroup_process_info *info_ptr;
991 int r;
992
993 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
994 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
995 continue;
996 /*
997 * For any path which has ns cgroup mounted, handler->pid is already
998 * moved into a container called '%d % (handler->pid)'. Rename it to
999 * the cgroup name and record that.
1000 */
1001 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
1002 info_ptr->cgroup_path, pid, name);
1003 if (!tmp)
1004 return -1;
1005 free(info_ptr->cgroup_path);
1006 info_ptr->cgroup_path = tmp;
1007 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
1008 if (r < 0)
1009 return -1;
1010 tmp = strdup(tmp);
1011 if (!tmp)
1012 return -1;
1013 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
1014 }
1015 return 0;
1016}
1017
33ad9f1a
CS
1018/* get the cgroup membership of a given container */
1019struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
c8f7c563 1020{
33ad9f1a
CS
1021 struct cgroup_process_info *result = NULL;
1022 int saved_errno = 0;
1023 size_t i;
1024 struct cgroup_process_info **cptr = &result;
1025 struct cgroup_process_info *entry = NULL;
1026 char *path = NULL;
1027
1028 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
1029 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
1030 if (!h || !h->used)
1031 continue;
c8f7c563 1032
33ad9f1a
CS
1033 /* use the command interface to look for the cgroup */
1034 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
1035 if (!path)
1036 goto out_error;
1037
1038 entry = calloc(1, sizeof(struct cgroup_process_info));
1039 if (!entry)
1040 goto out_error;
1041 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
1042 entry->hierarchy = h;
1043 entry->cgroup_path = path;
1044 path = NULL;
1045
1046 /* it is not an error if we don't find anything here,
1047 * it is up to the caller to decide what to do in that
1048 * case */
1049 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
1050
1051 *cptr = entry;
1052 cptr = &entry->next;
1053 entry = NULL;
c8f7c563
CS
1054 }
1055
33ad9f1a
CS
1056 return result;
1057out_error:
1058 saved_errno = errno;
1059 free(path);
1060 lxc_cgroup_process_info_free(result);
1061 lxc_cgroup_process_info_free(entry);
1062 errno = saved_errno;
1063 return NULL;
fd4f5a56
DL
1064}
1065
33ad9f1a 1066/* move a processs to the cgroups specified by the membership */
d4ef7c50 1067int lxc_cgroupfs_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
4f17323e 1068{
33ad9f1a
CS
1069 char pid_buf[32];
1070 char *cgroup_tasks_fn;
1071 int r;
1072 struct cgroup_process_info *info_ptr;
1073
1074 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1075 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1076 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1077 info_ptr->cgroup_path_sub :
1078 info_ptr->cgroup_path;
1079
1080 if (!info_ptr->designated_mount_point) {
1081 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1082 if (!info_ptr->designated_mount_point) {
1083 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1084 return -1;
1085 }
1086 }
4f17323e 1087
33ad9f1a
CS
1088 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1089 if (!cgroup_tasks_fn) {
1090 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1091 return -1;
1092 }
4f17323e 1093
33ad9f1a 1094 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
5903da82 1095 free(cgroup_tasks_fn);
33ad9f1a
CS
1096 if (r < 0) {
1097 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1098 return -1;
1099 }
4f17323e
CS
1100 }
1101
33ad9f1a 1102 return 0;
4f17323e
CS
1103}
1104
33ad9f1a
CS
1105/* free process membership information */
1106void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
fc7de561 1107{
33ad9f1a
CS
1108 struct cgroup_process_info *next;
1109 if (!info)
b98f7d6e 1110 return;
33ad9f1a
CS
1111 next = info->next;
1112 lxc_cgroup_put_meta(info->meta_ref);
1113 free(info->cgroup_path);
1114 free(info->cgroup_path_sub);
1115 lxc_free_array((void **)info->created_paths, free);
1116 free(info);
1117 lxc_cgroup_process_info_free(next);
fc7de561
SH
1118}
1119
33ad9f1a
CS
1120/* free process membership information and remove cgroups that were created */
1121void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
b98f7d6e 1122{
33ad9f1a
CS
1123 struct cgroup_process_info *next;
1124 char **pp;
1125 if (!info)
1126 return;
1127 next = info->next;
603c64c2 1128 {
33ad9f1a
CS
1129 struct cgroup_mount_point *mp = info->designated_mount_point;
1130 if (!mp)
1131 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1132 if (mp)
1133 /* ignore return value here, perhaps we created the
1134 * '/lxc' cgroup in this container but another container
1135 * is still running (for example)
1136 */
603c64c2
SH
1137 (void)remove_cgroup(mp, info->cgroup_path, true);
1138 }
1139 for (pp = info->created_paths; pp && *pp; pp++);
1140 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
33ad9f1a 1141 free(*pp);
b98f7d6e 1142 }
33ad9f1a
CS
1143 free(info->created_paths);
1144 lxc_cgroup_put_meta(info->meta_ref);
1145 free(info->cgroup_path);
1146 free(info->cgroup_path_sub);
1147 free(info);
9431aa65 1148 lxc_cgroup_process_info_free_and_remove(next);
33ad9f1a 1149}
b98f7d6e 1150
d4ef7c50 1151static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
33ad9f1a 1152{
d4ef7c50
SH
1153 struct cgfs_data *d = handler->cgroup_info->data;
1154 struct cgroup_process_info *info = d->info;
1155 info = find_info_for_subsystem(info, subsystem);
33ad9f1a
CS
1156 if (!info)
1157 return NULL;
1158 return info->cgroup_path;
b98f7d6e
SH
1159}
1160
33ad9f1a 1161char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
b98f7d6e 1162{
33ad9f1a 1163 return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
b98f7d6e
SH
1164}
1165
33ad9f1a 1166char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
b98f7d6e 1167{
d4ef7c50
SH
1168 struct cgfs_data *d = handler->cgroup_info->data;
1169 struct cgroup_process_info *info = d->info;
33ad9f1a 1170 struct cgroup_mount_point *mp = NULL;
d4ef7c50
SH
1171
1172 info = find_info_for_subsystem(info, subsystem);
33ad9f1a
CS
1173 if (!info)
1174 return NULL;
1175 if (info->designated_mount_point) {
8900b9eb 1176 mp = info->designated_mount_point;
33ad9f1a
CS
1177 } else {
1178 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1179 if (!mp)
1180 return NULL;
b98f7d6e 1181 }
33ad9f1a 1182 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
b98f7d6e 1183}
55c76589 1184
33ad9f1a 1185char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
9a93d992 1186{
33ad9f1a
CS
1187 struct cgroup_meta_data *meta;
1188 struct cgroup_process_info *base_info, *info;
1189 struct cgroup_mount_point *mp;
1190 char *result = NULL;
33ad9f1a
CS
1191
1192 meta = lxc_cgroup_load_meta();
1193 if (!meta)
9a93d992 1194 return NULL;
33ad9f1a
CS
1195 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1196 if (!base_info)
178938fe 1197 goto out1;
33ad9f1a
CS
1198 info = find_info_for_subsystem(base_info, subsystem);
1199 if (!info)
178938fe 1200 goto out2;
33ad9f1a 1201 if (info->designated_mount_point) {
8900b9eb 1202 mp = info->designated_mount_point;
33ad9f1a
CS
1203 } else {
1204 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1205 if (!mp)
178938fe 1206 goto out3;
33ad9f1a
CS
1207 }
1208 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
178938fe 1209out3:
178938fe 1210out2:
33ad9f1a 1211 lxc_cgroup_process_info_free(base_info);
178938fe 1212out1:
33ad9f1a 1213 lxc_cgroup_put_meta(meta);
33ad9f1a
CS
1214 return result;
1215}
9a93d992 1216
33ad9f1a
CS
1217int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
1218{
1219 char *subsystem = NULL, *p, *path;
1220 int ret = -1;
9a93d992 1221
33ad9f1a
CS
1222 subsystem = alloca(strlen(filename) + 1);
1223 strcpy(subsystem, filename);
1224 if ((p = index(subsystem, '.')) != NULL)
1225 *p = '\0';
9a93d992 1226
33ad9f1a
CS
1227 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1228 if (path) {
1229 ret = do_cgroup_set(path, filename, value);
1230 free(path);
9a93d992 1231 }
33ad9f1a
CS
1232 return ret;
1233}
9a93d992 1234
33ad9f1a
CS
1235int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
1236{
1237 char *subsystem = NULL, *p, *path;
1238 int ret = -1;
1239
1240 subsystem = alloca(strlen(filename) + 1);
1241 strcpy(subsystem, filename);
1242 if ((p = index(subsystem, '.')) != NULL)
1243 *p = '\0';
1244
1245 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1246 if (path) {
1247 ret = do_cgroup_get(path, filename, value, len);
1248 free(path);
1249 }
9a93d992
SH
1250 return ret;
1251}
1252
d4ef7c50 1253int lxc_cgroupfs_set(const char *filename, const char *value, const char *name, const char *lxcpath)
9a93d992 1254{
33ad9f1a
CS
1255 char *subsystem = NULL, *p, *path;
1256 int ret = -1;
9a93d992 1257
33ad9f1a
CS
1258 subsystem = alloca(strlen(filename) + 1);
1259 strcpy(subsystem, filename);
1260 if ((p = index(subsystem, '.')) != NULL)
1261 *p = '\0';
9a93d992 1262
33ad9f1a
CS
1263 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1264 if (path) {
1265 ret = do_cgroup_set(path, filename, value);
1266 free(path);
1267 }
b98f7d6e 1268 return ret;
9a93d992
SH
1269}
1270
d4ef7c50 1271int lxc_cgroupfs_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
9a93d992 1272{
33ad9f1a
CS
1273 char *subsystem = NULL, *p, *path;
1274 int ret = -1;
1275
1276 subsystem = alloca(strlen(filename) + 1);
1277 strcpy(subsystem, filename);
1278 if ((p = index(subsystem, '.')) != NULL)
1279 *p = '\0';
1280
1281 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1282 if (path) {
1283 ret = do_cgroup_get(path, filename, value, len);
1284 free(path);
9a93d992 1285 }
33ad9f1a 1286 return ret;
9a93d992
SH
1287}
1288
33ad9f1a
CS
1289/*
1290 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1291 * file for a running container.
1292 *
1293 * @filename : the file of interest (e.g. "freezer.state") or
1294 * the subsystem name (e.g. "freezer") in which case
1295 * the directory where the cgroup may be modified
1296 * will be returned
1297 * @name : name of container to connect to
1298 * @lxcpath : the lxcpath in which the container is running
8900b9eb 1299 *
33ad9f1a
CS
1300 * This is the exported function, which determines cgpath from the
1301 * lxc-start of the @name container running in @lxcpath.
1302 *
1303 * Returns path on success, NULL on error. The caller must free()
1304 * the returned path.
1305 */
1306char *lxc_cgroup_path_get(const char *filename, const char *name,
1307 const char *lxcpath)
9a93d992 1308{
33ad9f1a 1309 char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
9a93d992 1310
33ad9f1a
CS
1311 subsystem = alloca(strlen(filename) + 1);
1312 strcpy(subsystem, filename);
1313 if ((p = index(subsystem, '.')) != NULL) {
1314 *p = '\0';
1315 longer_file = alloca(strlen(filename) + 2);
1316 longer_file[0] = '/';
1317 strcpy(longer_file + 1, filename);
b98f7d6e
SH
1318 }
1319
33ad9f1a
CS
1320 group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
1321 if (!group)
1322 return NULL;
b98f7d6e 1323
86b3688b 1324 path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
33ad9f1a
CS
1325 free(group);
1326 return path;
9a93d992
SH
1327}
1328
c476bdce
SH
1329static bool cgroupfs_mount_cgroup(const char *root,
1330 struct lxc_cgroup_info *cgroup_info, int type)
aae1f3c4
CS
1331{
1332 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1333 char *path = NULL;
1334 char **parts = NULL;
1335 char *dirname = NULL;
1336 char *abs_path = NULL;
1337 char *abs_path2 = NULL;
d4ef7c50
SH
1338 struct cgfs_data *cgfs_d;
1339 struct cgroup_process_info *info, *base_info;
aae1f3c4
CS
1340 int r, saved_errno = 0;
1341
d4ef7c50
SH
1342 init_cg_ops();
1343
d4ef7c50
SH
1344 cgfs_d = cgroup_info->data;
1345 base_info = cgfs_d->info;
1346
7997d7da
CS
1347 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1348 ERROR("could not mount cgroups into container: invalid type specified internally");
1349 errno = EINVAL;
c476bdce 1350 return false;
7997d7da
CS
1351 }
1352
aae1f3c4
CS
1353 path = calloc(1, bufsz);
1354 if (!path)
c476bdce 1355 return false;
aae1f3c4
CS
1356 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1357 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1358 if (r < 0) {
1359 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
c476bdce 1360 return false;
aae1f3c4
CS
1361 }
1362
1363 /* now mount all the hierarchies we care about */
1364 for (info = base_info; info; info = info->next) {
1365 size_t subsystem_count, i;
1366 struct cgroup_mount_point *mp = info->designated_mount_point;
1367 if (!mp)
1368 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1369 if (!mp) {
1370 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1371 goto out_error;
1372 }
1373
1374 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1375 parts = calloc(subsystem_count + 1, sizeof(char *));
1376 if (!parts)
1377 goto out_error;
1378
1379 for (i = 0; i < subsystem_count; i++) {
1380 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1381 parts[i] = info->hierarchy->subsystems[i] + 5;
1382 else
1383 parts[i] = info->hierarchy->subsystems[i];
1384 }
1385 dirname = lxc_string_join(",", (const char **)parts, false);
1386 if (!dirname)
1387 goto out_error;
1388
1389 /* create subsystem directory */
1390 abs_path = lxc_append_paths(path, dirname);
1391 if (!abs_path)
1392 goto out_error;
1393 r = mkdir_p(abs_path, 0755);
1394 if (r < 0 && errno != EEXIST) {
1395 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1396 goto out_error;
1397 }
1398
aae1f3c4
CS
1399 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1400 if (!abs_path2)
1401 goto out_error;
aae1f3c4 1402
7997d7da
CS
1403 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1404 /* bind-mount the cgroup entire filesystem there */
1405 if (strcmp(mp->mount_prefix, "/") != 0) {
1406 /* FIXME: maybe we should just try to remount the entire hierarchy
1407 * with a regular mount command? may that works? */
1408 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1409 goto out_error;
1410 }
1411 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1412 if (r < 0) {
1413 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1414 goto out_error;
1415 }
1416 /* main cgroup path should be read-only */
1417 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1418 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1419 if (r < 0) {
1420 SYSERROR("error re-mounting %s readonly", abs_path);
1421 goto out_error;
1422 }
1423 }
1424 /* own cgroup should be read-write */
1425 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1426 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1427 if (r < 0) {
1428 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1429 goto out_error;
1430 }
1431 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1432 if (r < 0) {
1433 SYSERROR("error re-mounting %s readwrite", abs_path2);
1434 goto out_error;
1435 }
1436 }
1437 } else {
1438 /* create path for container's cgroup */
1439 r = mkdir_p(abs_path2, 0755);
1440 if (r < 0 && errno != EEXIST) {
1441 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1442 goto out_error;
1443 }
aae1f3c4 1444
7997d7da
CS
1445 free(abs_path);
1446 abs_path = NULL;
1447
1448 /* bind-mount container's cgroup to that directory */
1449 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1450 if (!abs_path)
1451 goto out_error;
1452 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1453 if (r < 0) {
1454 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1455 goto out_error;
1456 }
1457 if (type == LXC_AUTO_CGROUP_RO) {
1458 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1459 if (r < 0) {
1460 SYSERROR("error re-mounting %s readonly", abs_path2);
1461 goto out_error;
1462 }
1463 }
aae1f3c4
CS
1464 }
1465
1466 free(abs_path);
1467 free(abs_path2);
1468 abs_path = NULL;
1469 abs_path2 = NULL;
1470
1471 /* add symlinks for every single subsystem */
1472 if (subsystem_count > 1) {
1473 for (i = 0; i < subsystem_count; i++) {
1474 abs_path = lxc_append_paths(path, parts[i]);
1475 if (!abs_path)
1476 goto out_error;
1477 r = symlink(dirname, abs_path);
1478 if (r < 0)
1479 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1480 free(abs_path);
1481 abs_path = NULL;
1482 }
1483 }
1484 free(dirname);
1485 free(parts);
1486 dirname = NULL;
1487 parts = NULL;
1488 }
1489
1490 /* try to remount the tmpfs readonly, since the container shouldn't
1491 * change anything (this will also make sure that trying to create
1492 * new cgroups outside the allowed area fails with an error instead
1493 * of simply causing this to create directories in the tmpfs itself)
1494 */
7997d7da
CS
1495 if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
1496 mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
aae1f3c4
CS
1497
1498 free(path);
1499
c476bdce 1500 return true;
aae1f3c4
CS
1501
1502out_error:
1503 saved_errno = errno;
1504 free(path);
1505 free(dirname);
1506 free(parts);
1507 free(abs_path);
1508 free(abs_path2);
1509 errno = saved_errno;
c476bdce 1510 return false;
aae1f3c4
CS
1511}
1512
33ad9f1a
CS
1513int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
1514{
d4ef7c50
SH
1515 struct cgfs_data *d = handler->cgroup_info->data;
1516 struct cgroup_process_info *info = d->info;
33ad9f1a
CS
1517 struct cgroup_mount_point *mp = NULL;
1518 char *abs_path = NULL;
1519 int ret;
460a1cf0 1520
33ad9f1a
CS
1521 if (!info) {
1522 errno = ENOENT;
1523 return -1;
b98f7d6e 1524 }
c8f7c563 1525
33ad9f1a 1526 if (info->designated_mount_point) {
8900b9eb 1527 mp = info->designated_mount_point;
33ad9f1a
CS
1528 } else {
1529 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1530 if (!mp)
1531 return -1;
c8f7c563
CS
1532 }
1533
33ad9f1a
CS
1534 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1535 if (!abs_path)
1536 return -1;
1537
1538 ret = cgroup_recursive_task_count(abs_path);
1539 free(abs_path);
1540 return ret;
c8f7c563
CS
1541}
1542
574c4428
QH
1543static struct cgroup_process_info *
1544lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
1545 struct cgroup_meta_data *meta)
d08ba6ec 1546{
33ad9f1a
CS
1547 struct cgroup_process_info *result = NULL;
1548 FILE *proc_pid_cgroup = NULL;
1549 char *line = NULL;
1550 size_t sz = 0;
1551 int saved_errno = 0;
1552 struct cgroup_process_info **cptr = &result;
1553 struct cgroup_process_info *entry = NULL;
1554
1555 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
1556 if (!proc_pid_cgroup)
b98f7d6e 1557 return NULL;
1ac470c0 1558
33ad9f1a
CS
1559 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1560 /* file format: hierarchy:subsystems:group */
1561 char *colon1;
1562 char *colon2;
1563 char *endptr;
1564 int hierarchy_number;
1565 struct cgroup_hierarchy *h = NULL;
fd4f5a56 1566
33ad9f1a 1567 if (!line[0])
ae5c8b8e 1568 continue;
b98f7d6e 1569
33ad9f1a
CS
1570 if (line[strlen(line) - 1] == '\n')
1571 line[strlen(line) - 1] = '\0';
1572
1573 colon1 = strchr(line, ':');
1574 if (!colon1)
8900b9eb 1575 continue;
33ad9f1a
CS
1576 *colon1++ = '\0';
1577 colon2 = strchr(colon1, ':');
1578 if (!colon2)
ae5c8b8e 1579 continue;
33ad9f1a 1580 *colon2++ = '\0';
e4659536 1581
33ad9f1a
CS
1582 endptr = NULL;
1583 hierarchy_number = strtoul(line, &endptr, 10);
1584 if (!endptr || *endptr)
9a93d992 1585 continue;
9a93d992 1586
33ad9f1a
CS
1587 if (hierarchy_number > meta->maximum_hierarchy) {
1588 /* we encountered a hierarchy we didn't have before,
1589 * so probably somebody remounted some stuff in the
1590 * mean time...
1591 */
1592 errno = EAGAIN;
1593 goto out_error;
b98f7d6e 1594 }
33ad9f1a
CS
1595
1596 h = meta->hierarchies[hierarchy_number];
1597 if (!h) {
1598 /* we encountered a hierarchy that was thought to be
1599 * dead before, so probably somebody remounted some
1600 * stuff in the mean time...
1601 */
1602 errno = EAGAIN;
1603 goto out_error;
b98f7d6e 1604 }
33ad9f1a
CS
1605
1606 /* we are told that we should ignore this hierarchy */
1607 if (!h->used)
b98f7d6e 1608 continue;
5193cc3d 1609
33ad9f1a
CS
1610 entry = calloc(1, sizeof(struct cgroup_process_info));
1611 if (!entry)
1612 goto out_error;
fd4f5a56 1613
33ad9f1a
CS
1614 entry->meta_ref = lxc_cgroup_get_meta(meta);
1615 entry->hierarchy = h;
1616 entry->cgroup_path = strdup(colon2);
1617 if (!entry->cgroup_path)
1618 goto out_error;
d08ba6ec 1619
33ad9f1a
CS
1620 *cptr = entry;
1621 cptr = &entry->next;
1622 entry = NULL;
b98f7d6e 1623 }
b98f7d6e 1624
33ad9f1a
CS
1625 fclose(proc_pid_cgroup);
1626 free(line);
1627 return result;
1628
1629out_error:
1630 saved_errno = errno;
1631 if (proc_pid_cgroup)
1632 fclose(proc_pid_cgroup);
1633 lxc_cgroup_process_info_free(result);
1634 lxc_cgroup_process_info_free(entry);
1635 free(line);
1636 errno = saved_errno;
ae5c8b8e 1637 return NULL;
36b86299
DL
1638}
1639
574c4428
QH
1640static char **subsystems_from_mount_options(const char *mount_options,
1641 char **kernel_list)
36b86299 1642{
33ad9f1a
CS
1643 char *token, *str, *saveptr = NULL;
1644 char **result = NULL;
1645 size_t result_capacity = 0;
8900b9eb 1646 size_t result_count = 0;
33ad9f1a
CS
1647 int saved_errno;
1648 int r;
ef342abb 1649
33ad9f1a
CS
1650 str = alloca(strlen(mount_options)+1);
1651 strcpy(str, mount_options);
1652 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1653 /* we have a subsystem if it's either in the list of
1654 * subsystems provided by the kernel OR if it starts
1655 * with name= for named hierarchies
1656 */
1657 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1658 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1659 if (r < 0)
1660 goto out_free;
1661 result[result_count + 1] = NULL;
1662 result[result_count] = strdup(token);
1663 if (!result[result_count])
1664 goto out_free;
1665 result_count++;
1666 }
ae5c8b8e 1667 }
f0e64b8b 1668
33ad9f1a
CS
1669 return result;
1670
1671out_free:
1672 saved_errno = errno;
1673 lxc_free_array((void**)result, free);
1674 errno = saved_errno;
1675 return NULL;
b98f7d6e
SH
1676}
1677
574c4428 1678static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
b98f7d6e 1679{
33ad9f1a
CS
1680 if (!mp)
1681 return;
1682 free(mp->mount_point);
1683 free(mp->mount_prefix);
1684 free(mp);
bcbd102c
SH
1685}
1686
574c4428 1687static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
341a9bd8 1688{
33ad9f1a
CS
1689 if (!h)
1690 return;
1691 lxc_free_array((void **)h->subsystems, free);
8bfcb981 1692 free(h->all_mount_points);
33ad9f1a
CS
1693 free(h);
1694}
341a9bd8 1695
574c4428 1696static bool is_valid_cgroup(const char *name)
33ad9f1a
CS
1697{
1698 const char *p;
1699 for (p = name; *p; p++) {
28bb9321
QH
1700 /* Use the ASCII printable characters range(32 - 127)
1701 * is reasonable, we kick out 32(SPACE) because it'll
1702 * break legacy lxc-ls
1703 */
1704 if (*p <= 32 || *p >= 127 || *p == '/')
33ad9f1a 1705 return false;
341a9bd8 1706 }
33ad9f1a
CS
1707 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1708}
341a9bd8 1709
574c4428
QH
1710static int create_or_remove_cgroup(bool do_remove,
1711 struct cgroup_mount_point *mp, const char *path, int recurse)
33ad9f1a
CS
1712{
1713 int r, saved_errno = 0;
1714 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1715 if (!buf)
1716 return -1;
341a9bd8 1717
33ad9f1a 1718 /* create or remove directory */
603c64c2
SH
1719 if (do_remove) {
1720 if (recurse)
1721 r = cgroup_rmdir(buf);
1722 else
1723 r = rmdir(buf);
1724 } else
1725 r = mkdir(buf, 0777);
33ad9f1a
CS
1726 saved_errno = errno;
1727 free(buf);
1728 errno = saved_errno;
1729 return r;
341a9bd8 1730}
bcbd102c 1731
574c4428 1732static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
a6ddef61 1733{
603c64c2 1734 return create_or_remove_cgroup(false, mp, path, false);
a6ddef61
MN
1735}
1736
574c4428
QH
1737static int remove_cgroup(struct cgroup_mount_point *mp,
1738 const char *path, bool recurse)
576f946d 1739{
603c64c2 1740 return create_or_remove_cgroup(true, mp, path, recurse);
33ad9f1a 1741}
576f946d 1742
574c4428
QH
1743static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
1744 const char *path, const char *suffix)
33ad9f1a
CS
1745{
1746 /* first we have to make sure we subtract the mount point's prefix */
1747 char *prefix = mp->mount_prefix;
1748 char *buf;
1749 ssize_t len, rv;
1750
1751 /* we want to make sure only absolute paths to cgroups are passed to us */
1752 if (path[0] != '/') {
1753 errno = EINVAL;
1754 return NULL;
1755 }
b98f7d6e 1756
33ad9f1a
CS
1757 if (prefix && !strcmp(prefix, "/"))
1758 prefix = NULL;
b98f7d6e 1759
33ad9f1a
CS
1760 /* prefix doesn't match */
1761 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1762 errno = EINVAL;
1763 return NULL;
1764 }
1765 /* if prefix is /foo and path is /foobar */
1766 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1767 errno = EINVAL;
1768 return NULL;
1769 }
b98f7d6e 1770
33ad9f1a
CS
1771 /* remove prefix from path */
1772 path += prefix ? strlen(prefix) : 0;
b98f7d6e 1773
33ad9f1a
CS
1774 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1775 buf = calloc(len + 1, 1);
50266dc6
DE
1776 if (!buf)
1777 return NULL;
33ad9f1a 1778 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
8900b9eb 1779 if (rv > len) {
33ad9f1a
CS
1780 free(buf);
1781 errno = ENOMEM;
8900b9eb 1782 return NULL;
8b92dc3a 1783 }
576f946d 1784
33ad9f1a 1785 return buf;
e0f888d9 1786}
283678ed 1787
574c4428
QH
1788static struct cgroup_process_info *
1789find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
283678ed 1790{
33ad9f1a
CS
1791 struct cgroup_process_info *info_ptr;
1792 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1793 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1794 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1795 return info_ptr;
b98f7d6e 1796 }
33ad9f1a
CS
1797 errno = ENOENT;
1798 return NULL;
1799}
283678ed 1800
574c4428
QH
1801static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
1802 char *value, size_t len)
33ad9f1a
CS
1803{
1804 const char *parts[3] = {
1805 cgroup_path,
1806 sub_filename,
1807 NULL
1808 };
1809 char *filename;
1810 int ret, saved_errno;
1811
1812 filename = lxc_string_join("/", parts, false);
1813 if (!filename)
1814 return -1;
1815
1816 ret = lxc_read_from_file(filename, value, len);
1817 saved_errno = errno;
1818 free(filename);
1819 errno = saved_errno;
1820 return ret;
283678ed 1821}
b113383b 1822
574c4428
QH
1823static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
1824 const char *value)
b113383b 1825{
33ad9f1a
CS
1826 const char *parts[3] = {
1827 cgroup_path,
1828 sub_filename,
1829 NULL
1830 };
1831 char *filename;
1832 int ret, saved_errno;
b113383b 1833
33ad9f1a
CS
1834 filename = lxc_string_join("/", parts, false);
1835 if (!filename)
1836 return -1;
b113383b 1837
33ad9f1a
CS
1838 ret = lxc_write_to_file(filename, value, strlen(value), false);
1839 saved_errno = errno;
1840 free(filename);
1841 errno = saved_errno;
1842 return ret;
b98f7d6e
SH
1843}
1844
9daf6f5d 1845static int do_setup_cgroup_limits(struct lxc_handler *h,
574c4428 1846 struct lxc_list *cgroup_settings, bool do_devices)
b98f7d6e
SH
1847{
1848 struct lxc_list *iterator;
1849 struct lxc_cgroup *cg;
1850 int ret = -1;
1851
33ad9f1a 1852 if (lxc_list_empty(cgroup_settings))
b98f7d6e
SH
1853 return 0;
1854
33ad9f1a 1855 lxc_list_for_each(iterator, cgroup_settings) {
b98f7d6e
SH
1856 cg = iterator->elem;
1857
33ad9f1a 1858 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
b98f7d6e 1859 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
33ad9f1a 1860 cgroup_devices_has_allow_or_deny(h, cg->value, false))
b98f7d6e
SH
1861 continue;
1862 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
33ad9f1a 1863 cgroup_devices_has_allow_or_deny(h, cg->value, true))
b98f7d6e 1864 continue;
33ad9f1a 1865 if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
b98f7d6e
SH
1866 ERROR("Error setting %s to %s for %s\n",
1867 cg->subsystem, cg->value, h->name);
1868 goto out;
1869 }
b113383b 1870 }
b98f7d6e
SH
1871
1872 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1873 }
1874
b98f7d6e
SH
1875 ret = 0;
1876 INFO("cgroup has been setup");
1877out:
b113383b
SH
1878 return ret;
1879}
b98f7d6e 1880
574c4428
QH
1881static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h,
1882 char *v, bool for_allow)
33ad9f1a
CS
1883{
1884 char *path;
1885 FILE *devices_list;
8900b9eb 1886 char *line = NULL;
33ad9f1a
CS
1887 size_t sz = 0;
1888 bool ret = !for_allow;
1889 const char *parts[3] = {
1890 NULL,
1891 "devices.list",
1892 NULL
1893 };
1894
1895 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1896 // not sure they ever do, but they *could*
1897 // right now, I'm assuming they do NOT
1898 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1899 return false;
1900
1901 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
1902 if (!parts[0])
1903 return false;
1904 path = lxc_string_join("/", parts, false);
1905 if (!path) {
1906 free((void *)parts[0]);
1907 return false;
1908 }
1909
1910 devices_list = fopen_cloexec(path, "r");
1911 if (!devices_list) {
1912 free(path);
1913 return false;
1914 }
1915
1916 while (getline(&line, &sz, devices_list) != -1) {
1917 size_t len = strlen(line);
1918 if (len > 0 && line[len-1] == '\n')
1919 line[len-1] = '\0';
1920 if (strcmp(line, "a *:* rwm") == 0) {
1921 ret = for_allow;
1922 goto out;
1923 } else if (for_allow && strcmp(line, v) == 0) {
1924 ret = true;
8900b9eb 1925 goto out;
33ad9f1a
CS
1926 }
1927 }
1928
1929out:
1930 fclose(devices_list);
1931 free(line);
1932 free(path);
1933 return ret;
1934}
1935
574c4428 1936static int cgroup_recursive_task_count(const char *cgroup_path)
b98f7d6e 1937{
33ad9f1a
CS
1938 DIR *d;
1939 struct dirent *dent_buf;
1940 struct dirent *dent;
8900b9eb 1941 ssize_t name_max;
33ad9f1a
CS
1942 int n = 0, r;
1943
1944 /* see man readdir_r(3) */
1945 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1946 if (name_max <= 0)
1947 name_max = 255;
1948 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1949 if (!dent_buf)
1950 return -1;
1951
1952 d = opendir(cgroup_path);
034ef75d
SH
1953 if (!d) {
1954 free(dent_buf);
33ad9f1a 1955 return 0;
034ef75d 1956 }
33ad9f1a
CS
1957
1958 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
1959 const char *parts[3] = {
1960 cgroup_path,
1961 dent->d_name,
1962 NULL
1963 };
1964 char *sub_path;
1965 struct stat st;
1966
1967 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
1968 continue;
1969 sub_path = lxc_string_join("/", parts, false);
1970 if (!sub_path) {
1971 closedir(d);
1972 free(dent_buf);
1973 return -1;
1974 }
1975 r = stat(sub_path, &st);
1976 if (r < 0) {
1977 closedir(d);
1978 free(dent_buf);
1979 free(sub_path);
1980 return -1;
1981 }
1982 if (S_ISDIR(st.st_mode)) {
1983 r = cgroup_recursive_task_count(sub_path);
1984 if (r >= 0)
1985 n += r;
1986 } else if (!strcmp(dent->d_name, "tasks")) {
1987 r = count_lines(sub_path);
1988 if (r >= 0)
1989 n += r;
1990 }
1991 free(sub_path);
1992 }
1993 closedir(d);
1994 free(dent_buf);
1995
1996 return n;
1997}
1998
574c4428 1999static int count_lines(const char *fn)
33ad9f1a
CS
2000{
2001 FILE *f;
2002 char *line = NULL;
2003 size_t sz = 0;
2004 int n = 0;
2005
2006 f = fopen_cloexec(fn, "r");
2007 if (!f)
2008 return -1;
2009
2010 while (getline(&line, &sz, f) != -1) {
2011 n++;
2012 }
2013 free(line);
2014 fclose(f);
2015 return n;
b98f7d6e
SH
2016}
2017
574c4428
QH
2018static int handle_cgroup_settings(struct cgroup_mount_point *mp,
2019 char *cgroup_path)
b98f7d6e 2020{
33ad9f1a 2021 int r, saved_errno = 0;
7e7243e1 2022 char buf[2];
1ea59ad2 2023
934b1673
SH
2024 mp->need_cpuset_init = false;
2025
1ea59ad2
SH
2026 /* If this is the memory cgroup, we want to enforce hierarchy.
2027 * But don't fail if for some reason we can't.
2028 */
2edb53c7
SH
2029 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
2030 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
2031 if (cc_path) {
2032 r = lxc_read_from_file(cc_path, buf, 1);
2033 if (r < 1 || buf[0] != '1') {
2034 r = lxc_write_to_file(cc_path, "1", 1, false);
2035 if (r < 0)
2036 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
2037 }
1ea59ad2
SH
2038 free(cc_path);
2039 }
2edb53c7 2040 }
1ea59ad2 2041
33ad9f1a
CS
2042 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2043 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2044 * and cpuset.cpus and then
2045 */
2edb53c7
SH
2046 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
2047 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
d703c2b1
RV
2048 struct stat sb;
2049
33ad9f1a 2050 if (!cc_path)
2edb53c7 2051 return -1;
d703c2b1
RV
2052 /* cgroup.clone_children is not available when running under
2053 * older kernel versions; in this case, we'll initialize
2054 * cpuset.cpus and cpuset.mems later, after the new cgroup
2055 * was created
2056 */
2057 if (stat(cc_path, &sb) != 0 && errno == ENOENT) {
934b1673 2058 mp->need_cpuset_init = true;
d703c2b1
RV
2059 free(cc_path);
2060 return 0;
2061 }
7e7243e1
SH
2062 r = lxc_read_from_file(cc_path, buf, 1);
2063 if (r == 1 && buf[0] == '1') {
2064 free(cc_path);
2edb53c7 2065 return 0;
7e7243e1 2066 }
33ad9f1a 2067 r = lxc_write_to_file(cc_path, "1", 1, false);
2edb53c7
SH
2068 saved_errno = errno;
2069 free(cc_path);
2070 errno = saved_errno;
2071 return r < 0 ? -1 : 0;
33ad9f1a
CS
2072 }
2073 return 0;
b98f7d6e 2074}
484ed030 2075
934b1673 2076static int cgroup_read_from_file(const char *fn, char buf[], size_t bufsize)
d703c2b1
RV
2077{
2078 int ret = lxc_read_from_file(fn, buf, bufsize);
2079 if (ret < 0) {
2080 SYSERROR("failed to read %s", fn);
934b1673 2081 return ret;
d703c2b1
RV
2082 }
2083 if (ret == bufsize) {
934b1673
SH
2084 if (bufsize > 0) {
2085 /* obviously this wasn't empty */
2086 buf[bufsize-1] = '\0';
2087 return ret;
2088 }
2089 /* Callers don't do this, but regression/sanity check */
2090 ERROR("%s: was not expecting 0 bufsize", __func__);
2091 return -1;
d703c2b1
RV
2092 }
2093 buf[ret] = '\0';
934b1673 2094 return ret;
d703c2b1
RV
2095}
2096
2097static bool do_init_cpuset_file(struct cgroup_mount_point *mp,
2098 const char *path, const char *name)
2099{
934b1673
SH
2100 char value[1024];
2101 char *childfile, *parentfile = NULL, *tmp;
2102 int ret;
2103 bool ok = false;
2104
d703c2b1
RV
2105 childfile = cgroup_to_absolute_path(mp, path, name);
2106 if (!childfile)
2107 return false;
2108
2109 /* don't overwrite a non-empty value in the file */
934b1673
SH
2110 ret = cgroup_read_from_file(childfile, value, sizeof(value));
2111 if (ret < 0)
2112 goto out;
d703c2b1 2113 if (value[0] != '\0' && value[0] != '\n') {
934b1673
SH
2114 ok = true;
2115 goto out;
d703c2b1
RV
2116 }
2117
2118 /* path to the same name in the parent cgroup */
2119 parentfile = strdup(path);
2120 if (!parentfile)
934b1673
SH
2121 goto out;
2122
d703c2b1 2123 tmp = strrchr(parentfile, '/');
934b1673
SH
2124 if (!tmp)
2125 goto out;
d703c2b1
RV
2126 if (tmp == parentfile)
2127 tmp++; /* keep the '/' at the start */
2128 *tmp = '\0';
2129 tmp = parentfile;
2130 parentfile = cgroup_to_absolute_path(mp, tmp, name);
2131 free(tmp);
934b1673
SH
2132 if (!parentfile)
2133 goto out;
d703c2b1
RV
2134
2135 /* copy from parent to child cgroup */
934b1673
SH
2136 ret = cgroup_read_from_file(parentfile, value, sizeof(value));
2137 if (ret < 0)
2138 goto out;
2139 if (ret == sizeof(value)) {
2140 /* If anyone actually sees this error, we can address it */
2141 ERROR("parent cpuset value too long");
2142 goto out;
d703c2b1
RV
2143 }
2144 ok = (lxc_write_to_file(childfile, value, strlen(value), false) >= 0);
2145 if (!ok)
2146 SYSERROR("failed writing %s", childfile);
b1dad6f6
RV
2147
2148out:
934b1673
SH
2149 if (parentfile)
2150 free(parentfile);
d703c2b1 2151 free(childfile);
d703c2b1
RV
2152 return ok;
2153}
2154
2155static bool init_cpuset_if_needed(struct cgroup_mount_point *mp,
2156 const char *path)
2157{
2158 /* the files we have to handle here are only in cpuset hierarchies */
2159 if (!lxc_string_in_array("cpuset",
2160 (const char **)mp->hierarchy->subsystems))
2161 return true;
2162
b1dad6f6
RV
2163 if (!mp->need_cpuset_init)
2164 return true;
2165
d703c2b1
RV
2166 return (do_init_cpuset_file(mp, path, "/cpuset.cpus") &&
2167 do_init_cpuset_file(mp, path, "/cpuset.mems") );
2168}
2169
484ed030
SH
2170extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
2171 const char *lxcpath);
d4ef7c50 2172int do_unfreeze(int freeze, const char *name, const char *lxcpath)
484ed030 2173{
d4ef7c50
SH
2174 char v[100];
2175 const char *state = freeze ? "FROZEN" : "THAWED";
484ed030 2176
d4ef7c50
SH
2177 if (lxc_cgroup_set("freezer.state", state, name, lxcpath) < 0) {
2178 ERROR("Failed to freeze %s:%s", lxcpath, name);
484ed030
SH
2179 return -1;
2180 }
d4ef7c50
SH
2181 while (1) {
2182 if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0) {
2183 ERROR("Failed to get new freezer state for %s:%s", lxcpath, name);
2184 return -1;
2185 }
2186 if (v[strlen(v)-1] == '\n')
2187 v[strlen(v)-1] = '\0';
2188 if (strncmp(v, state, strlen(state)) == 0) {
2189 if (name)
2190 lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
2191 return 0;
2192 }
2193 sleep(1);
484ed030 2194 }
d4ef7c50 2195}
484ed030 2196
d4ef7c50
SH
2197int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
2198{
2199 return do_unfreeze(freeze, name, lxcpath);
2200}
484ed030 2201
d4ef7c50
SH
2202lxc_state_t freezer_state(const char *name, const char *lxcpath)
2203{
2204 char v[100];
e8d07ef2 2205 if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0)
d4ef7c50 2206 return -1;
484ed030 2207
d4ef7c50
SH
2208 if (v[strlen(v)-1] == '\n')
2209 v[strlen(v)-1] = '\0';
2210 return lxc_str2state(v);
2211}
484ed030 2212
d4ef7c50
SH
2213static void cgfs_destroy(struct lxc_handler *handler)
2214{
2215 struct cgfs_data *d = handler->cgroup_info->data;
2216 if (!d)
2217 return;
2218 if (d->info)
2219 lxc_cgroup_process_info_free_and_remove(d->info);
2220 if (d->meta)
2221 lxc_cgroup_put_meta(d->meta);
2222 free(d);
2223 handler->cgroup_info->data = NULL;
2224}
484ed030 2225
d4ef7c50
SH
2226static inline bool cgfs_init(struct lxc_handler *handler)
2227{
2228 struct cgfs_data *d = malloc(sizeof(*d));
2229 if (!d)
2230 return false;
2231 d->info = NULL;
2232 d->meta = lxc_cgroup_load_meta();
484ed030 2233
d4ef7c50
SH
2234 if (!d->meta) {
2235 ERROR("cgroupfs failed to detect cgroup metadata");
378a5729 2236 free(d);
d4ef7c50
SH
2237 return false;
2238 }
2239 handler->cgroup_info->data = d;
2240 return true;
2241}
484ed030 2242
d4ef7c50
SH
2243static inline bool cgfs_create(struct lxc_handler *handler)
2244{
2245 struct cgfs_data *d = handler->cgroup_info->data;
2246 struct cgroup_process_info *i;
2247 struct cgroup_meta_data *md = d->meta;
2248 i = lxc_cgroupfs_create(handler->name, handler->cgroup_info->cgroup_pattern, md, NULL);
2249 if (!i)
2250 return false;
2251 d->info = i;
2252 return true;
2253}
484ed030 2254
d4ef7c50
SH
2255static inline bool cgfs_enter(struct lxc_handler *handler)
2256{
2257 struct cgfs_data *d = handler->cgroup_info->data;
2258 struct cgroup_process_info *i = d->info;
2259 int ret;
2260
2261 ret = lxc_cgroupfs_enter(i, handler->pid, false);
484ed030 2262
d4ef7c50
SH
2263 return ret == 0;
2264}
2265
2266static inline bool cgfs_create_legacy(struct lxc_handler *handler)
2267{
2268 struct cgfs_data *d = handler->cgroup_info->data;
2269 struct cgroup_process_info *i = d->info;
2270 if (lxc_cgroup_create_legacy(i, handler->name, handler->pid) < 0) {
2271 ERROR("failed to create legacy ns cgroups for '%s'", handler->name);
2272 return false;
484ed030 2273 }
d4ef7c50
SH
2274 return true;
2275}
484ed030 2276
d4ef7c50
SH
2277static char *cgfs_get_cgroup(struct lxc_handler *handler, const char *subsystem)
2278{
2279 return lxc_cgroup_get_hierarchy_path_handler(subsystem, handler);
484ed030
SH
2280}
2281
0086f499
SH
2282static int cgfs_unfreeze_fromhandler(struct lxc_handler *handler)
2283{
2284 char *cgabspath, *cgrelpath;
2285 int ret;
2286
2287 cgrelpath = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
2288 cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
2289 if (!cgabspath)
2290 return -1;
2291
2292 ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
2293 free(cgabspath);
2294 return ret;
2295}
2296
9daf6f5d
SH
2297bool cgroupfs_setup_limits(struct lxc_handler *h, bool with_devices)
2298{
2299 return do_setup_cgroup_limits(h, &h->conf->cgroup, with_devices) == 0;
2300}
2301
5d897655
SH
2302bool lxc_cgroupfs_attach(const char *name, const char *lxcpath, pid_t pid)
2303{
2304 struct cgroup_meta_data *meta_data;
2305 struct cgroup_process_info *container_info;
2306 int ret;
2307
2308 meta_data = lxc_cgroup_load_meta();
2309 if (!meta_data) {
2310 ERROR("could not move attached process %d to cgroup of container", pid);
2311 return false;
2312 }
2313
2314 container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
2315 lxc_cgroup_put_meta(meta_data);
2316 if (!container_info) {
2317 ERROR("could not move attached process %d to cgroup of container", pid);
2318 return false;
2319 }
2320
2321 ret = lxc_cgroupfs_enter(container_info, pid, false);
2322 lxc_cgroup_process_info_free(container_info);
2323 if (ret < 0) {
2324 ERROR("could not move attached process %d to cgroup of container", pid);
2325 return false;
2326 }
2327 return true;
2328}
2329
d4ef7c50
SH
2330static struct cgroup_ops cgfs_ops = {
2331 .destroy = cgfs_destroy,
2332 .init = cgfs_init,
2333 .create = cgfs_create,
2334 .enter = cgfs_enter,
2335 .create_legacy = cgfs_create_legacy,
2336 .get_cgroup = cgfs_get_cgroup,
2337 .get = lxc_cgroupfs_get,
2338 .set = lxc_cgroupfs_set,
0086f499 2339 .unfreeze_fromhandler = cgfs_unfreeze_fromhandler,
9daf6f5d 2340 .setup_limits = cgroupfs_setup_limits,
d4ef7c50 2341 .name = "cgroupfs",
5d897655 2342 .attach = lxc_cgroupfs_attach,
0996e18a 2343 .chown = NULL,
c476bdce 2344 .mount_cgroup = cgroupfs_mount_cgroup,
d4ef7c50
SH
2345};
2346static void init_cg_ops(void)
484ed030 2347{
d4ef7c50
SH
2348 if (!use_cgmanager)
2349 return;
2350 if (cgmanager_initialized)
2351 return;
2352 if (!lxc_init_cgmanager()) {
2353 ERROR("Could not contact cgroup manager, falling back to cgroupfs");
2354 active_cg_ops = &cgfs_ops;
2355 }
2356}
484ed030 2357
d4ef7c50
SH
2358/*
2359 * These are the backend-independent cgroup handlers for container
2360 * start and stop
2361 */
484ed030 2362
d4ef7c50
SH
2363/* Free all cgroup info held by the handler */
2364void cgroup_destroy(struct lxc_handler *handler)
2365{
2366 if (!handler->cgroup_info)
2367 return;
2368 if (active_cg_ops)
2369 active_cg_ops->destroy(handler);
484ed030
SH
2370}
2371
d4ef7c50
SH
2372/*
2373 * Allocate a lxc_cgroup_info for the active cgroup
2374 * backend, and assign it to the handler
2375 */
2376bool cgroup_init(struct lxc_handler *handler)
484ed030 2377{
d4ef7c50
SH
2378 init_cg_ops();
2379 handler->cgroup_info = malloc(sizeof(struct lxc_cgroup_info));
2380 if (!handler->cgroup_info)
2381 return false;
2382 memset(handler->cgroup_info, 0, sizeof(struct lxc_cgroup_info));
2383 /* if we are running as root, use system cgroup pattern, otherwise
2384 * just create a cgroup under the current one. But also fall back to
2385 * that if for some reason reading the configuration fails and no
2386 * default value is available
2387 */
2388 if (geteuid() == 0)
2389 handler->cgroup_info->cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
2390 if (!handler->cgroup_info->cgroup_pattern)
2391 handler->cgroup_info->cgroup_pattern = "%n";
484ed030 2392
d4ef7c50
SH
2393 return active_cg_ops->init(handler);
2394}
484ed030 2395
d4ef7c50
SH
2396/* Create the container cgroups for all requested controllers */
2397bool cgroup_create(struct lxc_handler *handler)
2398{
2399 return active_cg_ops->create(handler);
2400}
484ed030 2401
d4ef7c50
SH
2402/*
2403 * Enter the container init into its new cgroups for all
2404 * requested controllers */
2405bool cgroup_enter(struct lxc_handler *handler)
2406{
2407 return active_cg_ops->enter(handler);
2408}
484ed030 2409
d4ef7c50
SH
2410bool cgroup_create_legacy(struct lxc_handler *handler)
2411{
2412 if (active_cg_ops->create_legacy)
2413 return active_cg_ops->create_legacy(handler);
2414 return true;
2415}
484ed030 2416
d4ef7c50
SH
2417char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
2418{
2419 return active_cg_ops->get_cgroup(handler, subsystem);
484ed030
SH
2420}
2421
d4ef7c50
SH
2422int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
2423{
2424 init_cg_ops();
2425 return active_cg_ops->set(filename, value, name, lxcpath);
2426}
2427
2428int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
2429{
2430 init_cg_ops();
2431 return active_cg_ops->get(filename, value, len, name, lxcpath);
2432}
0086f499
SH
2433
2434int lxc_unfreeze_fromhandler(struct lxc_handler *handler)
2435{
2436 return active_cg_ops->unfreeze_fromhandler(handler);
2437}
9daf6f5d
SH
2438
2439bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
2440{
2441 return active_cg_ops->setup_limits(handler, with_devices);
2442}
0996e18a
SH
2443
2444bool cgroup_chown(struct lxc_handler *handler)
2445{
2446 if (active_cg_ops->chown)
2447 return active_cg_ops->chown(handler);
2448 return true;
2449}
5d897655
SH
2450
2451bool lxc_cgroup_attach(const char *name, const char *lxcpath, pid_t pid)
2452{
2453 init_cg_ops();
2454 return active_cg_ops->attach(name, lxcpath, pid);
2455}
c476bdce
SH
2456
2457bool lxc_setup_mount_cgroup(const char *root,
2458 struct lxc_cgroup_info *cgroup_info, int type)
2459{
2460 return active_cg_ops->mount_cgroup(root, cgroup_info, type);
2461}