]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroup.c
Catch the python packages when building them.
[mirror_lxc.git] / src / lxc / cgroup.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
576f946d 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
aae1f3c4 37#include <sys/mount.h>
576f946d 38#include <netinet/in.h>
39#include <net/if.h>
40
e2bcd7db 41#include "error.h"
881450bb 42#include "config.h"
ae5c8b8e 43#include "commands.h"
b98f7d6e
SH
44#include "list.h"
45#include "conf.h"
33ad9f1a 46#include "utils.h"
740d1928 47#include "bdev.h"
025ed0f3 48#include "lxclock.h"
36eb9bde 49
36eb9bde 50#include <lxc/log.h>
00b3c2e2
CLG
51#include <lxc/cgroup.h>
52#include <lxc/start.h>
36eb9bde 53
edaf8b1b
SG
54#if IS_BIONIC
55#include <../include/lxcmntent.h>
56#else
57#include <mntent.h>
58#endif
59
120ce443
SG
60#ifndef HAVE_GETLINE
61#ifdef HAVE_FGETLN
62#include <../include/getline.h>
63#endif
64#endif
65
36eb9bde 66lxc_log_define(lxc_cgroup, lxc);
576f946d 67
33ad9f1a
CS
68static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
69static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
70static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
71static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
72static bool is_valid_cgroup(const char *name);
73static int create_or_remove_cgroup(bool remove, struct cgroup_mount_point *mp, const char *path);
74static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
75static int remove_cgroup(struct cgroup_mount_point *mp, const char *path);
76static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
77static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
78static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
79static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
80static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
81static int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
82static int cgroup_recursive_task_count(const char *cgroup_path);
83static int count_lines(const char *fn);
1ea59ad2 84static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
33ad9f1a
CS
85
86struct cgroup_meta_data *lxc_cgroup_load_meta()
87{
88 const char *cgroup_use = NULL;
89 char **cgroup_use_list = NULL;
90 struct cgroup_meta_data *md = NULL;
91 int saved_errno;
92
93 errno = 0;
052616eb 94 cgroup_use = default_cgroup_use();
33ad9f1a
CS
95 if (!cgroup_use && errno != 0)
96 return NULL;
97 if (cgroup_use) {
98 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
99 if (!cgroup_use_list)
100 return NULL;
101 }
576f946d 102
33ad9f1a
CS
103 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
104 saved_errno = errno;
105 lxc_free_array((void **)cgroup_use_list, free);
106 errno = saved_errno;
107 return md;
108}
fd37327f 109
b653309a
SH
110/* Step 1: determine all kernel subsystems */
111static bool find_cgroup_subsystems(char ***kernel_subsystems)
1d39a065 112{
b653309a
SH
113 FILE *proc_cgroups;
114 bool bret = false;
33ad9f1a
CS
115 char *line = NULL;
116 size_t sz = 0;
b653309a
SH
117 size_t kernel_subsystems_count = 0;
118 size_t kernel_subsystems_capacity = 0;
119 int r;
1d39a065 120
025ed0f3 121 process_lock();
33ad9f1a 122 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
025ed0f3 123 process_unlock();
33ad9f1a 124 if (!proc_cgroups)
b653309a 125 return false;
1d39a065 126
33ad9f1a
CS
127 while (getline(&line, &sz, proc_cgroups) != -1) {
128 char *tab1;
129 char *tab2;
130 int hierarchy_number;
1d39a065 131
33ad9f1a
CS
132 if (line[0] == '#')
133 continue;
134 if (!line[0])
135 continue;
1d39a065 136
33ad9f1a
CS
137 tab1 = strchr(line, '\t');
138 if (!tab1)
8900b9eb 139 continue;
33ad9f1a
CS
140 *tab1++ = '\0';
141 tab2 = strchr(tab1, '\t');
142 if (!tab2)
143 continue;
144 *tab2 = '\0';
fd37327f 145
33ad9f1a
CS
146 tab2 = NULL;
147 hierarchy_number = strtoul(tab1, &tab2, 10);
148 if (!tab2 || *tab2)
149 continue;
150 (void)hierarchy_number;
151
b653309a 152 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
33ad9f1a 153 if (r < 0)
b653309a
SH
154 goto out;
155 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
156 if (!(*kernel_subsystems)[kernel_subsystems_count])
157 goto out;
33ad9f1a 158 kernel_subsystems_count++;
bcbd102c 159 }
b653309a 160 bret = true;
0d9f8e18 161
b653309a 162out:
025ed0f3 163 process_lock();
33ad9f1a 164 fclose(proc_cgroups);
025ed0f3 165 process_unlock();
0ccf7c2a 166 free(line);
b653309a
SH
167 return bret;
168}
169
170/* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
171 * since mount points don't specify hierarchy number and
172 * /proc/cgroups does not contain named hierarchies
173 */
174static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
175 bool all_kernel_subsystems, bool all_named_subsystems,
176 const char **subsystem_whitelist)
177{
178 FILE *proc_self_cgroup;
179 char *line = NULL;
180 size_t sz = 0;
181 int r;
182 bool bret = false;
183 size_t hierarchy_capacity = 0;
ef6e34ee 184
025ed0f3 185 process_lock();
33ad9f1a
CS
186 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
187 /* if for some reason (because of setns() and pid namespace for example),
188 * /proc/self is not valid, we try /proc/1/cgroup... */
189 if (!proc_self_cgroup)
190 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
025ed0f3 191 process_unlock();
33ad9f1a 192 if (!proc_self_cgroup)
b653309a 193 return false;
33ad9f1a
CS
194
195 while (getline(&line, &sz, proc_self_cgroup) != -1) {
196 /* file format: hierarchy:subsystems:group,
197 * we only extract hierarchy and subsystems
198 * here */
199 char *colon1;
200 char *colon2;
201 int hierarchy_number;
202 struct cgroup_hierarchy *h = NULL;
203 char **p;
204
205 if (!line[0])
206 continue;
ad08bbb7 207
33ad9f1a
CS
208 colon1 = strchr(line, ':');
209 if (!colon1)
8900b9eb 210 continue;
33ad9f1a
CS
211 *colon1++ = '\0';
212 colon2 = strchr(colon1, ':');
213 if (!colon2)
214 continue;
215 *colon2 = '\0';
ad08bbb7 216
33ad9f1a
CS
217 colon2 = NULL;
218 hierarchy_number = strtoul(line, &colon2, 10);
219 if (!colon2 || *colon2)
220 continue;
576f946d 221
33ad9f1a
CS
222 if (hierarchy_number > meta_data->maximum_hierarchy) {
223 /* lxc_grow_array will never shrink, so even if we find a lower
224 * hierarchy number here, the array will never be smaller
225 */
226 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
227 if (r < 0)
b653309a 228 goto out;
5193cc3d 229
33ad9f1a
CS
230 meta_data->maximum_hierarchy = hierarchy_number;
231 }
fd37327f 232
33ad9f1a
CS
233 /* this shouldn't happen, we had this already */
234 if (meta_data->hierarchies[hierarchy_number])
b653309a 235 goto out;
33ad9f1a
CS
236
237 h = calloc(1, sizeof(struct cgroup_hierarchy));
238 if (!h)
b653309a 239 goto out;
33ad9f1a
CS
240
241 meta_data->hierarchies[hierarchy_number] = h;
242
243 h->index = hierarchy_number;
244 h->subsystems = lxc_string_split_and_trim(colon1, ',');
245 if (!h->subsystems)
b653309a 246 goto out;
33ad9f1a
CS
247 /* see if this hierarchy should be considered */
248 if (!all_kernel_subsystems || !all_named_subsystems) {
249 for (p = h->subsystems; *p; p++) {
250 if (!strncmp(*p, "name=", 5)) {
251 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
252 h->used = true;
253 break;
254 }
255 } else {
256 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
257 h->used = true;
258 break;
259 }
260 }
261 }
262 } else {
263 /* we want all hierarchy anyway */
264 h->used = true;
ae5c8b8e 265 }
ae5c8b8e 266 }
b653309a 267 bret = true;
0b9c21ab 268
b653309a 269out:
025ed0f3 270 process_lock();
33ad9f1a 271 fclose(proc_self_cgroup);
025ed0f3 272 process_unlock();
0ccf7c2a 273 free(line);
b653309a
SH
274 return bret;
275}
276
277/* Step 3: determine all mount points of each hierarchy */
278static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
279{
280 bool bret = false;
281 FILE *proc_self_mountinfo;
282 char *line = NULL;
283 size_t sz = 0;
284 char **tokens = NULL;
285 size_t mount_point_count = 0;
286 size_t mount_point_capacity = 0;
287 size_t token_capacity = 0;
288 int r;
289
025ed0f3 290 process_lock();
33ad9f1a
CS
291 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
292 /* if for some reason (because of setns() and pid namespace for example),
293 * /proc/self is not valid, we try /proc/1/cgroup... */
294 if (!proc_self_mountinfo)
295 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
025ed0f3 296 process_unlock();
33ad9f1a 297 if (!proc_self_mountinfo)
b653309a 298 return false;
33ad9f1a
CS
299
300 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
178938fe 301 char *token, *line_tok, *saveptr = NULL;
33ad9f1a
CS
302 size_t i, j, k;
303 struct cgroup_mount_point *mount_point;
304 struct cgroup_hierarchy *h;
305 char **subsystems;
306
307 if (line[0] && line[strlen(line) - 1] == '\n')
308 line[strlen(line) - 1] = '\0';
309
178938fe 310 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
33ad9f1a
CS
311 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
312 if (r < 0)
b653309a 313 goto out;
33ad9f1a
CS
314 tokens[i++] = token;
315 }
b98f7d6e 316
33ad9f1a
CS
317 /* layout of /proc/self/mountinfo:
318 * 0: id
319 * 1: parent id
320 * 2: device major:minor
321 * 3: mount prefix
8900b9eb 322 * 4: mount point
33ad9f1a
CS
323 * 5: per-mount options
324 * [optional X]: additional data
325 * X+7: "-"
326 * X+8: type
327 * X+9: source
328 * X+10: per-superblock options
329 */
330 for (j = 6; j < i && tokens[j]; j++)
331 if (!strcmp(tokens[j], "-"))
332 break;
fd4f5a56 333
33ad9f1a
CS
334 /* could not find separator */
335 if (j >= i || !tokens[j])
336 continue;
337 /* there should be exactly three fields after
338 * the separator
339 */
340 if (i != j + 4)
341 continue;
fd4f5a56 342
33ad9f1a
CS
343 /* not a cgroup filesystem */
344 if (strcmp(tokens[j + 1], "cgroup") != 0)
345 continue;
b98f7d6e 346
33ad9f1a
CS
347 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
348 if (!subsystems)
b653309a 349 goto out;
33ad9f1a
CS
350
351 h = NULL;
352 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
353 if (meta_data->hierarchies[k] &&
354 meta_data->hierarchies[k]->subsystems[0] &&
355 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
356 /* TODO: we could also check if the lists really match completely,
357 * just to have an additional sanity check */
358 h = meta_data->hierarchies[k];
b98f7d6e 359 break;
33ad9f1a 360 }
b98f7d6e 361 }
33ad9f1a
CS
362 lxc_free_array((void **)subsystems, free);
363
364 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
365 if (r < 0)
b653309a 366 goto out;
33ad9f1a
CS
367
368 /* create mount point object */
369 mount_point = calloc(1, sizeof(*mount_point));
370 if (!mount_point)
b653309a 371 goto out;
33ad9f1a
CS
372
373 meta_data->mount_points[mount_point_count++] = mount_point;
374
375 mount_point->hierarchy = h;
376 mount_point->mount_point = strdup(tokens[4]);
377 mount_point->mount_prefix = strdup(tokens[3]);
378 if (!mount_point->mount_point || !mount_point->mount_prefix)
b653309a 379 goto out;
33ad9f1a
CS
380 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
381
382 if (!strcmp(mount_point->mount_prefix, "/")) {
383 if (mount_point->read_only) {
384 if (!h->ro_absolute_mount_point)
385 h->ro_absolute_mount_point = mount_point;
386 } else {
387 if (!h->rw_absolute_mount_point)
388 h->rw_absolute_mount_point = mount_point;
389 }
b98f7d6e 390 }
ae5c8b8e 391
33ad9f1a
CS
392 k = lxc_array_len((void **)h->all_mount_points);
393 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
394 if (r < 0)
b653309a 395 goto out;
33ad9f1a 396 h->all_mount_points[k] = mount_point;
fd4f5a56 397 }
b653309a
SH
398 bret = true;
399
400out:
401 process_lock();
402 fclose(proc_self_mountinfo);
403 process_unlock();
404 free(tokens);
2cdafc54 405 free(line);
b653309a
SH
406 return bret;
407}
408
409struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
410{
411 bool all_kernel_subsystems = true;
412 bool all_named_subsystems = false;
413 struct cgroup_meta_data *meta_data = NULL;
414 char **kernel_subsystems = NULL;
415 int saved_errno = 0;
416
417 /* if the subsystem whitelist is not specified, include all
418 * hierarchies that contain kernel subsystems by default but
419 * no hierarchies that only contain named subsystems
420 *
421 * if it is specified, the specifier @all will select all
422 * hierarchies, @kernel will select all hierarchies with
423 * kernel subsystems and @named will select all named
424 * hierarchies
425 */
426 all_kernel_subsystems = subsystem_whitelist ?
427 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
428 true;
429 all_named_subsystems = subsystem_whitelist ?
430 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
431 false;
432
433 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
434 if (!meta_data)
435 return NULL;
436 meta_data->ref = 1;
437
438 if (!find_cgroup_subsystems(&kernel_subsystems))
439 goto out_error;
440
441 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
442 all_named_subsystems, subsystem_whitelist))
443 goto out_error;
444
445 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
446 goto out_error;
fd4f5a56 447
33ad9f1a
CS
448 /* oops, we couldn't find anything */
449 if (!meta_data->hierarchies || !meta_data->mount_points) {
450 errno = EINVAL;
451 goto out_error;
ae5c8b8e 452 }
fd4f5a56 453
3a0abb3a 454 lxc_free_array((void **)kernel_subsystems, free);
33ad9f1a
CS
455 return meta_data;
456
457out_error:
458 saved_errno = errno;
33ad9f1a
CS
459 lxc_free_array((void **)kernel_subsystems, free);
460 lxc_cgroup_put_meta(meta_data);
461 errno = saved_errno;
462 return NULL;
fd4f5a56
DL
463}
464
33ad9f1a 465struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
e14f67a7 466{
33ad9f1a
CS
467 meta_data->ref++;
468 return meta_data;
469}
e14f67a7 470
33ad9f1a
CS
471struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
472{
473 size_t i;
474 if (!meta_data)
475 return NULL;
476 if (--meta_data->ref > 0)
477 return meta_data;
478 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
479 if (meta_data->hierarchies) {
480 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
481 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
e14f67a7 482 }
33ad9f1a 483 free(meta_data->hierarchies);
178938fe 484 free(meta_data);
33ad9f1a 485 return NULL;
e14f67a7
U
486}
487
33ad9f1a 488struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
e14f67a7 489{
33ad9f1a
CS
490 size_t i;
491 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
492 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
493 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
494 return h;
e14f67a7 495 }
e14f67a7
U
496 return NULL;
497}
498
33ad9f1a 499struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
b98f7d6e 500{
33ad9f1a
CS
501 struct cgroup_mount_point **mps;
502 struct cgroup_mount_point *current_result = NULL;
503 ssize_t quality = -1;
b98f7d6e 504
33ad9f1a
CS
505 /* trivial case */
506 if (hierarchy->rw_absolute_mount_point)
507 return hierarchy->rw_absolute_mount_point;
508 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
509 return hierarchy->ro_absolute_mount_point;
b98f7d6e 510
33ad9f1a
CS
511 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
512 struct cgroup_mount_point *mp = *mps;
513 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
b98f7d6e 514
33ad9f1a
CS
515 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
516 prefix_len = 0;
b98f7d6e 517
33ad9f1a
CS
518 if (should_be_writable && mp->read_only)
519 continue;
520
521 if (!prefix_len ||
522 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
523 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
524 /* search for the best quality match, i.e. the match with the
525 * shortest prefix where this group is still contained
526 */
527 if (quality == -1 || prefix_len < quality) {
528 current_result = mp;
529 quality = prefix_len;
530 }
b98f7d6e
SH
531 }
532 }
533
33ad9f1a
CS
534 if (!current_result)
535 errno = ENOENT;
536 return current_result;
b98f7d6e
SH
537}
538
33ad9f1a 539char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
b98f7d6e 540{
33ad9f1a
CS
541 struct cgroup_meta_data *meta_data;
542 struct cgroup_hierarchy *h;
543 struct cgroup_mount_point *mp;
544 char *result;
545 int saved_errno;
546
547 meta_data = lxc_cgroup_load_meta();
548 if (!meta_data)
549 return NULL;
b98f7d6e 550
33ad9f1a
CS
551 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
552 if (!h)
553 goto out_error;
b98f7d6e 554
33ad9f1a
CS
555 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
556 if (!mp)
557 goto out_error;
b98f7d6e 558
33ad9f1a
CS
559 result = cgroup_to_absolute_path(mp, group, suffix);
560 if (!result)
561 goto out_error;
b98f7d6e 562
33ad9f1a
CS
563 lxc_cgroup_put_meta(meta_data);
564 return result;
b98f7d6e 565
33ad9f1a
CS
566out_error:
567 saved_errno = errno;
568 lxc_cgroup_put_meta(meta_data);
569 errno = saved_errno;
570 return NULL;
b98f7d6e
SH
571}
572
33ad9f1a 573struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
fd4f5a56 574{
33ad9f1a
CS
575 char pid_buf[32];
576 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
577 return lxc_cgroup_process_info_getx(pid_buf, meta);
c8f7c563
CS
578}
579
33ad9f1a 580struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
c8f7c563 581{
33ad9f1a
CS
582 return lxc_cgroup_process_info_get(1, meta);
583}
b98f7d6e 584
33ad9f1a
CS
585struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
586{
587 struct cgroup_process_info *i;
588 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
589 if (!i)
590 i = lxc_cgroup_process_info_get(getpid(), meta);
591 return i;
592}
ae5c8b8e 593
692ba18f
SH
594/*
595 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
596 * is already in a new cgroup named after the pid. 'mnt' is passed in as
597 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
598 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
599 * and return the string /sys/fs/cgroup/lxc/c1.
600 */
cea0552e 601static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
692ba18f
SH
602{
603 char *dir, *fulloldpath;
604 char *newname, *fullnewpath;
cea0552e 605 int len, newlen, ret;
692ba18f
SH
606
607 /*
608 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
609 * name is c1,
610 * dir: /ab
611 * fulloldpath = /cgroup/ab/2375
612 * fullnewpath = /cgroup/ab/c1
613 * newname = /ab/c1
614 */
615 dir = alloca(strlen(oldname) + 1);
616 strcpy(dir, oldname);
617
cea0552e
SH
618 len = strlen(oldname) + strlen(mountpath) + 22;
619 fulloldpath = alloca(len);
620 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
621 if (ret < 0 || ret >= len)
622 return NULL;
692ba18f
SH
623
624 len = strlen(dir) + strlen(name) + 2;
625 newname = malloc(len);
626 if (!newname) {
627 SYSERROR("Out of memory");
628 return NULL;
629 }
cea0552e
SH
630 ret = snprintf(newname, len, "%s/%s", dir, name);
631 if (ret < 0 || ret >= len) {
632 free(newname);
633 return NULL;
634 }
692ba18f 635
cea0552e
SH
636 newlen = strlen(mountpath) + len + 2;
637 fullnewpath = alloca(newlen);
638 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
639 if (ret < 0 || ret >= newlen) {
640 free(newname);
641 return NULL;
642 }
692ba18f
SH
643
644 if (access(fullnewpath, F_OK) == 0) {
645 if (rmdir(fullnewpath) != 0) {
646 SYSERROR("container cgroup %s already exists.", fullnewpath);
647 free(newname);
648 return NULL;
649 }
650 }
651 if (rename(fulloldpath, fullnewpath)) {
652 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
653 free(newname);
654 return NULL;
655 }
656
657 DEBUG("'%s' renamed to '%s'", oldname, newname);
658
659 return newname;
660}
661
33ad9f1a 662/* create a new cgroup */
47d8fb3b 663extern struct cgroup_process_info *lxc_cgroup_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
33ad9f1a 664{
001b026e 665 char **cgroup_path_components = NULL;
33ad9f1a
CS
666 char **p = NULL;
667 char *path_so_far = NULL;
668 char **new_cgroup_paths = NULL;
669 char **new_cgroup_paths_sub = NULL;
670 struct cgroup_mount_point *mp;
671 struct cgroup_hierarchy *h;
672 struct cgroup_process_info *base_info = NULL;
673 struct cgroup_process_info *info_ptr;
674 int saved_errno;
675 int r;
676 unsigned suffix = 0;
677 bool had_sub_pattern = false;
678 size_t i;
ae5c8b8e 679
33ad9f1a
CS
680 if (!is_valid_cgroup(name)) {
681 ERROR("Invalid cgroup name: '%s'", name);
682 errno = EINVAL;
683 return NULL;
ae5c8b8e
SH
684 }
685
33ad9f1a
CS
686 if (!strstr(path_pattern, "%n")) {
687 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
688 errno = EINVAL;
689 return NULL;
690 }
fd37327f 691
33ad9f1a
CS
692 /* we will modify the result of this operation directly,
693 * so we don't have to copy the data structure
694 */
695 base_info = (path_pattern[0] == '/') ?
696 lxc_cgroup_process_info_get_init(meta_data) :
697 lxc_cgroup_process_info_get_self(meta_data);
698 if (!base_info)
699 return NULL;
c8f7c563 700
33ad9f1a
CS
701 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
702 if (!new_cgroup_paths)
703 goto out_initial_error;
704
705 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
706 if (!new_cgroup_paths_sub)
707 goto out_initial_error;
708
709 /* find mount points we can use */
710 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
711 h = info_ptr->hierarchy;
712 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
713 if (!mp) {
714 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
715 goto out_initial_error;
716 }
717 info_ptr->designated_mount_point = mp;
460a1cf0 718
692ba18f
SH
719 if (lxc_string_in_array("ns", (const char **)h->subsystems))
720 continue;
1ea59ad2 721 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
722 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
723 goto out_initial_error;
724 }
725 }
b98f7d6e 726
33ad9f1a
CS
727 /* normalize the path */
728 cgroup_path_components = lxc_normalize_path(path_pattern);
729 if (!cgroup_path_components)
730 goto out_initial_error;
731
732 /* go through the path components to see if we can create them */
733 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
734 /* we only want to create the same component with -1, -2, etc.
735 * if the component contains the container name itself, otherwise
736 * it's not an error if it already exists
737 */
738 char *p_eff = *p ? *p : (char *)sub_pattern;
739 bool contains_name = strstr(p_eff, "%n");
740 char *current_component = NULL;
741 char *current_subpath = NULL;
742 char *current_entire_path = NULL;
743 char *parts[3];
744 size_t j = 0;
745 i = 0;
746
747 /* if we are processing the subpattern, we want to make sure
748 * loop is ended the next time around
749 */
750 if (!*p) {
751 had_sub_pattern = true;
752 p--;
753 }
b98f7d6e 754
33ad9f1a
CS
755 goto find_name_on_this_level;
756
757 cleanup_name_on_this_level:
758 /* This is reached if we found a name clash.
759 * In that case, remove the cgroup from all previous hierarchies
760 */
761 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
762 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1]);
763 if (r < 0)
764 WARN("could not clean up cgroup we created when trying to create container");
765 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
766 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
767 }
768 if (current_component != current_subpath)
769 free(current_subpath);
770 if (current_component != p_eff)
771 free(current_component);
772 current_component = current_subpath = NULL;
773 /* try again with another suffix */
774 ++suffix;
775
776 find_name_on_this_level:
777 /* determine name of the path component we should create */
778 if (contains_name && suffix > 0) {
779 char *buf = calloc(strlen(name) + 32, 1);
780 if (!buf)
781 goto out_initial_error;
782 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
783 current_component = lxc_string_replace("%n", buf, p_eff);
784 free(buf);
785 } else {
786 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
787 }
788 parts[0] = path_so_far;
789 parts[1] = current_component;
790 parts[2] = NULL;
791 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
792
793 /* Now go through each hierarchy and try to create the
794 * corresponding cgroup
795 */
796 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
797 char *parts2[3];
692ba18f
SH
798
799 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
800 continue;
33ad9f1a
CS
801 current_entire_path = NULL;
802
803 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
804 parts2[1] = current_subpath;
805 parts2[2] = NULL;
806 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
807
808 if (!*p) {
809 /* we are processing the subpath, so only update that one */
810 free(new_cgroup_paths_sub[i]);
811 new_cgroup_paths_sub[i] = strdup(current_entire_path);
812 if (!new_cgroup_paths_sub[i])
813 goto cleanup_from_error;
814 } else {
815 /* remember which path was used on this controller */
816 free(new_cgroup_paths[i]);
817 new_cgroup_paths[i] = strdup(current_entire_path);
818 if (!new_cgroup_paths[i])
819 goto cleanup_from_error;
820 }
fd4f5a56 821
33ad9f1a
CS
822 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
823 if (r < 0 && errno == EEXIST && contains_name) {
824 /* name clash => try new name with new suffix */
825 free(current_entire_path);
826 current_entire_path = NULL;
827 goto cleanup_name_on_this_level;
828 } else if (r < 0 && errno != EEXIST) {
829 SYSERROR("Could not create cgroup %s", current_entire_path);
830 goto cleanup_from_error;
831 } else if (r == 0) {
832 /* successfully created */
833 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
834 if (r < 0)
835 goto cleanup_from_error;
836 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
837 } else {
838 /* if we didn't create the cgroup, then we have to make sure that
839 * further cgroups will be created properly
840 */
1ea59ad2 841 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
33ad9f1a
CS
842 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
843 goto cleanup_from_error;
844 }
845
846 /* already existed but path component of pattern didn't contain '%n',
847 * so this is not an error; but then we don't need current_entire_path
848 * anymore...
849 */
850 free(current_entire_path);
851 current_entire_path = NULL;
852 }
853 }
fd4f5a56 854
33ad9f1a
CS
855 /* save path so far */
856 free(path_so_far);
857 path_so_far = strdup(current_subpath);
858 if (!path_so_far)
859 goto cleanup_from_error;
860
861 /* cleanup */
862 if (current_component != current_subpath)
863 free(current_subpath);
864 if (current_component != p_eff)
865 free(current_component);
866 current_component = current_subpath = NULL;
867 continue;
868
869 cleanup_from_error:
870 /* called if an error occured in the loop, so we
871 * do some additional cleanup here
872 */
873 saved_errno = errno;
874 if (current_component != current_subpath)
875 free(current_subpath);
876 if (current_component != p_eff)
877 free(current_component);
878 free(current_entire_path);
879 errno = saved_errno;
880 goto out_initial_error;
fd4f5a56
DL
881 }
882
33ad9f1a
CS
883 /* we're done, now update the paths */
884 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
47d8fb3b
CS
885 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
886 * will take care of it
887 * Since we do a continue in above loop, new_cgroup_paths[i] is
888 * unset anyway, as is new_cgroup_paths_sub[i]
692ba18f 889 */
47d8fb3b
CS
890 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
891 continue;
892 free(info_ptr->cgroup_path);
893 info_ptr->cgroup_path = new_cgroup_paths[i];
894 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
fd4f5a56 895 }
33ad9f1a
CS
896 /* don't use lxc_free_array since we used the array members
897 * to store them in our result...
898 */
899 free(new_cgroup_paths);
900 free(new_cgroup_paths_sub);
901 free(path_so_far);
902 lxc_free_array((void **)cgroup_path_components, free);
903 return base_info;
904
905out_initial_error:
906 saved_errno = errno;
907 free(path_so_far);
908 lxc_cgroup_process_info_free_and_remove(base_info);
909 lxc_free_array((void **)new_cgroup_paths, free);
910 lxc_free_array((void **)new_cgroup_paths_sub, free);
911 lxc_free_array((void **)cgroup_path_components, free);
912 errno = saved_errno;
913 return NULL;
c8f7c563
CS
914}
915
47d8fb3b
CS
916int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
917{
918 struct cgroup_process_info *info_ptr;
919 int r;
920
921 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
922 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
923 continue;
924 /*
925 * For any path which has ns cgroup mounted, handler->pid is already
926 * moved into a container called '%d % (handler->pid)'. Rename it to
927 * the cgroup name and record that.
928 */
929 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
930 info_ptr->cgroup_path, pid, name);
931 if (!tmp)
932 return -1;
933 free(info_ptr->cgroup_path);
934 info_ptr->cgroup_path = tmp;
935 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
936 if (r < 0)
937 return -1;
938 tmp = strdup(tmp);
939 if (!tmp)
940 return -1;
941 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
942 }
943 return 0;
944}
945
33ad9f1a
CS
946/* get the cgroup membership of a given container */
947struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
c8f7c563 948{
33ad9f1a
CS
949 struct cgroup_process_info *result = NULL;
950 int saved_errno = 0;
951 size_t i;
952 struct cgroup_process_info **cptr = &result;
953 struct cgroup_process_info *entry = NULL;
954 char *path = NULL;
955
956 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
957 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
958 if (!h || !h->used)
959 continue;
c8f7c563 960
33ad9f1a
CS
961 /* use the command interface to look for the cgroup */
962 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
963 if (!path)
964 goto out_error;
965
966 entry = calloc(1, sizeof(struct cgroup_process_info));
967 if (!entry)
968 goto out_error;
969 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
970 entry->hierarchy = h;
971 entry->cgroup_path = path;
972 path = NULL;
973
974 /* it is not an error if we don't find anything here,
975 * it is up to the caller to decide what to do in that
976 * case */
977 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
978
979 *cptr = entry;
980 cptr = &entry->next;
981 entry = NULL;
c8f7c563
CS
982 }
983
33ad9f1a
CS
984 return result;
985out_error:
986 saved_errno = errno;
987 free(path);
988 lxc_cgroup_process_info_free(result);
989 lxc_cgroup_process_info_free(entry);
990 errno = saved_errno;
991 return NULL;
fd4f5a56
DL
992}
993
33ad9f1a
CS
994/* move a processs to the cgroups specified by the membership */
995int lxc_cgroup_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
4f17323e 996{
33ad9f1a
CS
997 char pid_buf[32];
998 char *cgroup_tasks_fn;
999 int r;
1000 struct cgroup_process_info *info_ptr;
1001
1002 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1003 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1004 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1005 info_ptr->cgroup_path_sub :
1006 info_ptr->cgroup_path;
1007
1008 if (!info_ptr->designated_mount_point) {
1009 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1010 if (!info_ptr->designated_mount_point) {
1011 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1012 return -1;
1013 }
1014 }
4f17323e 1015
33ad9f1a
CS
1016 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1017 if (!cgroup_tasks_fn) {
1018 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1019 return -1;
1020 }
4f17323e 1021
33ad9f1a 1022 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
5903da82 1023 free(cgroup_tasks_fn);
33ad9f1a
CS
1024 if (r < 0) {
1025 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1026 return -1;
1027 }
4f17323e
CS
1028 }
1029
33ad9f1a 1030 return 0;
4f17323e
CS
1031}
1032
33ad9f1a
CS
1033/* free process membership information */
1034void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
fc7de561 1035{
33ad9f1a
CS
1036 struct cgroup_process_info *next;
1037 if (!info)
b98f7d6e 1038 return;
33ad9f1a
CS
1039 next = info->next;
1040 lxc_cgroup_put_meta(info->meta_ref);
1041 free(info->cgroup_path);
1042 free(info->cgroup_path_sub);
1043 lxc_free_array((void **)info->created_paths, free);
1044 free(info);
1045 lxc_cgroup_process_info_free(next);
fc7de561
SH
1046}
1047
33ad9f1a
CS
1048/* free process membership information and remove cgroups that were created */
1049void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
b98f7d6e 1050{
33ad9f1a
CS
1051 struct cgroup_process_info *next;
1052 char **pp;
1053 if (!info)
1054 return;
1055 next = info->next;
1056 for (pp = info->created_paths; pp && *pp; pp++);
1057 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
1058 struct cgroup_mount_point *mp = info->designated_mount_point;
1059 if (!mp)
1060 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1061 if (mp)
1062 /* ignore return value here, perhaps we created the
1063 * '/lxc' cgroup in this container but another container
1064 * is still running (for example)
1065 */
1066 (void)remove_cgroup(mp, *pp);
1067 free(*pp);
b98f7d6e 1068 }
33ad9f1a
CS
1069 free(info->created_paths);
1070 lxc_cgroup_put_meta(info->meta_ref);
1071 free(info->cgroup_path);
1072 free(info->cgroup_path_sub);
1073 free(info);
9431aa65 1074 lxc_cgroup_process_info_free_and_remove(next);
33ad9f1a 1075}
b98f7d6e 1076
33ad9f1a
CS
1077char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
1078{
1079 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1080 if (!info)
1081 return NULL;
1082 return info->cgroup_path;
b98f7d6e
SH
1083}
1084
33ad9f1a 1085char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
b98f7d6e 1086{
33ad9f1a 1087 return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
b98f7d6e
SH
1088}
1089
33ad9f1a 1090char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
b98f7d6e 1091{
33ad9f1a
CS
1092 struct cgroup_mount_point *mp = NULL;
1093 struct cgroup_process_info *info = find_info_for_subsystem(handler->cgroup, subsystem);
1094 if (!info)
1095 return NULL;
1096 if (info->designated_mount_point) {
8900b9eb 1097 mp = info->designated_mount_point;
33ad9f1a
CS
1098 } else {
1099 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1100 if (!mp)
1101 return NULL;
b98f7d6e 1102 }
33ad9f1a 1103 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
b98f7d6e 1104}
55c76589 1105
33ad9f1a 1106char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
9a93d992 1107{
33ad9f1a
CS
1108 struct cgroup_meta_data *meta;
1109 struct cgroup_process_info *base_info, *info;
1110 struct cgroup_mount_point *mp;
1111 char *result = NULL;
33ad9f1a
CS
1112
1113 meta = lxc_cgroup_load_meta();
1114 if (!meta)
9a93d992 1115 return NULL;
33ad9f1a
CS
1116 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1117 if (!base_info)
178938fe 1118 goto out1;
33ad9f1a
CS
1119 info = find_info_for_subsystem(base_info, subsystem);
1120 if (!info)
178938fe 1121 goto out2;
33ad9f1a 1122 if (info->designated_mount_point) {
8900b9eb 1123 mp = info->designated_mount_point;
33ad9f1a
CS
1124 } else {
1125 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1126 if (!mp)
178938fe 1127 goto out3;
33ad9f1a
CS
1128 }
1129 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
178938fe 1130out3:
178938fe 1131out2:
33ad9f1a 1132 lxc_cgroup_process_info_free(base_info);
178938fe 1133out1:
33ad9f1a 1134 lxc_cgroup_put_meta(meta);
33ad9f1a
CS
1135 return result;
1136}
9a93d992 1137
33ad9f1a
CS
1138int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
1139{
1140 char *subsystem = NULL, *p, *path;
1141 int ret = -1;
9a93d992 1142
33ad9f1a
CS
1143 subsystem = alloca(strlen(filename) + 1);
1144 strcpy(subsystem, filename);
1145 if ((p = index(subsystem, '.')) != NULL)
1146 *p = '\0';
9a93d992 1147
33ad9f1a
CS
1148 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1149 if (path) {
1150 ret = do_cgroup_set(path, filename, value);
1151 free(path);
9a93d992 1152 }
33ad9f1a
CS
1153 return ret;
1154}
9a93d992 1155
33ad9f1a
CS
1156int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
1157{
1158 char *subsystem = NULL, *p, *path;
1159 int ret = -1;
1160
1161 subsystem = alloca(strlen(filename) + 1);
1162 strcpy(subsystem, filename);
1163 if ((p = index(subsystem, '.')) != NULL)
1164 *p = '\0';
1165
1166 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1167 if (path) {
1168 ret = do_cgroup_get(path, filename, value, len);
1169 free(path);
1170 }
9a93d992
SH
1171 return ret;
1172}
1173
33ad9f1a 1174int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
9a93d992 1175{
33ad9f1a
CS
1176 char *subsystem = NULL, *p, *path;
1177 int ret = -1;
9a93d992 1178
33ad9f1a
CS
1179 subsystem = alloca(strlen(filename) + 1);
1180 strcpy(subsystem, filename);
1181 if ((p = index(subsystem, '.')) != NULL)
1182 *p = '\0';
9a93d992 1183
33ad9f1a
CS
1184 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1185 if (path) {
1186 ret = do_cgroup_set(path, filename, value);
1187 free(path);
1188 }
b98f7d6e 1189 return ret;
9a93d992
SH
1190}
1191
33ad9f1a 1192int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
9a93d992 1193{
33ad9f1a
CS
1194 char *subsystem = NULL, *p, *path;
1195 int ret = -1;
1196
1197 subsystem = alloca(strlen(filename) + 1);
1198 strcpy(subsystem, filename);
1199 if ((p = index(subsystem, '.')) != NULL)
1200 *p = '\0';
1201
1202 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1203 if (path) {
1204 ret = do_cgroup_get(path, filename, value, len);
1205 free(path);
9a93d992 1206 }
33ad9f1a 1207 return ret;
9a93d992
SH
1208}
1209
33ad9f1a
CS
1210/*
1211 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1212 * file for a running container.
1213 *
1214 * @filename : the file of interest (e.g. "freezer.state") or
1215 * the subsystem name (e.g. "freezer") in which case
1216 * the directory where the cgroup may be modified
1217 * will be returned
1218 * @name : name of container to connect to
1219 * @lxcpath : the lxcpath in which the container is running
8900b9eb 1220 *
33ad9f1a
CS
1221 * This is the exported function, which determines cgpath from the
1222 * lxc-start of the @name container running in @lxcpath.
1223 *
1224 * Returns path on success, NULL on error. The caller must free()
1225 * the returned path.
1226 */
1227char *lxc_cgroup_path_get(const char *filename, const char *name,
1228 const char *lxcpath)
9a93d992 1229{
33ad9f1a 1230 char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
9a93d992 1231
33ad9f1a
CS
1232 subsystem = alloca(strlen(filename) + 1);
1233 strcpy(subsystem, filename);
1234 if ((p = index(subsystem, '.')) != NULL) {
1235 *p = '\0';
1236 longer_file = alloca(strlen(filename) + 2);
1237 longer_file[0] = '/';
1238 strcpy(longer_file + 1, filename);
b98f7d6e
SH
1239 }
1240
33ad9f1a
CS
1241 group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
1242 if (!group)
1243 return NULL;
b98f7d6e 1244
86b3688b 1245 path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
33ad9f1a
CS
1246 free(group);
1247 return path;
9a93d992
SH
1248}
1249
33ad9f1a
CS
1250int lxc_setup_cgroup_without_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
1251{
1252 return do_setup_cgroup(h, cgroup_settings, false);
1253}
b98f7d6e 1254
33ad9f1a 1255int lxc_setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroup_settings)
460a1cf0 1256{
33ad9f1a
CS
1257 return do_setup_cgroup(h, cgroup_settings, true);
1258}
fd37327f 1259
7997d7da 1260int lxc_setup_mount_cgroup(const char *root, struct cgroup_process_info *base_info, int type)
aae1f3c4
CS
1261{
1262 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1263 char *path = NULL;
1264 char **parts = NULL;
1265 char *dirname = NULL;
1266 char *abs_path = NULL;
1267 char *abs_path2 = NULL;
1268 struct cgroup_process_info *info;
1269 int r, saved_errno = 0;
1270
7997d7da
CS
1271 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1272 ERROR("could not mount cgroups into container: invalid type specified internally");
1273 errno = EINVAL;
1274 return -1;
1275 }
1276
aae1f3c4
CS
1277 path = calloc(1, bufsz);
1278 if (!path)
1279 return -1;
1280 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1281 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1282 if (r < 0) {
1283 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1284 return -1;
1285 }
1286
1287 /* now mount all the hierarchies we care about */
1288 for (info = base_info; info; info = info->next) {
1289 size_t subsystem_count, i;
1290 struct cgroup_mount_point *mp = info->designated_mount_point;
1291 if (!mp)
1292 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1293 if (!mp) {
1294 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1295 goto out_error;
1296 }
1297
1298 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1299 parts = calloc(subsystem_count + 1, sizeof(char *));
1300 if (!parts)
1301 goto out_error;
1302
1303 for (i = 0; i < subsystem_count; i++) {
1304 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1305 parts[i] = info->hierarchy->subsystems[i] + 5;
1306 else
1307 parts[i] = info->hierarchy->subsystems[i];
1308 }
1309 dirname = lxc_string_join(",", (const char **)parts, false);
1310 if (!dirname)
1311 goto out_error;
1312
1313 /* create subsystem directory */
1314 abs_path = lxc_append_paths(path, dirname);
1315 if (!abs_path)
1316 goto out_error;
1317 r = mkdir_p(abs_path, 0755);
1318 if (r < 0 && errno != EEXIST) {
1319 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1320 goto out_error;
1321 }
1322
aae1f3c4
CS
1323 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1324 if (!abs_path2)
1325 goto out_error;
aae1f3c4 1326
7997d7da
CS
1327 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1328 /* bind-mount the cgroup entire filesystem there */
1329 if (strcmp(mp->mount_prefix, "/") != 0) {
1330 /* FIXME: maybe we should just try to remount the entire hierarchy
1331 * with a regular mount command? may that works? */
1332 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1333 goto out_error;
1334 }
1335 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1336 if (r < 0) {
1337 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1338 goto out_error;
1339 }
1340 /* main cgroup path should be read-only */
1341 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1342 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1343 if (r < 0) {
1344 SYSERROR("error re-mounting %s readonly", abs_path);
1345 goto out_error;
1346 }
1347 }
1348 /* own cgroup should be read-write */
1349 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1350 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1351 if (r < 0) {
1352 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1353 goto out_error;
1354 }
1355 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1356 if (r < 0) {
1357 SYSERROR("error re-mounting %s readwrite", abs_path2);
1358 goto out_error;
1359 }
1360 }
1361 } else {
1362 /* create path for container's cgroup */
1363 r = mkdir_p(abs_path2, 0755);
1364 if (r < 0 && errno != EEXIST) {
1365 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1366 goto out_error;
1367 }
aae1f3c4 1368
7997d7da
CS
1369 free(abs_path);
1370 abs_path = NULL;
1371
1372 /* bind-mount container's cgroup to that directory */
1373 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1374 if (!abs_path)
1375 goto out_error;
1376 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1377 if (r < 0) {
1378 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1379 goto out_error;
1380 }
1381 if (type == LXC_AUTO_CGROUP_RO) {
1382 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1383 if (r < 0) {
1384 SYSERROR("error re-mounting %s readonly", abs_path2);
1385 goto out_error;
1386 }
1387 }
aae1f3c4
CS
1388 }
1389
1390 free(abs_path);
1391 free(abs_path2);
1392 abs_path = NULL;
1393 abs_path2 = NULL;
1394
1395 /* add symlinks for every single subsystem */
1396 if (subsystem_count > 1) {
1397 for (i = 0; i < subsystem_count; i++) {
1398 abs_path = lxc_append_paths(path, parts[i]);
1399 if (!abs_path)
1400 goto out_error;
1401 r = symlink(dirname, abs_path);
1402 if (r < 0)
1403 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1404 free(abs_path);
1405 abs_path = NULL;
1406 }
1407 }
1408 free(dirname);
1409 free(parts);
1410 dirname = NULL;
1411 parts = NULL;
1412 }
1413
1414 /* try to remount the tmpfs readonly, since the container shouldn't
1415 * change anything (this will also make sure that trying to create
1416 * new cgroups outside the allowed area fails with an error instead
1417 * of simply causing this to create directories in the tmpfs itself)
1418 */
7997d7da
CS
1419 if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
1420 mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
aae1f3c4
CS
1421
1422 free(path);
1423
1424 return 0;
1425
1426out_error:
1427 saved_errno = errno;
1428 free(path);
1429 free(dirname);
1430 free(parts);
1431 free(abs_path);
1432 free(abs_path2);
1433 errno = saved_errno;
1434 return -1;
1435}
1436
33ad9f1a
CS
1437int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
1438{
1439 struct cgroup_process_info *info = handler->cgroup;
1440 struct cgroup_mount_point *mp = NULL;
1441 char *abs_path = NULL;
1442 int ret;
460a1cf0 1443
33ad9f1a
CS
1444 if (!info) {
1445 errno = ENOENT;
1446 return -1;
b98f7d6e 1447 }
c8f7c563 1448
33ad9f1a 1449 if (info->designated_mount_point) {
8900b9eb 1450 mp = info->designated_mount_point;
33ad9f1a
CS
1451 } else {
1452 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1453 if (!mp)
1454 return -1;
c8f7c563
CS
1455 }
1456
33ad9f1a
CS
1457 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1458 if (!abs_path)
1459 return -1;
1460
1461 ret = cgroup_recursive_task_count(abs_path);
1462 free(abs_path);
1463 return ret;
c8f7c563
CS
1464}
1465
33ad9f1a 1466struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta)
d08ba6ec 1467{
33ad9f1a
CS
1468 struct cgroup_process_info *result = NULL;
1469 FILE *proc_pid_cgroup = NULL;
1470 char *line = NULL;
1471 size_t sz = 0;
1472 int saved_errno = 0;
1473 struct cgroup_process_info **cptr = &result;
1474 struct cgroup_process_info *entry = NULL;
1475
025ed0f3 1476 process_lock();
33ad9f1a 1477 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
025ed0f3 1478 process_unlock();
33ad9f1a 1479 if (!proc_pid_cgroup)
b98f7d6e 1480 return NULL;
1ac470c0 1481
33ad9f1a
CS
1482 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1483 /* file format: hierarchy:subsystems:group */
1484 char *colon1;
1485 char *colon2;
1486 char *endptr;
1487 int hierarchy_number;
1488 struct cgroup_hierarchy *h = NULL;
fd4f5a56 1489
33ad9f1a 1490 if (!line[0])
ae5c8b8e 1491 continue;
b98f7d6e 1492
33ad9f1a
CS
1493 if (line[strlen(line) - 1] == '\n')
1494 line[strlen(line) - 1] = '\0';
1495
1496 colon1 = strchr(line, ':');
1497 if (!colon1)
8900b9eb 1498 continue;
33ad9f1a
CS
1499 *colon1++ = '\0';
1500 colon2 = strchr(colon1, ':');
1501 if (!colon2)
ae5c8b8e 1502 continue;
33ad9f1a 1503 *colon2++ = '\0';
e4659536 1504
33ad9f1a
CS
1505 endptr = NULL;
1506 hierarchy_number = strtoul(line, &endptr, 10);
1507 if (!endptr || *endptr)
9a93d992 1508 continue;
9a93d992 1509
33ad9f1a
CS
1510 if (hierarchy_number > meta->maximum_hierarchy) {
1511 /* we encountered a hierarchy we didn't have before,
1512 * so probably somebody remounted some stuff in the
1513 * mean time...
1514 */
1515 errno = EAGAIN;
1516 goto out_error;
b98f7d6e 1517 }
33ad9f1a
CS
1518
1519 h = meta->hierarchies[hierarchy_number];
1520 if (!h) {
1521 /* we encountered a hierarchy that was thought to be
1522 * dead before, so probably somebody remounted some
1523 * stuff in the mean time...
1524 */
1525 errno = EAGAIN;
1526 goto out_error;
b98f7d6e 1527 }
33ad9f1a
CS
1528
1529 /* we are told that we should ignore this hierarchy */
1530 if (!h->used)
b98f7d6e 1531 continue;
5193cc3d 1532
33ad9f1a
CS
1533 entry = calloc(1, sizeof(struct cgroup_process_info));
1534 if (!entry)
1535 goto out_error;
fd4f5a56 1536
33ad9f1a
CS
1537 entry->meta_ref = lxc_cgroup_get_meta(meta);
1538 entry->hierarchy = h;
1539 entry->cgroup_path = strdup(colon2);
1540 if (!entry->cgroup_path)
1541 goto out_error;
d08ba6ec 1542
33ad9f1a
CS
1543 *cptr = entry;
1544 cptr = &entry->next;
1545 entry = NULL;
b98f7d6e 1546 }
b98f7d6e 1547
025ed0f3 1548 process_lock();
33ad9f1a 1549 fclose(proc_pid_cgroup);
025ed0f3 1550 process_unlock();
33ad9f1a
CS
1551 free(line);
1552 return result;
1553
1554out_error:
1555 saved_errno = errno;
025ed0f3 1556 process_lock();
33ad9f1a
CS
1557 if (proc_pid_cgroup)
1558 fclose(proc_pid_cgroup);
025ed0f3 1559 process_unlock();
33ad9f1a
CS
1560 lxc_cgroup_process_info_free(result);
1561 lxc_cgroup_process_info_free(entry);
1562 free(line);
1563 errno = saved_errno;
ae5c8b8e 1564 return NULL;
36b86299
DL
1565}
1566
33ad9f1a 1567char **subsystems_from_mount_options(const char *mount_options, char **kernel_list)
36b86299 1568{
33ad9f1a
CS
1569 char *token, *str, *saveptr = NULL;
1570 char **result = NULL;
1571 size_t result_capacity = 0;
8900b9eb 1572 size_t result_count = 0;
33ad9f1a
CS
1573 int saved_errno;
1574 int r;
ef342abb 1575
33ad9f1a
CS
1576 str = alloca(strlen(mount_options)+1);
1577 strcpy(str, mount_options);
1578 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1579 /* we have a subsystem if it's either in the list of
1580 * subsystems provided by the kernel OR if it starts
1581 * with name= for named hierarchies
1582 */
1583 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1584 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1585 if (r < 0)
1586 goto out_free;
1587 result[result_count + 1] = NULL;
1588 result[result_count] = strdup(token);
1589 if (!result[result_count])
1590 goto out_free;
1591 result_count++;
1592 }
ae5c8b8e 1593 }
f0e64b8b 1594
33ad9f1a
CS
1595 return result;
1596
1597out_free:
1598 saved_errno = errno;
1599 lxc_free_array((void**)result, free);
1600 errno = saved_errno;
1601 return NULL;
b98f7d6e
SH
1602}
1603
33ad9f1a 1604void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
b98f7d6e 1605{
33ad9f1a
CS
1606 if (!mp)
1607 return;
1608 free(mp->mount_point);
1609 free(mp->mount_prefix);
1610 free(mp);
bcbd102c
SH
1611}
1612
33ad9f1a 1613void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
341a9bd8 1614{
33ad9f1a
CS
1615 if (!h)
1616 return;
1617 lxc_free_array((void **)h->subsystems, free);
8bfcb981 1618 free(h->all_mount_points);
33ad9f1a
CS
1619 free(h);
1620}
341a9bd8 1621
33ad9f1a
CS
1622bool is_valid_cgroup(const char *name)
1623{
1624 const char *p;
1625 for (p = name; *p; p++) {
1626 if (*p < 32 || *p == 127 || *p == '/')
1627 return false;
341a9bd8 1628 }
33ad9f1a
CS
1629 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1630}
341a9bd8 1631
33ad9f1a
CS
1632int create_or_remove_cgroup(bool do_remove, struct cgroup_mount_point *mp, const char *path)
1633{
1634 int r, saved_errno = 0;
1635 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1636 if (!buf)
1637 return -1;
341a9bd8 1638
33ad9f1a
CS
1639 /* create or remove directory */
1640 r = do_remove ?
1641 rmdir(buf) :
1642 mkdir(buf, 0777);
1643 saved_errno = errno;
1644 free(buf);
1645 errno = saved_errno;
1646 return r;
341a9bd8 1647}
bcbd102c 1648
33ad9f1a 1649int create_cgroup(struct cgroup_mount_point *mp, const char *path)
a6ddef61 1650{
33ad9f1a 1651 return create_or_remove_cgroup(false, mp, path);
a6ddef61
MN
1652}
1653
33ad9f1a 1654int remove_cgroup(struct cgroup_mount_point *mp, const char *path)
576f946d 1655{
33ad9f1a
CS
1656 return create_or_remove_cgroup(true, mp, path);
1657}
576f946d 1658
33ad9f1a
CS
1659char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix)
1660{
1661 /* first we have to make sure we subtract the mount point's prefix */
1662 char *prefix = mp->mount_prefix;
1663 char *buf;
1664 ssize_t len, rv;
1665
1666 /* we want to make sure only absolute paths to cgroups are passed to us */
1667 if (path[0] != '/') {
1668 errno = EINVAL;
1669 return NULL;
1670 }
b98f7d6e 1671
33ad9f1a
CS
1672 if (prefix && !strcmp(prefix, "/"))
1673 prefix = NULL;
b98f7d6e 1674
33ad9f1a
CS
1675 /* prefix doesn't match */
1676 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1677 errno = EINVAL;
1678 return NULL;
1679 }
1680 /* if prefix is /foo and path is /foobar */
1681 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1682 errno = EINVAL;
1683 return NULL;
1684 }
b98f7d6e 1685
33ad9f1a
CS
1686 /* remove prefix from path */
1687 path += prefix ? strlen(prefix) : 0;
b98f7d6e 1688
33ad9f1a
CS
1689 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1690 buf = calloc(len + 1, 1);
50266dc6
DE
1691 if (!buf)
1692 return NULL;
33ad9f1a 1693 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
8900b9eb 1694 if (rv > len) {
33ad9f1a
CS
1695 free(buf);
1696 errno = ENOMEM;
8900b9eb 1697 return NULL;
8b92dc3a 1698 }
576f946d 1699
33ad9f1a 1700 return buf;
e0f888d9 1701}
283678ed 1702
33ad9f1a 1703struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
283678ed 1704{
33ad9f1a
CS
1705 struct cgroup_process_info *info_ptr;
1706 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1707 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1708 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1709 return info_ptr;
b98f7d6e 1710 }
33ad9f1a
CS
1711 errno = ENOENT;
1712 return NULL;
1713}
283678ed 1714
33ad9f1a
CS
1715int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len)
1716{
1717 const char *parts[3] = {
1718 cgroup_path,
1719 sub_filename,
1720 NULL
1721 };
1722 char *filename;
1723 int ret, saved_errno;
1724
1725 filename = lxc_string_join("/", parts, false);
1726 if (!filename)
1727 return -1;
1728
1729 ret = lxc_read_from_file(filename, value, len);
1730 saved_errno = errno;
1731 free(filename);
1732 errno = saved_errno;
1733 return ret;
283678ed 1734}
b113383b 1735
33ad9f1a 1736int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value)
b113383b 1737{
33ad9f1a
CS
1738 const char *parts[3] = {
1739 cgroup_path,
1740 sub_filename,
1741 NULL
1742 };
1743 char *filename;
1744 int ret, saved_errno;
b113383b 1745
33ad9f1a
CS
1746 filename = lxc_string_join("/", parts, false);
1747 if (!filename)
1748 return -1;
b113383b 1749
33ad9f1a
CS
1750 ret = lxc_write_to_file(filename, value, strlen(value), false);
1751 saved_errno = errno;
1752 free(filename);
1753 errno = saved_errno;
1754 return ret;
b98f7d6e
SH
1755}
1756
33ad9f1a 1757int do_setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices)
b98f7d6e
SH
1758{
1759 struct lxc_list *iterator;
1760 struct lxc_cgroup *cg;
1761 int ret = -1;
1762
33ad9f1a 1763 if (lxc_list_empty(cgroup_settings))
b98f7d6e
SH
1764 return 0;
1765
33ad9f1a 1766 lxc_list_for_each(iterator, cgroup_settings) {
b98f7d6e
SH
1767 cg = iterator->elem;
1768
33ad9f1a 1769 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
b98f7d6e 1770 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
33ad9f1a 1771 cgroup_devices_has_allow_or_deny(h, cg->value, false))
b98f7d6e
SH
1772 continue;
1773 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
33ad9f1a 1774 cgroup_devices_has_allow_or_deny(h, cg->value, true))
b98f7d6e 1775 continue;
33ad9f1a 1776 if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
b98f7d6e
SH
1777 ERROR("Error setting %s to %s for %s\n",
1778 cg->subsystem, cg->value, h->name);
1779 goto out;
1780 }
b113383b 1781 }
b98f7d6e
SH
1782
1783 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1784 }
1785
b98f7d6e
SH
1786 ret = 0;
1787 INFO("cgroup has been setup");
1788out:
b113383b
SH
1789 return ret;
1790}
b98f7d6e 1791
33ad9f1a
CS
1792bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow)
1793{
1794 char *path;
1795 FILE *devices_list;
8900b9eb 1796 char *line = NULL;
33ad9f1a
CS
1797 size_t sz = 0;
1798 bool ret = !for_allow;
1799 const char *parts[3] = {
1800 NULL,
1801 "devices.list",
1802 NULL
1803 };
1804
1805 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1806 // not sure they ever do, but they *could*
1807 // right now, I'm assuming they do NOT
1808 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1809 return false;
1810
1811 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
1812 if (!parts[0])
1813 return false;
1814 path = lxc_string_join("/", parts, false);
1815 if (!path) {
1816 free((void *)parts[0]);
1817 return false;
1818 }
1819
025ed0f3 1820 process_lock();
33ad9f1a 1821 devices_list = fopen_cloexec(path, "r");
025ed0f3 1822 process_unlock();
33ad9f1a
CS
1823 if (!devices_list) {
1824 free(path);
1825 return false;
1826 }
1827
1828 while (getline(&line, &sz, devices_list) != -1) {
1829 size_t len = strlen(line);
1830 if (len > 0 && line[len-1] == '\n')
1831 line[len-1] = '\0';
1832 if (strcmp(line, "a *:* rwm") == 0) {
1833 ret = for_allow;
1834 goto out;
1835 } else if (for_allow && strcmp(line, v) == 0) {
1836 ret = true;
8900b9eb 1837 goto out;
33ad9f1a
CS
1838 }
1839 }
1840
1841out:
025ed0f3 1842 process_lock();
33ad9f1a 1843 fclose(devices_list);
025ed0f3 1844 process_unlock();
33ad9f1a
CS
1845 free(line);
1846 free(path);
1847 return ret;
1848}
1849
1850int cgroup_recursive_task_count(const char *cgroup_path)
b98f7d6e 1851{
33ad9f1a
CS
1852 DIR *d;
1853 struct dirent *dent_buf;
1854 struct dirent *dent;
8900b9eb 1855 ssize_t name_max;
33ad9f1a
CS
1856 int n = 0, r;
1857
1858 /* see man readdir_r(3) */
1859 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1860 if (name_max <= 0)
1861 name_max = 255;
1862 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1863 if (!dent_buf)
1864 return -1;
1865
025ed0f3 1866 process_lock();
33ad9f1a 1867 d = opendir(cgroup_path);
025ed0f3 1868 process_unlock();
034ef75d
SH
1869 if (!d) {
1870 free(dent_buf);
33ad9f1a 1871 return 0;
034ef75d 1872 }
33ad9f1a
CS
1873
1874 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
1875 const char *parts[3] = {
1876 cgroup_path,
1877 dent->d_name,
1878 NULL
1879 };
1880 char *sub_path;
1881 struct stat st;
1882
1883 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
1884 continue;
1885 sub_path = lxc_string_join("/", parts, false);
1886 if (!sub_path) {
025ed0f3 1887 process_lock();
33ad9f1a 1888 closedir(d);
025ed0f3 1889 process_unlock();
33ad9f1a
CS
1890 free(dent_buf);
1891 return -1;
1892 }
1893 r = stat(sub_path, &st);
1894 if (r < 0) {
025ed0f3 1895 process_lock();
33ad9f1a 1896 closedir(d);
025ed0f3 1897 process_unlock();
33ad9f1a
CS
1898 free(dent_buf);
1899 free(sub_path);
1900 return -1;
1901 }
1902 if (S_ISDIR(st.st_mode)) {
1903 r = cgroup_recursive_task_count(sub_path);
1904 if (r >= 0)
1905 n += r;
1906 } else if (!strcmp(dent->d_name, "tasks")) {
1907 r = count_lines(sub_path);
1908 if (r >= 0)
1909 n += r;
1910 }
1911 free(sub_path);
1912 }
025ed0f3 1913 process_lock();
33ad9f1a 1914 closedir(d);
025ed0f3 1915 process_unlock();
33ad9f1a
CS
1916 free(dent_buf);
1917
1918 return n;
1919}
1920
8900b9eb 1921int count_lines(const char *fn)
33ad9f1a
CS
1922{
1923 FILE *f;
1924 char *line = NULL;
1925 size_t sz = 0;
1926 int n = 0;
1927
025ed0f3 1928 process_lock();
33ad9f1a 1929 f = fopen_cloexec(fn, "r");
025ed0f3 1930 process_unlock();
33ad9f1a
CS
1931 if (!f)
1932 return -1;
1933
1934 while (getline(&line, &sz, f) != -1) {
1935 n++;
1936 }
1937 free(line);
025ed0f3 1938 process_lock();
33ad9f1a 1939 fclose(f);
025ed0f3 1940 process_unlock();
33ad9f1a 1941 return n;
b98f7d6e
SH
1942}
1943
1ea59ad2 1944int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path)
b98f7d6e 1945{
33ad9f1a 1946 int r, saved_errno = 0;
1ea59ad2
SH
1947
1948 /* If this is the memory cgroup, we want to enforce hierarchy.
1949 * But don't fail if for some reason we can't.
1950 */
1951 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
1952 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
1953 if (cc_path) {
1954 r = lxc_write_to_file(cc_path, "1", 1, false);
1955 if (r < 0)
1956 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
1957 free(cc_path);
1958 }
1959 }
1960
33ad9f1a
CS
1961 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
1962 * the base cgroup, otherwise containers will start with an empty cpuset.mems
1963 * and cpuset.cpus and then
1964 */
1965 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
1966 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
1967 if (!cc_path)
1968 return -1;
1969 r = lxc_write_to_file(cc_path, "1", 1, false);
1970 saved_errno = errno;
1971 free(cc_path);
1972 errno = saved_errno;
1973 return r < 0 ? -1 : 0;
1974 }
1975 return 0;
b98f7d6e 1976}