]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgroup.c
cgmanager: chown cgroups to the container root
[mirror_lxc.git] / src / lxc / cgroup.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #define _GNU_SOURCE
24 #include <stdio.h>
25 #undef _GNU_SOURCE
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <unistd.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <fcntl.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <sys/param.h>
36 #include <sys/inotify.h>
37 #include <sys/mount.h>
38 #include <netinet/in.h>
39 #include <net/if.h>
40
41 #include "error.h"
42 #include "config.h"
43 #include "commands.h"
44 #include "list.h"
45 #include "conf.h"
46 #include "utils.h"
47 #include "bdev.h"
48 #include "log.h"
49 #include "cgroup.h"
50 #include "start.h"
51 #include "state.h"
52
53 #if IS_BIONIC
54 #include <../include/lxcmntent.h>
55 #else
56 #include <mntent.h>
57 #endif
58
59 lxc_log_define(lxc_cgroup, lxc);
60
61 static struct cgroup_process_info *lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str, struct cgroup_meta_data *meta);
62 static char **subsystems_from_mount_options(const char *mount_options, char **kernel_list);
63 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp);
64 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h);
65 static bool is_valid_cgroup(const char *name);
66 static int create_cgroup(struct cgroup_mount_point *mp, const char *path);
67 static int remove_cgroup(struct cgroup_mount_point *mp, const char *path, bool recurse);
68 static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp, const char *path, const char *suffix);
69 static struct cgroup_process_info *find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem);
70 static int do_cgroup_get(const char *cgroup_path, const char *sub_filename, char *value, size_t len);
71 static int do_cgroup_set(const char *cgroup_path, const char *sub_filename, const char *value);
72 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h, char *v, bool for_allow);
73 static int do_setup_cgroup_limits(struct lxc_handler *h, struct lxc_list *cgroup_settings, bool do_devices);
74 static int cgroup_recursive_task_count(const char *cgroup_path);
75 static int count_lines(const char *fn);
76 static int handle_cgroup_settings(struct cgroup_mount_point *mp, char *cgroup_path);
77
78 static struct cgroup_ops cgfs_ops;
79 struct cgroup_ops *active_cg_ops = &cgfs_ops;
80 static void init_cg_ops(void);
81
82 #ifdef HAVE_CGMANAGER
83 /* this needs to be mutexed for api use */
84 extern bool cgmanager_initialized;
85 extern bool use_cgmanager;
86 extern bool lxc_init_cgmanager(void);
87 #else
88 static bool cgmanager_initialized = false;
89 static bool use_cgmanager = false;
90 static bool lxc_init_cgmanager(void) { return false; }
91 #endif
92
93 static int cgroup_rmdir(char *dirname)
94 {
95 struct dirent dirent, *direntp;
96 int saved_errno = 0;
97 DIR *dir;
98 int ret, failed=0;
99 char pathname[MAXPATHLEN];
100
101 dir = opendir(dirname);
102 if (!dir) {
103 ERROR("%s: failed to open %s", __func__, dirname);
104 return -1;
105 }
106
107 while (!readdir_r(dir, &dirent, &direntp)) {
108 struct stat mystat;
109 int rc;
110
111 if (!direntp)
112 break;
113
114 if (!strcmp(direntp->d_name, ".") ||
115 !strcmp(direntp->d_name, ".."))
116 continue;
117
118 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
119 if (rc < 0 || rc >= MAXPATHLEN) {
120 ERROR("pathname too long");
121 failed=1;
122 if (!saved_errno)
123 saved_errno = -ENOMEM;
124 continue;
125 }
126 ret = lstat(pathname, &mystat);
127 if (ret) {
128 SYSERROR("%s: failed to stat %s", __func__, pathname);
129 failed=1;
130 if (!saved_errno)
131 saved_errno = errno;
132 continue;
133 }
134 if (S_ISDIR(mystat.st_mode)) {
135 if (cgroup_rmdir(pathname) < 0) {
136 if (!saved_errno)
137 saved_errno = errno;
138 failed=1;
139 }
140 }
141 }
142
143 if (rmdir(dirname) < 0) {
144 SYSERROR("%s: failed to delete %s", __func__, dirname);
145 if (!saved_errno)
146 saved_errno = errno;
147 failed=1;
148 }
149
150 ret = closedir(dir);
151 if (ret) {
152 SYSERROR("%s: failed to close directory %s", __func__, dirname);
153 if (!saved_errno)
154 saved_errno = errno;
155 failed=1;
156 }
157
158 errno = saved_errno;
159 return failed ? -1 : 0;
160 }
161
162 struct cgroup_meta_data *lxc_cgroup_load_meta()
163 {
164 const char *cgroup_use = NULL;
165 char **cgroup_use_list = NULL;
166 struct cgroup_meta_data *md = NULL;
167 int saved_errno;
168
169 errno = 0;
170 cgroup_use = lxc_global_config_value("lxc.cgroup.use");
171 if (!cgroup_use && errno != 0)
172 return NULL;
173 if (cgroup_use) {
174 cgroup_use_list = lxc_string_split_and_trim(cgroup_use, ',');
175 if (!cgroup_use_list)
176 return NULL;
177 }
178
179 md = lxc_cgroup_load_meta2((const char **)cgroup_use_list);
180 saved_errno = errno;
181 lxc_free_array((void **)cgroup_use_list, free);
182 errno = saved_errno;
183 return md;
184 }
185
186 /* Step 1: determine all kernel subsystems */
187 bool find_cgroup_subsystems(char ***kernel_subsystems)
188 {
189 FILE *proc_cgroups;
190 bool bret = false;
191 char *line = NULL;
192 size_t sz = 0;
193 size_t kernel_subsystems_count = 0;
194 size_t kernel_subsystems_capacity = 0;
195 int r;
196
197 proc_cgroups = fopen_cloexec("/proc/cgroups", "r");
198 if (!proc_cgroups)
199 return false;
200
201 while (getline(&line, &sz, proc_cgroups) != -1) {
202 char *tab1;
203 char *tab2;
204 int hierarchy_number;
205
206 if (line[0] == '#')
207 continue;
208 if (!line[0])
209 continue;
210
211 tab1 = strchr(line, '\t');
212 if (!tab1)
213 continue;
214 *tab1++ = '\0';
215 tab2 = strchr(tab1, '\t');
216 if (!tab2)
217 continue;
218 *tab2 = '\0';
219
220 tab2 = NULL;
221 hierarchy_number = strtoul(tab1, &tab2, 10);
222 if (!tab2 || *tab2)
223 continue;
224 (void)hierarchy_number;
225
226 r = lxc_grow_array((void ***)kernel_subsystems, &kernel_subsystems_capacity, kernel_subsystems_count + 1, 12);
227 if (r < 0)
228 goto out;
229 (*kernel_subsystems)[kernel_subsystems_count] = strdup(line);
230 if (!(*kernel_subsystems)[kernel_subsystems_count])
231 goto out;
232 kernel_subsystems_count++;
233 }
234 bret = true;
235
236 out:
237 fclose(proc_cgroups);
238 free(line);
239 return bret;
240 }
241
242 /* Step 2: determine all hierarchies (by reading /proc/self/cgroup),
243 * since mount points don't specify hierarchy number and
244 * /proc/cgroups does not contain named hierarchies
245 */
246 static bool find_cgroup_hierarchies(struct cgroup_meta_data *meta_data,
247 bool all_kernel_subsystems, bool all_named_subsystems,
248 const char **subsystem_whitelist)
249 {
250 FILE *proc_self_cgroup;
251 char *line = NULL;
252 size_t sz = 0;
253 int r;
254 bool bret = false;
255 size_t hierarchy_capacity = 0;
256
257 proc_self_cgroup = fopen_cloexec("/proc/self/cgroup", "r");
258 /* if for some reason (because of setns() and pid namespace for example),
259 * /proc/self is not valid, we try /proc/1/cgroup... */
260 if (!proc_self_cgroup)
261 proc_self_cgroup = fopen_cloexec("/proc/1/cgroup", "r");
262 if (!proc_self_cgroup)
263 return false;
264
265 while (getline(&line, &sz, proc_self_cgroup) != -1) {
266 /* file format: hierarchy:subsystems:group,
267 * we only extract hierarchy and subsystems
268 * here */
269 char *colon1;
270 char *colon2;
271 int hierarchy_number;
272 struct cgroup_hierarchy *h = NULL;
273 char **p;
274
275 if (!line[0])
276 continue;
277
278 colon1 = strchr(line, ':');
279 if (!colon1)
280 continue;
281 *colon1++ = '\0';
282 colon2 = strchr(colon1, ':');
283 if (!colon2)
284 continue;
285 *colon2 = '\0';
286
287 colon2 = NULL;
288 hierarchy_number = strtoul(line, &colon2, 10);
289 if (!colon2 || *colon2)
290 continue;
291
292 if (hierarchy_number > meta_data->maximum_hierarchy) {
293 /* lxc_grow_array will never shrink, so even if we find a lower
294 * hierarchy number here, the array will never be smaller
295 */
296 r = lxc_grow_array((void ***)&meta_data->hierarchies, &hierarchy_capacity, hierarchy_number + 1, 12);
297 if (r < 0)
298 goto out;
299
300 meta_data->maximum_hierarchy = hierarchy_number;
301 }
302
303 /* this shouldn't happen, we had this already */
304 if (meta_data->hierarchies[hierarchy_number])
305 goto out;
306
307 h = calloc(1, sizeof(struct cgroup_hierarchy));
308 if (!h)
309 goto out;
310
311 meta_data->hierarchies[hierarchy_number] = h;
312
313 h->index = hierarchy_number;
314 h->subsystems = lxc_string_split_and_trim(colon1, ',');
315 if (!h->subsystems)
316 goto out;
317 /* see if this hierarchy should be considered */
318 if (!all_kernel_subsystems || !all_named_subsystems) {
319 for (p = h->subsystems; *p; p++) {
320 if (!strncmp(*p, "name=", 5)) {
321 if (all_named_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
322 h->used = true;
323 break;
324 }
325 } else {
326 if (all_kernel_subsystems || (subsystem_whitelist && lxc_string_in_array(*p, subsystem_whitelist))) {
327 h->used = true;
328 break;
329 }
330 }
331 }
332 } else {
333 /* we want all hierarchy anyway */
334 h->used = true;
335 }
336 }
337 bret = true;
338
339 out:
340 fclose(proc_self_cgroup);
341 free(line);
342 return bret;
343 }
344
345 /* Step 3: determine all mount points of each hierarchy */
346 static bool find_hierarchy_mountpts( struct cgroup_meta_data *meta_data, char **kernel_subsystems)
347 {
348 bool bret = false;
349 FILE *proc_self_mountinfo;
350 char *line = NULL;
351 size_t sz = 0;
352 char **tokens = NULL;
353 size_t mount_point_count = 0;
354 size_t mount_point_capacity = 0;
355 size_t token_capacity = 0;
356 int r;
357
358 proc_self_mountinfo = fopen_cloexec("/proc/self/mountinfo", "r");
359 /* if for some reason (because of setns() and pid namespace for example),
360 * /proc/self is not valid, we try /proc/1/cgroup... */
361 if (!proc_self_mountinfo)
362 proc_self_mountinfo = fopen_cloexec("/proc/1/mountinfo", "r");
363 if (!proc_self_mountinfo)
364 return false;
365
366 while (getline(&line, &sz, proc_self_mountinfo) != -1) {
367 char *token, *line_tok, *saveptr = NULL;
368 size_t i, j, k;
369 struct cgroup_mount_point *mount_point;
370 struct cgroup_hierarchy *h;
371 char **subsystems;
372
373 if (line[0] && line[strlen(line) - 1] == '\n')
374 line[strlen(line) - 1] = '\0';
375
376 for (i = 0, line_tok = line; (token = strtok_r(line_tok, " ", &saveptr)); line_tok = NULL) {
377 r = lxc_grow_array((void ***)&tokens, &token_capacity, i + 1, 64);
378 if (r < 0)
379 goto out;
380 tokens[i++] = token;
381 }
382
383 /* layout of /proc/self/mountinfo:
384 * 0: id
385 * 1: parent id
386 * 2: device major:minor
387 * 3: mount prefix
388 * 4: mount point
389 * 5: per-mount options
390 * [optional X]: additional data
391 * X+7: "-"
392 * X+8: type
393 * X+9: source
394 * X+10: per-superblock options
395 */
396 for (j = 6; j < i && tokens[j]; j++)
397 if (!strcmp(tokens[j], "-"))
398 break;
399
400 /* could not find separator */
401 if (j >= i || !tokens[j])
402 continue;
403 /* there should be exactly three fields after
404 * the separator
405 */
406 if (i != j + 4)
407 continue;
408
409 /* not a cgroup filesystem */
410 if (strcmp(tokens[j + 1], "cgroup") != 0)
411 continue;
412
413 subsystems = subsystems_from_mount_options(tokens[j + 3], kernel_subsystems);
414 if (!subsystems)
415 goto out;
416
417 h = NULL;
418 for (k = 1; k <= meta_data->maximum_hierarchy; k++) {
419 if (meta_data->hierarchies[k] &&
420 meta_data->hierarchies[k]->subsystems[0] &&
421 lxc_string_in_array(meta_data->hierarchies[k]->subsystems[0], (const char **)subsystems)) {
422 /* TODO: we could also check if the lists really match completely,
423 * just to have an additional sanity check */
424 h = meta_data->hierarchies[k];
425 break;
426 }
427 }
428 lxc_free_array((void **)subsystems, free);
429
430 r = lxc_grow_array((void ***)&meta_data->mount_points, &mount_point_capacity, mount_point_count + 1, 12);
431 if (r < 0)
432 goto out;
433
434 /* create mount point object */
435 mount_point = calloc(1, sizeof(*mount_point));
436 if (!mount_point)
437 goto out;
438
439 meta_data->mount_points[mount_point_count++] = mount_point;
440
441 mount_point->hierarchy = h;
442 mount_point->mount_point = strdup(tokens[4]);
443 mount_point->mount_prefix = strdup(tokens[3]);
444 if (!mount_point->mount_point || !mount_point->mount_prefix)
445 goto out;
446 mount_point->read_only = !lxc_string_in_list("rw", tokens[5], ',');
447
448 if (!strcmp(mount_point->mount_prefix, "/")) {
449 if (mount_point->read_only) {
450 if (!h->ro_absolute_mount_point)
451 h->ro_absolute_mount_point = mount_point;
452 } else {
453 if (!h->rw_absolute_mount_point)
454 h->rw_absolute_mount_point = mount_point;
455 }
456 }
457
458 k = lxc_array_len((void **)h->all_mount_points);
459 r = lxc_grow_array((void ***)&h->all_mount_points, &h->all_mount_point_capacity, k + 1, 4);
460 if (r < 0)
461 goto out;
462 h->all_mount_points[k] = mount_point;
463 }
464 bret = true;
465
466 out:
467 fclose(proc_self_mountinfo);
468 free(tokens);
469 free(line);
470 return bret;
471 }
472
473 struct cgroup_meta_data *lxc_cgroup_load_meta2(const char **subsystem_whitelist)
474 {
475 bool all_kernel_subsystems = true;
476 bool all_named_subsystems = false;
477 struct cgroup_meta_data *meta_data = NULL;
478 char **kernel_subsystems = NULL;
479 int saved_errno = 0;
480
481 /* if the subsystem whitelist is not specified, include all
482 * hierarchies that contain kernel subsystems by default but
483 * no hierarchies that only contain named subsystems
484 *
485 * if it is specified, the specifier @all will select all
486 * hierarchies, @kernel will select all hierarchies with
487 * kernel subsystems and @named will select all named
488 * hierarchies
489 */
490 all_kernel_subsystems = subsystem_whitelist ?
491 (lxc_string_in_array("@kernel", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
492 true;
493 all_named_subsystems = subsystem_whitelist ?
494 (lxc_string_in_array("@named", subsystem_whitelist) || lxc_string_in_array("@all", subsystem_whitelist)) :
495 false;
496
497 meta_data = calloc(1, sizeof(struct cgroup_meta_data));
498 if (!meta_data)
499 return NULL;
500 meta_data->ref = 1;
501
502 if (!find_cgroup_subsystems(&kernel_subsystems))
503 goto out_error;
504
505 if (!find_cgroup_hierarchies(meta_data, all_kernel_subsystems,
506 all_named_subsystems, subsystem_whitelist))
507 goto out_error;
508
509 if (!find_hierarchy_mountpts(meta_data, kernel_subsystems))
510 goto out_error;
511
512 /* oops, we couldn't find anything */
513 if (!meta_data->hierarchies || !meta_data->mount_points) {
514 errno = EINVAL;
515 goto out_error;
516 }
517
518 lxc_free_array((void **)kernel_subsystems, free);
519 return meta_data;
520
521 out_error:
522 saved_errno = errno;
523 lxc_free_array((void **)kernel_subsystems, free);
524 lxc_cgroup_put_meta(meta_data);
525 errno = saved_errno;
526 return NULL;
527 }
528
529 struct cgroup_meta_data *lxc_cgroup_get_meta(struct cgroup_meta_data *meta_data)
530 {
531 meta_data->ref++;
532 return meta_data;
533 }
534
535 struct cgroup_meta_data *lxc_cgroup_put_meta(struct cgroup_meta_data *meta_data)
536 {
537 size_t i;
538 if (!meta_data)
539 return NULL;
540 if (--meta_data->ref > 0)
541 return meta_data;
542 lxc_free_array((void **)meta_data->mount_points, (lxc_free_fn)lxc_cgroup_mount_point_free);
543 if (meta_data->hierarchies) {
544 for (i = 0; i <= meta_data->maximum_hierarchy; i++)
545 lxc_cgroup_hierarchy_free(meta_data->hierarchies[i]);
546 }
547 free(meta_data->hierarchies);
548 free(meta_data);
549 return NULL;
550 }
551
552 struct cgroup_hierarchy *lxc_cgroup_find_hierarchy(struct cgroup_meta_data *meta_data, const char *subsystem)
553 {
554 size_t i;
555 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
556 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
557 if (h && lxc_string_in_array(subsystem, (const char **)h->subsystems))
558 return h;
559 }
560 return NULL;
561 }
562
563 struct cgroup_mount_point *lxc_cgroup_find_mount_point(struct cgroup_hierarchy *hierarchy, const char *group, bool should_be_writable)
564 {
565 struct cgroup_mount_point **mps;
566 struct cgroup_mount_point *current_result = NULL;
567 ssize_t quality = -1;
568
569 /* trivial case */
570 if (hierarchy->rw_absolute_mount_point)
571 return hierarchy->rw_absolute_mount_point;
572 if (!should_be_writable && hierarchy->ro_absolute_mount_point)
573 return hierarchy->ro_absolute_mount_point;
574
575 for (mps = hierarchy->all_mount_points; mps && *mps; mps++) {
576 struct cgroup_mount_point *mp = *mps;
577 size_t prefix_len = mp->mount_prefix ? strlen(mp->mount_prefix) : 0;
578
579 if (prefix_len == 1 && mp->mount_prefix[0] == '/')
580 prefix_len = 0;
581
582 if (should_be_writable && mp->read_only)
583 continue;
584
585 if (!prefix_len ||
586 (strncmp(group, mp->mount_prefix, prefix_len) == 0 &&
587 (group[prefix_len] == '\0' || group[prefix_len] == '/'))) {
588 /* search for the best quality match, i.e. the match with the
589 * shortest prefix where this group is still contained
590 */
591 if (quality == -1 || prefix_len < quality) {
592 current_result = mp;
593 quality = prefix_len;
594 }
595 }
596 }
597
598 if (!current_result)
599 errno = ENOENT;
600 return current_result;
601 }
602
603 char *lxc_cgroup_find_abs_path(const char *subsystem, const char *group, bool should_be_writable, const char *suffix)
604 {
605 struct cgroup_meta_data *meta_data;
606 struct cgroup_hierarchy *h;
607 struct cgroup_mount_point *mp;
608 char *result;
609 int saved_errno;
610
611 meta_data = lxc_cgroup_load_meta();
612 if (!meta_data)
613 return NULL;
614
615 h = lxc_cgroup_find_hierarchy(meta_data, subsystem);
616 if (!h)
617 goto out_error;
618
619 mp = lxc_cgroup_find_mount_point(h, group, should_be_writable);
620 if (!mp)
621 goto out_error;
622
623 result = cgroup_to_absolute_path(mp, group, suffix);
624 if (!result)
625 goto out_error;
626
627 lxc_cgroup_put_meta(meta_data);
628 return result;
629
630 out_error:
631 saved_errno = errno;
632 lxc_cgroup_put_meta(meta_data);
633 errno = saved_errno;
634 return NULL;
635 }
636
637 struct cgroup_process_info *lxc_cgroup_process_info_get(pid_t pid, struct cgroup_meta_data *meta)
638 {
639 char pid_buf[32];
640 snprintf(pid_buf, 32, "/proc/%lu/cgroup", (unsigned long)pid);
641 return lxc_cgroup_process_info_getx(pid_buf, meta);
642 }
643
644 struct cgroup_process_info *lxc_cgroup_process_info_get_init(struct cgroup_meta_data *meta)
645 {
646 return lxc_cgroup_process_info_get(1, meta);
647 }
648
649 struct cgroup_process_info *lxc_cgroup_process_info_get_self(struct cgroup_meta_data *meta)
650 {
651 struct cgroup_process_info *i;
652 i = lxc_cgroup_process_info_getx("/proc/self/cgroup", meta);
653 if (!i)
654 i = lxc_cgroup_process_info_get(getpid(), meta);
655 return i;
656 }
657
658 /*
659 * If a controller has ns cgroup mounted, then in that cgroup the handler->pid
660 * is already in a new cgroup named after the pid. 'mnt' is passed in as
661 * the full current cgroup. Say that is /sys/fs/cgroup/lxc/2975 and the container
662 * name is c1. . We want to rename the cgroup directory to /sys/fs/cgroup/lxc/c1,
663 * and return the string /sys/fs/cgroup/lxc/c1.
664 */
665 static char *cgroup_rename_nsgroup(const char *mountpath, const char *oldname, pid_t pid, const char *name)
666 {
667 char *dir, *fulloldpath;
668 char *newname, *fullnewpath;
669 int len, newlen, ret;
670
671 /*
672 * if cgroup is mounted at /cgroup and task is in cgroup /ab/, pid 2375 and
673 * name is c1,
674 * dir: /ab
675 * fulloldpath = /cgroup/ab/2375
676 * fullnewpath = /cgroup/ab/c1
677 * newname = /ab/c1
678 */
679 dir = alloca(strlen(oldname) + 1);
680 strcpy(dir, oldname);
681
682 len = strlen(oldname) + strlen(mountpath) + 22;
683 fulloldpath = alloca(len);
684 ret = snprintf(fulloldpath, len, "%s/%s/%ld", mountpath, oldname, (unsigned long)pid);
685 if (ret < 0 || ret >= len)
686 return NULL;
687
688 len = strlen(dir) + strlen(name) + 2;
689 newname = malloc(len);
690 if (!newname) {
691 SYSERROR("Out of memory");
692 return NULL;
693 }
694 ret = snprintf(newname, len, "%s/%s", dir, name);
695 if (ret < 0 || ret >= len) {
696 free(newname);
697 return NULL;
698 }
699
700 newlen = strlen(mountpath) + len + 2;
701 fullnewpath = alloca(newlen);
702 ret = snprintf(fullnewpath, newlen, "%s/%s", mountpath, newname);
703 if (ret < 0 || ret >= newlen) {
704 free(newname);
705 return NULL;
706 }
707
708 if (access(fullnewpath, F_OK) == 0) {
709 if (rmdir(fullnewpath) != 0) {
710 SYSERROR("container cgroup %s already exists.", fullnewpath);
711 free(newname);
712 return NULL;
713 }
714 }
715 if (rename(fulloldpath, fullnewpath)) {
716 SYSERROR("failed to rename cgroup %s->%s", fulloldpath, fullnewpath);
717 free(newname);
718 return NULL;
719 }
720
721 DEBUG("'%s' renamed to '%s'", oldname, newname);
722
723 return newname;
724 }
725
726 /* create a new cgroup */
727 struct cgroup_process_info *lxc_cgroupfs_create(const char *name, const char *path_pattern, struct cgroup_meta_data *meta_data, const char *sub_pattern)
728 {
729 char **cgroup_path_components = NULL;
730 char **p = NULL;
731 char *path_so_far = NULL;
732 char **new_cgroup_paths = NULL;
733 char **new_cgroup_paths_sub = NULL;
734 struct cgroup_mount_point *mp;
735 struct cgroup_hierarchy *h;
736 struct cgroup_process_info *base_info = NULL;
737 struct cgroup_process_info *info_ptr;
738 int saved_errno;
739 int r;
740 unsigned suffix = 0;
741 bool had_sub_pattern = false;
742 size_t i;
743
744 if (!is_valid_cgroup(name)) {
745 ERROR("Invalid cgroup name: '%s'", name);
746 errno = EINVAL;
747 return NULL;
748 }
749
750 if (!strstr(path_pattern, "%n")) {
751 ERROR("Invalid cgroup path pattern: '%s'; contains no %%n for specifying container name", path_pattern);
752 errno = EINVAL;
753 return NULL;
754 }
755
756 /* we will modify the result of this operation directly,
757 * so we don't have to copy the data structure
758 */
759 base_info = (path_pattern[0] == '/') ?
760 lxc_cgroup_process_info_get_init(meta_data) :
761 lxc_cgroup_process_info_get_self(meta_data);
762 if (!base_info)
763 return NULL;
764
765 new_cgroup_paths = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
766 if (!new_cgroup_paths)
767 goto out_initial_error;
768
769 new_cgroup_paths_sub = calloc(meta_data->maximum_hierarchy + 1, sizeof(char *));
770 if (!new_cgroup_paths_sub)
771 goto out_initial_error;
772
773 /* find mount points we can use */
774 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
775 h = info_ptr->hierarchy;
776 mp = lxc_cgroup_find_mount_point(h, info_ptr->cgroup_path, true);
777 if (!mp) {
778 ERROR("Could not find writable mount point for cgroup hierarchy %d while trying to create cgroup.", h->index);
779 goto out_initial_error;
780 }
781 info_ptr->designated_mount_point = mp;
782
783 if (lxc_string_in_array("ns", (const char **)h->subsystems))
784 continue;
785 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
786 ERROR("Could not set clone_children to 1 for cpuset hierarchy in parent cgroup.");
787 goto out_initial_error;
788 }
789 }
790
791 /* normalize the path */
792 cgroup_path_components = lxc_normalize_path(path_pattern);
793 if (!cgroup_path_components)
794 goto out_initial_error;
795
796 /* go through the path components to see if we can create them */
797 for (p = cgroup_path_components; *p || (sub_pattern && !had_sub_pattern); p++) {
798 /* we only want to create the same component with -1, -2, etc.
799 * if the component contains the container name itself, otherwise
800 * it's not an error if it already exists
801 */
802 char *p_eff = *p ? *p : (char *)sub_pattern;
803 bool contains_name = strstr(p_eff, "%n");
804 char *current_component = NULL;
805 char *current_subpath = NULL;
806 char *current_entire_path = NULL;
807 char *parts[3];
808 size_t j = 0;
809 i = 0;
810
811 /* if we are processing the subpattern, we want to make sure
812 * loop is ended the next time around
813 */
814 if (!*p) {
815 had_sub_pattern = true;
816 p--;
817 }
818
819 goto find_name_on_this_level;
820
821 cleanup_name_on_this_level:
822 /* This is reached if we found a name clash.
823 * In that case, remove the cgroup from all previous hierarchies
824 */
825 for (j = 0, info_ptr = base_info; j < i && info_ptr; info_ptr = info_ptr->next, j++) {
826 r = remove_cgroup(info_ptr->designated_mount_point, info_ptr->created_paths[info_ptr->created_paths_count - 1], false);
827 if (r < 0)
828 WARN("could not clean up cgroup we created when trying to create container");
829 free(info_ptr->created_paths[info_ptr->created_paths_count - 1]);
830 info_ptr->created_paths[--info_ptr->created_paths_count] = NULL;
831 }
832 if (current_component != current_subpath)
833 free(current_subpath);
834 if (current_component != p_eff)
835 free(current_component);
836 current_component = current_subpath = NULL;
837 /* try again with another suffix */
838 ++suffix;
839
840 find_name_on_this_level:
841 /* determine name of the path component we should create */
842 if (contains_name && suffix > 0) {
843 char *buf = calloc(strlen(name) + 32, 1);
844 if (!buf)
845 goto out_initial_error;
846 snprintf(buf, strlen(name) + 32, "%s-%u", name, suffix);
847 current_component = lxc_string_replace("%n", buf, p_eff);
848 free(buf);
849 } else {
850 current_component = contains_name ? lxc_string_replace("%n", name, p_eff) : p_eff;
851 }
852 parts[0] = path_so_far;
853 parts[1] = current_component;
854 parts[2] = NULL;
855 current_subpath = path_so_far ? lxc_string_join("/", (const char **)parts, false) : current_component;
856
857 /* Now go through each hierarchy and try to create the
858 * corresponding cgroup
859 */
860 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
861 char *parts2[3];
862
863 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
864 continue;
865 current_entire_path = NULL;
866
867 parts2[0] = !strcmp(info_ptr->cgroup_path, "/") ? "" : info_ptr->cgroup_path;
868 parts2[1] = current_subpath;
869 parts2[2] = NULL;
870 current_entire_path = lxc_string_join("/", (const char **)parts2, false);
871
872 if (!*p) {
873 /* we are processing the subpath, so only update that one */
874 free(new_cgroup_paths_sub[i]);
875 new_cgroup_paths_sub[i] = strdup(current_entire_path);
876 if (!new_cgroup_paths_sub[i])
877 goto cleanup_from_error;
878 } else {
879 /* remember which path was used on this controller */
880 free(new_cgroup_paths[i]);
881 new_cgroup_paths[i] = strdup(current_entire_path);
882 if (!new_cgroup_paths[i])
883 goto cleanup_from_error;
884 }
885
886 r = create_cgroup(info_ptr->designated_mount_point, current_entire_path);
887 if (r < 0 && errno == EEXIST && contains_name) {
888 /* name clash => try new name with new suffix */
889 free(current_entire_path);
890 current_entire_path = NULL;
891 goto cleanup_name_on_this_level;
892 } else if (r < 0 && errno != EEXIST) {
893 SYSERROR("Could not create cgroup %s", current_entire_path);
894 goto cleanup_from_error;
895 } else if (r == 0) {
896 /* successfully created */
897 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
898 if (r < 0)
899 goto cleanup_from_error;
900 info_ptr->created_paths[info_ptr->created_paths_count++] = current_entire_path;
901 } else {
902 /* if we didn't create the cgroup, then we have to make sure that
903 * further cgroups will be created properly
904 */
905 if (handle_cgroup_settings(mp, info_ptr->cgroup_path) < 0) {
906 ERROR("Could not set clone_children to 1 for cpuset hierarchy in pre-existing cgroup.");
907 goto cleanup_from_error;
908 }
909
910 /* already existed but path component of pattern didn't contain '%n',
911 * so this is not an error; but then we don't need current_entire_path
912 * anymore...
913 */
914 free(current_entire_path);
915 current_entire_path = NULL;
916 }
917 }
918
919 /* save path so far */
920 free(path_so_far);
921 path_so_far = strdup(current_subpath);
922 if (!path_so_far)
923 goto cleanup_from_error;
924
925 /* cleanup */
926 if (current_component != current_subpath)
927 free(current_subpath);
928 if (current_component != p_eff)
929 free(current_component);
930 current_component = current_subpath = NULL;
931 continue;
932
933 cleanup_from_error:
934 /* called if an error occured in the loop, so we
935 * do some additional cleanup here
936 */
937 saved_errno = errno;
938 if (current_component != current_subpath)
939 free(current_subpath);
940 if (current_component != p_eff)
941 free(current_component);
942 free(current_entire_path);
943 errno = saved_errno;
944 goto out_initial_error;
945 }
946
947 /* we're done, now update the paths */
948 for (i = 0, info_ptr = base_info; info_ptr; info_ptr = info_ptr->next, i++) {
949 /* ignore legacy 'ns' subsystem here, lxc_cgroup_create_legacy
950 * will take care of it
951 * Since we do a continue in above loop, new_cgroup_paths[i] is
952 * unset anyway, as is new_cgroup_paths_sub[i]
953 */
954 if (lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
955 continue;
956 free(info_ptr->cgroup_path);
957 info_ptr->cgroup_path = new_cgroup_paths[i];
958 info_ptr->cgroup_path_sub = new_cgroup_paths_sub[i];
959 }
960 /* don't use lxc_free_array since we used the array members
961 * to store them in our result...
962 */
963 free(new_cgroup_paths);
964 free(new_cgroup_paths_sub);
965 free(path_so_far);
966 lxc_free_array((void **)cgroup_path_components, free);
967 return base_info;
968
969 out_initial_error:
970 saved_errno = errno;
971 free(path_so_far);
972 lxc_cgroup_process_info_free_and_remove(base_info);
973 lxc_free_array((void **)new_cgroup_paths, free);
974 lxc_free_array((void **)new_cgroup_paths_sub, free);
975 lxc_free_array((void **)cgroup_path_components, free);
976 errno = saved_errno;
977 return NULL;
978 }
979
980 int lxc_cgroup_create_legacy(struct cgroup_process_info *base_info, const char *name, pid_t pid)
981 {
982 struct cgroup_process_info *info_ptr;
983 int r;
984
985 for (info_ptr = base_info; info_ptr; info_ptr = info_ptr->next) {
986 if (!lxc_string_in_array("ns", (const char **)info_ptr->hierarchy->subsystems))
987 continue;
988 /*
989 * For any path which has ns cgroup mounted, handler->pid is already
990 * moved into a container called '%d % (handler->pid)'. Rename it to
991 * the cgroup name and record that.
992 */
993 char *tmp = cgroup_rename_nsgroup((const char *)info_ptr->designated_mount_point->mount_point,
994 info_ptr->cgroup_path, pid, name);
995 if (!tmp)
996 return -1;
997 free(info_ptr->cgroup_path);
998 info_ptr->cgroup_path = tmp;
999 r = lxc_grow_array((void ***)&info_ptr->created_paths, &info_ptr->created_paths_capacity, info_ptr->created_paths_count + 1, 8);
1000 if (r < 0)
1001 return -1;
1002 tmp = strdup(tmp);
1003 if (!tmp)
1004 return -1;
1005 info_ptr->created_paths[info_ptr->created_paths_count++] = tmp;
1006 }
1007 return 0;
1008 }
1009
1010 /* get the cgroup membership of a given container */
1011 struct cgroup_process_info *lxc_cgroup_get_container_info(const char *name, const char *lxcpath, struct cgroup_meta_data *meta_data)
1012 {
1013 struct cgroup_process_info *result = NULL;
1014 int saved_errno = 0;
1015 size_t i;
1016 struct cgroup_process_info **cptr = &result;
1017 struct cgroup_process_info *entry = NULL;
1018 char *path = NULL;
1019
1020 for (i = 0; i <= meta_data->maximum_hierarchy; i++) {
1021 struct cgroup_hierarchy *h = meta_data->hierarchies[i];
1022 if (!h || !h->used)
1023 continue;
1024
1025 /* use the command interface to look for the cgroup */
1026 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->subsystems[0]);
1027 if (!path)
1028 goto out_error;
1029
1030 entry = calloc(1, sizeof(struct cgroup_process_info));
1031 if (!entry)
1032 goto out_error;
1033 entry->meta_ref = lxc_cgroup_get_meta(meta_data);
1034 entry->hierarchy = h;
1035 entry->cgroup_path = path;
1036 path = NULL;
1037
1038 /* it is not an error if we don't find anything here,
1039 * it is up to the caller to decide what to do in that
1040 * case */
1041 entry->designated_mount_point = lxc_cgroup_find_mount_point(h, entry->cgroup_path, true);
1042
1043 *cptr = entry;
1044 cptr = &entry->next;
1045 entry = NULL;
1046 }
1047
1048 return result;
1049 out_error:
1050 saved_errno = errno;
1051 free(path);
1052 lxc_cgroup_process_info_free(result);
1053 lxc_cgroup_process_info_free(entry);
1054 errno = saved_errno;
1055 return NULL;
1056 }
1057
1058 /* move a processs to the cgroups specified by the membership */
1059 int lxc_cgroupfs_enter(struct cgroup_process_info *info, pid_t pid, bool enter_sub)
1060 {
1061 char pid_buf[32];
1062 char *cgroup_tasks_fn;
1063 int r;
1064 struct cgroup_process_info *info_ptr;
1065
1066 snprintf(pid_buf, 32, "%lu", (unsigned long)pid);
1067 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1068 char *cgroup_path = (enter_sub && info_ptr->cgroup_path_sub) ?
1069 info_ptr->cgroup_path_sub :
1070 info_ptr->cgroup_path;
1071
1072 if (!info_ptr->designated_mount_point) {
1073 info_ptr->designated_mount_point = lxc_cgroup_find_mount_point(info_ptr->hierarchy, cgroup_path, true);
1074 if (!info_ptr->designated_mount_point) {
1075 SYSERROR("Could not add pid %lu to cgroup %s: internal error (couldn't find any writable mountpoint to cgroup filesystem)", (unsigned long)pid, cgroup_path);
1076 return -1;
1077 }
1078 }
1079
1080 cgroup_tasks_fn = cgroup_to_absolute_path(info_ptr->designated_mount_point, cgroup_path, "/tasks");
1081 if (!cgroup_tasks_fn) {
1082 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1083 return -1;
1084 }
1085
1086 r = lxc_write_to_file(cgroup_tasks_fn, pid_buf, strlen(pid_buf), false);
1087 free(cgroup_tasks_fn);
1088 if (r < 0) {
1089 SYSERROR("Could not add pid %lu to cgroup %s: internal error", (unsigned long)pid, cgroup_path);
1090 return -1;
1091 }
1092 }
1093
1094 return 0;
1095 }
1096
1097 /* free process membership information */
1098 void lxc_cgroup_process_info_free(struct cgroup_process_info *info)
1099 {
1100 struct cgroup_process_info *next;
1101 if (!info)
1102 return;
1103 next = info->next;
1104 lxc_cgroup_put_meta(info->meta_ref);
1105 free(info->cgroup_path);
1106 free(info->cgroup_path_sub);
1107 lxc_free_array((void **)info->created_paths, free);
1108 free(info);
1109 lxc_cgroup_process_info_free(next);
1110 }
1111
1112 /* free process membership information and remove cgroups that were created */
1113 void lxc_cgroup_process_info_free_and_remove(struct cgroup_process_info *info)
1114 {
1115 struct cgroup_process_info *next;
1116 char **pp;
1117 if (!info)
1118 return;
1119 next = info->next;
1120 {
1121 struct cgroup_mount_point *mp = info->designated_mount_point;
1122 if (!mp)
1123 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1124 if (mp)
1125 /* ignore return value here, perhaps we created the
1126 * '/lxc' cgroup in this container but another container
1127 * is still running (for example)
1128 */
1129 (void)remove_cgroup(mp, info->cgroup_path, true);
1130 }
1131 for (pp = info->created_paths; pp && *pp; pp++);
1132 for ((void)(pp && --pp); info->created_paths && pp >= info->created_paths; --pp) {
1133 free(*pp);
1134 }
1135 free(info->created_paths);
1136 lxc_cgroup_put_meta(info->meta_ref);
1137 free(info->cgroup_path);
1138 free(info->cgroup_path_sub);
1139 free(info);
1140 lxc_cgroup_process_info_free_and_remove(next);
1141 }
1142
1143 static char *lxc_cgroup_get_hierarchy_path_handler(const char *subsystem, struct lxc_handler *handler)
1144 {
1145 struct cgfs_data *d = handler->cgroup_info->data;
1146 struct cgroup_process_info *info = d->info;
1147 info = find_info_for_subsystem(info, subsystem);
1148 if (!info)
1149 return NULL;
1150 return info->cgroup_path;
1151 }
1152
1153 char *lxc_cgroup_get_hierarchy_path(const char *subsystem, const char *name, const char *lxcpath)
1154 {
1155 return lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
1156 }
1157
1158 char *lxc_cgroup_get_hierarchy_abs_path_handler(const char *subsystem, struct lxc_handler *handler)
1159 {
1160 struct cgfs_data *d = handler->cgroup_info->data;
1161 struct cgroup_process_info *info = d->info;
1162 struct cgroup_mount_point *mp = NULL;
1163
1164 info = find_info_for_subsystem(info, subsystem);
1165 if (!info)
1166 return NULL;
1167 if (info->designated_mount_point) {
1168 mp = info->designated_mount_point;
1169 } else {
1170 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1171 if (!mp)
1172 return NULL;
1173 }
1174 return cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1175 }
1176
1177 char *lxc_cgroup_get_hierarchy_abs_path(const char *subsystem, const char *name, const char *lxcpath)
1178 {
1179 struct cgroup_meta_data *meta;
1180 struct cgroup_process_info *base_info, *info;
1181 struct cgroup_mount_point *mp;
1182 char *result = NULL;
1183
1184 meta = lxc_cgroup_load_meta();
1185 if (!meta)
1186 return NULL;
1187 base_info = lxc_cgroup_get_container_info(name, lxcpath, meta);
1188 if (!base_info)
1189 goto out1;
1190 info = find_info_for_subsystem(base_info, subsystem);
1191 if (!info)
1192 goto out2;
1193 if (info->designated_mount_point) {
1194 mp = info->designated_mount_point;
1195 } else {
1196 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1197 if (!mp)
1198 goto out3;
1199 }
1200 result = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1201 out3:
1202 out2:
1203 lxc_cgroup_process_info_free(base_info);
1204 out1:
1205 lxc_cgroup_put_meta(meta);
1206 return result;
1207 }
1208
1209 int lxc_cgroup_set_handler(const char *filename, const char *value, struct lxc_handler *handler)
1210 {
1211 char *subsystem = NULL, *p, *path;
1212 int ret = -1;
1213
1214 subsystem = alloca(strlen(filename) + 1);
1215 strcpy(subsystem, filename);
1216 if ((p = index(subsystem, '.')) != NULL)
1217 *p = '\0';
1218
1219 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1220 if (path) {
1221 ret = do_cgroup_set(path, filename, value);
1222 free(path);
1223 }
1224 return ret;
1225 }
1226
1227 int lxc_cgroup_get_handler(const char *filename, char *value, size_t len, struct lxc_handler *handler)
1228 {
1229 char *subsystem = NULL, *p, *path;
1230 int ret = -1;
1231
1232 subsystem = alloca(strlen(filename) + 1);
1233 strcpy(subsystem, filename);
1234 if ((p = index(subsystem, '.')) != NULL)
1235 *p = '\0';
1236
1237 path = lxc_cgroup_get_hierarchy_abs_path_handler(subsystem, handler);
1238 if (path) {
1239 ret = do_cgroup_get(path, filename, value, len);
1240 free(path);
1241 }
1242 return ret;
1243 }
1244
1245 int lxc_cgroupfs_set(const char *filename, const char *value, const char *name, const char *lxcpath)
1246 {
1247 char *subsystem = NULL, *p, *path;
1248 int ret = -1;
1249
1250 subsystem = alloca(strlen(filename) + 1);
1251 strcpy(subsystem, filename);
1252 if ((p = index(subsystem, '.')) != NULL)
1253 *p = '\0';
1254
1255 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1256 if (path) {
1257 ret = do_cgroup_set(path, filename, value);
1258 free(path);
1259 }
1260 return ret;
1261 }
1262
1263 int lxc_cgroupfs_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
1264 {
1265 char *subsystem = NULL, *p, *path;
1266 int ret = -1;
1267
1268 subsystem = alloca(strlen(filename) + 1);
1269 strcpy(subsystem, filename);
1270 if ((p = index(subsystem, '.')) != NULL)
1271 *p = '\0';
1272
1273 path = lxc_cgroup_get_hierarchy_abs_path(subsystem, name, lxcpath);
1274 if (path) {
1275 ret = do_cgroup_get(path, filename, value, len);
1276 free(path);
1277 }
1278 return ret;
1279 }
1280
1281 /*
1282 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
1283 * file for a running container.
1284 *
1285 * @filename : the file of interest (e.g. "freezer.state") or
1286 * the subsystem name (e.g. "freezer") in which case
1287 * the directory where the cgroup may be modified
1288 * will be returned
1289 * @name : name of container to connect to
1290 * @lxcpath : the lxcpath in which the container is running
1291 *
1292 * This is the exported function, which determines cgpath from the
1293 * lxc-start of the @name container running in @lxcpath.
1294 *
1295 * Returns path on success, NULL on error. The caller must free()
1296 * the returned path.
1297 */
1298 char *lxc_cgroup_path_get(const char *filename, const char *name,
1299 const char *lxcpath)
1300 {
1301 char *subsystem = NULL, *longer_file = NULL, *p, *group, *path;
1302
1303 subsystem = alloca(strlen(filename) + 1);
1304 strcpy(subsystem, filename);
1305 if ((p = index(subsystem, '.')) != NULL) {
1306 *p = '\0';
1307 longer_file = alloca(strlen(filename) + 2);
1308 longer_file[0] = '/';
1309 strcpy(longer_file + 1, filename);
1310 }
1311
1312 group = lxc_cgroup_get_hierarchy_path(subsystem, name, lxcpath);
1313 if (!group)
1314 return NULL;
1315
1316 path = lxc_cgroup_find_abs_path(subsystem, group, true, p ? longer_file : NULL);
1317 free(group);
1318 return path;
1319 }
1320
1321 int lxc_setup_mount_cgroup(const char *root, struct lxc_cgroup_info *cgroup_info, int type)
1322 {
1323 size_t bufsz = strlen(root) + sizeof("/sys/fs/cgroup");
1324 char *path = NULL;
1325 char **parts = NULL;
1326 char *dirname = NULL;
1327 char *abs_path = NULL;
1328 char *abs_path2 = NULL;
1329 struct cgfs_data *cgfs_d;
1330 struct cgroup_process_info *info, *base_info;
1331 int r, saved_errno = 0;
1332
1333 init_cg_ops();
1334
1335 if (strcmp(active_cg_ops->name, "cgmanager") == 0) {
1336 // todo - offer to bind-mount /sys/fs/cgroup/cgmanager/
1337 return 0;
1338 }
1339
1340 cgfs_d = cgroup_info->data;
1341 base_info = cgfs_d->info;
1342
1343 if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
1344 ERROR("could not mount cgroups into container: invalid type specified internally");
1345 errno = EINVAL;
1346 return -1;
1347 }
1348
1349 path = calloc(1, bufsz);
1350 if (!path)
1351 return -1;
1352 snprintf(path, bufsz, "%s/sys/fs/cgroup", root);
1353 r = mount("cgroup_root", path, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME, "size=10240k,mode=755");
1354 if (r < 0) {
1355 SYSERROR("could not mount tmpfs to /sys/fs/cgroup in the container");
1356 return -1;
1357 }
1358
1359 /* now mount all the hierarchies we care about */
1360 for (info = base_info; info; info = info->next) {
1361 size_t subsystem_count, i;
1362 struct cgroup_mount_point *mp = info->designated_mount_point;
1363 if (!mp)
1364 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, true);
1365 if (!mp) {
1366 SYSERROR("could not find original mount point for cgroup hierarchy while trying to mount cgroup filesystem");
1367 goto out_error;
1368 }
1369
1370 subsystem_count = lxc_array_len((void **)info->hierarchy->subsystems);
1371 parts = calloc(subsystem_count + 1, sizeof(char *));
1372 if (!parts)
1373 goto out_error;
1374
1375 for (i = 0; i < subsystem_count; i++) {
1376 if (!strncmp(info->hierarchy->subsystems[i], "name=", 5))
1377 parts[i] = info->hierarchy->subsystems[i] + 5;
1378 else
1379 parts[i] = info->hierarchy->subsystems[i];
1380 }
1381 dirname = lxc_string_join(",", (const char **)parts, false);
1382 if (!dirname)
1383 goto out_error;
1384
1385 /* create subsystem directory */
1386 abs_path = lxc_append_paths(path, dirname);
1387 if (!abs_path)
1388 goto out_error;
1389 r = mkdir_p(abs_path, 0755);
1390 if (r < 0 && errno != EEXIST) {
1391 SYSERROR("could not create cgroup subsystem directory /sys/fs/cgroup/%s", dirname);
1392 goto out_error;
1393 }
1394
1395 abs_path2 = lxc_append_paths(abs_path, info->cgroup_path);
1396 if (!abs_path2)
1397 goto out_error;
1398
1399 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_RW || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1400 /* bind-mount the cgroup entire filesystem there */
1401 if (strcmp(mp->mount_prefix, "/") != 0) {
1402 /* FIXME: maybe we should just try to remount the entire hierarchy
1403 * with a regular mount command? may that works? */
1404 ERROR("could not automatically mount cgroup-full to /sys/fs/cgroup/%s: host has no mount point for this cgroup filesystem that has access to the root cgroup", dirname);
1405 goto out_error;
1406 }
1407 r = mount(mp->mount_point, abs_path, "none", MS_BIND, 0);
1408 if (r < 0) {
1409 SYSERROR("error bind-mounting %s to %s", mp->mount_point, abs_path);
1410 goto out_error;
1411 }
1412 /* main cgroup path should be read-only */
1413 if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_FULL_MIXED) {
1414 r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1415 if (r < 0) {
1416 SYSERROR("error re-mounting %s readonly", abs_path);
1417 goto out_error;
1418 }
1419 }
1420 /* own cgroup should be read-write */
1421 if (type == LXC_AUTO_CGROUP_FULL_MIXED) {
1422 r = mount(abs_path2, abs_path2, NULL, MS_BIND, NULL);
1423 if (r < 0) {
1424 SYSERROR("error bind-mounting %s onto itself", abs_path2);
1425 goto out_error;
1426 }
1427 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND, NULL);
1428 if (r < 0) {
1429 SYSERROR("error re-mounting %s readwrite", abs_path2);
1430 goto out_error;
1431 }
1432 }
1433 } else {
1434 /* create path for container's cgroup */
1435 r = mkdir_p(abs_path2, 0755);
1436 if (r < 0 && errno != EEXIST) {
1437 SYSERROR("could not create cgroup directory /sys/fs/cgroup/%s%s", dirname, info->cgroup_path);
1438 goto out_error;
1439 }
1440
1441 free(abs_path);
1442 abs_path = NULL;
1443
1444 /* bind-mount container's cgroup to that directory */
1445 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1446 if (!abs_path)
1447 goto out_error;
1448 r = mount(abs_path, abs_path2, "none", MS_BIND, 0);
1449 if (r < 0) {
1450 SYSERROR("error bind-mounting %s to %s", abs_path, abs_path2);
1451 goto out_error;
1452 }
1453 if (type == LXC_AUTO_CGROUP_RO) {
1454 r = mount(NULL, abs_path2, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
1455 if (r < 0) {
1456 SYSERROR("error re-mounting %s readonly", abs_path2);
1457 goto out_error;
1458 }
1459 }
1460 }
1461
1462 free(abs_path);
1463 free(abs_path2);
1464 abs_path = NULL;
1465 abs_path2 = NULL;
1466
1467 /* add symlinks for every single subsystem */
1468 if (subsystem_count > 1) {
1469 for (i = 0; i < subsystem_count; i++) {
1470 abs_path = lxc_append_paths(path, parts[i]);
1471 if (!abs_path)
1472 goto out_error;
1473 r = symlink(dirname, abs_path);
1474 if (r < 0)
1475 WARN("could not create symlink %s -> %s in /sys/fs/cgroup of container", parts[i], dirname);
1476 free(abs_path);
1477 abs_path = NULL;
1478 }
1479 }
1480 free(dirname);
1481 free(parts);
1482 dirname = NULL;
1483 parts = NULL;
1484 }
1485
1486 /* try to remount the tmpfs readonly, since the container shouldn't
1487 * change anything (this will also make sure that trying to create
1488 * new cgroups outside the allowed area fails with an error instead
1489 * of simply causing this to create directories in the tmpfs itself)
1490 */
1491 if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
1492 mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
1493
1494 free(path);
1495
1496 return 0;
1497
1498 out_error:
1499 saved_errno = errno;
1500 free(path);
1501 free(dirname);
1502 free(parts);
1503 free(abs_path);
1504 free(abs_path2);
1505 errno = saved_errno;
1506 return -1;
1507 }
1508
1509 int lxc_cgroup_nrtasks_handler(struct lxc_handler *handler)
1510 {
1511 struct cgfs_data *d = handler->cgroup_info->data;
1512 struct cgroup_process_info *info = d->info;
1513 struct cgroup_mount_point *mp = NULL;
1514 char *abs_path = NULL;
1515 int ret;
1516
1517 if (!info) {
1518 errno = ENOENT;
1519 return -1;
1520 }
1521
1522 if (info->designated_mount_point) {
1523 mp = info->designated_mount_point;
1524 } else {
1525 mp = lxc_cgroup_find_mount_point(info->hierarchy, info->cgroup_path, false);
1526 if (!mp)
1527 return -1;
1528 }
1529
1530 abs_path = cgroup_to_absolute_path(mp, info->cgroup_path, NULL);
1531 if (!abs_path)
1532 return -1;
1533
1534 ret = cgroup_recursive_task_count(abs_path);
1535 free(abs_path);
1536 return ret;
1537 }
1538
1539 static struct cgroup_process_info *
1540 lxc_cgroup_process_info_getx(const char *proc_pid_cgroup_str,
1541 struct cgroup_meta_data *meta)
1542 {
1543 struct cgroup_process_info *result = NULL;
1544 FILE *proc_pid_cgroup = NULL;
1545 char *line = NULL;
1546 size_t sz = 0;
1547 int saved_errno = 0;
1548 struct cgroup_process_info **cptr = &result;
1549 struct cgroup_process_info *entry = NULL;
1550
1551 proc_pid_cgroup = fopen_cloexec(proc_pid_cgroup_str, "r");
1552 if (!proc_pid_cgroup)
1553 return NULL;
1554
1555 while (getline(&line, &sz, proc_pid_cgroup) != -1) {
1556 /* file format: hierarchy:subsystems:group */
1557 char *colon1;
1558 char *colon2;
1559 char *endptr;
1560 int hierarchy_number;
1561 struct cgroup_hierarchy *h = NULL;
1562
1563 if (!line[0])
1564 continue;
1565
1566 if (line[strlen(line) - 1] == '\n')
1567 line[strlen(line) - 1] = '\0';
1568
1569 colon1 = strchr(line, ':');
1570 if (!colon1)
1571 continue;
1572 *colon1++ = '\0';
1573 colon2 = strchr(colon1, ':');
1574 if (!colon2)
1575 continue;
1576 *colon2++ = '\0';
1577
1578 endptr = NULL;
1579 hierarchy_number = strtoul(line, &endptr, 10);
1580 if (!endptr || *endptr)
1581 continue;
1582
1583 if (hierarchy_number > meta->maximum_hierarchy) {
1584 /* we encountered a hierarchy we didn't have before,
1585 * so probably somebody remounted some stuff in the
1586 * mean time...
1587 */
1588 errno = EAGAIN;
1589 goto out_error;
1590 }
1591
1592 h = meta->hierarchies[hierarchy_number];
1593 if (!h) {
1594 /* we encountered a hierarchy that was thought to be
1595 * dead before, so probably somebody remounted some
1596 * stuff in the mean time...
1597 */
1598 errno = EAGAIN;
1599 goto out_error;
1600 }
1601
1602 /* we are told that we should ignore this hierarchy */
1603 if (!h->used)
1604 continue;
1605
1606 entry = calloc(1, sizeof(struct cgroup_process_info));
1607 if (!entry)
1608 goto out_error;
1609
1610 entry->meta_ref = lxc_cgroup_get_meta(meta);
1611 entry->hierarchy = h;
1612 entry->cgroup_path = strdup(colon2);
1613 if (!entry->cgroup_path)
1614 goto out_error;
1615
1616 *cptr = entry;
1617 cptr = &entry->next;
1618 entry = NULL;
1619 }
1620
1621 fclose(proc_pid_cgroup);
1622 free(line);
1623 return result;
1624
1625 out_error:
1626 saved_errno = errno;
1627 if (proc_pid_cgroup)
1628 fclose(proc_pid_cgroup);
1629 lxc_cgroup_process_info_free(result);
1630 lxc_cgroup_process_info_free(entry);
1631 free(line);
1632 errno = saved_errno;
1633 return NULL;
1634 }
1635
1636 static char **subsystems_from_mount_options(const char *mount_options,
1637 char **kernel_list)
1638 {
1639 char *token, *str, *saveptr = NULL;
1640 char **result = NULL;
1641 size_t result_capacity = 0;
1642 size_t result_count = 0;
1643 int saved_errno;
1644 int r;
1645
1646 str = alloca(strlen(mount_options)+1);
1647 strcpy(str, mount_options);
1648 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
1649 /* we have a subsystem if it's either in the list of
1650 * subsystems provided by the kernel OR if it starts
1651 * with name= for named hierarchies
1652 */
1653 if (!strncmp(token, "name=", 5) || lxc_string_in_array(token, (const char **)kernel_list)) {
1654 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 12);
1655 if (r < 0)
1656 goto out_free;
1657 result[result_count + 1] = NULL;
1658 result[result_count] = strdup(token);
1659 if (!result[result_count])
1660 goto out_free;
1661 result_count++;
1662 }
1663 }
1664
1665 return result;
1666
1667 out_free:
1668 saved_errno = errno;
1669 lxc_free_array((void**)result, free);
1670 errno = saved_errno;
1671 return NULL;
1672 }
1673
1674 static void lxc_cgroup_mount_point_free(struct cgroup_mount_point *mp)
1675 {
1676 if (!mp)
1677 return;
1678 free(mp->mount_point);
1679 free(mp->mount_prefix);
1680 free(mp);
1681 }
1682
1683 static void lxc_cgroup_hierarchy_free(struct cgroup_hierarchy *h)
1684 {
1685 if (!h)
1686 return;
1687 lxc_free_array((void **)h->subsystems, free);
1688 free(h->all_mount_points);
1689 free(h);
1690 }
1691
1692 static bool is_valid_cgroup(const char *name)
1693 {
1694 const char *p;
1695 for (p = name; *p; p++) {
1696 /* Use the ASCII printable characters range(32 - 127)
1697 * is reasonable, we kick out 32(SPACE) because it'll
1698 * break legacy lxc-ls
1699 */
1700 if (*p <= 32 || *p >= 127 || *p == '/')
1701 return false;
1702 }
1703 return strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
1704 }
1705
1706 static int create_or_remove_cgroup(bool do_remove,
1707 struct cgroup_mount_point *mp, const char *path, int recurse)
1708 {
1709 int r, saved_errno = 0;
1710 char *buf = cgroup_to_absolute_path(mp, path, NULL);
1711 if (!buf)
1712 return -1;
1713
1714 /* create or remove directory */
1715 if (do_remove) {
1716 if (recurse)
1717 r = cgroup_rmdir(buf);
1718 else
1719 r = rmdir(buf);
1720 } else
1721 r = mkdir(buf, 0777);
1722 saved_errno = errno;
1723 free(buf);
1724 errno = saved_errno;
1725 return r;
1726 }
1727
1728 static int create_cgroup(struct cgroup_mount_point *mp, const char *path)
1729 {
1730 return create_or_remove_cgroup(false, mp, path, false);
1731 }
1732
1733 static int remove_cgroup(struct cgroup_mount_point *mp,
1734 const char *path, bool recurse)
1735 {
1736 return create_or_remove_cgroup(true, mp, path, recurse);
1737 }
1738
1739 static char *cgroup_to_absolute_path(struct cgroup_mount_point *mp,
1740 const char *path, const char *suffix)
1741 {
1742 /* first we have to make sure we subtract the mount point's prefix */
1743 char *prefix = mp->mount_prefix;
1744 char *buf;
1745 ssize_t len, rv;
1746
1747 /* we want to make sure only absolute paths to cgroups are passed to us */
1748 if (path[0] != '/') {
1749 errno = EINVAL;
1750 return NULL;
1751 }
1752
1753 if (prefix && !strcmp(prefix, "/"))
1754 prefix = NULL;
1755
1756 /* prefix doesn't match */
1757 if (prefix && strncmp(prefix, path, strlen(prefix)) != 0) {
1758 errno = EINVAL;
1759 return NULL;
1760 }
1761 /* if prefix is /foo and path is /foobar */
1762 if (prefix && path[strlen(prefix)] != '/' && path[strlen(prefix)] != '\0') {
1763 errno = EINVAL;
1764 return NULL;
1765 }
1766
1767 /* remove prefix from path */
1768 path += prefix ? strlen(prefix) : 0;
1769
1770 len = strlen(mp->mount_point) + strlen(path) + (suffix ? strlen(suffix) : 0);
1771 buf = calloc(len + 1, 1);
1772 if (!buf)
1773 return NULL;
1774 rv = snprintf(buf, len + 1, "%s%s%s", mp->mount_point, path, suffix ? suffix : "");
1775 if (rv > len) {
1776 free(buf);
1777 errno = ENOMEM;
1778 return NULL;
1779 }
1780
1781 return buf;
1782 }
1783
1784 static struct cgroup_process_info *
1785 find_info_for_subsystem(struct cgroup_process_info *info, const char *subsystem)
1786 {
1787 struct cgroup_process_info *info_ptr;
1788 for (info_ptr = info; info_ptr; info_ptr = info_ptr->next) {
1789 struct cgroup_hierarchy *h = info_ptr->hierarchy;
1790 if (lxc_string_in_array(subsystem, (const char **)h->subsystems))
1791 return info_ptr;
1792 }
1793 errno = ENOENT;
1794 return NULL;
1795 }
1796
1797 static int do_cgroup_get(const char *cgroup_path, const char *sub_filename,
1798 char *value, size_t len)
1799 {
1800 const char *parts[3] = {
1801 cgroup_path,
1802 sub_filename,
1803 NULL
1804 };
1805 char *filename;
1806 int ret, saved_errno;
1807
1808 filename = lxc_string_join("/", parts, false);
1809 if (!filename)
1810 return -1;
1811
1812 ret = lxc_read_from_file(filename, value, len);
1813 saved_errno = errno;
1814 free(filename);
1815 errno = saved_errno;
1816 return ret;
1817 }
1818
1819 static int do_cgroup_set(const char *cgroup_path, const char *sub_filename,
1820 const char *value)
1821 {
1822 const char *parts[3] = {
1823 cgroup_path,
1824 sub_filename,
1825 NULL
1826 };
1827 char *filename;
1828 int ret, saved_errno;
1829
1830 filename = lxc_string_join("/", parts, false);
1831 if (!filename)
1832 return -1;
1833
1834 ret = lxc_write_to_file(filename, value, strlen(value), false);
1835 saved_errno = errno;
1836 free(filename);
1837 errno = saved_errno;
1838 return ret;
1839 }
1840
1841 static int do_setup_cgroup_limits(struct lxc_handler *h,
1842 struct lxc_list *cgroup_settings, bool do_devices)
1843 {
1844 struct lxc_list *iterator;
1845 struct lxc_cgroup *cg;
1846 int ret = -1;
1847
1848 if (lxc_list_empty(cgroup_settings))
1849 return 0;
1850
1851 lxc_list_for_each(iterator, cgroup_settings) {
1852 cg = iterator->elem;
1853
1854 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
1855 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
1856 cgroup_devices_has_allow_or_deny(h, cg->value, false))
1857 continue;
1858 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
1859 cgroup_devices_has_allow_or_deny(h, cg->value, true))
1860 continue;
1861 if (lxc_cgroup_set_handler(cg->subsystem, cg->value, h)) {
1862 ERROR("Error setting %s to %s for %s\n",
1863 cg->subsystem, cg->value, h->name);
1864 goto out;
1865 }
1866 }
1867
1868 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
1869 }
1870
1871 ret = 0;
1872 INFO("cgroup has been setup");
1873 out:
1874 return ret;
1875 }
1876
1877 static bool cgroup_devices_has_allow_or_deny(struct lxc_handler *h,
1878 char *v, bool for_allow)
1879 {
1880 char *path;
1881 FILE *devices_list;
1882 char *line = NULL;
1883 size_t sz = 0;
1884 bool ret = !for_allow;
1885 const char *parts[3] = {
1886 NULL,
1887 "devices.list",
1888 NULL
1889 };
1890
1891 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
1892 // not sure they ever do, but they *could*
1893 // right now, I'm assuming they do NOT
1894 if (!for_allow && strcmp(v, "a") != 0 && strcmp(v, "a *:* rwm") != 0)
1895 return false;
1896
1897 parts[0] = (const char *)lxc_cgroup_get_hierarchy_abs_path_handler("devices", h);
1898 if (!parts[0])
1899 return false;
1900 path = lxc_string_join("/", parts, false);
1901 if (!path) {
1902 free((void *)parts[0]);
1903 return false;
1904 }
1905
1906 devices_list = fopen_cloexec(path, "r");
1907 if (!devices_list) {
1908 free(path);
1909 return false;
1910 }
1911
1912 while (getline(&line, &sz, devices_list) != -1) {
1913 size_t len = strlen(line);
1914 if (len > 0 && line[len-1] == '\n')
1915 line[len-1] = '\0';
1916 if (strcmp(line, "a *:* rwm") == 0) {
1917 ret = for_allow;
1918 goto out;
1919 } else if (for_allow && strcmp(line, v) == 0) {
1920 ret = true;
1921 goto out;
1922 }
1923 }
1924
1925 out:
1926 fclose(devices_list);
1927 free(line);
1928 free(path);
1929 return ret;
1930 }
1931
1932 static int cgroup_recursive_task_count(const char *cgroup_path)
1933 {
1934 DIR *d;
1935 struct dirent *dent_buf;
1936 struct dirent *dent;
1937 ssize_t name_max;
1938 int n = 0, r;
1939
1940 /* see man readdir_r(3) */
1941 name_max = pathconf(cgroup_path, _PC_NAME_MAX);
1942 if (name_max <= 0)
1943 name_max = 255;
1944 dent_buf = malloc(offsetof(struct dirent, d_name) + name_max + 1);
1945 if (!dent_buf)
1946 return -1;
1947
1948 d = opendir(cgroup_path);
1949 if (!d) {
1950 free(dent_buf);
1951 return 0;
1952 }
1953
1954 while (readdir_r(d, dent_buf, &dent) == 0 && dent) {
1955 const char *parts[3] = {
1956 cgroup_path,
1957 dent->d_name,
1958 NULL
1959 };
1960 char *sub_path;
1961 struct stat st;
1962
1963 if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
1964 continue;
1965 sub_path = lxc_string_join("/", parts, false);
1966 if (!sub_path) {
1967 closedir(d);
1968 free(dent_buf);
1969 return -1;
1970 }
1971 r = stat(sub_path, &st);
1972 if (r < 0) {
1973 closedir(d);
1974 free(dent_buf);
1975 free(sub_path);
1976 return -1;
1977 }
1978 if (S_ISDIR(st.st_mode)) {
1979 r = cgroup_recursive_task_count(sub_path);
1980 if (r >= 0)
1981 n += r;
1982 } else if (!strcmp(dent->d_name, "tasks")) {
1983 r = count_lines(sub_path);
1984 if (r >= 0)
1985 n += r;
1986 }
1987 free(sub_path);
1988 }
1989 closedir(d);
1990 free(dent_buf);
1991
1992 return n;
1993 }
1994
1995 static int count_lines(const char *fn)
1996 {
1997 FILE *f;
1998 char *line = NULL;
1999 size_t sz = 0;
2000 int n = 0;
2001
2002 f = fopen_cloexec(fn, "r");
2003 if (!f)
2004 return -1;
2005
2006 while (getline(&line, &sz, f) != -1) {
2007 n++;
2008 }
2009 free(line);
2010 fclose(f);
2011 return n;
2012 }
2013
2014 static int handle_cgroup_settings(struct cgroup_mount_point *mp,
2015 char *cgroup_path)
2016 {
2017 int r, saved_errno = 0;
2018 char buf[2];
2019
2020 /* If this is the memory cgroup, we want to enforce hierarchy.
2021 * But don't fail if for some reason we can't.
2022 */
2023 if (lxc_string_in_array("memory", (const char **)mp->hierarchy->subsystems)) {
2024 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/memory.use_hierarchy");
2025 if (cc_path) {
2026 r = lxc_read_from_file(cc_path, buf, 1);
2027 if (r < 1 || buf[0] != '1') {
2028 r = lxc_write_to_file(cc_path, "1", 1, false);
2029 if (r < 0)
2030 SYSERROR("failed to set memory.use_hiararchy to 1; continuing");
2031 }
2032 free(cc_path);
2033 }
2034 }
2035
2036 /* if this is a cpuset hierarchy, we have to set cgroup.clone_children in
2037 * the base cgroup, otherwise containers will start with an empty cpuset.mems
2038 * and cpuset.cpus and then
2039 */
2040 if (lxc_string_in_array("cpuset", (const char **)mp->hierarchy->subsystems)) {
2041 char *cc_path = cgroup_to_absolute_path(mp, cgroup_path, "/cgroup.clone_children");
2042 if (!cc_path)
2043 return -1;
2044 r = lxc_read_from_file(cc_path, buf, 1);
2045 if (r == 1 && buf[0] == '1') {
2046 free(cc_path);
2047 return 0;
2048 }
2049 r = lxc_write_to_file(cc_path, "1", 1, false);
2050 saved_errno = errno;
2051 free(cc_path);
2052 errno = saved_errno;
2053 return r < 0 ? -1 : 0;
2054 }
2055 return 0;
2056 }
2057
2058 extern void lxc_monitor_send_state(const char *name, lxc_state_t state,
2059 const char *lxcpath);
2060 int do_unfreeze(int freeze, const char *name, const char *lxcpath)
2061 {
2062 char v[100];
2063 const char *state = freeze ? "FROZEN" : "THAWED";
2064
2065 if (lxc_cgroup_set("freezer.state", state, name, lxcpath) < 0) {
2066 ERROR("Failed to freeze %s:%s", lxcpath, name);
2067 return -1;
2068 }
2069 while (1) {
2070 if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0) {
2071 ERROR("Failed to get new freezer state for %s:%s", lxcpath, name);
2072 return -1;
2073 }
2074 if (v[strlen(v)-1] == '\n')
2075 v[strlen(v)-1] = '\0';
2076 if (strncmp(v, state, strlen(state)) == 0) {
2077 if (name)
2078 lxc_monitor_send_state(name, freeze ? FROZEN : THAWED, lxcpath);
2079 return 0;
2080 }
2081 sleep(1);
2082 }
2083 }
2084
2085 int freeze_unfreeze(const char *name, int freeze, const char *lxcpath)
2086 {
2087 return do_unfreeze(freeze, name, lxcpath);
2088 }
2089
2090 lxc_state_t freezer_state(const char *name, const char *lxcpath)
2091 {
2092 char v[100];
2093 if (lxc_cgroup_get("freezer.state", v, 100, name, lxcpath) < 0)
2094 return -1;
2095
2096 if (v[strlen(v)-1] == '\n')
2097 v[strlen(v)-1] = '\0';
2098 return lxc_str2state(v);
2099 }
2100
2101 static void cgfs_destroy(struct lxc_handler *handler)
2102 {
2103 struct cgfs_data *d = handler->cgroup_info->data;
2104 if (!d)
2105 return;
2106 if (d->info)
2107 lxc_cgroup_process_info_free_and_remove(d->info);
2108 if (d->meta)
2109 lxc_cgroup_put_meta(d->meta);
2110 free(d);
2111 handler->cgroup_info->data = NULL;
2112 }
2113
2114 static inline bool cgfs_init(struct lxc_handler *handler)
2115 {
2116 struct cgfs_data *d = malloc(sizeof(*d));
2117 if (!d)
2118 return false;
2119 d->info = NULL;
2120 d->meta = lxc_cgroup_load_meta();
2121
2122 if (!d->meta) {
2123 ERROR("cgroupfs failed to detect cgroup metadata");
2124 free(d);
2125 return false;
2126 }
2127 handler->cgroup_info->data = d;
2128 return true;
2129 }
2130
2131 static inline bool cgfs_create(struct lxc_handler *handler)
2132 {
2133 struct cgfs_data *d = handler->cgroup_info->data;
2134 struct cgroup_process_info *i;
2135 struct cgroup_meta_data *md = d->meta;
2136 i = lxc_cgroupfs_create(handler->name, handler->cgroup_info->cgroup_pattern, md, NULL);
2137 if (!i)
2138 return false;
2139 d->info = i;
2140 return true;
2141 }
2142
2143 static inline bool cgfs_enter(struct lxc_handler *handler)
2144 {
2145 struct cgfs_data *d = handler->cgroup_info->data;
2146 struct cgroup_process_info *i = d->info;
2147 int ret;
2148
2149 ret = lxc_cgroupfs_enter(i, handler->pid, false);
2150
2151 return ret == 0;
2152 }
2153
2154 static inline bool cgfs_create_legacy(struct lxc_handler *handler)
2155 {
2156 struct cgfs_data *d = handler->cgroup_info->data;
2157 struct cgroup_process_info *i = d->info;
2158 if (lxc_cgroup_create_legacy(i, handler->name, handler->pid) < 0) {
2159 ERROR("failed to create legacy ns cgroups for '%s'", handler->name);
2160 return false;
2161 }
2162 return true;
2163 }
2164
2165 static char *cgfs_get_cgroup(struct lxc_handler *handler, const char *subsystem)
2166 {
2167 return lxc_cgroup_get_hierarchy_path_handler(subsystem, handler);
2168 }
2169
2170 static int cgfs_unfreeze_fromhandler(struct lxc_handler *handler)
2171 {
2172 char *cgabspath, *cgrelpath;
2173 int ret;
2174
2175 cgrelpath = lxc_cgroup_get_hierarchy_path_handler("freezer", handler);
2176 cgabspath = lxc_cgroup_find_abs_path("freezer", cgrelpath, true, NULL);
2177 if (!cgabspath)
2178 return -1;
2179
2180 ret = do_cgroup_set(cgabspath, "freezer.state", "THAWED");
2181 free(cgabspath);
2182 return ret;
2183 }
2184
2185 bool cgroupfs_setup_limits(struct lxc_handler *h, bool with_devices)
2186 {
2187 return do_setup_cgroup_limits(h, &h->conf->cgroup, with_devices) == 0;
2188 }
2189
2190 static struct cgroup_ops cgfs_ops = {
2191 .destroy = cgfs_destroy,
2192 .init = cgfs_init,
2193 .create = cgfs_create,
2194 .enter = cgfs_enter,
2195 .create_legacy = cgfs_create_legacy,
2196 .get_cgroup = cgfs_get_cgroup,
2197 .get = lxc_cgroupfs_get,
2198 .set = lxc_cgroupfs_set,
2199 .unfreeze_fromhandler = cgfs_unfreeze_fromhandler,
2200 .setup_limits = cgroupfs_setup_limits,
2201 .name = "cgroupfs",
2202 .chown = NULL,
2203 };
2204 static void init_cg_ops(void)
2205 {
2206 if (!use_cgmanager)
2207 return;
2208 if (cgmanager_initialized)
2209 return;
2210 if (!lxc_init_cgmanager()) {
2211 ERROR("Could not contact cgroup manager, falling back to cgroupfs");
2212 active_cg_ops = &cgfs_ops;
2213 }
2214 }
2215
2216 /*
2217 * These are the backend-independent cgroup handlers for container
2218 * start and stop
2219 */
2220
2221 /* Free all cgroup info held by the handler */
2222 void cgroup_destroy(struct lxc_handler *handler)
2223 {
2224 if (!handler->cgroup_info)
2225 return;
2226 if (active_cg_ops)
2227 active_cg_ops->destroy(handler);
2228 }
2229
2230 /*
2231 * Allocate a lxc_cgroup_info for the active cgroup
2232 * backend, and assign it to the handler
2233 */
2234 bool cgroup_init(struct lxc_handler *handler)
2235 {
2236 init_cg_ops();
2237 handler->cgroup_info = malloc(sizeof(struct lxc_cgroup_info));
2238 if (!handler->cgroup_info)
2239 return false;
2240 memset(handler->cgroup_info, 0, sizeof(struct lxc_cgroup_info));
2241 /* if we are running as root, use system cgroup pattern, otherwise
2242 * just create a cgroup under the current one. But also fall back to
2243 * that if for some reason reading the configuration fails and no
2244 * default value is available
2245 */
2246 if (geteuid() == 0)
2247 handler->cgroup_info->cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
2248 if (!handler->cgroup_info->cgroup_pattern)
2249 handler->cgroup_info->cgroup_pattern = "%n";
2250
2251 return active_cg_ops->init(handler);
2252 }
2253
2254 /* Create the container cgroups for all requested controllers */
2255 bool cgroup_create(struct lxc_handler *handler)
2256 {
2257 return active_cg_ops->create(handler);
2258 }
2259
2260 /*
2261 * Enter the container init into its new cgroups for all
2262 * requested controllers */
2263 bool cgroup_enter(struct lxc_handler *handler)
2264 {
2265 return active_cg_ops->enter(handler);
2266 }
2267
2268 bool cgroup_create_legacy(struct lxc_handler *handler)
2269 {
2270 if (active_cg_ops->create_legacy)
2271 return active_cg_ops->create_legacy(handler);
2272 return true;
2273 }
2274
2275 char *cgroup_get_cgroup(struct lxc_handler *handler, const char *subsystem)
2276 {
2277 return active_cg_ops->get_cgroup(handler, subsystem);
2278 }
2279
2280 int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath)
2281 {
2282 init_cg_ops();
2283 return active_cg_ops->set(filename, value, name, lxcpath);
2284 }
2285
2286 int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
2287 {
2288 init_cg_ops();
2289 return active_cg_ops->get(filename, value, len, name, lxcpath);
2290 }
2291
2292 int lxc_unfreeze_fromhandler(struct lxc_handler *handler)
2293 {
2294 return active_cg_ops->unfreeze_fromhandler(handler);
2295 }
2296
2297 bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
2298 {
2299 return active_cg_ops->setup_limits(handler, with_devices);
2300 }
2301
2302 bool cgroup_chown(struct lxc_handler *handler)
2303 {
2304 if (active_cg_ops->chown)
2305 return active_cg_ops->chown(handler);
2306 return true;
2307 }