]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgroup.c
Makefile.am: use right .h file name for seccomp
[mirror_lxc.git] / src / lxc / cgroup.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23 #define _GNU_SOURCE
24 #include <stdio.h>
25 #undef _GNU_SOURCE
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <mntent.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include <dirent.h>
32 #include <fcntl.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <sys/param.h>
36 #include <sys/inotify.h>
37 #include <netinet/in.h>
38 #include <net/if.h>
39
40 #include "error.h"
41 #include "config.h"
42
43 #include <lxc/log.h>
44 #include <lxc/cgroup.h>
45 #include <lxc/start.h>
46
47 lxc_log_define(lxc_cgroup, lxc);
48
49 #define MTAB "/proc/mounts"
50
51 enum {
52 CGROUP_NS_CGROUP = 1,
53 CGROUP_CLONE_CHILDREN,
54 };
55
56 /* Check if a mount is a cgroup hierarchy for any subsystem.
57 * Return the first subsystem found (or NULL if none).
58 */
59 static char *mount_has_subsystem(const struct mntent *mntent)
60 {
61 FILE *f;
62 char *c, *ret;
63 char line[MAXPATHLEN];
64
65 /* read the list of subsystems from the kernel */
66 f = fopen("/proc/cgroups", "r");
67 if (!f)
68 return 0;
69
70 /* skip the first line, which contains column headings */
71 if (!fgets(line, MAXPATHLEN, f))
72 return 0;
73
74 while (fgets(line, MAXPATHLEN, f)) {
75 c = strchr(line, '\t');
76 if (!c)
77 continue;
78 *c = '\0';
79
80 ret = hasmntopt(mntent, line);
81 if (ret)
82 break;
83 }
84
85 fclose(f);
86 return ret;
87 }
88
89 /*
90 * get_init_cgroup: get the cgroup init is in.
91 * dsg: preallocated buffer to put the output in
92 * subsystem: the exact cgroup subsystem to look up
93 * mntent: a mntent (from getmntent) whose mntopts contains the
94 * subsystem to look up.
95 *
96 * subsystem and mntent can both be NULL, in which case we return
97 * the first entry in /proc/1/cgroup.
98 *
99 * Returns a pointer to the answer, which may be "".
100 */
101 static char *get_init_cgroup(const char *subsystem, struct mntent *mntent,
102 char *dsg)
103 {
104 FILE *f;
105 char *c, *c2;
106 char line[MAXPATHLEN];
107
108 *dsg = '\0';
109 f = fopen("/proc/1/cgroup", "r");
110 if (!f)
111 return dsg;
112
113 while (fgets(line, MAXPATHLEN, f)) {
114 c = index(line, ':');
115 if (!c)
116 continue;
117 c++;
118 c2 = index(c, ':');
119 if (!c2)
120 continue;
121 *c2 = '\0';
122 c2++;
123 if (!subsystem && !mntent)
124 goto good;
125 if (subsystem && strcmp(c, subsystem) != 0)
126 continue;
127 if (mntent && !hasmntopt(mntent, c))
128 continue;
129 good:
130 DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
131 subsystem, c2);
132 strncpy(dsg, c2, MAXPATHLEN);
133 c = &dsg[strlen(dsg)-1];
134 if (*c == '\n')
135 *c = '\0';
136 goto found;
137 }
138
139 found:
140 fclose(f);
141 return dsg;
142 }
143
144 static int get_cgroup_flags(struct mntent *mntent)
145 {
146 int flags = 0;
147
148
149 if (hasmntopt(mntent, "ns"))
150 flags |= CGROUP_NS_CGROUP;
151
152 if (hasmntopt(mntent, "clone_children"))
153 flags |= CGROUP_CLONE_CHILDREN;
154
155 DEBUG("cgroup %s has flags 0x%x", mntent->mnt_dir, flags);
156 return flags;
157 }
158
159 static int get_cgroup_mount(const char *subsystem, char *mnt)
160 {
161 struct mntent *mntent;
162 char initcgroup[MAXPATHLEN];
163 FILE *file = NULL;
164 int ret, flags, err = -1;
165
166 file = setmntent(MTAB, "r");
167 if (!file) {
168 SYSERROR("failed to open %s", MTAB);
169 return -1;
170 }
171
172 while ((mntent = getmntent(file))) {
173 if (strcmp(mntent->mnt_type, "cgroup"))
174 continue;
175
176 if (subsystem) {
177 if (!hasmntopt(mntent, subsystem))
178 continue;
179 }
180 else {
181 if (!mount_has_subsystem(mntent))
182 continue;
183 }
184
185 flags = get_cgroup_flags(mntent);
186 ret = snprintf(mnt, MAXPATHLEN, "%s%s%s", mntent->mnt_dir,
187 get_init_cgroup(subsystem, NULL, initcgroup),
188 (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
189 if (ret < 0 || ret >= MAXPATHLEN)
190 goto fail;
191
192 DEBUG("using cgroup mounted at '%s'", mnt);
193 err = 0;
194 goto out;
195 };
196
197 fail:
198 DEBUG("Failed to find cgroup for %s\n",
199 subsystem ? subsystem : "(NULL)");
200 out:
201 endmntent(file);
202 return err;
203 }
204
205 int lxc_ns_is_mounted(void)
206 {
207 static char buf[MAXPATHLEN];
208
209 return (get_cgroup_mount("ns", buf) == 0);
210 }
211
212 static int cgroup_rename_nsgroup(const char *mnt, const char *name, pid_t pid)
213 {
214 char oldname[MAXPATHLEN];
215 char newname[MAXPATHLEN];
216 int ret;
217
218 ret = snprintf(oldname, MAXPATHLEN, "%s/%d", mnt, pid);
219 if (ret >= MAXPATHLEN)
220 return -1;
221
222 ret = snprintf(newname, MAXPATHLEN, "%s/%s", mnt, name);
223 if (ret >= MAXPATHLEN)
224 return -1;
225
226 if (rename(oldname, newname)) {
227 SYSERROR("failed to rename cgroup %s->%s", oldname, newname);
228 return -1;
229 }
230
231 DEBUG("'%s' renamed to '%s'", oldname, newname);
232
233 return 0;
234 }
235
236 static int cgroup_enable_clone_children(const char *path)
237 {
238 FILE *f;
239 int ret = 0;
240
241 f = fopen(path, "w");
242 if (!f) {
243 SYSERROR("failed to open '%s'", path);
244 return -1;
245 }
246
247 if (fprintf(f, "1") < 1) {
248 ERROR("failed to write flag to '%s'", path);
249 ret = -1;
250 }
251
252 fclose(f);
253
254 return ret;
255 }
256
257 static int lxc_one_cgroup_attach(const char *name,
258 struct mntent *mntent, pid_t pid)
259 {
260 FILE *f;
261 char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
262 char *cgmnt = mntent->mnt_dir;
263 int flags, ret = 0;
264 int rc;
265
266 flags = get_cgroup_flags(mntent);
267
268 rc = snprintf(tasks, MAXPATHLEN, "%s%s%s/%s/tasks", cgmnt,
269 get_init_cgroup(NULL, mntent, initcgroup),
270 (flags & CGROUP_NS_CGROUP) ? "" : "/lxc",
271 name);
272 if (rc < 0 || rc >= MAXPATHLEN) {
273 ERROR("pathname too long");
274 return -1;
275 }
276
277 f = fopen(tasks, "w");
278 if (!f) {
279 SYSERROR("failed to open '%s'", tasks);
280 return -1;
281 }
282
283 if (fprintf(f, "%d", pid) <= 0) {
284 SYSERROR("failed to write pid '%d' to '%s'", pid, tasks);
285 ret = -1;
286 }
287
288 fclose(f);
289
290 return ret;
291 }
292
293 /*
294 * for each mounted cgroup, attach a pid to the cgroup for the container
295 */
296 int lxc_cgroup_attach(const char *name, pid_t pid)
297 {
298 struct mntent *mntent;
299 FILE *file = NULL;
300 int err = -1;
301 int found = 0;
302
303 file = setmntent(MTAB, "r");
304 if (!file) {
305 SYSERROR("failed to open %s", MTAB);
306 return -1;
307 }
308
309 while ((mntent = getmntent(file))) {
310 DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
311
312 if (strcmp(mntent->mnt_type, "cgroup"))
313 continue;
314 if (!mount_has_subsystem(mntent))
315 continue;
316
317 INFO("[%d] found cgroup mounted at '%s',opts='%s'",
318 ++found, mntent->mnt_dir, mntent->mnt_opts);
319
320 err = lxc_one_cgroup_attach(name, mntent, pid);
321 if (err)
322 goto out;
323 };
324
325 if (!found)
326 ERROR("No cgroup mounted on the system");
327
328 out:
329 endmntent(file);
330 return err;
331 }
332
333 /*
334 * rename cgname, which is under cgparent, to a new name starting
335 * with 'cgparent/dead'. That way cgname can be reused. Return
336 * 0 on success, -1 on failure.
337 */
338 int try_to_move_cgname(char *cgparent, char *cgname)
339 {
340 char *newdir;
341
342 /* tempnam problems don't matter here - cgroupfs will prevent
343 * duplicates if we race, and we'll just fail at that (unlikely)
344 * point
345 */
346
347 newdir = tempnam(cgparent, "dead");
348 if (!newdir)
349 return -1;
350 if (rename(cgname, newdir))
351 return -1;
352 WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
353 cgname, newdir);
354
355 return 0;
356 }
357
358 /*
359 * create a cgroup for the container in a particular subsystem.
360 */
361 static int lxc_one_cgroup_create(const char *name,
362 struct mntent *mntent, pid_t pid)
363 {
364 char cginit[MAXPATHLEN], cgname[MAXPATHLEN], cgparent[MAXPATHLEN];
365 char clonechild[MAXPATHLEN];
366 char initcgroup[MAXPATHLEN];
367 int flags, ret;
368
369 /* cgparent is the parent dir, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
370 /* (remember get_init_cgroup() returns a path starting with '/') */
371 /* cgname is the full name, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc/name */
372 ret = snprintf(cginit, MAXPATHLEN, "%s%s", mntent->mnt_dir,
373 get_init_cgroup(NULL, mntent, initcgroup));
374 if (ret < 0 || ret >= MAXPATHLEN) {
375 SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret);
376 return -1;
377 }
378
379 flags = get_cgroup_flags(mntent);
380
381 ret = snprintf(cgparent, MAXPATHLEN, "%s%s", cginit,
382 (flags & CGROUP_NS_CGROUP) ? "" : "/lxc");
383 if (ret < 0 || ret >= MAXPATHLEN) {
384 SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret);
385 return -1;
386 }
387 ret = snprintf(cgname, MAXPATHLEN, "%s/%s", cgparent, name);
388 if (ret < 0 || ret >= MAXPATHLEN) {
389 SYSERROR("Failed creating pathname for cgroup (%d)\n", ret);
390 return -1;
391 }
392
393 /* Do we have the deprecated ns_cgroup subsystem? */
394 if (flags & CGROUP_NS_CGROUP) {
395 WARN("using deprecated ns_cgroup");
396 return cgroup_rename_nsgroup(cginit, name, pid);
397 }
398
399 ret = snprintf(clonechild, MAXPATHLEN, "%s/cgroup.clone_children",
400 cginit);
401 if (ret < 0 || ret >= MAXPATHLEN) {
402 SYSERROR("Failed creating pathname for clone_children (%d)\n", ret);
403 return -1;
404 }
405
406 /* we check if the kernel has clone_children, at this point if there
407 * no clone_children neither ns_cgroup, that means the cgroup is mounted
408 * without the ns_cgroup and it has not the compatibility flag
409 */
410 if (access(clonechild, F_OK)) {
411 ERROR("no ns_cgroup option specified");
412 return -1;
413 }
414
415 /* enable the clone_children flag of the cgroup */
416 if (cgroup_enable_clone_children(clonechild)) {
417 SYSERROR("failed to enable 'clone_children flag");
418 return -1;
419 }
420
421 /* if cgparent does not exist, create it */
422 if (access(cgparent, F_OK)) {
423 ret = mkdir(cgparent, 0755);
424 if (ret == -1 && errno == EEXIST) {
425 SYSERROR("failed to create '%s' directory", cgparent);
426 return -1;
427 }
428 }
429
430 /*
431 * There is a previous cgroup. Try to delete it. If that fails
432 * (i.e. it is not empty) try to move it out of the way.
433 */
434 if (!access(cgname, F_OK) && rmdir(cgname)) {
435 if (try_to_move_cgname(cgparent, cgname)) {
436 SYSERROR("failed to remove previous cgroup '%s'", cgname);
437 return -1;
438 }
439 }
440
441 /* Let's create the cgroup */
442 if (mkdir(cgname, 0755)) {
443 SYSERROR("failed to create '%s' directory", cgname);
444 return -1;
445 }
446
447 INFO("created cgroup '%s'", cgname);
448
449 return 0;
450 }
451
452 /*
453 * for each mounted cgroup, create a cgroup for the container and attach a pid
454 */
455 int lxc_cgroup_create(const char *name, pid_t pid)
456 {
457 struct mntent *mntent;
458 FILE *file = NULL;
459 int err = -1;
460 int found = 0;
461
462 file = setmntent(MTAB, "r");
463 if (!file) {
464 SYSERROR("failed to open %s", MTAB);
465 return -1;
466 }
467
468 while ((mntent = getmntent(file))) {
469 DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
470
471 if (strcmp(mntent->mnt_type, "cgroup"))
472 continue;
473 if (!mount_has_subsystem(mntent))
474 continue;
475
476 INFO("[%d] found cgroup mounted at '%s',opts='%s'",
477 ++found, mntent->mnt_dir, mntent->mnt_opts);
478
479 err = lxc_one_cgroup_create(name, mntent, pid);
480 if (err)
481 goto out;
482
483 err = lxc_one_cgroup_attach(name, mntent, pid);
484 if (err)
485 goto out;
486 };
487
488 if (!found)
489 ERROR("No cgroup mounted on the system");
490
491 out:
492 endmntent(file);
493 return err;
494 }
495
496 int recursive_rmdir(char *dirname)
497 {
498 struct dirent dirent, *direntp;
499 DIR *dir;
500 int ret;
501 char pathname[MAXPATHLEN];
502
503 dir = opendir(dirname);
504 if (!dir) {
505 WARN("failed to open directory: %m");
506 return -1;
507 }
508
509 while (!readdir_r(dir, &dirent, &direntp)) {
510 struct stat mystat;
511 int rc;
512
513 if (!direntp)
514 break;
515
516 if (!strcmp(direntp->d_name, ".") ||
517 !strcmp(direntp->d_name, ".."))
518 continue;
519
520 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
521 if (rc < 0 || rc >= MAXPATHLEN) {
522 ERROR("pathname too long");
523 continue;
524 }
525 ret = stat(pathname, &mystat);
526 if (ret)
527 continue;
528 if (S_ISDIR(mystat.st_mode))
529 recursive_rmdir(pathname);
530 }
531
532 ret = rmdir(dirname);
533
534 if (closedir(dir))
535 ERROR("failed to close directory");
536 return ret;
537
538
539 }
540
541 int lxc_one_cgroup_destroy(struct mntent *mntent, const char *name)
542 {
543 char cgname[MAXPATHLEN], initcgroup[MAXPATHLEN];
544 char *cgmnt = mntent->mnt_dir;
545 int flags = get_cgroup_flags(mntent);
546 int rc;
547
548 rc = snprintf(cgname, MAXPATHLEN, "%s%s%s/%s", cgmnt,
549 get_init_cgroup(NULL, mntent, initcgroup),
550 (flags & CGROUP_NS_CGROUP) ? "" : "/lxc", name);
551 if (rc < 0 || rc >= MAXPATHLEN) {
552 ERROR("name too long");
553 return -1;
554 }
555 DEBUG("destroying %s\n", cgname);
556 if (recursive_rmdir(cgname)) {
557 SYSERROR("failed to remove cgroup '%s'", cgname);
558 return -1;
559 }
560
561 DEBUG("'%s' unlinked", cgname);
562
563 return 0;
564 }
565
566 /*
567 * for each mounted cgroup, destroy the cgroup for the container
568 */
569 int lxc_cgroup_destroy(const char *name)
570 {
571 struct mntent *mntent;
572 FILE *file = NULL;
573 int err = -1;
574
575 file = setmntent(MTAB, "r");
576 if (!file) {
577 SYSERROR("failed to open %s", MTAB);
578 return -1;
579 }
580
581 while ((mntent = getmntent(file))) {
582 if (strcmp(mntent->mnt_type, "cgroup"))
583 continue;
584 if (!mount_has_subsystem(mntent))
585 continue;
586
587 err = lxc_one_cgroup_destroy(mntent, name);
588 if (err)
589 break;
590 }
591
592 endmntent(file);
593 return err;
594 }
595 /*
596 * lxc_cgroup_path_get: put into *path the pathname for
597 * %subsystem and cgroup %name. If %subsystem is NULL, then
598 * the first mounted cgroup will be used (for nr_tasks)
599 */
600 int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name)
601 {
602 static char buf[MAXPATHLEN];
603 static char retbuf[MAXPATHLEN];
604 int rc;
605
606 /* lxc_cgroup_set passes a state object for the subsystem,
607 * so trim it to just the subsystem part */
608 if (subsystem) {
609 rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem);
610 if (rc < 0 || rc >= MAXPATHLEN) {
611 ERROR("subsystem name too long");
612 return -1;
613 }
614 char *s = index(retbuf, '.');
615 if (s)
616 *s = '\0';
617 DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, name);
618 }
619 if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) {
620 ERROR("cgroup is not mounted");
621 return -1;
622 }
623
624 rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name);
625 if (rc < 0 || rc >= MAXPATHLEN) {
626 ERROR("name too long");
627 return -1;
628 }
629
630 DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem);
631
632 *path = retbuf;
633 return 0;
634 }
635
636 int lxc_cgroup_set(const char *name, const char *filename, const char *value)
637 {
638 int fd, ret;
639 char *dirpath;
640 char path[MAXPATHLEN];
641 int rc;
642
643 ret = lxc_cgroup_path_get(&dirpath, filename, name);
644 if (ret)
645 return -1;
646
647 rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
648 if (rc < 0 || rc >= MAXPATHLEN) {
649 ERROR("pathname too long");
650 return -1;
651 }
652
653 fd = open(path, O_WRONLY);
654 if (fd < 0) {
655 ERROR("open %s : %s", path, strerror(errno));
656 return -1;
657 }
658
659 ret = write(fd, value, strlen(value));
660 if (ret < 0) {
661 ERROR("write %s : %s", path, strerror(errno));
662 goto out;
663 }
664
665 ret = 0;
666 out:
667 close(fd);
668 return ret;
669 }
670
671 int lxc_cgroup_get(const char *name, const char *filename,
672 char *value, size_t len)
673 {
674 int fd, ret = -1;
675 char *dirpath;
676 char path[MAXPATHLEN];
677 int rc;
678
679 ret = lxc_cgroup_path_get(&dirpath, filename, name);
680 if (ret)
681 return -1;
682
683 rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename);
684 if (rc < 0 || rc >= MAXPATHLEN) {
685 ERROR("pathname too long");
686 return -1;
687 }
688
689 fd = open(path, O_RDONLY);
690 if (fd < 0) {
691 ERROR("open %s : %s", path, strerror(errno));
692 return -1;
693 }
694
695 ret = read(fd, value, len);
696 if (ret < 0)
697 ERROR("read %s : %s", path, strerror(errno));
698
699 close(fd);
700 return ret;
701 }
702
703 int lxc_cgroup_nrtasks(const char *name)
704 {
705 char *dpath;
706 char path[MAXPATHLEN];
707 int pid, ret, count = 0;
708 FILE *file;
709 int rc;
710
711 ret = lxc_cgroup_path_get(&dpath, NULL, name);
712 if (ret)
713 return -1;
714
715 rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath);
716 if (rc < 0 || rc >= MAXPATHLEN) {
717 ERROR("pathname too long");
718 return -1;
719 }
720
721 file = fopen(path, "r");
722 if (!file) {
723 SYSERROR("fopen '%s' failed", path);
724 return -1;
725 }
726
727 while (fscanf(file, "%d", &pid) != EOF)
728 count++;
729
730 fclose(file);
731
732 return count;
733 }