]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgroup.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/inotify.h>
37 #include <netinet/in.h>
44 #include <lxc/cgroup.h>
45 #include <lxc/start.h>
47 lxc_log_define(lxc_cgroup
, lxc
);
49 #define MTAB "/proc/mounts"
53 CGROUP_CLONE_CHILDREN
,
56 /* Check if a mount is a cgroup hierarchy for any subsystem.
57 * Return the first subsystem found (or NULL if none).
59 static char *mount_has_subsystem(const struct mntent
*mntent
)
63 char line
[MAXPATHLEN
];
65 /* read the list of subsystems from the kernel */
66 f
= fopen("/proc/cgroups", "r");
70 /* skip the first line, which contains column headings */
71 if (!fgets(line
, MAXPATHLEN
, f
))
74 while (fgets(line
, MAXPATHLEN
, f
)) {
75 c
= strchr(line
, '\t');
80 ret
= hasmntopt(mntent
, line
);
90 * get_init_cgroup: get the cgroup init is in.
91 * dsg: preallocated buffer to put the output in
92 * subsystem: the exact cgroup subsystem to look up
93 * mntent: a mntent (from getmntent) whose mntopts contains the
94 * subsystem to look up.
96 * subsystem and mntent can both be NULL, in which case we return
97 * the first entry in /proc/1/cgroup.
99 * Returns a pointer to the answer, which may be "".
101 static char *get_init_cgroup(const char *subsystem
, struct mntent
*mntent
,
106 char line
[MAXPATHLEN
];
109 f
= fopen("/proc/1/cgroup", "r");
113 while (fgets(line
, MAXPATHLEN
, f
)) {
114 c
= index(line
, ':');
123 if (!subsystem
&& !mntent
)
125 if (subsystem
&& strcmp(c
, subsystem
) != 0)
127 if (mntent
&& !hasmntopt(mntent
, c
))
130 DEBUG("get_init_cgroup: found init cgroup for subsys %s at %s\n",
132 strncpy(dsg
, c2
, MAXPATHLEN
);
133 c
= &dsg
[strlen(dsg
)-1];
144 static int get_cgroup_flags(struct mntent
*mntent
)
149 if (hasmntopt(mntent
, "ns"))
150 flags
|= CGROUP_NS_CGROUP
;
152 if (hasmntopt(mntent
, "clone_children"))
153 flags
|= CGROUP_CLONE_CHILDREN
;
155 DEBUG("cgroup %s has flags 0x%x", mntent
->mnt_dir
, flags
);
159 static int get_cgroup_mount(const char *subsystem
, char *mnt
)
161 struct mntent
*mntent
;
162 char initcgroup
[MAXPATHLEN
];
164 int ret
, flags
, err
= -1;
166 file
= setmntent(MTAB
, "r");
168 SYSERROR("failed to open %s", MTAB
);
172 while ((mntent
= getmntent(file
))) {
173 if (strcmp(mntent
->mnt_type
, "cgroup"))
177 if (!hasmntopt(mntent
, subsystem
))
181 if (!mount_has_subsystem(mntent
))
185 flags
= get_cgroup_flags(mntent
);
186 ret
= snprintf(mnt
, MAXPATHLEN
, "%s%s%s", mntent
->mnt_dir
,
187 get_init_cgroup(subsystem
, NULL
, initcgroup
),
188 (flags
& CGROUP_NS_CGROUP
) ? "" : "/lxc");
189 if (ret
< 0 || ret
>= MAXPATHLEN
)
192 DEBUG("using cgroup mounted at '%s'", mnt
);
198 DEBUG("Failed to find cgroup for %s\n",
199 subsystem
? subsystem
: "(NULL)");
205 int lxc_ns_is_mounted(void)
207 static char buf
[MAXPATHLEN
];
209 return (get_cgroup_mount("ns", buf
) == 0);
212 static int cgroup_rename_nsgroup(const char *mnt
, const char *name
, pid_t pid
)
214 char oldname
[MAXPATHLEN
];
215 char newname
[MAXPATHLEN
];
218 ret
= snprintf(oldname
, MAXPATHLEN
, "%s/%d", mnt
, pid
);
219 if (ret
>= MAXPATHLEN
)
222 ret
= snprintf(newname
, MAXPATHLEN
, "%s/%s", mnt
, name
);
223 if (ret
>= MAXPATHLEN
)
226 if (rename(oldname
, newname
)) {
227 SYSERROR("failed to rename cgroup %s->%s", oldname
, newname
);
231 DEBUG("'%s' renamed to '%s'", oldname
, newname
);
236 static int cgroup_enable_clone_children(const char *path
)
241 f
= fopen(path
, "w");
243 SYSERROR("failed to open '%s'", path
);
247 if (fprintf(f
, "1") < 1) {
248 ERROR("failed to write flag to '%s'", path
);
257 static int lxc_one_cgroup_attach(const char *name
,
258 struct mntent
*mntent
, pid_t pid
)
261 char tasks
[MAXPATHLEN
], initcgroup
[MAXPATHLEN
];
262 char *cgmnt
= mntent
->mnt_dir
;
266 flags
= get_cgroup_flags(mntent
);
268 rc
= snprintf(tasks
, MAXPATHLEN
, "%s%s%s/%s/tasks", cgmnt
,
269 get_init_cgroup(NULL
, mntent
, initcgroup
),
270 (flags
& CGROUP_NS_CGROUP
) ? "" : "/lxc",
272 if (rc
< 0 || rc
>= MAXPATHLEN
) {
273 ERROR("pathname too long");
277 f
= fopen(tasks
, "w");
279 SYSERROR("failed to open '%s'", tasks
);
283 if (fprintf(f
, "%d", pid
) <= 0) {
284 SYSERROR("failed to write pid '%d' to '%s'", pid
, tasks
);
294 * for each mounted cgroup, attach a pid to the cgroup for the container
296 int lxc_cgroup_attach(const char *name
, pid_t pid
)
298 struct mntent
*mntent
;
303 file
= setmntent(MTAB
, "r");
305 SYSERROR("failed to open %s", MTAB
);
309 while ((mntent
= getmntent(file
))) {
310 DEBUG("checking '%s' (%s)", mntent
->mnt_dir
, mntent
->mnt_type
);
312 if (strcmp(mntent
->mnt_type
, "cgroup"))
314 if (!mount_has_subsystem(mntent
))
317 INFO("[%d] found cgroup mounted at '%s',opts='%s'",
318 ++found
, mntent
->mnt_dir
, mntent
->mnt_opts
);
320 err
= lxc_one_cgroup_attach(name
, mntent
, pid
);
326 ERROR("No cgroup mounted on the system");
334 * rename cgname, which is under cgparent, to a new name starting
335 * with 'cgparent/dead'. That way cgname can be reused. Return
336 * 0 on success, -1 on failure.
338 int try_to_move_cgname(char *cgparent
, char *cgname
)
342 /* tempnam problems don't matter here - cgroupfs will prevent
343 * duplicates if we race, and we'll just fail at that (unlikely)
347 newdir
= tempnam(cgparent
, "dead");
350 if (rename(cgname
, newdir
))
352 WARN("non-empty cgroup %s renamed to %s, please manually inspect it\n",
359 * create a cgroup for the container in a particular subsystem.
361 static int lxc_one_cgroup_create(const char *name
,
362 struct mntent
*mntent
, pid_t pid
)
364 char cginit
[MAXPATHLEN
], cgname
[MAXPATHLEN
], cgparent
[MAXPATHLEN
];
365 char clonechild
[MAXPATHLEN
];
366 char initcgroup
[MAXPATHLEN
];
369 /* cgparent is the parent dir, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc */
370 /* (remember get_init_cgroup() returns a path starting with '/') */
371 /* cgname is the full name, e.g., /sys/fs/cgroup/<cgroup>/<init-cgroup>/lxc/name */
372 ret
= snprintf(cginit
, MAXPATHLEN
, "%s%s", mntent
->mnt_dir
,
373 get_init_cgroup(NULL
, mntent
, initcgroup
));
374 if (ret
< 0 || ret
>= MAXPATHLEN
) {
375 SYSERROR("Failed creating pathname for init's cgroup (%d)\n", ret
);
379 flags
= get_cgroup_flags(mntent
);
381 ret
= snprintf(cgparent
, MAXPATHLEN
, "%s%s", cginit
,
382 (flags
& CGROUP_NS_CGROUP
) ? "" : "/lxc");
383 if (ret
< 0 || ret
>= MAXPATHLEN
) {
384 SYSERROR("Failed creating pathname for cgroup parent (%d)\n", ret
);
387 ret
= snprintf(cgname
, MAXPATHLEN
, "%s/%s", cgparent
, name
);
388 if (ret
< 0 || ret
>= MAXPATHLEN
) {
389 SYSERROR("Failed creating pathname for cgroup (%d)\n", ret
);
393 /* Do we have the deprecated ns_cgroup subsystem? */
394 if (flags
& CGROUP_NS_CGROUP
) {
395 WARN("using deprecated ns_cgroup");
396 return cgroup_rename_nsgroup(cginit
, name
, pid
);
399 ret
= snprintf(clonechild
, MAXPATHLEN
, "%s/cgroup.clone_children",
401 if (ret
< 0 || ret
>= MAXPATHLEN
) {
402 SYSERROR("Failed creating pathname for clone_children (%d)\n", ret
);
406 /* we check if the kernel has clone_children, at this point if there
407 * no clone_children neither ns_cgroup, that means the cgroup is mounted
408 * without the ns_cgroup and it has not the compatibility flag
410 if (access(clonechild
, F_OK
)) {
411 ERROR("no ns_cgroup option specified");
415 /* enable the clone_children flag of the cgroup */
416 if (cgroup_enable_clone_children(clonechild
)) {
417 SYSERROR("failed to enable 'clone_children flag");
421 /* if cgparent does not exist, create it */
422 if (access(cgparent
, F_OK
)) {
423 ret
= mkdir(cgparent
, 0755);
424 if (ret
== -1 && errno
== EEXIST
) {
425 SYSERROR("failed to create '%s' directory", cgparent
);
431 * There is a previous cgroup. Try to delete it. If that fails
432 * (i.e. it is not empty) try to move it out of the way.
434 if (!access(cgname
, F_OK
) && rmdir(cgname
)) {
435 if (try_to_move_cgname(cgparent
, cgname
)) {
436 SYSERROR("failed to remove previous cgroup '%s'", cgname
);
441 /* Let's create the cgroup */
442 if (mkdir(cgname
, 0755)) {
443 SYSERROR("failed to create '%s' directory", cgname
);
447 INFO("created cgroup '%s'", cgname
);
453 * for each mounted cgroup, create a cgroup for the container and attach a pid
455 int lxc_cgroup_create(const char *name
, pid_t pid
)
457 struct mntent
*mntent
;
462 file
= setmntent(MTAB
, "r");
464 SYSERROR("failed to open %s", MTAB
);
468 while ((mntent
= getmntent(file
))) {
469 DEBUG("checking '%s' (%s)", mntent
->mnt_dir
, mntent
->mnt_type
);
471 if (strcmp(mntent
->mnt_type
, "cgroup"))
473 if (!mount_has_subsystem(mntent
))
476 INFO("[%d] found cgroup mounted at '%s',opts='%s'",
477 ++found
, mntent
->mnt_dir
, mntent
->mnt_opts
);
479 err
= lxc_one_cgroup_create(name
, mntent
, pid
);
483 err
= lxc_one_cgroup_attach(name
, mntent
, pid
);
489 ERROR("No cgroup mounted on the system");
496 int recursive_rmdir(char *dirname
)
498 struct dirent dirent
, *direntp
;
501 char pathname
[MAXPATHLEN
];
503 dir
= opendir(dirname
);
505 WARN("failed to open directory: %m");
509 while (!readdir_r(dir
, &dirent
, &direntp
)) {
516 if (!strcmp(direntp
->d_name
, ".") ||
517 !strcmp(direntp
->d_name
, ".."))
520 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
521 if (rc
< 0 || rc
>= MAXPATHLEN
) {
522 ERROR("pathname too long");
525 ret
= stat(pathname
, &mystat
);
528 if (S_ISDIR(mystat
.st_mode
))
529 recursive_rmdir(pathname
);
532 ret
= rmdir(dirname
);
535 ERROR("failed to close directory");
541 int lxc_one_cgroup_destroy(struct mntent
*mntent
, const char *name
)
543 char cgname
[MAXPATHLEN
], initcgroup
[MAXPATHLEN
];
544 char *cgmnt
= mntent
->mnt_dir
;
545 int flags
= get_cgroup_flags(mntent
);
548 rc
= snprintf(cgname
, MAXPATHLEN
, "%s%s%s/%s", cgmnt
,
549 get_init_cgroup(NULL
, mntent
, initcgroup
),
550 (flags
& CGROUP_NS_CGROUP
) ? "" : "/lxc", name
);
551 if (rc
< 0 || rc
>= MAXPATHLEN
) {
552 ERROR("name too long");
555 DEBUG("destroying %s\n", cgname
);
556 if (recursive_rmdir(cgname
)) {
557 SYSERROR("failed to remove cgroup '%s'", cgname
);
561 DEBUG("'%s' unlinked", cgname
);
567 * for each mounted cgroup, destroy the cgroup for the container
569 int lxc_cgroup_destroy(const char *name
)
571 struct mntent
*mntent
;
575 file
= setmntent(MTAB
, "r");
577 SYSERROR("failed to open %s", MTAB
);
581 while ((mntent
= getmntent(file
))) {
582 if (strcmp(mntent
->mnt_type
, "cgroup"))
584 if (!mount_has_subsystem(mntent
))
587 err
= lxc_one_cgroup_destroy(mntent
, name
);
596 * lxc_cgroup_path_get: put into *path the pathname for
597 * %subsystem and cgroup %name. If %subsystem is NULL, then
598 * the first mounted cgroup will be used (for nr_tasks)
600 int lxc_cgroup_path_get(char **path
, const char *subsystem
, const char *name
)
602 static char buf
[MAXPATHLEN
];
603 static char retbuf
[MAXPATHLEN
];
606 /* lxc_cgroup_set passes a state object for the subsystem,
607 * so trim it to just the subsystem part */
609 rc
= snprintf(retbuf
, MAXPATHLEN
, "%s", subsystem
);
610 if (rc
< 0 || rc
>= MAXPATHLEN
) {
611 ERROR("subsystem name too long");
614 char *s
= index(retbuf
, '.');
617 DEBUG("%s: called for subsys %s name %s\n", __func__
, retbuf
, name
);
619 if (get_cgroup_mount(subsystem
? retbuf
: NULL
, buf
)) {
620 ERROR("cgroup is not mounted");
624 rc
= snprintf(retbuf
, MAXPATHLEN
, "%s/%s", buf
, name
);
625 if (rc
< 0 || rc
>= MAXPATHLEN
) {
626 ERROR("name too long");
630 DEBUG("%s: returning %s for subsystem %s", __func__
, retbuf
, subsystem
);
636 int lxc_cgroup_set(const char *name
, const char *filename
, const char *value
)
640 char path
[MAXPATHLEN
];
643 ret
= lxc_cgroup_path_get(&dirpath
, filename
, name
);
647 rc
= snprintf(path
, MAXPATHLEN
, "%s/%s", dirpath
, filename
);
648 if (rc
< 0 || rc
>= MAXPATHLEN
) {
649 ERROR("pathname too long");
653 fd
= open(path
, O_WRONLY
);
655 ERROR("open %s : %s", path
, strerror(errno
));
659 ret
= write(fd
, value
, strlen(value
));
661 ERROR("write %s : %s", path
, strerror(errno
));
671 int lxc_cgroup_get(const char *name
, const char *filename
,
672 char *value
, size_t len
)
676 char path
[MAXPATHLEN
];
679 ret
= lxc_cgroup_path_get(&dirpath
, filename
, name
);
683 rc
= snprintf(path
, MAXPATHLEN
, "%s/%s", dirpath
, filename
);
684 if (rc
< 0 || rc
>= MAXPATHLEN
) {
685 ERROR("pathname too long");
689 fd
= open(path
, O_RDONLY
);
691 ERROR("open %s : %s", path
, strerror(errno
));
695 ret
= read(fd
, value
, len
);
697 ERROR("read %s : %s", path
, strerror(errno
));
703 int lxc_cgroup_nrtasks(const char *name
)
706 char path
[MAXPATHLEN
];
707 int pid
, ret
, count
= 0;
711 ret
= lxc_cgroup_path_get(&dpath
, NULL
, name
);
715 rc
= snprintf(path
, MAXPATHLEN
, "%s/tasks", dpath
);
716 if (rc
< 0 || rc
>= MAXPATHLEN
) {
717 ERROR("pathname too long");
721 file
= fopen(path
, "r");
723 SYSERROR("fopen '%s' failed", path
);
727 while (fscanf(file
, "%d", &pid
) != EOF
)