]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgroup.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/inotify.h>
36 #include <netinet/in.h>
44 #include <lxc/cgroup.h>
45 #include <lxc/start.h>
48 #include <../include/lxcmntent.h>
53 lxc_log_define(lxc_cgroup
, lxc
);
55 #define MTAB "/proc/mounts"
57 /* Check if a mount is a cgroup hierarchy for any subsystem.
58 * Return the first subsystem found (or NULL if none).
60 static char *mount_has_subsystem(const struct mntent
*mntent
)
64 char line
[MAXPATHLEN
];
66 /* read the list of subsystems from the kernel */
67 f
= fopen("/proc/cgroups", "r");
71 /* skip the first line, which contains column headings */
72 if (!fgets(line
, MAXPATHLEN
, f
))
75 while (fgets(line
, MAXPATHLEN
, f
)) {
76 c
= strchr(line
, '\t');
81 ret
= hasmntopt(mntent
, line
);
91 * Determine mountpoint for a cgroup subsystem.
92 * @subsystem: cgroup subsystem (i.e. freezer). If this is NULL, the first
93 * cgroup mountpoint with any subsystems is used.
94 * @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path
97 * Returns 0 on success, -1 on error.
99 static int get_cgroup_mount(const char *subsystem
, char *mnt
)
101 struct mntent
*mntent
;
105 file
= setmntent(MTAB
, "r");
107 SYSERROR("failed to open %s", MTAB
);
111 while ((mntent
= getmntent(file
))) {
112 if (strcmp(mntent
->mnt_type
, "cgroup"))
116 if (!hasmntopt(mntent
, subsystem
))
119 if (!mount_has_subsystem(mntent
))
123 ret
= snprintf(mnt
, MAXPATHLEN
, "%s", mntent
->mnt_dir
);
124 if (ret
< 0 || ret
>= MAXPATHLEN
)
127 DEBUG("using cgroup mounted at '%s'", mnt
);
133 DEBUG("Failed to find cgroup for %s\n",
134 subsystem
? subsystem
: "(NULL)");
141 * cgroup_path_get: Calculate the full path for a particular subsystem, plus
142 * a passed-in (to be appended) relative cgpath for a container.
143 * @path: a char** into which a pointer to the answer is copied
144 * @subsystem: subsystem of interest (i.e. freezer).
145 * @cgpath: a container's (relative) cgroup path, i.e. "/lxc/c1".
147 * Returns 0 on success, -1 on error.
149 * The answer is written in a static char[MAXPATHLEN] in this function and
150 * should not be freed.
152 extern int cgroup_path_get(char **path
, const char *subsystem
, const char *cgpath
)
154 static char buf
[MAXPATHLEN
];
155 static char retbuf
[MAXPATHLEN
];
158 /* lxc_cgroup_set passes a state object for the subsystem,
159 * so trim it to just the subsystem part */
161 rc
= snprintf(retbuf
, MAXPATHLEN
, "%s", subsystem
);
162 if (rc
< 0 || rc
>= MAXPATHLEN
) {
163 ERROR("subsystem name too long");
166 char *s
= index(retbuf
, '.');
169 DEBUG("%s: called for subsys %s name %s\n", __func__
, retbuf
, cgpath
);
171 if (get_cgroup_mount(subsystem
? retbuf
: NULL
, buf
)) {
172 ERROR("cgroup is not mounted");
176 rc
= snprintf(retbuf
, MAXPATHLEN
, "%s/%s", buf
, cgpath
);
177 if (rc
< 0 || rc
>= MAXPATHLEN
) {
178 ERROR("name too long");
182 DEBUG("%s: returning %s for subsystem %s", __func__
, retbuf
, subsystem
);
189 * Calculate a container's cgroup path for a particular subsystem. This
190 * is the cgroup path relative to the root of the cgroup filesystem.
191 * @path: A char ** into which we copy the char* containing the answer
192 * @subsystem: the cgroup subsystem of interest (i.e. freezer)
193 * @name: container name
194 * @lxcpath: the lxcpath in which the container is running.
196 * Returns 0 on success, -1 on error.
198 * Note that the char* copied into *path is a static char[MAXPATHLEN] in
199 * commands.c:receive_answer(). It should not be freed.
201 extern int lxc_get_cgpath(const char **path
, const char *subsystem
, const char *name
, const char *lxcpath
)
203 struct lxc_command command
= {
204 .request
= { .type
= LXC_COMMAND_CGROUP
},
207 int ret
, stopped
= 0;
209 ret
= lxc_command(name
, &command
, &stopped
, lxcpath
);
212 ERROR("failed to send command");
217 WARN("'%s' has stopped before sending its state", name
);
221 if (command
.answer
.ret
< 0 || command
.answer
.pathlen
< 0) {
222 ERROR("failed to get state for '%s': %s",
223 name
, strerror(-command
.answer
.ret
));
227 *path
= command
.answer
.path
;
233 * lxc_cgroup_path_get: determine full pathname for a cgroup
234 * file for a specific container.
235 * @path: char ** used to return the answer. The char * will point
236 * into the static char* retuf from cgroup_path_get() (so no need
238 * @subsystem: cgroup subsystem (i.e. "freezer") for which to
239 * return an answer. If NULL, then the first cgroup entry in
242 * This is the exported function, which determines cgpath from the
243 * monitor running in lxcpath.
245 * Returns 0 on success, < 0 on error.
247 int lxc_cgroup_path_get(char **path
, const char *subsystem
, const char *name
, const char *lxcpath
)
251 if (lxc_get_cgpath(&cgpath
, subsystem
, name
, lxcpath
) < 0)
254 return cgroup_path_get(path
, subsystem
, cgpath
);
258 * small helper which simply write a value into a (cgroup) file
260 static int do_cgroup_set(const char *path
, const char *value
)
264 if ((fd
= open(path
, O_WRONLY
)) < 0) {
265 SYSERROR("open %s : %s", path
, strerror(errno
));
269 if ((ret
= write(fd
, value
, strlen(value
))) < 0) {
271 SYSERROR("write %s : %s", path
, strerror(errno
));
275 if ((ret
= close(fd
)) < 0) {
276 SYSERROR("close %s : %s", path
, strerror(errno
));
283 * small helper to write a value into a file in a particular directory.
284 * @cgpath: the directory in which to find the file
285 * @filename: the file (under cgpath) to which to write
286 * @value: what to write
288 * Returns 0 on success, < 0 on error.
290 int lxc_cgroup_set_bypath(const char *cgpath
, const char *filename
, const char *value
)
294 char path
[MAXPATHLEN
];
296 ret
= cgroup_path_get(&dirpath
, filename
, cgpath
);
300 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s", dirpath
, filename
);
301 if (ret
< 0 || ret
>= MAXPATHLEN
) {
302 ERROR("pathname too long");
306 return do_cgroup_set(path
, value
);
310 * set a cgroup value for a container
312 * @name: name of the container
313 * @filename: the cgroup file (i.e. freezer.state) whose value to change
314 * @value: the value to write to the file
315 * @lxcpath: the lxcpath under which the container is running.
317 * Returns 0 on success, < 0 on error.
320 int lxc_cgroup_set(const char *name
, const char *filename
, const char *value
,
325 char path
[MAXPATHLEN
];
327 ret
= lxc_cgroup_path_get(&dirpath
, filename
, name
, lxcpath
);
331 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s", dirpath
, filename
);
332 if (ret
< 0 || ret
>= MAXPATHLEN
) {
333 ERROR("pathname too long");
337 return do_cgroup_set(path
, value
);
341 * Get value of a cgroup setting for a container.
343 * @name: name of the container
344 * @filename: the cgroup file to read (i.e. 'freezer.state')
345 * @value: a preallocated char* into which to copy the answer
346 * @len: the length of pre-allocated @value
347 * @lxcpath: the lxcpath in which the container is running (i.e.
350 * Returns < 0 on error, or the number of bytes read.
352 * If you pass in NULL value or 0 len, then you are asking for the size of the
355 * Note that we can't get the file size quickly through stat or lseek.
356 * Therefore if you pass in len > 0 but less than the file size, your only
357 * indication will be that the return value will be equal to the passed-in ret.
358 * We will not return the actual full file size.
360 int lxc_cgroup_get(const char *name
, const char *filename
, char *value
,
361 size_t len
, const char *lxcpath
)
365 char path
[MAXPATHLEN
];
368 ret
= lxc_cgroup_path_get(&dirpath
, filename
, name
, lxcpath
);
372 rc
= snprintf(path
, MAXPATHLEN
, "%s/%s", dirpath
, filename
);
373 if (rc
< 0 || rc
>= MAXPATHLEN
) {
374 ERROR("pathname too long");
378 fd
= open(path
, O_RDONLY
);
380 ERROR("open %s : %s", path
, strerror(errno
));
384 if (!len
|| !value
) {
387 while ((ret
= read(fd
, buf
, 100)) > 0)
392 memset(value
, 0, len
);
393 ret
= read(fd
, value
, len
);
397 ERROR("read %s : %s", path
, strerror(errno
));
403 int lxc_cgroup_nrtasks(const char *cgpath
)
406 char path
[MAXPATHLEN
];
407 int pid
, ret
, count
= 0;
411 ret
= cgroup_path_get(&dpath
, NULL
, cgpath
);
415 rc
= snprintf(path
, MAXPATHLEN
, "%s/tasks", dpath
);
416 if (rc
< 0 || rc
>= MAXPATHLEN
) {
417 ERROR("pathname too long");
421 file
= fopen(path
, "r");
423 SYSERROR("fopen '%s' failed", path
);
427 while (fscanf(file
, "%d", &pid
) != EOF
)
436 * If first creating the /sys/fs/cgroup/$subsys/lxc container, then
437 * try to set clone_children to 1. Some kernels don't support
438 * clone_children, and cgroup maintainer wants to deprecate it. So
439 * XXX TODO we should instead after each cgroup mkdir (here and in
440 * hooks/mountcgroup) check if cpuset is in the subsystems, and if so
441 * manually copy over mems and cpus.
443 static void set_clone_children(const char *mntdir
)
445 char path
[MAXPATHLEN
];
449 ret
= snprintf(path
, MAXPATHLEN
, "%s/cgroup.clone_children", mntdir
);
450 INFO("writing to %s\n", path
);
451 if (ret
< 0 || ret
> MAXPATHLEN
)
453 fout
= fopen(path
, "w");
456 fprintf(fout
, "1\n");
461 * Make sure the 'cgroup group' exists, so that we don't have to worry about
464 * @lxcgroup: the cgroup group, i.e. 'lxc' by default.
466 * See detailed comments at lxc_cgroup_path_create for more information.
468 * Returns 0 on success, -1 on error.
470 static int create_lxcgroups(const char *lxcgroup
)
473 struct mntent
*mntent
;
475 char path
[MAXPATHLEN
];
477 file
= setmntent(MTAB
, "r");
479 SYSERROR("failed to open %s", MTAB
);
483 while ((mntent
= getmntent(file
))) {
485 if (strcmp(mntent
->mnt_type
, "cgroup"))
487 if (!mount_has_subsystem(mntent
))
491 * TODO - handle case where lxcgroup has subdirs? (i.e. build/l1)
492 * We probably only want to support that for /users/joe
494 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s",
495 mntent
->mnt_dir
, lxcgroup
? lxcgroup
: "lxc");
496 if (ret
< 0 || ret
>= MAXPATHLEN
)
498 if (access(path
, F_OK
)) {
499 set_clone_children(mntent
->mnt_dir
);
500 ret
= mkdir(path
, 0755);
501 if (ret
== -1 && errno
!= EEXIST
) {
502 SYSERROR("failed to create '%s' directory", path
);
516 * For a new container, find a cgroup path which is unique in all cgroup mounts.
517 * I.e. if r1 is already running, then /lxc/r1-1 may be used.
519 * @lxcgroup: the cgroup 'group' the contaienr should run in. By default, this
520 * is just 'lxc'. Admins may wish to group some containers into other groups,
521 * i.e. 'build', to take advantage of cgroup hierarchy to simplify group
522 * administration. Also, unprivileged users who are placed into a cgroup by
523 * libcgroup_pam will be using that cgroup rather than the system-wide 'lxc'
525 * @name: the name of the container
527 * The chosen cgpath is returned as a strdup'd string. The caller will have to
528 * free that eventually, however the lxc monitor will keep that string so as to
529 * return it in response to a LXC_COMMAND_CGROUP query.
531 * Note the path is relative to cgroup mounts. I.e. if the freezer subsystem
532 * is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
533 * freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
535 * XXX This should probably be locked globally
537 * Races won't be determintal, you'll just end up with leftover unused cgroups
539 char *lxc_cgroup_path_create(const char *lxcgroup
, const char *name
)
542 char *retpath
, path
[MAXPATHLEN
];
545 struct mntent
*mntent
;
547 if (create_lxcgroups(lxcgroup
) < 0)
551 file
= setmntent(MTAB
, "r");
553 SYSERROR("failed to open %s", MTAB
);
558 snprintf(tail
, 12, "-%d", i
);
562 while ((mntent
= getmntent(file
))) {
564 if (strcmp(mntent
->mnt_type
, "cgroup"))
566 if (!mount_has_subsystem(mntent
))
569 /* find unused mnt_dir + lxcgroup + name + -$i */
570 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s/%s%s", mntent
->mnt_dir
,
571 lxcgroup
? lxcgroup
: "lxc", name
, tail
);
572 if (ret
< 0 || ret
>= MAXPATHLEN
)
575 if (access(path
, F_OK
) == 0) goto next
;
577 if (mkdir(path
, 0755)) {
578 ERROR("Error creating cgroups");
586 // print out the cgpath part
587 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s%s",
588 lxcgroup
? lxcgroup
: "lxc", name
, tail
);
589 if (ret
< 0 || ret
>= MAXPATHLEN
) // can't happen
592 retpath
= strdup(path
);
606 int lxc_cgroup_enter(const char *cgpath
, pid_t pid
)
608 char path
[MAXPATHLEN
];
609 FILE *file
= NULL
, *fout
;
610 struct mntent
*mntent
;
613 file
= setmntent(MTAB
, "r");
615 SYSERROR("failed to open %s", MTAB
);
619 while ((mntent
= getmntent(file
))) {
620 if (strcmp(mntent
->mnt_type
, "cgroup"))
622 if (!mount_has_subsystem(mntent
))
624 ret
= snprintf(path
, MAXPATHLEN
, "%s/%s/tasks",
625 mntent
->mnt_dir
, cgpath
);
626 if (ret
< 0 || ret
>= MAXPATHLEN
) {
627 ERROR("entering cgroup");
630 fout
= fopen(path
, "w");
632 ERROR("entering cgroup");
635 fprintf(fout
, "%d\n", (int)pid
);
645 int recursive_rmdir(char *dirname
)
647 struct dirent dirent
, *direntp
;
650 char pathname
[MAXPATHLEN
];
652 dir
= opendir(dirname
);
654 WARN("failed to open directory: %m");
658 while (!readdir_r(dir
, &dirent
, &direntp
)) {
665 if (!strcmp(direntp
->d_name
, ".") ||
666 !strcmp(direntp
->d_name
, ".."))
669 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
670 if (rc
< 0 || rc
>= MAXPATHLEN
) {
671 ERROR("pathname too long");
674 ret
= stat(pathname
, &mystat
);
677 if (S_ISDIR(mystat
.st_mode
))
678 recursive_rmdir(pathname
);
681 ret
= rmdir(dirname
);
684 ERROR("failed to close directory");
690 static int lxc_one_cgroup_destroy(struct mntent
*mntent
, const char *cgpath
)
692 char cgname
[MAXPATHLEN
];
693 char *cgmnt
= mntent
->mnt_dir
;
696 rc
= snprintf(cgname
, MAXPATHLEN
, "%s/%s", cgmnt
, cgpath
);
697 if (rc
< 0 || rc
>= MAXPATHLEN
) {
698 ERROR("name too long");
701 DEBUG("destroying %s\n", cgname
);
702 if (recursive_rmdir(cgname
)) {
703 SYSERROR("failed to remove cgroup '%s'", cgname
);
707 DEBUG("'%s' unlinked", cgname
);
713 * for each mounted cgroup, destroy the cgroup for the container
715 int lxc_cgroup_destroy(const char *cgpath
)
717 struct mntent
*mntent
;
721 file
= setmntent(MTAB
, "r");
723 SYSERROR("failed to open %s", MTAB
);
727 while ((mntent
= getmntent(file
))) {
728 if (strcmp(mntent
->mnt_type
, "cgroup"))
730 if (!mount_has_subsystem(mntent
))
733 err
= lxc_one_cgroup_destroy(mntent
, cgpath
);
734 if (err
) // keep trying to clean up the others
742 int lxc_cgroup_attach(pid_t pid
, const char *name
, const char *lxcpath
)
746 if (lxc_get_cgpath(&dirpath
, NULL
, name
, lxcpath
) < 0) {
747 ERROR("Error getting cgroup for container %s: %s", lxcpath
, name
);
750 INFO("joining pid %d to cgroup %s", pid
, dirpath
);
752 return lxc_cgroup_enter(dirpath
, pid
);