]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgmanager.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/inotify.h>
38 #include <sys/mount.h>
39 #include <netinet/in.h>
54 lxc_log_define(lxc_cgmanager
, lxc
);
56 #include <nih-dbus/dbus_connection.h>
57 #include <cgmanager-client/cgmanager-client.h>
58 #include <nih/alloc.h>
59 #include <nih/error.h>
60 #include <nih/string.h>
61 NihDBusProxy
*cgroup_manager
= NULL
;
63 extern struct cgroup_ops
*active_cg_ops
;
64 bool cgmanager_initialized
= false;
65 bool use_cgmanager
= true;
66 static struct cgroup_ops cgmanager_ops
;
67 static int nr_subsystems
;
68 static char **subsystems
;
70 bool lxc_init_cgmanager(void);
71 static void cgmanager_disconnected(DBusConnection
*connection
)
73 WARN("Cgroup manager connection was terminated");
74 cgroup_manager
= NULL
;
75 cgmanager_initialized
= false;
76 if (lxc_init_cgmanager()) {
77 cgmanager_initialized
= true;
78 INFO("New cgroup manager connection was opened");
82 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
84 struct msghdr msg
= { 0 };
92 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
96 msg
.msg_control
= cmsgbuf
;
97 msg
.msg_controllen
= sizeof(cmsgbuf
);
99 cmsg
= CMSG_FIRSTHDR(&msg
);
100 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
101 cmsg
->cmsg_level
= SOL_SOCKET
;
102 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
103 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
109 iov
.iov_len
= sizeof(buf
);
113 if (sendmsg(sock
, &msg
, 0) < 0)
118 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
119 bool lxc_init_cgmanager(void)
121 DBusError dbus_error
;
122 DBusConnection
*connection
;
123 dbus_error_init(&dbus_error
);
125 connection
= nih_dbus_connect(CGMANAGER_DBUS_SOCK
, cgmanager_disconnected
);
128 nerr
= nih_error_get();
129 ERROR("Error opening cgmanager connection at %s: %s", CGMANAGER_DBUS_SOCK
,
132 dbus_error_free(&dbus_error
);
135 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
136 dbus_error_free(&dbus_error
);
137 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
139 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
140 dbus_connection_unref(connection
);
141 if (!cgroup_manager
) {
143 nerr
= nih_error_get();
144 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
148 active_cg_ops
= &cgmanager_ops
;
149 // force fd passing negotiation
150 if (cgmanager_ping_sync(NULL
, cgroup_manager
, 0) != 0) {
152 nerr
= nih_error_get();
153 ERROR("Error pinging cgroup manager: %s", nerr
->message
);
159 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
161 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
162 cgroup_path
, existed
) != 0) {
164 nerr
= nih_error_get();
165 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
167 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
175 const char *controller
;
176 const char *cgroup_path
;
180 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
183 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
186 uid_t caller_nsuid
= get_ns_uid(origuid
);
188 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
189 SYSERROR("Error creating socketpair");
192 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
193 SYSERROR("setsockopt failed");
196 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
197 SYSERROR("setsockopt failed");
200 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
201 cgroup_path
, sv
[1]) != 0) {
203 nerr
= nih_error_get();
204 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
208 /* now send credentials */
212 FD_SET(sv
[0], &rfds
);
213 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
214 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
217 if (read(sv
[0], &buf
, 1) != 1) {
218 ERROR("Error getting reply from server over socketpair");
221 if (send_creds(sv
[0], getpid(), getuid(), getgid())) {
222 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
226 FD_SET(sv
[0], &rfds
);
227 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
228 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
231 if (read(sv
[0], &buf
, 1) != 1) {
232 ERROR("Error getting reply from server over socketpair");
235 if (send_creds(sv
[0], getpid(), caller_nsuid
, 0)) {
236 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
240 FD_SET(sv
[0], &rfds
);
241 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
242 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
245 ret
= read(sv
[0], buf
, 1);
249 if (ret
== 1 && *buf
== '1')
254 static int chown_cgroup_wrapper(void *data
)
256 struct chown_data
*arg
= data
;
258 if (setresgid(0,0,0) < 0)
259 SYSERROR("Failed to setgid to 0");
260 if (setresuid(0,0,0) < 0)
261 SYSERROR("Failed to setuid to 0");
262 if (setgroups(0, NULL
) < 0)
263 SYSERROR("Failed to clear groups");
264 return do_chown_cgroup(arg
->controller
, arg
->cgroup_path
, arg
->origuid
);
267 static bool chown_cgroup(const char *controller
, const char *cgroup_path
,
268 struct lxc_conf
*conf
)
270 struct chown_data data
;
272 if (lxc_list_empty(&conf
->id_map
))
273 /* If there's no mapping then we don't need to chown */
276 data
.controller
= controller
;
277 data
.cgroup_path
= cgroup_path
;
278 data
.origuid
= geteuid();
280 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
) < 0) {
281 ERROR("Error requesting cgroup chown in new namespace");
287 #define CG_REMOVE_RECURSIVE 1
288 static void cgm_remove_cgroup(const char *controller
, const char *path
)
291 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
292 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
294 nerr
= nih_error_get();
295 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
297 ERROR("Error removing %s:%s", controller
, path
);
300 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
303 static void cgm_destroy(struct lxc_handler
*handler
)
305 char *cgroup_path
= handler
->cgroup_info
->data
;
311 for (i
= 0; i
< nr_subsystems
; i
++)
312 cgm_remove_cgroup(subsystems
[i
], cgroup_path
);
315 handler
->cgroup_info
->data
= NULL
;
319 * remove all the cgroups created
321 static inline void cleanup_cgroups(char *path
)
324 for (i
= 0; i
< nr_subsystems
; i
++)
325 cgm_remove_cgroup(subsystems
[i
], path
);
328 static inline bool cgm_create(struct lxc_handler
*handler
)
330 int i
, index
=0, baselen
, ret
;
332 char result
[MAXPATHLEN
], *tmp
;
333 char *cgroup_path
= handler
->cgroup_info
->data
;
335 // XXX we should send a hint to the cgmanager that when these
336 // cgroups become empty they should be deleted. Requires a cgmanager
339 memset(result
, 0, MAXPATHLEN
);
340 tmp
= lxc_string_replace("%n", handler
->name
, handler
->cgroup_info
->cgroup_pattern
);
343 if (strlen(tmp
) > MAXPATHLEN
)
346 baselen
= strlen(result
);
352 if (index
== 100) { // turn this into a warn later
353 ERROR("cgroup error? 100 cgroups with this name already running");
357 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
358 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
362 for (i
= 0; i
< nr_subsystems
; i
++) {
363 if (!lxc_cgmanager_create(subsystems
[i
], tmp
, &existed
)) {
364 ERROR("Error creating cgroup %s:%s", subsystems
[i
], result
);
365 cleanup_cgroups(tmp
);
372 cgroup_path
= strdup(tmp
);
374 cleanup_cgroups(tmp
);
377 handler
->cgroup_info
->data
= cgroup_path
;
380 cleanup_cgroups(tmp
);
386 * Use the cgmanager to move a task into a cgroup for a particular
388 * All the subsystems in this hierarchy are co-mounted, so we only
389 * need to transition the task into one of the cgroups
391 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
392 const char *cgroup_path
)
394 if (cgmanager_move_pid_sync(NULL
, cgroup_manager
, controller
,
395 cgroup_path
, pid
) != 0) {
397 nerr
= nih_error_get();
398 ERROR("call to cgmanager_move_pid_sync failed: %s", nerr
->message
);
405 static bool do_cgm_enter(pid_t pid
, const char *cgroup_path
)
409 for (i
= 0; i
< nr_subsystems
; i
++) {
410 if (!lxc_cgmanager_enter(pid
, subsystems
[i
], cgroup_path
))
416 static inline bool cgm_enter(struct lxc_handler
*handler
)
418 char *cgroup_path
= handler
->cgroup_info
->data
;
419 return do_cgm_enter(handler
->pid
, cgroup_path
);
422 static char *cgm_get_cgroup(struct lxc_handler
*handler
, const char *subsystem
)
424 char *cgroup_path
= handler
->cgroup_info
->data
;
428 int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
430 char *result
, *controller
, *key
, *cgroup
;
433 controller
= alloca(strlen(filename
)+1);
434 strcpy(controller
, filename
);
435 key
= strchr(controller
, '.');
440 /* use the command interface to look for the cgroup */
441 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
444 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cgroup
, filename
, &result
) != 0) {
446 * must consume the nih error
447 * However don't print out an error as the key may simply not exist
451 nerr
= nih_error_get();
457 newlen
= strlen(result
);
459 // user queries the size
464 strncpy(value
, result
, len
);
468 } else if (newlen
+1 < len
) {
469 // cgmanager doesn't add eol to last entry
470 value
[newlen
++] = '\n';
471 value
[newlen
] = '\0';
477 static int cgm_do_set(const char *controller
, const char *file
,
478 const char *cgroup
, const char *value
)
481 ret
= cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
482 cgroup
, file
, value
);
485 nerr
= nih_error_get();
486 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
488 ERROR("Error setting cgroup %s limit %s", file
, cgroup
);
493 int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
495 char *controller
, *key
, *cgroup
;
498 controller
= alloca(strlen(filename
)+1);
499 strcpy(controller
, filename
);
500 key
= strchr(controller
, '.');
505 /* use the command interface to look for the cgroup */
506 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
508 ERROR("Failed to get cgroup for controller %s for %s:%s",
509 controller
, lxcpath
, name
);
512 ret
= cgm_do_set(controller
, filename
, cgroup
, value
);
517 static bool collect_subsytems(void)
519 char *line
= NULL
, *tab1
;
523 if (subsystems
) // already initialized
526 f
= fopen_cloexec("/proc/cgroups", "r");
529 while (getline(&line
, &sz
, f
) != -1) {
535 tab1
= strchr(line
, '\t');
539 tmp
= realloc(subsystems
, (nr_subsystems
+1)*sizeof(char *));
544 tmp
[nr_subsystems
] = strdup(line
);
545 if (!tmp
[nr_subsystems
])
551 if (!nr_subsystems
) {
552 ERROR("No cgroup subsystems found");
560 for (i
= 0; i
< nr_subsystems
; i
++)
568 static inline bool cgm_init(struct lxc_handler
*handler
)
570 return collect_subsytems();
573 static bool cgm_unfreeze_fromhandler(struct lxc_handler
*handler
)
575 char *cgroup_path
= handler
->cgroup_info
->data
;
577 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", cgroup_path
,
578 "freezer.state", "THAWED") != 0) {
580 nerr
= nih_error_get();
581 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
583 ERROR("Error unfreezing %s", cgroup_path
);
589 static bool setup_limits(struct lxc_handler
*h
, bool do_devices
)
591 struct lxc_list
*iterator
;
592 struct lxc_cgroup
*cg
;
594 struct lxc_list
*cgroup_settings
= &h
->conf
->cgroup
;
595 char *cgroup_path
= h
->cgroup_info
->data
;
597 if (lxc_list_empty(cgroup_settings
))
600 lxc_list_for_each(iterator
, cgroup_settings
) {
601 char controller
[100], *p
;
603 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
605 if (strlen(cg
->subsystem
) > 100) // i smell a rat
607 strcpy(controller
, cg
->subsystem
);
608 p
= strchr(controller
, '.');
611 if (cgm_do_set(controller
, cg
->subsystem
, cgroup_path
613 ERROR("Error setting %s to %s for %s\n",
614 cg
->subsystem
, cg
->value
, h
->name
);
618 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
622 INFO("cgroup limits have been setup");
627 static bool cgm_setup_limits(struct lxc_handler
*handler
, bool with_devices
)
629 return setup_limits(handler
, with_devices
);
632 static bool cgm_chown(struct lxc_handler
*handler
)
634 char *cgroup_path
= handler
->cgroup_info
->data
;
637 for (i
= 0; i
< nr_subsystems
; i
++) {
638 if (!chown_cgroup(subsystems
[i
], cgroup_path
, handler
->conf
))
639 WARN("Failed to chown %s:%s to container root",
640 subsystems
[i
], cgroup_path
);
646 * TODO: this should be re-written to use the get_config_item("lxc.id_map")
647 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
648 * that the id_maps may be different if the container was started with a
650 * The reason I'm punting on that is because we'll need to parse the
653 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
657 struct lxc_container
*c
;
659 c
= lxc_container_new(name
, lxcpath
);
661 ERROR("Could not load container %s:%s", lxcpath
, name
);
664 if (!collect_subsytems()) {
665 ERROR("Error collecting cgroup subsystems");
668 // cgm_create makes sure that we have the same cgroup name for all
669 // subsystems, so since this is a slow command over the cmd socket,
670 // just get the cgroup name for the first one.
671 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, subsystems
[0]);
673 ERROR("Failed to get cgroup for controller %s", subsystems
[0]);
677 if (!(pass
= do_cgm_enter(pid
, cgroup
)))
678 ERROR("Failed to enter group %s", cgroup
);
682 lxc_container_put(c
);
686 static bool cgm_bind_dir(const char *root
, const char *dirname
)
688 nih_local
char *cgpath
= NULL
;
690 /* /sys should have been mounted by now */
691 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
692 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
694 if (!dir_exists(cgpath
)) {
695 ERROR("%s does not exist", cgpath
);
699 /* mount a tmpfs there so we can create subdirs */
700 if (mount("cgroup", cgpath
, "tmpfs", 0, "size=10000")) {
701 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
704 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
706 if (mkdir(cgpath
, 0755) < 0) {
707 SYSERROR("Failed to create %s", cgpath
);
711 if (mount(dirname
, cgpath
, "none", MS_BIND
, 0)) {
712 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
721 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
722 * /sys/fs/cgroup/cgmanager/ in the container.
723 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
726 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
727 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
728 static bool cgm_mount_cgroup(const char *root
,
729 struct lxc_cgroup_info
*cgroup_info
, int type
)
731 if (dir_exists(CGMANAGER_LOWER_SOCK
))
732 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
733 if (dir_exists(CGMANAGER_UPPER_SOCK
))
734 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
735 // Host doesn't have cgmanager running? Then how did we get here?
739 static struct cgroup_ops cgmanager_ops
= {
740 .destroy
= cgm_destroy
,
742 .create
= cgm_create
,
744 .create_legacy
= NULL
,
745 .get_cgroup
= cgm_get_cgroup
,
748 .unfreeze_fromhandler
= cgm_unfreeze_fromhandler
,
749 .setup_limits
= cgm_setup_limits
,
752 .attach
= cgm_attach
,
753 .mount_cgroup
= cgm_mount_cgroup
,