]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cgmanager.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/inotify.h>
38 #include <sys/mount.h>
39 #include <netinet/in.h>
54 lxc_log_define(lxc_cgmanager
, lxc
);
56 #include <nih-dbus/dbus_connection.h>
57 #include <cgmanager/cgmanager-client.h>
58 #include <nih/alloc.h>
59 #include <nih/error.h>
60 #include <nih/string.h>
65 const char *cgroup_pattern
;
68 static NihDBusProxy
*cgroup_manager
= NULL
;
69 static struct cgroup_ops cgmanager_ops
;
70 static int nr_subsystems
;
71 static char **subsystems
;
73 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
74 static void cgm_dbus_disconnected(DBusConnection
*connection
);
75 static bool cgm_dbus_connect(void)
78 DBusConnection
*connection
;
79 dbus_error_init(&dbus_error
);
81 connection
= nih_dbus_connect(CGMANAGER_DBUS_SOCK
, cgm_dbus_disconnected
);
84 nerr
= nih_error_get();
85 DEBUG("Unable to open cgmanager connection at %s: %s", CGMANAGER_DBUS_SOCK
,
88 dbus_error_free(&dbus_error
);
91 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
92 dbus_error_free(&dbus_error
);
93 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
95 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
96 dbus_connection_unref(connection
);
97 if (!cgroup_manager
) {
99 nerr
= nih_error_get();
100 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
105 // force fd passing negotiation
106 if (cgmanager_ping_sync(NULL
, cgroup_manager
, 0) != 0) {
108 nerr
= nih_error_get();
109 ERROR("Error pinging cgroup manager: %s", nerr
->message
);
115 static void cgm_dbus_disconnect(void)
117 nih_free(cgroup_manager
);
118 cgroup_manager
= NULL
;
121 static void cgm_dbus_disconnected(DBusConnection
*connection
)
123 WARN("Cgroup manager connection was terminated");
124 cgroup_manager
= NULL
;
125 if (cgm_dbus_connect()) {
126 INFO("New cgroup manager connection was opened");
128 WARN("Cgroup manager unable to re-open connection");
132 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
134 struct msghdr msg
= { 0 };
136 struct cmsghdr
*cmsg
;
137 struct ucred cred
= {
142 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
146 msg
.msg_control
= cmsgbuf
;
147 msg
.msg_controllen
= sizeof(cmsgbuf
);
149 cmsg
= CMSG_FIRSTHDR(&msg
);
150 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
151 cmsg
->cmsg_level
= SOL_SOCKET
;
152 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
153 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
159 iov
.iov_len
= sizeof(buf
);
163 if (sendmsg(sock
, &msg
, 0) < 0)
168 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
170 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
171 cgroup_path
, existed
) != 0) {
173 nerr
= nih_error_get();
174 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
176 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
183 static bool lxc_cgmanager_escape(void)
187 for (i
= 0; i
< nr_subsystems
; i
++) {
188 if (cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
189 subsystems
[i
], "/", me
) != 0) {
191 nerr
= nih_error_get();
192 ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
193 subsystems
[i
], nerr
->message
);
203 const char *controller
;
204 const char *cgroup_path
;
208 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
211 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
214 uid_t caller_nsuid
= get_ns_uid(origuid
);
216 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
217 SYSERROR("Error creating socketpair");
220 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
221 SYSERROR("setsockopt failed");
224 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
225 SYSERROR("setsockopt failed");
228 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
229 cgroup_path
, sv
[1]) != 0) {
231 nerr
= nih_error_get();
232 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
236 /* now send credentials */
240 FD_SET(sv
[0], &rfds
);
241 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
242 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
245 if (read(sv
[0], &buf
, 1) != 1) {
246 ERROR("Error getting reply from server over socketpair");
249 if (send_creds(sv
[0], getpid(), getuid(), getgid())) {
250 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
254 FD_SET(sv
[0], &rfds
);
255 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
256 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
259 if (read(sv
[0], &buf
, 1) != 1) {
260 ERROR("Error getting reply from server over socketpair");
263 if (send_creds(sv
[0], getpid(), caller_nsuid
, 0)) {
264 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
268 FD_SET(sv
[0], &rfds
);
269 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
270 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
273 ret
= read(sv
[0], buf
, 1);
277 if (ret
== 1 && *buf
== '1')
282 static int chown_cgroup_wrapper(void *data
)
284 struct chown_data
*arg
= data
;
286 if (setresgid(0,0,0) < 0)
287 SYSERROR("Failed to setgid to 0");
288 if (setresuid(0,0,0) < 0)
289 SYSERROR("Failed to setuid to 0");
290 if (setgroups(0, NULL
) < 0)
291 SYSERROR("Failed to clear groups");
292 return do_chown_cgroup(arg
->controller
, arg
->cgroup_path
, arg
->origuid
);
295 static bool lxc_cgmanager_chmod(const char *controller
,
296 const char *cgroup_path
, const char *file
, int mode
)
298 if (cgmanager_chmod_sync(NULL
, cgroup_manager
, controller
,
299 cgroup_path
, file
, mode
) != 0) {
301 nerr
= nih_error_get();
302 ERROR("call to cgmanager_chmod_sync failed: %s", nerr
->message
);
309 static bool chown_cgroup(const char *controller
, const char *cgroup_path
,
310 struct lxc_conf
*conf
)
312 struct chown_data data
;
314 if (lxc_list_empty(&conf
->id_map
))
315 /* If there's no mapping then we don't need to chown */
318 data
.controller
= controller
;
319 data
.cgroup_path
= cgroup_path
;
320 data
.origuid
= geteuid();
322 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
) < 0) {
323 ERROR("Error requesting cgroup chown in new namespace");
327 /* now chmod 775 the directory else the container cannot create cgroups */
328 if (!lxc_cgmanager_chmod(controller
, cgroup_path
, "", 0775))
330 if (!lxc_cgmanager_chmod(controller
, cgroup_path
, "tasks", 0775))
332 if (!lxc_cgmanager_chmod(controller
, cgroup_path
, "cgroup.procs", 0775))
337 #define CG_REMOVE_RECURSIVE 1
338 static void cgm_remove_cgroup(const char *controller
, const char *path
)
341 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
342 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
344 nerr
= nih_error_get();
345 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
347 ERROR("Error removing %s:%s", controller
, path
);
350 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
353 static void *cgm_init(const char *name
)
357 d
= malloc(sizeof(*d
));
361 memset(d
, 0, sizeof(*d
));
362 d
->name
= strdup(name
);
366 /* if we are running as root, use system cgroup pattern, otherwise
367 * just create a cgroup under the current one. But also fall back to
368 * that if for some reason reading the configuration fails and no
369 * default value is available
372 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
373 if (!d
->cgroup_pattern
)
374 d
->cgroup_pattern
= "%n";
382 static void cgm_destroy(void *hdata
)
384 struct cgm_data
*d
= hdata
;
389 for (i
= 0; i
< nr_subsystems
; i
++)
390 cgm_remove_cgroup(subsystems
[i
], d
->cgroup_path
);
394 free(d
->cgroup_path
);
399 * remove all the cgroups created
401 static inline void cleanup_cgroups(char *path
)
404 for (i
= 0; i
< nr_subsystems
; i
++)
405 cgm_remove_cgroup(subsystems
[i
], path
);
408 static inline bool cgm_create(void *hdata
)
410 struct cgm_data
*d
= hdata
;
411 int i
, index
=0, baselen
, ret
;
413 char result
[MAXPATHLEN
], *tmp
, *cgroup_path
;
417 // XXX we should send a hint to the cgmanager that when these
418 // cgroups become empty they should be deleted. Requires a cgmanager
421 memset(result
, 0, MAXPATHLEN
);
422 tmp
= lxc_string_replace("%n", d
->name
, d
->cgroup_pattern
);
425 if (strlen(tmp
) > MAXPATHLEN
)
428 baselen
= strlen(result
);
434 if (index
== 100) { // turn this into a warn later
435 ERROR("cgroup error? 100 cgroups with this name already running");
439 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
440 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
444 for (i
= 0; i
< nr_subsystems
; i
++) {
445 if (!lxc_cgmanager_create(subsystems
[i
], tmp
, &existed
)) {
446 ERROR("Error creating cgroup %s:%s", subsystems
[i
], result
);
447 cleanup_cgroups(tmp
);
454 cgroup_path
= strdup(tmp
);
456 cleanup_cgroups(tmp
);
459 d
->cgroup_path
= cgroup_path
;
462 cleanup_cgroups(tmp
);
468 * Use the cgmanager to move a task into a cgroup for a particular
470 * All the subsystems in this hierarchy are co-mounted, so we only
471 * need to transition the task into one of the cgroups
473 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
474 const char *cgroup_path
)
476 if (cgmanager_move_pid_sync(NULL
, cgroup_manager
, controller
,
477 cgroup_path
, pid
) != 0) {
479 nerr
= nih_error_get();
480 ERROR("call to cgmanager_move_pid_sync failed: %s", nerr
->message
);
487 static bool do_cgm_enter(pid_t pid
, const char *cgroup_path
)
491 for (i
= 0; i
< nr_subsystems
; i
++) {
492 if (!lxc_cgmanager_enter(pid
, subsystems
[i
], cgroup_path
))
498 static inline bool cgm_enter(void *hdata
, pid_t pid
)
500 struct cgm_data
*d
= hdata
;
502 if (!d
|| !d
->cgroup_path
)
504 return do_cgm_enter(pid
, d
->cgroup_path
);
507 static const char *cgm_get_cgroup(void *hdata
, const char *subsystem
)
509 struct cgm_data
*d
= hdata
;
511 if (!d
|| !d
->cgroup_path
)
513 return d
->cgroup_path
;
516 static int cgm_get_nrtasks(void *hdata
)
518 struct cgm_data
*d
= hdata
;
522 if (!d
|| !d
->cgroup_path
)
525 if (cgmanager_get_tasks_sync(NULL
, cgroup_manager
, subsystems
[0],
526 d
->cgroup_path
, &pids
, &pids_len
) != 0) {
528 nerr
= nih_error_get();
529 ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr
->message
);
537 static int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
539 char *result
, *controller
, *key
, *cgroup
;
542 controller
= alloca(strlen(filename
)+1);
543 strcpy(controller
, filename
);
544 key
= strchr(controller
, '.');
549 /* use the command interface to look for the cgroup */
550 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
553 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cgroup
, filename
, &result
) != 0) {
555 * must consume the nih error
556 * However don't print out an error as the key may simply not exist
560 nerr
= nih_error_get();
566 newlen
= strlen(result
);
568 // user queries the size
573 strncpy(value
, result
, len
);
577 } else if (newlen
+1 < len
) {
578 // cgmanager doesn't add eol to last entry
579 value
[newlen
++] = '\n';
580 value
[newlen
] = '\0';
586 static int cgm_do_set(const char *controller
, const char *file
,
587 const char *cgroup
, const char *value
)
590 ret
= cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
591 cgroup
, file
, value
);
594 nerr
= nih_error_get();
595 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
597 ERROR("Error setting cgroup %s limit %s", file
, cgroup
);
602 static int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
604 char *controller
, *key
, *cgroup
;
607 controller
= alloca(strlen(filename
)+1);
608 strcpy(controller
, filename
);
609 key
= strchr(controller
, '.');
614 /* use the command interface to look for the cgroup */
615 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
617 ERROR("Failed to get cgroup for controller %s for %s:%s",
618 controller
, lxcpath
, name
);
621 ret
= cgm_do_set(controller
, filename
, cgroup
, value
);
626 static void free_subsystems(void)
630 for (i
= 0; i
< nr_subsystems
; i
++)
637 static bool collect_subsytems(void)
639 char *line
= NULL
, *tab1
;
643 if (subsystems
) // already initialized
646 f
= fopen_cloexec("/proc/cgroups", "r");
649 while (getline(&line
, &sz
, f
) != -1) {
655 tab1
= strchr(line
, '\t');
659 tmp
= realloc(subsystems
, (nr_subsystems
+1)*sizeof(char *));
664 tmp
[nr_subsystems
] = strdup(line
);
665 if (!tmp
[nr_subsystems
])
671 if (!nr_subsystems
) {
672 ERROR("No cgroup subsystems found");
684 struct cgroup_ops
*cgm_ops_init(void)
686 if (!collect_subsytems())
688 if (!cgm_dbus_connect())
691 // root; try to escape to root cgroup
692 if (geteuid() == 0 && !lxc_cgmanager_escape())
695 return &cgmanager_ops
;
698 cgm_dbus_disconnect();
704 static bool cgm_unfreeze(void *hdata
)
706 struct cgm_data
*d
= hdata
;
708 if (!d
|| !d
->cgroup_path
)
711 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", d
->cgroup_path
,
712 "freezer.state", "THAWED") != 0) {
714 nerr
= nih_error_get();
715 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
717 ERROR("Error unfreezing %s", d
->cgroup_path
);
723 static bool cgm_setup_limits(void *hdata
, struct lxc_list
*cgroup_settings
, bool do_devices
)
725 struct cgm_data
*d
= hdata
;
726 struct lxc_list
*iterator
;
727 struct lxc_cgroup
*cg
;
730 if (lxc_list_empty(cgroup_settings
))
733 if (!d
|| !d
->cgroup_path
)
736 lxc_list_for_each(iterator
, cgroup_settings
) {
737 char controller
[100], *p
;
739 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
741 if (strlen(cg
->subsystem
) > 100) // i smell a rat
743 strcpy(controller
, cg
->subsystem
);
744 p
= strchr(controller
, '.');
747 if (cgm_do_set(controller
, cg
->subsystem
, d
->cgroup_path
749 ERROR("Error setting %s to %s for %s",
750 cg
->subsystem
, cg
->value
, d
->name
);
754 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
758 INFO("cgroup limits have been setup");
763 static bool cgm_chown(void *hdata
, struct lxc_conf
*conf
)
765 struct cgm_data
*d
= hdata
;
768 if (!d
|| !d
->cgroup_path
)
770 for (i
= 0; i
< nr_subsystems
; i
++) {
771 if (!chown_cgroup(subsystems
[i
], d
->cgroup_path
, conf
))
772 WARN("Failed to chown %s:%s to container root",
773 subsystems
[i
], d
->cgroup_path
);
779 * TODO: this should be re-written to use the get_config_item("lxc.id_map")
780 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
781 * that the id_maps may be different if the container was started with a
783 * The reason I'm punting on that is because we'll need to parse the
786 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
790 struct lxc_container
*c
;
792 c
= lxc_container_new(name
, lxcpath
);
794 ERROR("Could not load container %s:%s", lxcpath
, name
);
797 if (!collect_subsytems()) {
798 ERROR("Error collecting cgroup subsystems");
801 // cgm_create makes sure that we have the same cgroup name for all
802 // subsystems, so since this is a slow command over the cmd socket,
803 // just get the cgroup name for the first one.
804 cgroup
= lxc_cmd_get_cgroup_path(name
, lxcpath
, subsystems
[0]);
806 ERROR("Failed to get cgroup for controller %s", subsystems
[0]);
810 if (!(pass
= do_cgm_enter(pid
, cgroup
)))
811 ERROR("Failed to enter group %s", cgroup
);
815 lxc_container_put(c
);
819 static bool cgm_bind_dir(const char *root
, const char *dirname
)
821 nih_local
char *cgpath
= NULL
;
823 /* /sys should have been mounted by now */
824 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
825 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
827 if (!dir_exists(cgpath
)) {
828 ERROR("%s does not exist", cgpath
);
832 /* mount a tmpfs there so we can create subdirs */
833 if (mount("cgroup", cgpath
, "tmpfs", 0, "size=10000,mode=755")) {
834 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
837 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
839 if (mkdir(cgpath
, 0755) < 0) {
840 SYSERROR("Failed to create %s", cgpath
);
844 if (mount(dirname
, cgpath
, "none", MS_BIND
, 0)) {
845 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
854 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
855 * /sys/fs/cgroup/cgmanager/ in the container.
856 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
859 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
860 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
861 static bool cgm_mount_cgroup(void *hdata
, const char *root
, int type
)
863 if (dir_exists(CGMANAGER_LOWER_SOCK
))
864 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
865 if (dir_exists(CGMANAGER_UPPER_SOCK
))
866 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
867 // Host doesn't have cgmanager running? Then how did we get here?
871 static struct cgroup_ops cgmanager_ops
= {
873 .destroy
= cgm_destroy
,
874 .create
= cgm_create
,
876 .create_legacy
= NULL
,
877 .get_cgroup
= cgm_get_cgroup
,
880 .unfreeze
= cgm_unfreeze
,
881 .setup_limits
= cgm_setup_limits
,
884 .attach
= cgm_attach
,
885 .mount_cgroup
= cgm_mount_cgroup
,
886 .nrtasks
= cgm_get_nrtasks
,