2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/inotify.h>
39 #include <sys/mount.h>
40 #include <netinet/in.h>
47 #include "namespace.h"
56 #define CGM_SUPPORTS_GET_ABS 3
57 #define CGM_SUPPORTS_NAMED 4
58 #define CGM_SUPPORTS_MULT_CONTROLLERS 10
61 lxc_log_define(lxc_cgmanager
, lxc
);
63 #include <nih-dbus/dbus_connection.h>
64 #include <cgmanager/cgmanager-client.h>
65 #include <nih/alloc.h>
66 #include <nih/error.h>
67 #include <nih/string.h>
72 const char *cgroup_pattern
;
75 static pthread_mutex_t cgm_mutex
= PTHREAD_MUTEX_INITIALIZER
;
77 static void lock_mutex(pthread_mutex_t
*l
)
81 if ((ret
= pthread_mutex_lock(l
)) != 0) {
82 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
87 static void unlock_mutex(pthread_mutex_t
*l
)
91 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
92 fprintf(stderr
, "%s: pthread_mutex_unlock returned:%d %s\n",
93 __FILE__
, ret
, strerror(ret
));
100 lock_mutex(&cgm_mutex
);
103 void cgm_unlock(void)
105 unlock_mutex(&cgm_mutex
);
108 #ifdef HAVE_PTHREAD_ATFORK
109 __attribute__((constructor
))
110 static void process_lock_setup_atfork(void)
112 pthread_atfork(cgm_lock
, cgm_unlock
, cgm_unlock
);
116 static NihDBusProxy
*cgroup_manager
= NULL
;
117 static int32_t api_version
;
119 static struct cgroup_ops cgmanager_ops
;
120 static int nr_subsystems
;
121 static char **subsystems
, **subsystems_inone
;
122 static bool dbus_threads_initialized
= false;
123 static void cull_user_controllers(void);
125 static void cgm_dbus_disconnect(void)
127 if (cgroup_manager
) {
128 dbus_connection_flush(cgroup_manager
->connection
);
129 dbus_connection_close(cgroup_manager
->connection
);
130 nih_free(cgroup_manager
);
132 cgroup_manager
= NULL
;
136 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
137 static bool cgm_dbus_connect(void)
139 DBusError dbus_error
;
140 static DBusConnection
*connection
;
143 if (!dbus_threads_initialized
) {
144 /* tell dbus to do struct locking for thread safety */
145 dbus_threads_init_default();
146 dbus_threads_initialized
= true;
149 dbus_error_init(&dbus_error
);
151 connection
= dbus_connection_open_private(CGMANAGER_DBUS_SOCK
, &dbus_error
);
153 DEBUG("Failed opening dbus connection: %s: %s",
154 dbus_error
.name
, dbus_error
.message
);
155 dbus_error_free(&dbus_error
);
159 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
160 dbus_error_free(&dbus_error
);
161 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
163 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
164 dbus_connection_unref(connection
);
165 if (!cgroup_manager
) {
167 nerr
= nih_error_get();
168 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
170 cgm_dbus_disconnect();
174 /* get the api version */
175 if (cgmanager_get_api_version_sync(NULL
, cgroup_manager
, &api_version
) != 0) {
177 nerr
= nih_error_get();
178 ERROR("Error cgroup manager api version: %s", nerr
->message
);
180 cgm_dbus_disconnect();
183 if (api_version
< CGM_SUPPORTS_NAMED
)
184 cull_user_controllers();
188 static bool cgm_all_controllers_same
;
191 * Check whether we can use "all" when talking to cgmanager.
192 * We check two things:
193 * 1. whether cgmanager is new enough to support this.
194 * 2. whether the task we are interested in is in the same
195 * cgroup for all controllers.
196 * In cgm_init (before an lxc-start) we care about our own
197 * cgroup. In cgm_attach, we care about the target task's
200 static void check_supports_multiple_controllers(pid_t pid
)
203 char *line
= NULL
, *prevpath
= NULL
;
207 cgm_all_controllers_same
= false;
210 sprintf(path
, "/proc/self/cgroup");
212 sprintf(path
, "/proc/%d/cgroup", pid
);
213 f
= fopen(path
, "r");
217 cgm_all_controllers_same
= true;
219 while (getline(&line
, &sz
, f
) != -1) {
220 /* file format: hierarchy:subsystems:group */
225 colon
= strchr(line
, ':');
228 colon
= strchr(colon
+1, ':');
233 prevpath
= alloca(strlen(colon
)+1);
234 strcpy(prevpath
, colon
);
237 if (strcmp(prevpath
, colon
) != 0) {
238 cgm_all_controllers_same
= false;
247 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
249 struct msghdr msg
= { 0 };
251 struct cmsghdr
*cmsg
;
252 struct ucred cred
= {
257 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
261 msg
.msg_control
= cmsgbuf
;
262 msg
.msg_controllen
= sizeof(cmsgbuf
);
264 cmsg
= CMSG_FIRSTHDR(&msg
);
265 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
266 cmsg
->cmsg_level
= SOL_SOCKET
;
267 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
268 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
274 iov
.iov_len
= sizeof(buf
);
278 if (sendmsg(sock
, &msg
, 0) < 0)
283 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
286 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
287 cgroup_path
, existed
) != 0) {
289 nerr
= nih_error_get();
290 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
292 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
300 * Escape to the root cgroup if we are root, so that the container will
301 * be in "/lxc/c1" rather than "/user/..../c1"
302 * called internally with connection already open
304 static bool cgm_escape(void *hdata
)
306 bool ret
= true, cgm_needs_disconnect
= false;
307 pid_t me
= lxc_raw_getpid();
308 char **slist
= subsystems
;
311 if (!cgroup_manager
) {
312 if (!cgm_dbus_connect()) {
313 ERROR("Error connecting to cgroup manager");
316 cgm_needs_disconnect
= true;
320 if (cgm_all_controllers_same
)
321 slist
= subsystems_inone
;
323 for (i
= 0; slist
[i
]; i
++) {
324 if (cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
325 slist
[i
], "/", me
) != 0) {
327 nerr
= nih_error_get();
328 ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
329 slist
[i
], nerr
->message
);
336 if (cgm_needs_disconnect
)
337 cgm_dbus_disconnect();
342 static int cgm_num_hierarchies(void)
344 /* not implemented */
348 static bool cgm_get_hierarchies(int i
, char ***out
)
350 /* not implemented */
355 const char *cgroup_path
;
359 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
362 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
367 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
368 SYSERROR("Error creating socketpair");
371 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
372 SYSERROR("setsockopt failed");
375 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
376 SYSERROR("setsockopt failed");
379 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
380 cgroup_path
, sv
[1]) != 0) {
382 nerr
= nih_error_get();
383 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
387 /* now send credentials */
392 if (poll(&fds
, 1, -1) <= 0) {
393 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
396 if (read(sv
[0], &buf
, 1) != 1) {
397 ERROR("Error getting reply from server over socketpair");
401 pid_self
= lxc_raw_getpid();
402 if (send_creds(sv
[0], pid_self
, getuid(), getgid())) {
403 SYSERROR("Error sending pid over SCM_CREDENTIAL");
409 if (poll(&fds
, 1, -1) <= 0) {
410 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
413 if (read(sv
[0], &buf
, 1) != 1) {
414 ERROR("Error getting reply from server over socketpair");
417 if (send_creds(sv
[0], pid_self
, newuid
, 0)) {
418 SYSERROR("Error sending pid over SCM_CREDENTIAL");
424 if (poll(&fds
, 1, -1) <= 0) {
425 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
428 ret
= read(sv
[0], buf
, 1);
432 if (ret
== 1 && *buf
== '1')
437 static int chown_cgroup_wrapper(void *data
)
439 struct chown_data
*arg
= data
;
440 char **slist
= subsystems
;
444 if (setresgid(0,0,0) < 0)
445 SYSERROR("Failed to setgid to 0");
446 if (setresuid(0,0,0) < 0)
447 SYSERROR("Failed to setuid to 0");
448 if (setgroups(0, NULL
) < 0)
449 SYSERROR("Failed to clear groups");
450 cgm_dbus_disconnect();
451 if (!cgm_dbus_connect()) {
452 ERROR("Error connecting to cgroup manager");
455 destuid
= get_ns_uid(arg
->origuid
);
457 if (cgm_all_controllers_same
)
458 slist
= subsystems_inone
;
460 for (i
= 0; slist
[i
]; i
++) {
461 if (do_chown_cgroup(slist
[i
], arg
->cgroup_path
, destuid
) < 0) {
462 ERROR("Failed to chown %s:%s to container root",
463 slist
[i
], arg
->cgroup_path
);
469 cgm_dbus_disconnect();
473 /* Internal helper. Must be called with the cgmanager dbus socket open */
474 static bool lxc_cgmanager_chmod(const char *controller
,
475 const char *cgroup_path
, const char *file
, int mode
)
477 if (cgmanager_chmod_sync(NULL
, cgroup_manager
, controller
,
478 cgroup_path
, file
, mode
) != 0) {
480 nerr
= nih_error_get();
481 ERROR("call to cgmanager_chmod_sync failed: %s", nerr
->message
);
488 /* Internal helper. Must be called with the cgmanager dbus socket open */
489 static bool chown_cgroup(const char *cgroup_path
, struct lxc_conf
*conf
)
491 struct chown_data data
;
492 char **slist
= subsystems
;
495 if (lxc_list_empty(&conf
->id_map
))
496 /* If there's no mapping then we don't need to chown */
499 data
.cgroup_path
= cgroup_path
;
500 data
.origuid
= geteuid();
502 /* Unpriv users can't chown it themselves, so chown from
503 * a child namespace mapping both our own and the target uid
505 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
,
506 "chown_cgroup_wrapper") < 0) {
507 ERROR("Error requesting cgroup chown in new namespace");
512 * Now chmod 775 the directory else the container cannot create cgroups.
513 * This can't be done in the child namespace because it only group-owns
516 if (cgm_all_controllers_same
)
517 slist
= subsystems_inone
;
519 for (i
= 0; slist
[i
]; i
++) {
520 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "", 0775))
522 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "tasks", 0664))
524 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "cgroup.procs", 0664))
531 #define CG_REMOVE_RECURSIVE 1
532 /* Internal helper. Must be called with the cgmanager dbus socket open */
533 static void cgm_remove_cgroup(const char *controller
, const char *path
)
536 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
537 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
539 nerr
= nih_error_get();
540 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
542 ERROR("Error removing %s:%s", controller
, path
);
545 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
548 static void *cgm_init(struct lxc_handler
*handler
)
552 d
= malloc(sizeof(*d
));
556 if (!cgm_dbus_connect()) {
557 ERROR("Error connecting to cgroup manager");
561 memset(d
, 0, sizeof(*d
));
562 d
->name
= strdup(handler
->name
);
564 cgm_dbus_disconnect();
568 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
570 /* cgm_create immediately gets called so keep the connection open */
578 /* Called after a failed container startup */
579 static void cgm_destroy(void *hdata
, struct lxc_conf
*conf
)
581 struct cgm_data
*d
= hdata
;
582 char **slist
= subsystems
;
585 if (!d
|| !d
->cgroup_path
)
587 if (!cgm_dbus_connect()) {
588 ERROR("Error connecting to cgroup manager");
592 if (cgm_all_controllers_same
)
593 slist
= subsystems_inone
;
594 for (i
= 0; slist
[i
]; i
++)
595 cgm_remove_cgroup(slist
[i
], d
->cgroup_path
);
598 free(d
->cgroup_path
);
600 cgm_dbus_disconnect();
604 * remove all the cgroups created
605 * called internally with dbus connection open
607 static inline void cleanup_cgroups(char *path
)
610 char **slist
= subsystems
;
612 if (cgm_all_controllers_same
)
613 slist
= subsystems_inone
;
614 for (i
= 0; slist
[i
]; i
++)
615 cgm_remove_cgroup(slist
[i
], path
);
618 static inline bool cgm_create(void *hdata
)
620 struct cgm_data
*d
= hdata
;
621 char **slist
= subsystems
;
622 int i
, index
=0, baselen
, ret
;
624 char result
[MAXPATHLEN
], *tmp
, *cgroup_path
;
629 /* XXX we should send a hint to the cgmanager that when these cgroups
630 * become empty they should be deleted. Requires a cgmanager extension.
632 memset(result
, 0, MAXPATHLEN
);
633 tmp
= lxc_string_replace("%n", d
->name
, d
->cgroup_pattern
);
636 if (strlen(tmp
) >= MAXPATHLEN
) {
641 baselen
= strlen(result
);
647 if (index
== 100) { /* turn this into a warn later */
648 ERROR("cgroup error? 100 cgroups with this name already running");
652 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
653 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
658 if (cgm_all_controllers_same
)
659 slist
= subsystems_inone
;
661 for (i
= 0; slist
[i
]; i
++) {
662 if (!lxc_cgmanager_create(slist
[i
], tmp
, &existed
)) {
663 ERROR("Error creating cgroup %s:%s", slist
[i
], result
);
664 cleanup_cgroups(tmp
);
671 cgroup_path
= strdup(tmp
);
673 cleanup_cgroups(tmp
);
676 d
->cgroup_path
= cgroup_path
;
677 cgm_dbus_disconnect();
684 cgm_dbus_disconnect();
689 * Use the cgmanager to move a task into a cgroup for a particular
691 * All the subsystems in this hierarchy are co-mounted, so we only
692 * need to transition the task into one of the cgroups
694 * Internal helper, must be called with cgmanager dbus socket open
696 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
697 const char *cgroup_path
, bool abs
)
702 ret
= cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
703 controller
, cgroup_path
, pid
);
705 ret
= cgmanager_move_pid_sync(NULL
, cgroup_manager
,
706 controller
, cgroup_path
, pid
);
709 nerr
= nih_error_get();
710 WARN("call to cgmanager_move_pid_%ssync failed: %s",
711 abs
? "abs_" : "", nerr
->message
);
718 static inline bool cgm_enter(void *hdata
, pid_t pid
)
720 struct cgm_data
*d
= hdata
;
721 char **slist
= subsystems
;
725 if (!d
|| !d
->cgroup_path
)
728 if (!cgm_dbus_connect()) {
729 ERROR("Error connecting to cgroup manager");
733 if (cgm_all_controllers_same
)
734 slist
= subsystems_inone
;
736 for (i
= 0; slist
[i
]; i
++) {
737 if (!lxc_cgmanager_enter(pid
, slist
[i
], d
->cgroup_path
, false))
742 cgm_dbus_disconnect();
746 static const char *cgm_get_cgroup(void *hdata
, const char *subsystem
)
748 struct cgm_data
*d
= hdata
;
750 if (!d
|| !d
->cgroup_path
)
752 return d
->cgroup_path
;
755 #if HAVE_CGMANAGER_GET_PID_CGROUP_ABS_SYNC
756 static inline bool abs_cgroup_supported(void) {
757 return api_version
>= CGM_SUPPORTS_GET_ABS
;
760 static inline bool abs_cgroup_supported(void) {
763 #define cgmanager_get_pid_cgroup_abs_sync(...) -1
766 static char *try_get_abs_cgroup(const char *name
, const char *lxcpath
,
767 const char *controller
)
771 if (abs_cgroup_supported()) {
772 /* get the container init pid and ask for its abs cgroup */
773 pid_t pid
= lxc_cmd_get_init_pid(name
, lxcpath
);
776 if (cgmanager_get_pid_cgroup_abs_sync(NULL
, cgroup_manager
,
777 controller
, pid
, &cgroup
) != 0) {
780 nerr
= nih_error_get();
783 prune_init_scope(cgroup
);
787 /* use the command interface to look for the cgroup */
788 return lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
792 * nrtasks is called by the utmp helper by the container monitor.
793 * cgmanager socket was closed after cgroup setup was complete, so we need
796 * Return -1 on error.
798 static int cgm_get_nrtasks(void *hdata
)
800 struct cgm_data
*d
= hdata
;
804 if (!d
|| !d
->cgroup_path
)
807 if (!cgm_dbus_connect()) {
808 ERROR("Error connecting to cgroup manager");
811 if (cgmanager_get_tasks_sync(NULL
, cgroup_manager
, subsystems
[0],
812 d
->cgroup_path
, &pids
, &pids_len
) != 0) {
814 nerr
= nih_error_get();
815 ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr
->message
);
822 cgm_dbus_disconnect();
826 #if HAVE_CGMANAGER_LIST_CONTROLLERS
827 static bool lxc_list_controllers(char ***list
)
829 if (!cgm_dbus_connect()) {
830 ERROR("Error connecting to cgroup manager");
833 if (cgmanager_list_controllers_sync(NULL
, cgroup_manager
, list
) != 0) {
835 nerr
= nih_error_get();
836 ERROR("call to cgmanager_list_controllers_sync failed: %s", nerr
->message
);
838 cgm_dbus_disconnect();
842 cgm_dbus_disconnect();
846 static bool lxc_list_controllers(char ***list
)
852 static inline void free_abs_cgroup(char *cgroup
)
856 if (abs_cgroup_supported())
862 static void do_cgm_get(const char *name
, const char *lxcpath
, const char *filename
, int outp
, bool sendvalue
)
864 char *controller
, *key
, *cgroup
= NULL
, *cglast
;
867 nih_local
char *result
= NULL
;
869 controller
= alloca(strlen(filename
)+1);
870 strcpy(controller
, filename
);
871 key
= strchr(controller
, '.');
873 ret
= write(outp
, &len
, sizeof(len
));
874 if (ret
!= sizeof(len
))
875 WARN("Failed to warn cgm_get of error; parent may hang");
880 if (!cgm_dbus_connect()) {
881 ERROR("Error connecting to cgroup manager");
882 ret
= write(outp
, &len
, sizeof(len
));
883 if (ret
!= sizeof(len
))
884 WARN("Failed to warn cgm_get of error; parent may hang");
887 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
889 cgm_dbus_disconnect();
890 ret
= write(outp
, &len
, sizeof(len
));
891 if (ret
!= sizeof(len
))
892 WARN("Failed to warn cgm_get of error; parent may hang");
895 cglast
= strrchr(cgroup
, '/');
897 cgm_dbus_disconnect();
898 free_abs_cgroup(cgroup
);
899 ret
= write(outp
, &len
, sizeof(len
));
900 if (ret
!= sizeof(len
))
901 WARN("Failed to warn cgm_get of error; parent may hang");
905 if (!lxc_cgmanager_enter(lxc_raw_getpid(), controller
, cgroup
, abs_cgroup_supported())) {
906 WARN("Failed to enter container cgroup %s:%s", controller
, cgroup
);
907 ret
= write(outp
, &len
, sizeof(len
));
908 if (ret
!= sizeof(len
))
909 WARN("Failed to warn cgm_get of error; parent may hang");
910 cgm_dbus_disconnect();
911 free_abs_cgroup(cgroup
);
914 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, &result
) != 0) {
916 nerr
= nih_error_get();
918 free_abs_cgroup(cgroup
);
919 cgm_dbus_disconnect();
920 ret
= write(outp
, &len
, sizeof(len
));
921 if (ret
!= sizeof(len
))
922 WARN("Failed to warn cgm_get of error; parent may hang");
925 free_abs_cgroup(cgroup
);
926 cgm_dbus_disconnect();
927 len
= strlen(result
);
928 ret
= write(outp
, &len
, sizeof(len
));
929 if (ret
!= sizeof(len
)) {
930 WARN("Failed to send length to parent");
933 if (!len
|| !sendvalue
) {
936 ret
= write(outp
, result
, len
);
942 /* cgm_get is called to get container cgroup settings, not during startup */
943 static int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
946 int p
[2], ret
, newlen
, readlen
;
950 if ((pid
= fork()) < 0) {
955 if (!pid
) /* do_cgm_get exits */
956 do_cgm_get(name
, lxcpath
, filename
, p
[1], len
&& value
);
958 ret
= read(p
[0], &newlen
, sizeof(newlen
));
959 if (ret
!= sizeof(newlen
)) {
964 if (!len
|| !value
) {
969 memset(value
, 0, len
);
970 if (newlen
< 0) { /* child is reporting an error */
975 if (newlen
== 0) { /* empty read */
980 readlen
= newlen
> len
? len
: newlen
;
981 ret
= read(p
[0], value
, readlen
);
983 if (ret
!= readlen
) {
990 } else if (newlen
+1 < len
) {
991 /* cgmanager doesn't add eol to last entry */
992 value
[newlen
++] = '\n';
993 value
[newlen
] = '\0';
997 if (wait_for_pid(pid
))
998 WARN("do_cgm_get exited with error");
1002 static void do_cgm_set(const char *name
, const char *lxcpath
, const char *filename
, const char *value
, int outp
)
1004 char *controller
, *key
, *cgroup
= NULL
;
1005 int retval
= 0; /* value we are sending to the parent over outp */
1009 controller
= alloca(strlen(filename
)+1);
1010 strcpy(controller
, filename
);
1011 key
= strchr(controller
, '.');
1013 ret
= write(outp
, &retval
, sizeof(retval
));
1014 if (ret
!= sizeof(retval
))
1015 WARN("Failed to warn cgm_set of error; parent may hang");
1020 if (!cgm_dbus_connect()) {
1021 ERROR("Error connecting to cgroup manager");
1022 ret
= write(outp
, &retval
, sizeof(retval
));
1023 if (ret
!= sizeof(retval
))
1024 WARN("Failed to warn cgm_set of error; parent may hang");
1027 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
1029 cgm_dbus_disconnect();
1030 ret
= write(outp
, &retval
, sizeof(retval
));
1031 if (ret
!= sizeof(retval
))
1032 WARN("Failed to warn cgm_set of error; parent may hang");
1035 cglast
= strrchr(cgroup
, '/');
1037 cgm_dbus_disconnect();
1038 free_abs_cgroup(cgroup
);
1039 ret
= write(outp
, &retval
, sizeof(retval
));
1040 if (ret
!= sizeof(retval
))
1041 WARN("Failed to warn cgm_set of error; parent may hang");
1045 if (!lxc_cgmanager_enter(lxc_raw_getpid(), controller
, cgroup
, abs_cgroup_supported())) {
1046 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
1047 ret
= write(outp
, &retval
, sizeof(retval
));
1048 if (ret
!= sizeof(retval
))
1049 WARN("Failed to warn cgm_set of error; parent may hang");
1050 cgm_dbus_disconnect();
1051 free_abs_cgroup(cgroup
);
1054 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, value
) != 0) {
1056 nerr
= nih_error_get();
1057 ERROR("Error setting cgroup value %s for %s:%s", filename
, controller
, cgroup
);
1058 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1060 free_abs_cgroup(cgroup
);
1061 cgm_dbus_disconnect();
1062 ret
= write(outp
, &retval
, sizeof(retval
));
1063 if (ret
!= sizeof(retval
))
1064 WARN("Failed to warn cgm_set of error; parent may hang");
1067 free_abs_cgroup(cgroup
);
1068 cgm_dbus_disconnect();
1069 /* tell parent that we are done */
1071 ret
= write(outp
, &retval
, sizeof(retval
));
1072 if (ret
!= sizeof(retval
)) {
1078 /* cgm_set is called to change cgroup settings, not during startup */
1079 static int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1086 if ((pid
= fork()) < 0) {
1091 if (!pid
) /* do_cgm_set exits */
1092 do_cgm_set(name
, lxcpath
, filename
, value
, p
[1]);
1094 ret
= read(p
[0], &v
, sizeof(v
));
1096 if (wait_for_pid(pid
))
1097 WARN("do_cgm_set exited with error");
1098 if (ret
!= sizeof(v
) || !v
)
1103 static void free_subsystems(void)
1107 for (i
= 0; i
< nr_subsystems
; i
++)
1108 free(subsystems
[i
]);
1114 static void cull_user_controllers(void)
1118 for (i
= 0; i
< nr_subsystems
; i
++) {
1119 if (strncmp(subsystems
[i
], "name=", 5) != 0)
1121 for (j
= i
; j
< nr_subsystems
-1; j
++)
1122 subsystems
[j
] = subsystems
[j
+1];
1128 * return true if inword is in the comma-delimited list cgroup_use
1130 static bool in_comma_list(const char *inword
, const char *cgroup_use
)
1133 size_t inlen
= strlen(inword
), len
;
1136 e
= strchr(cgroup_use
, ',');
1137 len
= e
? e
- cgroup_use
: strlen(cgroup_use
);
1138 if (len
== inlen
&& strncmp(inword
, cgroup_use
, len
) == 0)
1147 * inlist is a comma-delimited list of cgroups; so is checklist. Return
1148 * true if any member of inlist is in checklist.
1150 static bool any_in_comma_list(const char *inlist
, const char *checklist
)
1152 char *tmp
= alloca(strlen(inlist
) + 1), *tok
, *saveptr
= NULL
;
1154 strcpy(tmp
, inlist
);
1155 for (tok
= strtok_r(tmp
, ",", &saveptr
); tok
; tok
= strtok_r(NULL
, ",", &saveptr
)) {
1156 if (in_comma_list(tok
, checklist
))
1163 static bool in_subsystem_list(const char *c
)
1167 for (i
= 0; i
< nr_subsystems
; i
++) {
1168 if (strcmp(c
, subsystems
[i
]) == 0)
1176 * If /etc/lxc/lxc.conf specifies lxc.cgroup.use = "freezer,memory",
1177 * then clear out any other subsystems, and make sure that freezer
1178 * and memory are both enabled
1180 static bool verify_and_prune(const char *cgroup_use
)
1186 for (p
= cgroup_use
; p
&& *p
; p
= e
+ 1) {
1191 if (!in_subsystem_list(p
)) {
1192 ERROR("Controller %s required by lxc.cgroup.use but not available\n", p
);
1202 for (i
= 0; i
< nr_subsystems
;) {
1203 if (in_comma_list(subsystems
[i
], cgroup_use
)) {
1207 free(subsystems
[i
]);
1208 for (j
= i
; j
< nr_subsystems
-1; j
++)
1209 subsystems
[j
] = subsystems
[j
+1];
1210 subsystems
[nr_subsystems
-1] = NULL
;
1217 static void drop_subsystem(int which
)
1221 if (which
< 0 || which
>= nr_subsystems
) {
1222 ERROR("code error: dropping invalid subsystem index\n");
1226 free(subsystems
[which
]);
1227 /* note - we have nr_subsystems+1 entries, last one a NULL */
1228 for (i
= which
; i
< nr_subsystems
; i
++)
1229 subsystems
[i
] = subsystems
[i
+1];
1234 * Check whether we can create the cgroups we would want
1236 static bool subsys_is_writeable(const char *controller
, const char *probe
)
1241 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
1242 probe
, &existed
) != 0) {
1244 nerr
= nih_error_get();
1245 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
1247 ERROR("Failed to create %s:%s", controller
, probe
);
1254 static char *get_last_controller_in_list(char *list
)
1258 while ((p
= strchr(list
, ',')) != NULL
)
1265 * Make sure that all the controllers are writeable.
1266 * If any are not, then
1267 * - if they are listed in lxc.cgroup.use, refuse to start
1268 * - else if they are crucial subsystems, refuse to start
1269 * - else warn and do not use them
1271 static bool verify_final_subsystems(const char *cgroup_use
)
1274 bool dropped_any
= false;
1276 const char *cgroup_pattern
;
1277 char tmpnam
[50], *probe
;
1279 if (!cgm_dbus_connect()) {
1280 ERROR("Error connecting to cgroup manager");
1284 cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
1285 i
= snprintf(tmpnam
, 50, "lxcprobe-%d", lxc_raw_getpid());
1286 if (i
< 0 || i
>= 50) {
1287 ERROR("Attack - format string modified?");
1290 probe
= lxc_string_replace("%n", tmpnam
, cgroup_pattern
);
1295 while (i
< nr_subsystems
) {
1296 char *p
= get_last_controller_in_list(subsystems
[i
]);
1298 if (!subsys_is_writeable(p
, probe
)) {
1299 if (is_crucial_cgroup_subsystem(p
)) {
1300 ERROR("Cannot write to crucial subsystem %s\n",
1304 if (cgroup_use
&& any_in_comma_list(subsystems
[i
], cgroup_use
)) {
1305 ERROR("Cannot write to subsystem %s which is requested in lxc.cgroup.use\n",
1309 WARN("Cannot write to subsystem %s, continuing with out it\n",
1314 cgm_remove_cgroup(subsystems
[i
], probe
);
1320 cgm_all_controllers_same
= false;
1325 cgm_dbus_disconnect();
1329 static bool collect_subsystems(void)
1332 nih_local
char **cgm_subsys_list
= NULL
;
1336 if (subsystems
) /* already initialized */
1339 subsystems_inone
= malloc(2 * sizeof(char *));
1340 if (!subsystems_inone
)
1342 subsystems_inone
[0] = "all";
1343 subsystems_inone
[1] = NULL
;
1345 if (lxc_list_controllers(&cgm_subsys_list
)) {
1346 while (cgm_subsys_list
[nr_subsystems
]) {
1347 char **tmp
= NIH_MUST( realloc(subsystems
,
1348 (nr_subsystems
+2)*sizeof(char *)) );
1349 tmp
[nr_subsystems
] = NIH_MUST(
1350 strdup(cgm_subsys_list
[nr_subsystems
++]) );
1354 subsystems
[nr_subsystems
] = NULL
;
1358 INFO("cgmanager_list_controllers failed, falling back to /proc/self/cgroups");
1359 f
= fopen_cloexec("/proc/self/cgroup", "r");
1361 f
= fopen_cloexec("/proc/1/cgroup", "r");
1365 while (getline(&line
, &sz
, f
) != -1) {
1366 /* file format: hierarchy:subsystems:group,
1367 * with multiple subsystems being ,-separated */
1368 char *slist
, *end
, *p
, *saveptr
= NULL
, **tmp
;
1373 slist
= strchr(line
, ':');
1377 end
= strchr(slist
, ':');
1382 for (p
= strtok_r(slist
, ",", &saveptr
);
1384 p
= strtok_r(NULL
, ",", &saveptr
)) {
1385 tmp
= realloc(subsystems
, (nr_subsystems
+2)*sizeof(char *));
1390 tmp
[nr_subsystems
] = strdup(p
);
1391 tmp
[nr_subsystems
+1] = NULL
;
1392 if (!tmp
[nr_subsystems
])
1404 if (!nr_subsystems
) {
1405 ERROR("No cgroup subsystems found");
1409 /* make sure that cgroup.use can be and is honored */
1410 const char *cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
1411 if (!cgroup_use
&& errno
!= 0)
1414 if (!verify_and_prune(cgroup_use
)) {
1418 subsystems_inone
[0] = NIH_MUST( strdup(cgroup_use
) );
1419 cgm_all_controllers_same
= false;
1423 return verify_final_subsystems(cgroup_use
);
1434 * called during cgroup.c:cgroup_ops_init(), at startup. No threads.
1435 * We check whether we can talk to cgmanager, escape to root cgroup if
1436 * we are root, then close the connection.
1438 struct cgroup_ops
*cgm_ops_init(void)
1440 check_supports_multiple_controllers(-1);
1441 if (!collect_subsystems())
1444 if (api_version
< CGM_SUPPORTS_MULT_CONTROLLERS
)
1445 cgm_all_controllers_same
= false;
1447 /* if root, try to escape to root cgroup */
1448 if (geteuid() == 0 && !cgm_escape(NULL
)) {
1453 return &cgmanager_ops
;
1456 /* unfreeze is called by the command api after killing a container. */
1457 static bool cgm_unfreeze(void *hdata
)
1459 struct cgm_data
*d
= hdata
;
1462 if (!d
|| !d
->cgroup_path
)
1465 if (!cgm_dbus_connect()) {
1466 ERROR("Error connecting to cgroup manager");
1469 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", d
->cgroup_path
,
1470 "freezer.state", "THAWED") != 0) {
1472 nerr
= nih_error_get();
1473 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1475 ERROR("Error unfreezing %s", d
->cgroup_path
);
1478 cgm_dbus_disconnect();
1482 static bool cgm_setup_limits(void *hdata
, struct lxc_list
*cgroup_settings
, bool do_devices
)
1484 struct cgm_data
*d
= hdata
;
1485 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
1486 struct lxc_cgroup
*cg
;
1489 if (lxc_list_empty(cgroup_settings
))
1492 if (!d
|| !d
->cgroup_path
)
1495 if (!cgm_dbus_connect()) {
1496 ERROR("Error connecting to cgroup manager");
1500 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
1501 if (!sorted_cgroup_settings
) {
1505 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
1506 char controller
[100], *p
;
1507 cg
= iterator
->elem
;
1508 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
1510 if (strlen(cg
->subsystem
) > 100) /* i smell a rat */
1512 strcpy(controller
, cg
->subsystem
);
1513 p
= strchr(controller
, '.');
1516 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
1517 d
->cgroup_path
, cg
->subsystem
, cg
->value
) != 0) {
1519 nerr
= nih_error_get();
1521 WARN("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1523 WARN("Error setting cgroup %s:%s limit type %s", controller
,
1524 d
->cgroup_path
, cg
->subsystem
);
1528 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1530 ERROR("Error setting cgroup %s:%s limit type %s", controller
,
1531 d
->cgroup_path
, cg
->subsystem
);
1535 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1539 INFO("cgroup limits have been setup");
1541 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
1542 lxc_list_del(iterator
);
1545 free(sorted_cgroup_settings
);
1546 cgm_dbus_disconnect();
1550 static bool cgm_chown(void *hdata
, struct lxc_conf
*conf
)
1552 struct cgm_data
*d
= hdata
;
1554 if (!d
|| !d
->cgroup_path
)
1556 if (!cgm_dbus_connect()) {
1557 ERROR("Error connecting to cgroup manager");
1560 if (!chown_cgroup(d
->cgroup_path
, conf
))
1561 WARN("Failed to chown %s to container root", d
->cgroup_path
);
1562 cgm_dbus_disconnect();
1567 * TODO: this should be re-written to use the get_config_item("lxc.idmap")
1568 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
1569 * that the id_maps may be different if the container was started with a
1570 * -f or -s argument.
1571 * The reason I'm punting on that is because we'll need to parse the
1574 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
1577 char *cgroup
= NULL
;
1578 char **slist
= subsystems
;
1581 if (!cgm_dbus_connect()) {
1582 ERROR("Error connecting to cgroup manager");
1586 for (i
= 0; slist
[i
]; i
++) {
1587 cgroup
= try_get_abs_cgroup(name
, lxcpath
, slist
[i
]);
1589 ERROR("Failed to get cgroup for controller %s", slist
[i
]);
1590 cgm_dbus_disconnect();
1594 if (!lxc_cgmanager_enter(pid
, slist
[i
], cgroup
, abs_cgroup_supported())) {
1600 cgm_dbus_disconnect();
1602 ERROR("Failed to enter group %s", cgroup
);
1604 free_abs_cgroup(cgroup
);
1608 static bool cgm_bind_dir(const char *root
, const char *dirname
)
1610 nih_local
char *cgpath
= NULL
;
1612 /* /sys should have been mounted by now */
1613 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
1614 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
1616 if (!dir_exists(cgpath
)) {
1617 ERROR("%s does not exist", cgpath
);
1621 /* mount a tmpfs there so we can create subdirs */
1622 if (safe_mount("cgroup", cgpath
, "tmpfs", 0, "size=10000,mode=755", root
)) {
1623 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
1626 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
1628 if (mkdir(cgpath
, 0755) < 0) {
1629 SYSERROR("Failed to create %s", cgpath
);
1633 if (safe_mount(dirname
, cgpath
, "none", MS_BIND
, 0, root
)) {
1634 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
1643 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
1644 * /sys/fs/cgroup/cgmanager/ in the container.
1645 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
1648 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
1649 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
1650 static bool cgm_mount_cgroup(void *hdata
, const char *root
, int type
)
1652 if (dir_exists(CGMANAGER_LOWER_SOCK
))
1653 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
1654 if (dir_exists(CGMANAGER_UPPER_SOCK
))
1655 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
1656 /* Host doesn't have cgmanager running? Then how did we get here? */
1660 static struct cgroup_ops cgmanager_ops
= {
1662 .destroy
= cgm_destroy
,
1663 .create
= cgm_create
,
1665 .create_legacy
= NULL
,
1666 .get_cgroup
= cgm_get_cgroup
,
1667 .escape
= cgm_escape
,
1668 .num_hierarchies
= cgm_num_hierarchies
,
1669 .get_hierarchies
= cgm_get_hierarchies
,
1672 .unfreeze
= cgm_unfreeze
,
1673 .setup_limits
= cgm_setup_limits
,
1674 .name
= "cgmanager",
1676 .attach
= cgm_attach
,
1677 .mount_cgroup
= cgm_mount_cgroup
,
1678 .nrtasks
= cgm_get_nrtasks
,
1680 .driver
= CGMANAGER
,