2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/inotify.h>
39 #include <sys/mount.h>
40 #include <netinet/in.h>
55 #define CGM_SUPPORTS_GET_ABS 3
56 #define CGM_SUPPORTS_NAMED 4
57 #define CGM_SUPPORTS_MULT_CONTROLLERS 10
60 lxc_log_define(lxc_cgmanager
, lxc
);
62 #include <nih-dbus/dbus_connection.h>
63 #include <cgmanager/cgmanager-client.h>
64 #include <nih/alloc.h>
65 #include <nih/error.h>
66 #include <nih/string.h>
71 const char *cgroup_pattern
;
74 static pthread_mutex_t cgm_mutex
= PTHREAD_MUTEX_INITIALIZER
;
76 static void lock_mutex(pthread_mutex_t
*l
)
80 if ((ret
= pthread_mutex_lock(l
)) != 0) {
81 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
86 static void unlock_mutex(pthread_mutex_t
*l
)
90 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
91 fprintf(stderr
, "pthread_mutex_unlock returned:%d %s\n", ret
, strerror(ret
));
98 lock_mutex(&cgm_mutex
);
101 void cgm_unlock(void)
103 unlock_mutex(&cgm_mutex
);
106 #ifdef HAVE_PTHREAD_ATFORK
107 __attribute__((constructor
))
108 static void process_lock_setup_atfork(void)
110 pthread_atfork(cgm_lock
, cgm_unlock
, cgm_unlock
);
114 static NihDBusProxy
*cgroup_manager
= NULL
;
115 static int32_t api_version
;
117 static struct cgroup_ops cgmanager_ops
;
118 static int nr_subsystems
;
119 static char **subsystems
, **subsystems_inone
;
120 static bool dbus_threads_initialized
= false;
121 static void cull_user_controllers(void);
123 static void cgm_dbus_disconnect(void)
125 if (cgroup_manager
) {
126 dbus_connection_flush(cgroup_manager
->connection
);
127 dbus_connection_close(cgroup_manager
->connection
);
128 nih_free(cgroup_manager
);
130 cgroup_manager
= NULL
;
134 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
135 static bool cgm_dbus_connect(void)
137 DBusError dbus_error
;
138 static DBusConnection
*connection
;
141 if (!dbus_threads_initialized
) {
142 // tell dbus to do struct locking for thread safety
143 dbus_threads_init_default();
144 dbus_threads_initialized
= true;
147 dbus_error_init(&dbus_error
);
149 connection
= dbus_connection_open_private(CGMANAGER_DBUS_SOCK
, &dbus_error
);
151 DEBUG("Failed opening dbus connection: %s: %s",
152 dbus_error
.name
, dbus_error
.message
);
153 dbus_error_free(&dbus_error
);
157 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
158 dbus_error_free(&dbus_error
);
159 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
161 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
162 dbus_connection_unref(connection
);
163 if (!cgroup_manager
) {
165 nerr
= nih_error_get();
166 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
168 cgm_dbus_disconnect();
172 // get the api version
173 if (cgmanager_get_api_version_sync(NULL
, cgroup_manager
, &api_version
) != 0) {
175 nerr
= nih_error_get();
176 ERROR("Error cgroup manager api version: %s", nerr
->message
);
178 cgm_dbus_disconnect();
181 if (api_version
< CGM_SUPPORTS_NAMED
)
182 cull_user_controllers();
186 static bool cgm_all_controllers_same
;
189 * Check whether we can use "all" when talking to cgmanager.
190 * We check two things:
191 * 1. whether cgmanager is new enough to support this.
192 * 2. whether the task we are interested in is in the same
193 * cgroup for all controllers.
194 * In cgm_init (before an lxc-start) we care about our own
195 * cgroup. In cgm_attach, we care about the target task's
198 static void check_supports_multiple_controllers(pid_t pid
)
201 char *line
= NULL
, *prevpath
= NULL
;
205 cgm_all_controllers_same
= false;
208 sprintf(path
, "/proc/self/cgroup");
210 sprintf(path
, "/proc/%d/cgroup", pid
);
211 f
= fopen(path
, "r");
215 cgm_all_controllers_same
= true;
217 while (getline(&line
, &sz
, f
) != -1) {
218 /* file format: hierarchy:subsystems:group */
223 colon
= strchr(line
, ':');
226 colon
= strchr(colon
+1, ':');
231 prevpath
= alloca(strlen(colon
)+1);
232 strcpy(prevpath
, colon
);
235 if (strcmp(prevpath
, colon
) != 0) {
236 cgm_all_controllers_same
= false;
245 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
247 struct msghdr msg
= { 0 };
249 struct cmsghdr
*cmsg
;
250 struct ucred cred
= {
255 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
259 msg
.msg_control
= cmsgbuf
;
260 msg
.msg_controllen
= sizeof(cmsgbuf
);
262 cmsg
= CMSG_FIRSTHDR(&msg
);
263 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
264 cmsg
->cmsg_level
= SOL_SOCKET
;
265 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
266 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
272 iov
.iov_len
= sizeof(buf
);
276 if (sendmsg(sock
, &msg
, 0) < 0)
281 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
284 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
285 cgroup_path
, existed
) != 0) {
287 nerr
= nih_error_get();
288 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
290 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
298 * Escape to the root cgroup if we are root, so that the container will
299 * be in "/lxc/c1" rather than "/user/..../c1"
300 * called internally with connection already open
302 static bool cgm_escape(void *hdata
)
304 bool ret
= true, cgm_needs_disconnect
= false;
306 char **slist
= subsystems
;
309 if (!cgroup_manager
) {
310 if (!cgm_dbus_connect()) {
311 ERROR("Error connecting to cgroup manager");
314 cgm_needs_disconnect
= true;
318 if (cgm_all_controllers_same
)
319 slist
= subsystems_inone
;
321 for (i
= 0; slist
[i
]; i
++) {
322 if (cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
323 slist
[i
], "/", me
) != 0) {
325 nerr
= nih_error_get();
326 ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
327 slist
[i
], nerr
->message
);
334 if (cgm_needs_disconnect
)
335 cgm_dbus_disconnect();
340 static int cgm_num_hierarchies(void)
342 /* not implemented */
346 static bool cgm_get_hierarchies(int i
, char ***out
)
348 /* not implemented */
353 const char *cgroup_path
;
357 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
360 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
364 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
365 SYSERROR("Error creating socketpair");
368 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
369 SYSERROR("setsockopt failed");
372 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
373 SYSERROR("setsockopt failed");
376 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
377 cgroup_path
, sv
[1]) != 0) {
379 nerr
= nih_error_get();
380 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
384 /* now send credentials */
389 if (poll(&fds
, 1, -1) <= 0) {
390 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
393 if (read(sv
[0], &buf
, 1) != 1) {
394 ERROR("Error getting reply from server over socketpair");
397 if (send_creds(sv
[0], getpid(), getuid(), getgid())) {
398 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
404 if (poll(&fds
, 1, -1) <= 0) {
405 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
408 if (read(sv
[0], &buf
, 1) != 1) {
409 ERROR("Error getting reply from server over socketpair");
412 if (send_creds(sv
[0], getpid(), newuid
, 0)) {
413 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
419 if (poll(&fds
, 1, -1) <= 0) {
420 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
423 ret
= read(sv
[0], buf
, 1);
427 if (ret
== 1 && *buf
== '1')
432 static int chown_cgroup_wrapper(void *data
)
434 struct chown_data
*arg
= data
;
435 char **slist
= subsystems
;
439 if (setresgid(0,0,0) < 0)
440 SYSERROR("Failed to setgid to 0");
441 if (setresuid(0,0,0) < 0)
442 SYSERROR("Failed to setuid to 0");
443 if (setgroups(0, NULL
) < 0)
444 SYSERROR("Failed to clear groups");
445 cgm_dbus_disconnect();
446 if (!cgm_dbus_connect()) {
447 ERROR("Error connecting to cgroup manager");
450 destuid
= get_ns_uid(arg
->origuid
);
452 if (cgm_all_controllers_same
)
453 slist
= subsystems_inone
;
455 for (i
= 0; slist
[i
]; i
++) {
456 if (do_chown_cgroup(slist
[i
], arg
->cgroup_path
, destuid
) < 0) {
457 ERROR("Failed to chown %s:%s to container root",
458 slist
[i
], arg
->cgroup_path
);
464 cgm_dbus_disconnect();
468 /* Internal helper. Must be called with the cgmanager dbus socket open */
469 static bool lxc_cgmanager_chmod(const char *controller
,
470 const char *cgroup_path
, const char *file
, int mode
)
472 if (cgmanager_chmod_sync(NULL
, cgroup_manager
, controller
,
473 cgroup_path
, file
, mode
) != 0) {
475 nerr
= nih_error_get();
476 ERROR("call to cgmanager_chmod_sync failed: %s", nerr
->message
);
483 /* Internal helper. Must be called with the cgmanager dbus socket open */
484 static bool chown_cgroup(const char *cgroup_path
, struct lxc_conf
*conf
)
486 struct chown_data data
;
487 char **slist
= subsystems
;
490 if (lxc_list_empty(&conf
->id_map
))
491 /* If there's no mapping then we don't need to chown */
494 data
.cgroup_path
= cgroup_path
;
495 data
.origuid
= geteuid();
497 /* Unpriv users can't chown it themselves, so chown from
498 * a child namespace mapping both our own and the target uid
500 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
,
501 "chown_cgroup_wrapper") < 0) {
502 ERROR("Error requesting cgroup chown in new namespace");
507 * Now chmod 775 the directory else the container cannot create cgroups.
508 * This can't be done in the child namespace because it only group-owns
511 if (cgm_all_controllers_same
)
512 slist
= subsystems_inone
;
514 for (i
= 0; slist
[i
]; i
++) {
515 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "", 0775))
517 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "tasks", 0664))
519 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "cgroup.procs", 0664))
526 #define CG_REMOVE_RECURSIVE 1
527 /* Internal helper. Must be called with the cgmanager dbus socket open */
528 static void cgm_remove_cgroup(const char *controller
, const char *path
)
531 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
532 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
534 nerr
= nih_error_get();
535 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
537 ERROR("Error removing %s:%s", controller
, path
);
540 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
543 static void *cgm_init(const char *name
)
547 d
= malloc(sizeof(*d
));
551 if (!cgm_dbus_connect()) {
552 ERROR("Error connecting to cgroup manager");
556 memset(d
, 0, sizeof(*d
));
557 d
->name
= strdup(name
);
559 cgm_dbus_disconnect();
563 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
565 // cgm_create immediately gets called so keep the connection open
573 /* Called after a failed container startup */
574 static void cgm_destroy(void *hdata
, struct lxc_conf
*conf
)
576 struct cgm_data
*d
= hdata
;
577 char **slist
= subsystems
;
580 if (!d
|| !d
->cgroup_path
)
582 if (!cgm_dbus_connect()) {
583 ERROR("Error connecting to cgroup manager");
587 if (cgm_all_controllers_same
)
588 slist
= subsystems_inone
;
589 for (i
= 0; slist
[i
]; i
++)
590 cgm_remove_cgroup(slist
[i
], d
->cgroup_path
);
593 free(d
->cgroup_path
);
595 cgm_dbus_disconnect();
599 * remove all the cgroups created
600 * called internally with dbus connection open
602 static inline void cleanup_cgroups(char *path
)
605 char **slist
= subsystems
;
607 if (cgm_all_controllers_same
)
608 slist
= subsystems_inone
;
609 for (i
= 0; slist
[i
]; i
++)
610 cgm_remove_cgroup(slist
[i
], path
);
613 static inline bool cgm_create(void *hdata
)
615 struct cgm_data
*d
= hdata
;
616 char **slist
= subsystems
;
617 int i
, index
=0, baselen
, ret
;
619 char result
[MAXPATHLEN
], *tmp
, *cgroup_path
;
623 // XXX we should send a hint to the cgmanager that when these
624 // cgroups become empty they should be deleted. Requires a cgmanager
627 memset(result
, 0, MAXPATHLEN
);
628 tmp
= lxc_string_replace("%n", d
->name
, d
->cgroup_pattern
);
631 if (strlen(tmp
) >= MAXPATHLEN
) {
636 baselen
= strlen(result
);
642 if (index
== 100) { // turn this into a warn later
643 ERROR("cgroup error? 100 cgroups with this name already running");
647 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
648 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
653 if (cgm_all_controllers_same
)
654 slist
= subsystems_inone
;
656 for (i
= 0; slist
[i
]; i
++) {
657 if (!lxc_cgmanager_create(slist
[i
], tmp
, &existed
)) {
658 ERROR("Error creating cgroup %s:%s", slist
[i
], result
);
659 cleanup_cgroups(tmp
);
666 cgroup_path
= strdup(tmp
);
668 cleanup_cgroups(tmp
);
671 d
->cgroup_path
= cgroup_path
;
672 cgm_dbus_disconnect();
679 cgm_dbus_disconnect();
684 * Use the cgmanager to move a task into a cgroup for a particular
686 * All the subsystems in this hierarchy are co-mounted, so we only
687 * need to transition the task into one of the cgroups
689 * Internal helper, must be called with cgmanager dbus socket open
691 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
692 const char *cgroup_path
, bool abs
)
697 ret
= cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
698 controller
, cgroup_path
, pid
);
700 ret
= cgmanager_move_pid_sync(NULL
, cgroup_manager
,
701 controller
, cgroup_path
, pid
);
704 nerr
= nih_error_get();
705 WARN("call to cgmanager_move_pid_%ssync failed: %s",
706 abs
? "abs_" : "", nerr
->message
);
713 static inline bool cgm_enter(void *hdata
, pid_t pid
)
715 struct cgm_data
*d
= hdata
;
716 char **slist
= subsystems
;
720 if (!d
|| !d
->cgroup_path
)
723 if (!cgm_dbus_connect()) {
724 ERROR("Error connecting to cgroup manager");
728 if (cgm_all_controllers_same
)
729 slist
= subsystems_inone
;
731 for (i
= 0; slist
[i
]; i
++) {
732 if (!lxc_cgmanager_enter(pid
, slist
[i
], d
->cgroup_path
, false))
737 cgm_dbus_disconnect();
741 static const char *cgm_get_cgroup(void *hdata
, const char *subsystem
)
743 struct cgm_data
*d
= hdata
;
745 if (!d
|| !d
->cgroup_path
)
747 return d
->cgroup_path
;
750 #if HAVE_CGMANAGER_GET_PID_CGROUP_ABS_SYNC
751 static inline bool abs_cgroup_supported(void) {
752 return api_version
>= CGM_SUPPORTS_GET_ABS
;
755 static inline bool abs_cgroup_supported(void) {
758 #define cgmanager_get_pid_cgroup_abs_sync(...) -1
761 static char *try_get_abs_cgroup(const char *name
, const char *lxcpath
,
762 const char *controller
)
766 if (abs_cgroup_supported()) {
767 /* get the container init pid and ask for its abs cgroup */
768 pid_t pid
= lxc_cmd_get_init_pid(name
, lxcpath
);
771 if (cgmanager_get_pid_cgroup_abs_sync(NULL
, cgroup_manager
,
772 controller
, pid
, &cgroup
) != 0) {
775 nerr
= nih_error_get();
778 prune_init_scope(cgroup
);
782 /* use the command interface to look for the cgroup */
783 return lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
787 * nrtasks is called by the utmp helper by the container monitor.
788 * cgmanager socket was closed after cgroup setup was complete, so we need
791 * Return -1 on error.
793 static int cgm_get_nrtasks(void *hdata
)
795 struct cgm_data
*d
= hdata
;
799 if (!d
|| !d
->cgroup_path
)
802 if (!cgm_dbus_connect()) {
803 ERROR("Error connecting to cgroup manager");
806 if (cgmanager_get_tasks_sync(NULL
, cgroup_manager
, subsystems
[0],
807 d
->cgroup_path
, &pids
, &pids_len
) != 0) {
809 nerr
= nih_error_get();
810 ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr
->message
);
817 cgm_dbus_disconnect();
821 #if HAVE_CGMANAGER_LIST_CONTROLLERS
822 static bool lxc_list_controllers(char ***list
)
824 if (!cgm_dbus_connect()) {
825 ERROR("Error connecting to cgroup manager");
828 if (cgmanager_list_controllers_sync(NULL
, cgroup_manager
, list
) != 0) {
830 nerr
= nih_error_get();
831 ERROR("call to cgmanager_list_controllers_sync failed: %s", nerr
->message
);
833 cgm_dbus_disconnect();
837 cgm_dbus_disconnect();
841 static bool lxc_list_controllers(char ***list
)
847 static inline void free_abs_cgroup(char *cgroup
)
851 if (abs_cgroup_supported())
857 static void do_cgm_get(const char *name
, const char *lxcpath
, const char *filename
, int outp
, bool sendvalue
)
859 char *controller
, *key
, *cgroup
= NULL
, *cglast
;
862 nih_local
char *result
= NULL
;
864 controller
= alloca(strlen(filename
)+1);
865 strcpy(controller
, filename
);
866 key
= strchr(controller
, '.');
868 ret
= write(outp
, &len
, sizeof(len
));
869 if (ret
!= sizeof(len
))
870 WARN("Failed to warn cgm_get of error; parent may hang");
875 if (!cgm_dbus_connect()) {
876 ERROR("Error connecting to cgroup manager");
877 ret
= write(outp
, &len
, sizeof(len
));
878 if (ret
!= sizeof(len
))
879 WARN("Failed to warn cgm_get of error; parent may hang");
882 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
884 cgm_dbus_disconnect();
885 ret
= write(outp
, &len
, sizeof(len
));
886 if (ret
!= sizeof(len
))
887 WARN("Failed to warn cgm_get of error; parent may hang");
890 cglast
= strrchr(cgroup
, '/');
892 cgm_dbus_disconnect();
893 free_abs_cgroup(cgroup
);
894 ret
= write(outp
, &len
, sizeof(len
));
895 if (ret
!= sizeof(len
))
896 WARN("Failed to warn cgm_get of error; parent may hang");
900 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
901 WARN("Failed to enter container cgroup %s:%s", controller
, cgroup
);
902 ret
= write(outp
, &len
, sizeof(len
));
903 if (ret
!= sizeof(len
))
904 WARN("Failed to warn cgm_get of error; parent may hang");
905 cgm_dbus_disconnect();
906 free_abs_cgroup(cgroup
);
909 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, &result
) != 0) {
911 nerr
= nih_error_get();
913 free_abs_cgroup(cgroup
);
914 cgm_dbus_disconnect();
915 ret
= write(outp
, &len
, sizeof(len
));
916 if (ret
!= sizeof(len
))
917 WARN("Failed to warn cgm_get of error; parent may hang");
920 free_abs_cgroup(cgroup
);
921 cgm_dbus_disconnect();
922 len
= strlen(result
);
923 ret
= write(outp
, &len
, sizeof(len
));
924 if (ret
!= sizeof(len
)) {
925 WARN("Failed to send length to parent");
928 if (!len
|| !sendvalue
) {
931 ret
= write(outp
, result
, len
);
937 /* cgm_get is called to get container cgroup settings, not during startup */
938 static int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
941 int p
[2], ret
, newlen
, readlen
;
945 if ((pid
= fork()) < 0) {
950 if (!pid
) // do_cgm_get exits
951 do_cgm_get(name
, lxcpath
, filename
, p
[1], len
&& value
);
953 ret
= read(p
[0], &newlen
, sizeof(newlen
));
954 if (ret
!= sizeof(newlen
)) {
959 if (!len
|| !value
) {
964 memset(value
, 0, len
);
965 if (newlen
< 0) { // child is reporting an error
970 if (newlen
== 0) { // empty read
975 readlen
= newlen
> len
? len
: newlen
;
976 ret
= read(p
[0], value
, readlen
);
978 if (ret
!= readlen
) {
985 } else if (newlen
+1 < len
) {
986 // cgmanager doesn't add eol to last entry
987 value
[newlen
++] = '\n';
988 value
[newlen
] = '\0';
992 if (wait_for_pid(pid
))
993 WARN("do_cgm_get exited with error");
997 static void do_cgm_set(const char *name
, const char *lxcpath
, const char *filename
, const char *value
, int outp
)
999 char *controller
, *key
, *cgroup
= NULL
;
1000 int retval
= 0; // value we are sending to the parent over outp
1004 controller
= alloca(strlen(filename
)+1);
1005 strcpy(controller
, filename
);
1006 key
= strchr(controller
, '.');
1008 ret
= write(outp
, &retval
, sizeof(retval
));
1009 if (ret
!= sizeof(retval
))
1010 WARN("Failed to warn cgm_set of error; parent may hang");
1015 if (!cgm_dbus_connect()) {
1016 ERROR("Error connecting to cgroup manager");
1017 ret
= write(outp
, &retval
, sizeof(retval
));
1018 if (ret
!= sizeof(retval
))
1019 WARN("Failed to warn cgm_set of error; parent may hang");
1022 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
1024 cgm_dbus_disconnect();
1025 ret
= write(outp
, &retval
, sizeof(retval
));
1026 if (ret
!= sizeof(retval
))
1027 WARN("Failed to warn cgm_set of error; parent may hang");
1030 cglast
= strrchr(cgroup
, '/');
1032 cgm_dbus_disconnect();
1033 free_abs_cgroup(cgroup
);
1034 ret
= write(outp
, &retval
, sizeof(retval
));
1035 if (ret
!= sizeof(retval
))
1036 WARN("Failed to warn cgm_set of error; parent may hang");
1040 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
1041 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
1042 ret
= write(outp
, &retval
, sizeof(retval
));
1043 if (ret
!= sizeof(retval
))
1044 WARN("Failed to warn cgm_set of error; parent may hang");
1045 cgm_dbus_disconnect();
1046 free_abs_cgroup(cgroup
);
1049 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, value
) != 0) {
1051 nerr
= nih_error_get();
1052 ERROR("Error setting cgroup value %s for %s:%s", filename
, controller
, cgroup
);
1053 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1055 free_abs_cgroup(cgroup
);
1056 cgm_dbus_disconnect();
1057 ret
= write(outp
, &retval
, sizeof(retval
));
1058 if (ret
!= sizeof(retval
))
1059 WARN("Failed to warn cgm_set of error; parent may hang");
1062 free_abs_cgroup(cgroup
);
1063 cgm_dbus_disconnect();
1064 /* tell parent that we are done */
1066 ret
= write(outp
, &retval
, sizeof(retval
));
1067 if (ret
!= sizeof(retval
)) {
1073 /* cgm_set is called to change cgroup settings, not during startup */
1074 static int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1081 if ((pid
= fork()) < 0) {
1086 if (!pid
) // do_cgm_set exits
1087 do_cgm_set(name
, lxcpath
, filename
, value
, p
[1]);
1089 ret
= read(p
[0], &v
, sizeof(v
));
1091 if (wait_for_pid(pid
))
1092 WARN("do_cgm_set exited with error");
1093 if (ret
!= sizeof(v
) || !v
)
1098 static void free_subsystems(void)
1102 for (i
= 0; i
< nr_subsystems
; i
++)
1103 free(subsystems
[i
]);
1109 static void cull_user_controllers(void)
1113 for (i
= 0; i
< nr_subsystems
; i
++) {
1114 if (strncmp(subsystems
[i
], "name=", 5) != 0)
1116 for (j
= i
; j
< nr_subsystems
-1; j
++)
1117 subsystems
[j
] = subsystems
[j
+1];
1123 * return true if inword is in the comma-delimited list cgroup_use
1125 static bool in_comma_list(const char *inword
, const char *cgroup_use
)
1128 size_t inlen
= strlen(inword
), len
;
1131 e
= strchr(cgroup_use
, ',');
1132 len
= e
? e
- cgroup_use
: strlen(cgroup_use
);
1133 if (len
== inlen
&& strncmp(inword
, cgroup_use
, len
) == 0)
1142 * inlist is a comma-delimited list of cgroups; so is checklist. Return
1143 * true if any member of inlist is in checklist.
1145 static bool any_in_comma_list(const char *inlist
, const char *checklist
)
1147 char *tmp
= alloca(strlen(inlist
) + 1), *tok
, *saveptr
= NULL
;
1149 strcpy(tmp
, inlist
);
1150 for (tok
= strtok_r(tmp
, ",", &saveptr
); tok
; tok
= strtok_r(NULL
, ",", &saveptr
)) {
1151 if (in_comma_list(tok
, checklist
))
1158 static bool in_subsystem_list(const char *c
)
1162 for (i
= 0; i
< nr_subsystems
; i
++) {
1163 if (strcmp(c
, subsystems
[i
]) == 0)
1171 * If /etc/lxc/lxc.conf specifies lxc.cgroup.use = "freezer,memory",
1172 * then clear out any other subsystems, and make sure that freezer
1173 * and memory are both enabled
1175 static bool verify_and_prune(const char *cgroup_use
)
1181 for (p
= cgroup_use
; p
&& *p
; p
= e
+ 1) {
1186 if (!in_subsystem_list(p
)) {
1187 ERROR("Controller %s required by lxc.cgroup.use but not available\n", p
);
1197 for (i
= 0; i
< nr_subsystems
;) {
1198 if (in_comma_list(subsystems
[i
], cgroup_use
)) {
1202 free(subsystems
[i
]);
1203 for (j
= i
; j
< nr_subsystems
-1; j
++)
1204 subsystems
[j
] = subsystems
[j
+1];
1205 subsystems
[nr_subsystems
-1] = NULL
;
1212 static void drop_subsystem(int which
)
1216 if (which
< 0 || which
>= nr_subsystems
) {
1217 ERROR("code error: dropping invalid subsystem index\n");
1221 free(subsystems
[which
]);
1222 /* note - we have nr_subsystems+1 entries, last one a NULL */
1223 for (i
= which
; i
< nr_subsystems
; i
++)
1224 subsystems
[i
] = subsystems
[i
+1];
1229 * Check whether we can create the cgroups we would want
1231 static bool subsys_is_writeable(const char *controller
, const char *probe
)
1236 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
1237 probe
, &existed
) != 0) {
1239 nerr
= nih_error_get();
1240 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
1242 ERROR("Failed to create %s:%s", controller
, probe
);
1249 static char *get_last_controller_in_list(char *list
)
1253 while ((p
= strchr(list
, ',')) != NULL
)
1260 * Make sure that all the controllers are writeable.
1261 * If any are not, then
1262 * - if they are listed in lxc.cgroup.use, refuse to start
1263 * - else if they are crucial subsystems, refuse to start
1264 * - else warn and do not use them
1266 static bool verify_final_subsystems(const char *cgroup_use
)
1269 bool dropped_any
= false;
1271 const char *cgroup_pattern
;
1272 char tmpnam
[50], *probe
;
1274 if (!cgm_dbus_connect()) {
1275 ERROR("Error connecting to cgroup manager");
1279 cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
1280 i
= snprintf(tmpnam
, 50, "lxcprobe-%d", getpid());
1281 if (i
< 0 || i
>= 50) {
1282 ERROR("Attack - format string modified?");
1285 probe
= lxc_string_replace("%n", tmpnam
, cgroup_pattern
);
1290 while (i
< nr_subsystems
) {
1291 char *p
= get_last_controller_in_list(subsystems
[i
]);
1293 if (!subsys_is_writeable(p
, probe
)) {
1294 if (is_crucial_cgroup_subsystem(p
)) {
1295 ERROR("Cannot write to crucial subsystem %s\n",
1299 if (cgroup_use
&& any_in_comma_list(subsystems
[i
], cgroup_use
)) {
1300 ERROR("Cannot write to subsystem %s which is requested in lxc.cgroup.use\n",
1304 WARN("Cannot write to subsystem %s, continuing with out it\n",
1309 cgm_remove_cgroup(subsystems
[i
], probe
);
1315 cgm_all_controllers_same
= false;
1320 cgm_dbus_disconnect();
1324 static bool collect_subsystems(void)
1327 nih_local
char **cgm_subsys_list
= NULL
;
1331 if (subsystems
) // already initialized
1334 subsystems_inone
= malloc(2 * sizeof(char *));
1335 if (!subsystems_inone
)
1337 subsystems_inone
[0] = "all";
1338 subsystems_inone
[1] = NULL
;
1340 if (lxc_list_controllers(&cgm_subsys_list
)) {
1341 while (cgm_subsys_list
[nr_subsystems
]) {
1342 char **tmp
= NIH_MUST( realloc(subsystems
,
1343 (nr_subsystems
+2)*sizeof(char *)) );
1344 tmp
[nr_subsystems
] = NIH_MUST(
1345 strdup(cgm_subsys_list
[nr_subsystems
++]) );
1349 subsystems
[nr_subsystems
] = NULL
;
1353 INFO("cgmanager_list_controllers failed, falling back to /proc/self/cgroups");
1354 f
= fopen_cloexec("/proc/self/cgroup", "r");
1356 f
= fopen_cloexec("/proc/1/cgroup", "r");
1360 while (getline(&line
, &sz
, f
) != -1) {
1361 /* file format: hierarchy:subsystems:group,
1362 * with multiple subsystems being ,-separated */
1363 char *slist
, *end
, *p
, *saveptr
= NULL
, **tmp
;
1368 slist
= strchr(line
, ':');
1372 end
= strchr(slist
, ':');
1377 for (p
= strtok_r(slist
, ",", &saveptr
);
1379 p
= strtok_r(NULL
, ",", &saveptr
)) {
1380 tmp
= realloc(subsystems
, (nr_subsystems
+2)*sizeof(char *));
1385 tmp
[nr_subsystems
] = strdup(p
);
1386 tmp
[nr_subsystems
+1] = NULL
;
1387 if (!tmp
[nr_subsystems
])
1399 if (!nr_subsystems
) {
1400 ERROR("No cgroup subsystems found");
1404 /* make sure that cgroup.use can be and is honored */
1405 const char *cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
1406 if (!cgroup_use
&& errno
!= 0)
1409 if (!verify_and_prune(cgroup_use
)) {
1413 subsystems_inone
[0] = NIH_MUST( strdup(cgroup_use
) );
1414 cgm_all_controllers_same
= false;
1418 return verify_final_subsystems(cgroup_use
);
1429 * called during cgroup.c:cgroup_ops_init(), at startup. No threads.
1430 * We check whether we can talk to cgmanager, escape to root cgroup if
1431 * we are root, then close the connection.
1433 struct cgroup_ops
*cgm_ops_init(void)
1435 check_supports_multiple_controllers(-1);
1436 if (!collect_subsystems())
1439 if (api_version
< CGM_SUPPORTS_MULT_CONTROLLERS
)
1440 cgm_all_controllers_same
= false;
1442 // if root, try to escape to root cgroup
1443 if (geteuid() == 0 && !cgm_escape(NULL
)) {
1448 return &cgmanager_ops
;
1451 /* unfreeze is called by the command api after killing a container. */
1452 static bool cgm_unfreeze(void *hdata
)
1454 struct cgm_data
*d
= hdata
;
1457 if (!d
|| !d
->cgroup_path
)
1460 if (!cgm_dbus_connect()) {
1461 ERROR("Error connecting to cgroup manager");
1464 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", d
->cgroup_path
,
1465 "freezer.state", "THAWED") != 0) {
1467 nerr
= nih_error_get();
1468 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1470 ERROR("Error unfreezing %s", d
->cgroup_path
);
1473 cgm_dbus_disconnect();
1477 static bool cgm_setup_limits(void *hdata
, struct lxc_list
*cgroup_settings
, bool do_devices
)
1479 struct cgm_data
*d
= hdata
;
1480 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
1481 struct lxc_cgroup
*cg
;
1484 if (lxc_list_empty(cgroup_settings
))
1487 if (!d
|| !d
->cgroup_path
)
1490 if (!cgm_dbus_connect()) {
1491 ERROR("Error connecting to cgroup manager");
1495 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
1496 if (!sorted_cgroup_settings
) {
1500 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
1501 char controller
[100], *p
;
1502 cg
= iterator
->elem
;
1503 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
1505 if (strlen(cg
->subsystem
) > 100) // i smell a rat
1507 strcpy(controller
, cg
->subsystem
);
1508 p
= strchr(controller
, '.');
1511 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
1512 d
->cgroup_path
, cg
->subsystem
, cg
->value
) != 0) {
1514 nerr
= nih_error_get();
1516 WARN("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1518 WARN("Error setting cgroup %s:%s limit type %s", controller
,
1519 d
->cgroup_path
, cg
->subsystem
);
1523 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1525 ERROR("Error setting cgroup %s:%s limit type %s", controller
,
1526 d
->cgroup_path
, cg
->subsystem
);
1530 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1534 INFO("cgroup limits have been setup");
1536 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
1537 lxc_list_del(iterator
);
1540 free(sorted_cgroup_settings
);
1541 cgm_dbus_disconnect();
1545 static bool cgm_chown(void *hdata
, struct lxc_conf
*conf
)
1547 struct cgm_data
*d
= hdata
;
1549 if (!d
|| !d
->cgroup_path
)
1551 if (!cgm_dbus_connect()) {
1552 ERROR("Error connecting to cgroup manager");
1555 if (!chown_cgroup(d
->cgroup_path
, conf
))
1556 WARN("Failed to chown %s to container root", d
->cgroup_path
);
1557 cgm_dbus_disconnect();
1562 * TODO: this should be re-written to use the get_config_item("lxc.id_map")
1563 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
1564 * that the id_maps may be different if the container was started with a
1565 * -f or -s argument.
1566 * The reason I'm punting on that is because we'll need to parse the
1569 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
1572 char *cgroup
= NULL
;
1573 char **slist
= subsystems
;
1576 if (!cgm_dbus_connect()) {
1577 ERROR("Error connecting to cgroup manager");
1581 for (i
= 0; slist
[i
]; i
++) {
1582 cgroup
= try_get_abs_cgroup(name
, lxcpath
, slist
[i
]);
1584 ERROR("Failed to get cgroup for controller %s", slist
[i
]);
1585 cgm_dbus_disconnect();
1589 if (!lxc_cgmanager_enter(pid
, slist
[i
], cgroup
, abs_cgroup_supported())) {
1595 cgm_dbus_disconnect();
1597 ERROR("Failed to enter group %s", cgroup
);
1599 free_abs_cgroup(cgroup
);
1603 static bool cgm_bind_dir(const char *root
, const char *dirname
)
1605 nih_local
char *cgpath
= NULL
;
1607 /* /sys should have been mounted by now */
1608 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
1609 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
1611 if (!dir_exists(cgpath
)) {
1612 ERROR("%s does not exist", cgpath
);
1616 /* mount a tmpfs there so we can create subdirs */
1617 if (safe_mount("cgroup", cgpath
, "tmpfs", 0, "size=10000,mode=755", root
)) {
1618 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
1621 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
1623 if (mkdir(cgpath
, 0755) < 0) {
1624 SYSERROR("Failed to create %s", cgpath
);
1628 if (safe_mount(dirname
, cgpath
, "none", MS_BIND
, 0, root
)) {
1629 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
1638 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
1639 * /sys/fs/cgroup/cgmanager/ in the container.
1640 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
1643 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
1644 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
1645 static bool cgm_mount_cgroup(void *hdata
, const char *root
, int type
)
1647 if (dir_exists(CGMANAGER_LOWER_SOCK
))
1648 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
1649 if (dir_exists(CGMANAGER_UPPER_SOCK
))
1650 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
1651 // Host doesn't have cgmanager running? Then how did we get here?
1655 static struct cgroup_ops cgmanager_ops
= {
1657 .destroy
= cgm_destroy
,
1658 .create
= cgm_create
,
1660 .create_legacy
= NULL
,
1661 .get_cgroup
= cgm_get_cgroup
,
1662 .escape
= cgm_escape
,
1663 .num_hierarchies
= cgm_num_hierarchies
,
1664 .get_hierarchies
= cgm_get_hierarchies
,
1667 .unfreeze
= cgm_unfreeze
,
1668 .setup_limits
= cgm_setup_limits
,
1669 .name
= "cgmanager",
1671 .attach
= cgm_attach
,
1672 .mount_cgroup
= cgm_mount_cgroup
,
1673 .nrtasks
= cgm_get_nrtasks
,
1675 .driver
= CGMANAGER
,