2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/inotify.h>
39 #include <sys/mount.h>
40 #include <netinet/in.h>
54 #define CGM_SUPPORTS_GET_ABS 3
55 #define CGM_SUPPORTS_NAMED 4
56 #define CGM_SUPPORTS_MULT_CONTROLLERS 10
59 lxc_log_define(lxc_cgmanager
, lxc
);
61 #include <nih-dbus/dbus_connection.h>
62 #include <cgmanager/cgmanager-client.h>
63 #include <nih/alloc.h>
64 #include <nih/error.h>
65 #include <nih/string.h>
70 const char *cgroup_pattern
;
73 static pthread_mutex_t cgm_mutex
= PTHREAD_MUTEX_INITIALIZER
;
75 static void lock_mutex(pthread_mutex_t
*l
)
79 if ((ret
= pthread_mutex_lock(l
)) != 0) {
80 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
85 static void unlock_mutex(pthread_mutex_t
*l
)
89 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
90 fprintf(stderr
, "pthread_mutex_unlock returned:%d %s\n", ret
, strerror(ret
));
97 lock_mutex(&cgm_mutex
);
100 void cgm_unlock(void)
102 unlock_mutex(&cgm_mutex
);
105 #ifdef HAVE_PTHREAD_ATFORK
106 __attribute__((constructor
))
107 static void process_lock_setup_atfork(void)
109 pthread_atfork(cgm_lock
, cgm_unlock
, cgm_unlock
);
113 static NihDBusProxy
*cgroup_manager
= NULL
;
114 static int32_t api_version
;
116 static struct cgroup_ops cgmanager_ops
;
117 static int nr_subsystems
;
118 static char **subsystems
, **subsystems_inone
;
119 static bool dbus_threads_initialized
= false;
120 static void cull_user_controllers(void);
122 static void cgm_dbus_disconnect(void)
124 if (cgroup_manager
) {
125 dbus_connection_flush(cgroup_manager
->connection
);
126 dbus_connection_close(cgroup_manager
->connection
);
127 nih_free(cgroup_manager
);
129 cgroup_manager
= NULL
;
133 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
134 static bool cgm_dbus_connect(void)
136 DBusError dbus_error
;
137 static DBusConnection
*connection
;
140 if (!dbus_threads_initialized
) {
141 // tell dbus to do struct locking for thread safety
142 dbus_threads_init_default();
143 dbus_threads_initialized
= true;
146 dbus_error_init(&dbus_error
);
148 connection
= dbus_connection_open_private(CGMANAGER_DBUS_SOCK
, &dbus_error
);
150 DEBUG("Failed opening dbus connection: %s: %s",
151 dbus_error
.name
, dbus_error
.message
);
152 dbus_error_free(&dbus_error
);
156 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
157 dbus_error_free(&dbus_error
);
158 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
160 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
161 dbus_connection_unref(connection
);
162 if (!cgroup_manager
) {
164 nerr
= nih_error_get();
165 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
167 cgm_dbus_disconnect();
171 // get the api version
172 if (cgmanager_get_api_version_sync(NULL
, cgroup_manager
, &api_version
) != 0) {
174 nerr
= nih_error_get();
175 ERROR("Error cgroup manager api version: %s", nerr
->message
);
177 cgm_dbus_disconnect();
180 if (api_version
< CGM_SUPPORTS_NAMED
)
181 cull_user_controllers();
185 static inline bool cgm_supports_multiple_controllers(void)
187 return api_version
>= CGM_SUPPORTS_MULT_CONTROLLERS
;
190 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
192 struct msghdr msg
= { 0 };
194 struct cmsghdr
*cmsg
;
195 struct ucred cred
= {
200 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
204 msg
.msg_control
= cmsgbuf
;
205 msg
.msg_controllen
= sizeof(cmsgbuf
);
207 cmsg
= CMSG_FIRSTHDR(&msg
);
208 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
209 cmsg
->cmsg_level
= SOL_SOCKET
;
210 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
211 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
217 iov
.iov_len
= sizeof(buf
);
221 if (sendmsg(sock
, &msg
, 0) < 0)
226 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
229 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
230 cgroup_path
, existed
) != 0) {
232 nerr
= nih_error_get();
233 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
235 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
243 * Escape to the root cgroup if we are root, so that the container will
244 * be in "/lxc/c1" rather than "/user/..../c1"
245 * called internally with connection already open
247 static bool lxc_cgmanager_escape(void)
251 char **slist
= subsystems
;
254 if (cgm_supports_multiple_controllers())
255 slist
= subsystems_inone
;
257 for (i
= 0; slist
[i
]; i
++) {
258 if (cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
259 slist
[i
], "/", me
) != 0) {
261 nerr
= nih_error_get();
262 ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
263 slist
[i
], nerr
->message
);
274 const char *cgroup_path
;
278 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
281 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
284 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
285 SYSERROR("Error creating socketpair");
288 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
289 SYSERROR("setsockopt failed");
292 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
293 SYSERROR("setsockopt failed");
296 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
297 cgroup_path
, sv
[1]) != 0) {
299 nerr
= nih_error_get();
300 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
304 /* now send credentials */
308 FD_SET(sv
[0], &rfds
);
309 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
310 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
313 if (read(sv
[0], &buf
, 1) != 1) {
314 ERROR("Error getting reply from server over socketpair");
317 if (send_creds(sv
[0], getpid(), getuid(), getgid())) {
318 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
322 FD_SET(sv
[0], &rfds
);
323 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
324 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
327 if (read(sv
[0], &buf
, 1) != 1) {
328 ERROR("Error getting reply from server over socketpair");
331 if (send_creds(sv
[0], getpid(), newuid
, 0)) {
332 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
336 FD_SET(sv
[0], &rfds
);
337 if (select(sv
[0]+1, &rfds
, NULL
, NULL
, NULL
) < 0) {
338 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
341 ret
= read(sv
[0], buf
, 1);
345 if (ret
== 1 && *buf
== '1')
350 static int chown_cgroup_wrapper(void *data
)
352 struct chown_data
*arg
= data
;
353 char **slist
= subsystems
;
357 if (setresgid(0,0,0) < 0)
358 SYSERROR("Failed to setgid to 0");
359 if (setresuid(0,0,0) < 0)
360 SYSERROR("Failed to setuid to 0");
361 if (setgroups(0, NULL
) < 0)
362 SYSERROR("Failed to clear groups");
363 cgm_dbus_disconnect();
364 if (!cgm_dbus_connect()) {
365 ERROR("Error connecting to cgroup manager");
368 destuid
= get_ns_uid(arg
->origuid
);
370 if (cgm_supports_multiple_controllers())
371 slist
= subsystems_inone
;
373 for (i
= 0; slist
[i
]; i
++) {
374 if (do_chown_cgroup(slist
[i
], arg
->cgroup_path
, destuid
) < 0) {
375 ERROR("Failed to chown %s:%s to container root",
376 slist
[i
], arg
->cgroup_path
);
382 cgm_dbus_disconnect();
386 /* Internal helper. Must be called with the cgmanager dbus socket open */
387 static bool lxc_cgmanager_chmod(const char *controller
,
388 const char *cgroup_path
, const char *file
, int mode
)
390 if (cgmanager_chmod_sync(NULL
, cgroup_manager
, controller
,
391 cgroup_path
, file
, mode
) != 0) {
393 nerr
= nih_error_get();
394 ERROR("call to cgmanager_chmod_sync failed: %s", nerr
->message
);
401 /* Internal helper. Must be called with the cgmanager dbus socket open */
402 static bool chown_cgroup(const char *cgroup_path
, struct lxc_conf
*conf
)
404 struct chown_data data
;
405 char **slist
= subsystems
;
408 if (lxc_list_empty(&conf
->id_map
))
409 /* If there's no mapping then we don't need to chown */
412 data
.cgroup_path
= cgroup_path
;
413 data
.origuid
= geteuid();
415 /* Unpriv users can't chown it themselves, so chown from
416 * a child namespace mapping both our own and the target uid
418 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
) < 0) {
419 ERROR("Error requesting cgroup chown in new namespace");
424 * Now chmod 775 the directory else the container cannot create cgroups.
425 * This can't be done in the child namespace because it only group-owns
428 if (cgm_supports_multiple_controllers())
429 slist
= subsystems_inone
;
431 for (i
= 0; slist
[i
]; i
++) {
432 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "", 0775))
434 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "tasks", 0775))
436 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "cgroup.procs", 0775))
443 #define CG_REMOVE_RECURSIVE 1
444 /* Internal helper. Must be called with the cgmanager dbus socket open */
445 static void cgm_remove_cgroup(const char *controller
, const char *path
)
448 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
449 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
451 nerr
= nih_error_get();
452 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
454 ERROR("Error removing %s:%s", controller
, path
);
457 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
460 static void *cgm_init(const char *name
)
464 if (!cgm_dbus_connect()) {
465 ERROR("Error connecting to cgroup manager");
468 d
= malloc(sizeof(*d
));
470 cgm_dbus_disconnect();
474 memset(d
, 0, sizeof(*d
));
475 d
->name
= strdup(name
);
477 cgm_dbus_disconnect();
481 /* if we are running as root, use system cgroup pattern, otherwise
482 * just create a cgroup under the current one. But also fall back to
483 * that if for some reason reading the configuration fails and no
484 * default value is available
487 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
488 if (!d
->cgroup_pattern
)
489 d
->cgroup_pattern
= "%n";
490 // cgm_create immediately gets called so keep the connection open
498 /* Called after a failed container startup */
499 static void cgm_destroy(void *hdata
)
501 struct cgm_data
*d
= hdata
;
502 char **slist
= subsystems
;
505 if (!d
|| !d
->cgroup_path
)
507 if (!cgm_dbus_connect()) {
508 ERROR("Error connecting to cgroup manager");
512 if (cgm_supports_multiple_controllers())
513 slist
= subsystems_inone
;
514 for (i
= 0; slist
[i
]; i
++)
515 cgm_remove_cgroup(slist
[i
], d
->cgroup_path
);
519 free(d
->cgroup_path
);
521 cgm_dbus_disconnect();
525 * remove all the cgroups created
526 * called internally with dbus connection open
528 static inline void cleanup_cgroups(char *path
)
531 char **slist
= subsystems
;
533 if (cgm_supports_multiple_controllers())
534 slist
= subsystems_inone
;
535 for (i
= 0; slist
[i
]; i
++)
536 cgm_remove_cgroup(slist
[i
], path
);
539 static inline bool cgm_create(void *hdata
)
541 struct cgm_data
*d
= hdata
;
542 char **slist
= subsystems
;
543 int i
, index
=0, baselen
, ret
;
545 char result
[MAXPATHLEN
], *tmp
, *cgroup_path
;
549 // XXX we should send a hint to the cgmanager that when these
550 // cgroups become empty they should be deleted. Requires a cgmanager
553 memset(result
, 0, MAXPATHLEN
);
554 tmp
= lxc_string_replace("%n", d
->name
, d
->cgroup_pattern
);
557 if (strlen(tmp
) >= MAXPATHLEN
) {
562 baselen
= strlen(result
);
568 if (index
== 100) { // turn this into a warn later
569 ERROR("cgroup error? 100 cgroups with this name already running");
573 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
574 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
579 if (cgm_supports_multiple_controllers())
580 slist
= subsystems_inone
;
582 for (i
= 0; slist
[i
]; i
++) {
583 if (!lxc_cgmanager_create(slist
[i
], tmp
, &existed
)) {
584 ERROR("Error creating cgroup %s:%s", slist
[i
], result
);
585 cleanup_cgroups(tmp
);
592 cgroup_path
= strdup(tmp
);
594 cleanup_cgroups(tmp
);
597 d
->cgroup_path
= cgroup_path
;
598 cgm_dbus_disconnect();
605 cgm_dbus_disconnect();
610 * Use the cgmanager to move a task into a cgroup for a particular
612 * All the subsystems in this hierarchy are co-mounted, so we only
613 * need to transition the task into one of the cgroups
615 * Internal helper, must be called with cgmanager dbus socket open
617 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
618 const char *cgroup_path
, bool abs
)
623 ret
= cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
624 controller
, cgroup_path
, pid
);
626 ret
= cgmanager_move_pid_sync(NULL
, cgroup_manager
,
627 controller
, cgroup_path
, pid
);
630 nerr
= nih_error_get();
631 ERROR("call to cgmanager_move_pid_%ssync failed: %s",
632 abs
? "abs_" : "", nerr
->message
);
639 /* Internal helper, must be called with cgmanager dbus socket open */
640 static bool do_cgm_enter(pid_t pid
, const char *cgroup_path
, bool abs
)
642 char **slist
= subsystems
;
645 if (cgm_supports_multiple_controllers())
646 slist
= subsystems_inone
;
648 for (i
= 0; slist
[i
]; i
++) {
649 if (!lxc_cgmanager_enter(pid
, slist
[i
], cgroup_path
, abs
))
655 static inline bool cgm_enter(void *hdata
, pid_t pid
)
657 struct cgm_data
*d
= hdata
;
660 if (!cgm_dbus_connect()) {
661 ERROR("Error connecting to cgroup manager");
664 if (!d
|| !d
->cgroup_path
)
666 if (do_cgm_enter(pid
, d
->cgroup_path
, false))
669 cgm_dbus_disconnect();
673 static const char *cgm_get_cgroup(void *hdata
, const char *subsystem
)
675 struct cgm_data
*d
= hdata
;
677 if (!d
|| !d
->cgroup_path
)
679 return d
->cgroup_path
;
682 #if HAVE_CGMANAGER_GET_PID_CGROUP_ABS_SYNC
683 static inline bool abs_cgroup_supported(void) {
684 return api_version
>= CGM_SUPPORTS_GET_ABS
;
687 static inline bool abs_cgroup_supported(void) {
690 #define cgmanager_get_pid_cgroup_abs_sync(...) -1
693 static char *try_get_abs_cgroup(const char *name
, const char *lxcpath
,
694 const char *controller
)
698 if (abs_cgroup_supported()) {
699 /* get the container init pid and ask for its abs cgroup */
700 pid_t pid
= lxc_cmd_get_init_pid(name
, lxcpath
);
703 if (cgmanager_get_pid_cgroup_abs_sync(NULL
, cgroup_manager
,
704 controller
, pid
, &cgroup
) != 0) {
707 nerr
= nih_error_get();
713 /* use the command interface to look for the cgroup */
714 return lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
718 * nrtasks is called by the utmp helper by the container monitor.
719 * cgmanager socket was closed after cgroup setup was complete, so we need
722 * Return -1 on error.
724 static int cgm_get_nrtasks(void *hdata
)
726 struct cgm_data
*d
= hdata
;
730 if (!d
|| !d
->cgroup_path
)
733 if (!cgm_dbus_connect()) {
734 ERROR("Error connecting to cgroup manager");
737 if (cgmanager_get_tasks_sync(NULL
, cgroup_manager
, subsystems
[0],
738 d
->cgroup_path
, &pids
, &pids_len
) != 0) {
740 nerr
= nih_error_get();
741 ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr
->message
);
748 cgm_dbus_disconnect();
752 static inline void free_abs_cgroup(char *cgroup
)
756 if (abs_cgroup_supported())
762 static void do_cgm_get(const char *name
, const char *lxcpath
, const char *filename
, int outp
, bool sendvalue
)
764 char *controller
, *key
, *cgroup
= NULL
, *cglast
;
767 nih_local
char *result
= NULL
;
769 controller
= alloca(strlen(filename
)+1);
770 strcpy(controller
, filename
);
771 key
= strchr(controller
, '.');
773 ret
= write(outp
, &len
, sizeof(len
));
774 if (ret
!= sizeof(len
))
775 WARN("Failed to warn cgm_get of error; parent may hang");
780 if (!cgm_dbus_connect()) {
781 ERROR("Error connecting to cgroup manager");
782 ret
= write(outp
, &len
, sizeof(len
));
783 if (ret
!= sizeof(len
))
784 WARN("Failed to warn cgm_get of error; parent may hang");
787 cgroup
= try_get_abs_cgroup(name
, lxcpath
, subsystems
[0]);
789 cgm_dbus_disconnect();
790 ret
= write(outp
, &len
, sizeof(len
));
791 if (ret
!= sizeof(len
))
792 WARN("Failed to warn cgm_get of error; parent may hang");
795 cglast
= strrchr(cgroup
, '/');
797 cgm_dbus_disconnect();
798 free_abs_cgroup(cgroup
);
799 ret
= write(outp
, &len
, sizeof(len
));
800 if (ret
!= sizeof(len
))
801 WARN("Failed to warn cgm_get of error; parent may hang");
805 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
806 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
807 ret
= write(outp
, &len
, sizeof(len
));
808 if (ret
!= sizeof(len
))
809 WARN("Failed to warn cgm_get of error; parent may hang");
810 cgm_dbus_disconnect();
811 free_abs_cgroup(cgroup
);
814 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, &result
) != 0) {
816 nerr
= nih_error_get();
818 free_abs_cgroup(cgroup
);
819 cgm_dbus_disconnect();
820 ret
= write(outp
, &len
, sizeof(len
));
821 if (ret
!= sizeof(len
))
822 WARN("Failed to warn cgm_get of error; parent may hang");
825 free_abs_cgroup(cgroup
);
826 cgm_dbus_disconnect();
827 len
= strlen(result
);
828 ret
= write(outp
, &len
, sizeof(len
));
829 if (ret
!= sizeof(len
)) {
830 WARN("Failed to send length to parent");
833 if (!len
|| !sendvalue
) {
836 ret
= write(outp
, result
, len
);
842 /* cgm_get is called to get container cgroup settings, not during startup */
843 static int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
846 int p
[2], ret
, newlen
, readlen
;
850 if ((pid
= fork()) < 0) {
855 if (!pid
) // do_cgm_get exits
856 do_cgm_get(name
, lxcpath
, filename
, p
[1], len
&& value
);
858 ret
= read(p
[0], &newlen
, sizeof(newlen
));
859 if (ret
!= sizeof(newlen
)) {
864 if (!len
|| !value
) {
869 memset(value
, 0, len
);
870 if (newlen
< 0) { // child is reporting an error
875 if (newlen
== 0) { // empty read
880 readlen
= newlen
> len
? len
: newlen
;
881 ret
= read(p
[0], value
, readlen
);
883 if (ret
!= readlen
) {
890 } else if (newlen
+1 < len
) {
891 // cgmanager doesn't add eol to last entry
892 value
[newlen
++] = '\n';
893 value
[newlen
] = '\0';
897 if (wait_for_pid(pid
))
898 WARN("do_cgm_get exited with error");
902 static void do_cgm_set(const char *name
, const char *lxcpath
, const char *filename
, const char *value
, int outp
)
904 char *controller
, *key
, *cgroup
= NULL
;
905 int retval
= 0; // value we are sending to the parent over outp
909 controller
= alloca(strlen(filename
)+1);
910 strcpy(controller
, filename
);
911 key
= strchr(controller
, '.');
913 ret
= write(outp
, &retval
, sizeof(retval
));
914 if (ret
!= sizeof(retval
))
915 WARN("Failed to warn cgm_set of error; parent may hang");
920 if (!cgm_dbus_connect()) {
921 ERROR("Error connecting to cgroup manager");
922 ret
= write(outp
, &retval
, sizeof(retval
));
923 if (ret
!= sizeof(retval
))
924 WARN("Failed to warn cgm_set of error; parent may hang");
927 cgroup
= try_get_abs_cgroup(name
, lxcpath
, subsystems
[0]);
929 cgm_dbus_disconnect();
930 ret
= write(outp
, &retval
, sizeof(retval
));
931 if (ret
!= sizeof(retval
))
932 WARN("Failed to warn cgm_set of error; parent may hang");
935 cglast
= strrchr(cgroup
, '/');
937 cgm_dbus_disconnect();
938 free_abs_cgroup(cgroup
);
939 ret
= write(outp
, &retval
, sizeof(retval
));
940 if (ret
!= sizeof(retval
))
941 WARN("Failed to warn cgm_set of error; parent may hang");
945 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
946 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
947 ret
= write(outp
, &retval
, sizeof(retval
));
948 if (ret
!= sizeof(retval
))
949 WARN("Failed to warn cgm_set of error; parent may hang");
950 cgm_dbus_disconnect();
951 free_abs_cgroup(cgroup
);
954 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, value
) != 0) {
956 nerr
= nih_error_get();
957 ERROR("Error setting cgroup value %s for %s:%s", filename
, controller
, cgroup
);
958 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
960 free_abs_cgroup(cgroup
);
961 cgm_dbus_disconnect();
962 ret
= write(outp
, &retval
, sizeof(retval
));
963 if (ret
!= sizeof(retval
))
964 WARN("Failed to warn cgm_set of error; parent may hang");
967 free_abs_cgroup(cgroup
);
968 cgm_dbus_disconnect();
969 /* tell parent that we are done */
971 ret
= write(outp
, &retval
, sizeof(retval
));
972 if (ret
!= sizeof(retval
)) {
978 /* cgm_set is called to change cgroup settings, not during startup */
979 static int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
986 if ((pid
= fork()) < 0) {
991 if (!pid
) // do_cgm_set exits
992 do_cgm_set(name
, lxcpath
, filename
, value
, p
[1]);
994 ret
= read(p
[0], &v
, sizeof(v
));
996 if (wait_for_pid(pid
))
997 WARN("do_cgm_set exited with error");
998 if (ret
!= sizeof(v
) || !v
)
1003 static void free_subsystems(void)
1007 for (i
= 0; i
< nr_subsystems
; i
++)
1008 free(subsystems
[i
]);
1014 static void cull_user_controllers(void)
1018 for (i
= 0; i
< nr_subsystems
; i
++) {
1019 if (strncmp(subsystems
[i
], "name=", 5) != 0)
1021 for (j
= i
; j
< nr_subsystems
-1; j
++)
1022 subsystems
[j
] = subsystems
[j
+1];
1027 static bool collect_subsytems(void)
1033 if (subsystems
) // already initialized
1036 subsystems_inone
= malloc(2 * sizeof(char *));
1037 if (!subsystems_inone
)
1039 subsystems_inone
[0] = "all";
1040 subsystems_inone
[1] = NULL
;
1042 f
= fopen_cloexec("/proc/self/cgroup", "r");
1044 f
= fopen_cloexec("/proc/1/cgroup", "r");
1048 while (getline(&line
, &sz
, f
) != -1) {
1049 /* file format: hierarchy:subsystems:group,
1050 * with multiple subsystems being ,-separated */
1051 char *slist
, *end
, *p
, *saveptr
= NULL
, **tmp
;
1056 slist
= strchr(line
, ':');
1060 end
= strchr(slist
, ':');
1065 for (p
= strtok_r(slist
, ",", &saveptr
);
1067 p
= strtok_r(NULL
, ",", &saveptr
)) {
1068 tmp
= realloc(subsystems
, (nr_subsystems
+2)*sizeof(char *));
1073 tmp
[nr_subsystems
] = strdup(p
);
1074 tmp
[nr_subsystems
+1] = NULL
;
1075 if (!tmp
[nr_subsystems
])
1083 if (!nr_subsystems
) {
1084 ERROR("No cgroup subsystems found");
1098 * called during cgroup.c:cgroup_ops_init(), at startup. No threads.
1099 * We check whether we can talk to cgmanager, escape to root cgroup if
1100 * we are root, then close the connection.
1102 struct cgroup_ops
*cgm_ops_init(void)
1104 if (!collect_subsytems())
1106 if (!cgm_dbus_connect())
1109 // root; try to escape to root cgroup
1110 if (geteuid() == 0 && !lxc_cgmanager_escape())
1112 cgm_dbus_disconnect();
1114 return &cgmanager_ops
;
1117 cgm_dbus_disconnect();
1123 /* unfreeze is called by the command api after killing a container. */
1124 static bool cgm_unfreeze(void *hdata
)
1126 struct cgm_data
*d
= hdata
;
1129 if (!d
|| !d
->cgroup_path
)
1132 if (!cgm_dbus_connect()) {
1133 ERROR("Error connecting to cgroup manager");
1136 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", d
->cgroup_path
,
1137 "freezer.state", "THAWED") != 0) {
1139 nerr
= nih_error_get();
1140 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1142 ERROR("Error unfreezing %s", d
->cgroup_path
);
1145 cgm_dbus_disconnect();
1149 static bool cgm_setup_limits(void *hdata
, struct lxc_list
*cgroup_settings
, bool do_devices
)
1151 struct cgm_data
*d
= hdata
;
1152 struct lxc_list
*iterator
;
1153 struct lxc_cgroup
*cg
;
1156 if (lxc_list_empty(cgroup_settings
))
1159 if (!d
|| !d
->cgroup_path
)
1162 if (!cgm_dbus_connect()) {
1163 ERROR("Error connecting to cgroup manager");
1167 lxc_list_for_each(iterator
, cgroup_settings
) {
1168 char controller
[100], *p
;
1169 cg
= iterator
->elem
;
1170 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
1172 if (strlen(cg
->subsystem
) > 100) // i smell a rat
1174 strcpy(controller
, cg
->subsystem
);
1175 p
= strchr(controller
, '.');
1178 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
1179 d
->cgroup_path
, cg
->subsystem
, cg
->value
) != 0) {
1181 nerr
= nih_error_get();
1182 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1184 ERROR("Error setting cgroup %s:%s limit type %s", controller
,
1185 d
->cgroup_path
, cg
->subsystem
);
1189 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1193 INFO("cgroup limits have been setup");
1195 cgm_dbus_disconnect();
1199 static bool cgm_chown(void *hdata
, struct lxc_conf
*conf
)
1201 struct cgm_data
*d
= hdata
;
1203 if (!d
|| !d
->cgroup_path
)
1205 if (!cgm_dbus_connect()) {
1206 ERROR("Error connecting to cgroup manager");
1209 if (!chown_cgroup(d
->cgroup_path
, conf
))
1210 WARN("Failed to chown %s to container root", d
->cgroup_path
);
1211 cgm_dbus_disconnect();
1216 * TODO: this should be re-written to use the get_config_item("lxc.id_map")
1217 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
1218 * that the id_maps may be different if the container was started with a
1219 * -f or -s argument.
1220 * The reason I'm punting on that is because we'll need to parse the
1223 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
1226 char *cgroup
= NULL
;
1228 if (!cgm_dbus_connect()) {
1229 ERROR("Error connecting to cgroup manager");
1232 // cgm_create makes sure that we have the same cgroup name for all
1233 // subsystems, so since this is a slow command over the cmd socket,
1234 // just get the cgroup name for the first one.
1235 cgroup
= try_get_abs_cgroup(name
, lxcpath
, subsystems
[0]);
1237 ERROR("Failed to get cgroup for controller %s", subsystems
[0]);
1238 cgm_dbus_disconnect();
1242 pass
= do_cgm_enter(pid
, cgroup
, abs_cgroup_supported());
1243 cgm_dbus_disconnect();
1245 ERROR("Failed to enter group %s", cgroup
);
1247 free_abs_cgroup(cgroup
);
1251 static bool cgm_bind_dir(const char *root
, const char *dirname
)
1253 nih_local
char *cgpath
= NULL
;
1255 /* /sys should have been mounted by now */
1256 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
1257 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
1259 if (!dir_exists(cgpath
)) {
1260 ERROR("%s does not exist", cgpath
);
1264 /* mount a tmpfs there so we can create subdirs */
1265 if (mount("cgroup", cgpath
, "tmpfs", 0, "size=10000,mode=755")) {
1266 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
1269 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
1271 if (mkdir(cgpath
, 0755) < 0) {
1272 SYSERROR("Failed to create %s", cgpath
);
1276 if (mount(dirname
, cgpath
, "none", MS_BIND
, 0)) {
1277 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
1286 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
1287 * /sys/fs/cgroup/cgmanager/ in the container.
1288 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
1291 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
1292 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
1293 static bool cgm_mount_cgroup(void *hdata
, const char *root
, int type
)
1295 if (dir_exists(CGMANAGER_LOWER_SOCK
))
1296 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
1297 if (dir_exists(CGMANAGER_UPPER_SOCK
))
1298 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
1299 // Host doesn't have cgmanager running? Then how did we get here?
1303 static struct cgroup_ops cgmanager_ops
= {
1305 .destroy
= cgm_destroy
,
1306 .create
= cgm_create
,
1308 .create_legacy
= NULL
,
1309 .get_cgroup
= cgm_get_cgroup
,
1312 .unfreeze
= cgm_unfreeze
,
1313 .setup_limits
= cgm_setup_limits
,
1314 .name
= "cgmanager",
1316 .attach
= cgm_attach
,
1317 .mount_cgroup
= cgm_mount_cgroup
,
1318 .nrtasks
= cgm_get_nrtasks
,