2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/inotify.h>
39 #include <sys/mount.h>
40 #include <netinet/in.h>
55 #define CGM_SUPPORTS_GET_ABS 3
56 #define CGM_SUPPORTS_NAMED 4
57 #define CGM_SUPPORTS_MULT_CONTROLLERS 10
60 lxc_log_define(lxc_cgmanager
, lxc
);
62 #include <nih-dbus/dbus_connection.h>
63 #include <cgmanager/cgmanager-client.h>
64 #include <nih/alloc.h>
65 #include <nih/error.h>
66 #include <nih/string.h>
71 const char *cgroup_pattern
;
74 static pthread_mutex_t cgm_mutex
= PTHREAD_MUTEX_INITIALIZER
;
76 static void lock_mutex(pthread_mutex_t
*l
)
80 if ((ret
= pthread_mutex_lock(l
)) != 0) {
81 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
86 static void unlock_mutex(pthread_mutex_t
*l
)
90 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
91 fprintf(stderr
, "pthread_mutex_unlock returned:%d %s\n", ret
, strerror(ret
));
98 lock_mutex(&cgm_mutex
);
101 void cgm_unlock(void)
103 unlock_mutex(&cgm_mutex
);
106 #ifdef HAVE_PTHREAD_ATFORK
107 __attribute__((constructor
))
108 static void process_lock_setup_atfork(void)
110 pthread_atfork(cgm_lock
, cgm_unlock
, cgm_unlock
);
114 static NihDBusProxy
*cgroup_manager
= NULL
;
115 static int32_t api_version
;
117 static struct cgroup_ops cgmanager_ops
;
118 static int nr_subsystems
;
119 static char **subsystems
, **subsystems_inone
;
120 static bool dbus_threads_initialized
= false;
121 static void cull_user_controllers(void);
123 static void cgm_dbus_disconnect(void)
125 if (cgroup_manager
) {
126 dbus_connection_flush(cgroup_manager
->connection
);
127 dbus_connection_close(cgroup_manager
->connection
);
128 nih_free(cgroup_manager
);
130 cgroup_manager
= NULL
;
134 #define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
135 static bool cgm_dbus_connect(void)
137 DBusError dbus_error
;
138 static DBusConnection
*connection
;
141 if (!dbus_threads_initialized
) {
142 // tell dbus to do struct locking for thread safety
143 dbus_threads_init_default();
144 dbus_threads_initialized
= true;
147 dbus_error_init(&dbus_error
);
149 connection
= dbus_connection_open_private(CGMANAGER_DBUS_SOCK
, &dbus_error
);
151 DEBUG("Failed opening dbus connection: %s: %s",
152 dbus_error
.name
, dbus_error
.message
);
153 dbus_error_free(&dbus_error
);
157 dbus_connection_set_exit_on_disconnect(connection
, FALSE
);
158 dbus_error_free(&dbus_error
);
159 cgroup_manager
= nih_dbus_proxy_new(NULL
, connection
,
161 "/org/linuxcontainers/cgmanager", NULL
, NULL
);
162 dbus_connection_unref(connection
);
163 if (!cgroup_manager
) {
165 nerr
= nih_error_get();
166 ERROR("Error opening cgmanager proxy: %s", nerr
->message
);
168 cgm_dbus_disconnect();
172 // get the api version
173 if (cgmanager_get_api_version_sync(NULL
, cgroup_manager
, &api_version
) != 0) {
175 nerr
= nih_error_get();
176 ERROR("Error cgroup manager api version: %s", nerr
->message
);
178 cgm_dbus_disconnect();
181 if (api_version
< CGM_SUPPORTS_NAMED
)
182 cull_user_controllers();
186 static bool cgm_supports_multiple_controllers
;
188 * if cgm_all_controllers_same is true, then cgm_supports_multiple_controllers
191 static bool cgm_all_controllers_same
;
194 * Check whether we can use "all" when talking to cgmanager.
195 * We check two things:
196 * 1. whether cgmanager is new enough to support this.
197 * 2. whether the task we are interested in is in the same
198 * cgroup for all controllers.
199 * In cgm_init (before an lxc-start) we care about our own
200 * cgroup. In cgm_attach, we care about the target task's
203 static void check_supports_multiple_controllers(pid_t pid
)
206 char *line
= NULL
, *prevpath
= NULL
;
210 cgm_supports_multiple_controllers
= false;
211 cgm_all_controllers_same
= false;
213 if (api_version
< CGM_SUPPORTS_MULT_CONTROLLERS
) {
214 cgm_supports_multiple_controllers
= false;
218 cgm_supports_multiple_controllers
= true;
221 sprintf(path
, "/proc/self/cgroup");
223 sprintf(path
, "/proc/%d/cgroup", pid
);
224 f
= fopen(path
, "r");
228 cgm_all_controllers_same
= true;
230 while (getline(&line
, &sz
, f
) != -1) {
231 /* file format: hierarchy:subsystems:group */
236 colon
= strchr(line
, ':');
239 colon
= strchr(colon
+1, ':');
244 prevpath
= alloca(strlen(colon
)+1);
245 strcpy(prevpath
, colon
);
248 if (strcmp(prevpath
, colon
) != 0) {
249 cgm_all_controllers_same
= false;
258 static int send_creds(int sock
, int rpid
, int ruid
, int rgid
)
260 struct msghdr msg
= { 0 };
262 struct cmsghdr
*cmsg
;
263 struct ucred cred
= {
268 char cmsgbuf
[CMSG_SPACE(sizeof(cred
))];
272 msg
.msg_control
= cmsgbuf
;
273 msg
.msg_controllen
= sizeof(cmsgbuf
);
275 cmsg
= CMSG_FIRSTHDR(&msg
);
276 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
277 cmsg
->cmsg_level
= SOL_SOCKET
;
278 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
279 memcpy(CMSG_DATA(cmsg
), &cred
, sizeof(cred
));
285 iov
.iov_len
= sizeof(buf
);
289 if (sendmsg(sock
, &msg
, 0) < 0)
294 static bool lxc_cgmanager_create(const char *controller
, const char *cgroup_path
, int32_t *existed
)
297 if ( cgmanager_create_sync(NULL
, cgroup_manager
, controller
,
298 cgroup_path
, existed
) != 0) {
300 nerr
= nih_error_get();
301 ERROR("call to cgmanager_create_sync failed: %s", nerr
->message
);
303 ERROR("Failed to create %s:%s", controller
, cgroup_path
);
311 * Escape to the root cgroup if we are root, so that the container will
312 * be in "/lxc/c1" rather than "/user/..../c1"
313 * called internally with connection already open
315 static bool lxc_cgmanager_escape(void)
319 char **slist
= subsystems
;
322 if (cgm_all_controllers_same
)
323 slist
= subsystems_inone
;
325 for (i
= 0; slist
[i
]; i
++) {
326 if (cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
327 slist
[i
], "/", me
) != 0) {
329 nerr
= nih_error_get();
330 ERROR("call to cgmanager_move_pid_abs_sync(%s) failed: %s",
331 slist
[i
], nerr
->message
);
342 const char *cgroup_path
;
346 static int do_chown_cgroup(const char *controller
, const char *cgroup_path
,
349 int sv
[2] = {-1, -1}, optval
= 1, ret
= -1;
353 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sv
) < 0) {
354 SYSERROR("Error creating socketpair");
357 if (setsockopt(sv
[1], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
358 SYSERROR("setsockopt failed");
361 if (setsockopt(sv
[0], SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
362 SYSERROR("setsockopt failed");
365 if ( cgmanager_chown_scm_sync(NULL
, cgroup_manager
, controller
,
366 cgroup_path
, sv
[1]) != 0) {
368 nerr
= nih_error_get();
369 ERROR("call to cgmanager_chown_scm_sync failed: %s", nerr
->message
);
373 /* now send credentials */
378 if (poll(&fds
, 1, -1) <= 0) {
379 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
382 if (read(sv
[0], &buf
, 1) != 1) {
383 ERROR("Error getting reply from server over socketpair");
386 if (send_creds(sv
[0], getpid(), getuid(), getgid())) {
387 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
393 if (poll(&fds
, 1, -1) <= 0) {
394 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
397 if (read(sv
[0], &buf
, 1) != 1) {
398 ERROR("Error getting reply from server over socketpair");
401 if (send_creds(sv
[0], getpid(), newuid
, 0)) {
402 SYSERROR("%s: Error sending pid over SCM_CREDENTIAL", __func__
);
408 if (poll(&fds
, 1, -1) <= 0) {
409 ERROR("Error getting go-ahead from server: %s", strerror(errno
));
412 ret
= read(sv
[0], buf
, 1);
416 if (ret
== 1 && *buf
== '1')
421 static int chown_cgroup_wrapper(void *data
)
423 struct chown_data
*arg
= data
;
424 char **slist
= subsystems
;
428 if (setresgid(0,0,0) < 0)
429 SYSERROR("Failed to setgid to 0");
430 if (setresuid(0,0,0) < 0)
431 SYSERROR("Failed to setuid to 0");
432 if (setgroups(0, NULL
) < 0)
433 SYSERROR("Failed to clear groups");
434 cgm_dbus_disconnect();
435 if (!cgm_dbus_connect()) {
436 ERROR("Error connecting to cgroup manager");
439 destuid
= get_ns_uid(arg
->origuid
);
441 if (cgm_supports_multiple_controllers
)
442 slist
= subsystems_inone
;
444 for (i
= 0; slist
[i
]; i
++) {
445 if (do_chown_cgroup(slist
[i
], arg
->cgroup_path
, destuid
) < 0) {
446 ERROR("Failed to chown %s:%s to container root",
447 slist
[i
], arg
->cgroup_path
);
453 cgm_dbus_disconnect();
457 /* Internal helper. Must be called with the cgmanager dbus socket open */
458 static bool lxc_cgmanager_chmod(const char *controller
,
459 const char *cgroup_path
, const char *file
, int mode
)
461 if (cgmanager_chmod_sync(NULL
, cgroup_manager
, controller
,
462 cgroup_path
, file
, mode
) != 0) {
464 nerr
= nih_error_get();
465 ERROR("call to cgmanager_chmod_sync failed: %s", nerr
->message
);
472 /* Internal helper. Must be called with the cgmanager dbus socket open */
473 static bool chown_cgroup(const char *cgroup_path
, struct lxc_conf
*conf
)
475 struct chown_data data
;
476 char **slist
= subsystems
;
479 if (lxc_list_empty(&conf
->id_map
))
480 /* If there's no mapping then we don't need to chown */
483 data
.cgroup_path
= cgroup_path
;
484 data
.origuid
= geteuid();
486 /* Unpriv users can't chown it themselves, so chown from
487 * a child namespace mapping both our own and the target uid
489 if (userns_exec_1(conf
, chown_cgroup_wrapper
, &data
) < 0) {
490 ERROR("Error requesting cgroup chown in new namespace");
495 * Now chmod 775 the directory else the container cannot create cgroups.
496 * This can't be done in the child namespace because it only group-owns
499 if (cgm_supports_multiple_controllers
)
500 slist
= subsystems_inone
;
502 for (i
= 0; slist
[i
]; i
++) {
503 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "", 0775))
505 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "tasks", 0775))
507 if (!lxc_cgmanager_chmod(slist
[i
], cgroup_path
, "cgroup.procs", 0775))
514 #define CG_REMOVE_RECURSIVE 1
515 /* Internal helper. Must be called with the cgmanager dbus socket open */
516 static void cgm_remove_cgroup(const char *controller
, const char *path
)
519 if ( cgmanager_remove_sync(NULL
, cgroup_manager
, controller
,
520 path
, CG_REMOVE_RECURSIVE
, &existed
) != 0) {
522 nerr
= nih_error_get();
523 ERROR("call to cgmanager_remove_sync failed: %s", nerr
->message
);
525 ERROR("Error removing %s:%s", controller
, path
);
528 INFO("cgroup removal attempt: %s:%s did not exist", controller
, path
);
531 static void *cgm_init(const char *name
)
535 if (!cgm_dbus_connect()) {
536 ERROR("Error connecting to cgroup manager");
540 check_supports_multiple_controllers(-1);
542 d
= malloc(sizeof(*d
));
544 cgm_dbus_disconnect();
548 memset(d
, 0, sizeof(*d
));
549 d
->name
= strdup(name
);
551 cgm_dbus_disconnect();
555 d
->cgroup_pattern
= lxc_global_config_value("lxc.cgroup.pattern");
557 // cgm_create immediately gets called so keep the connection open
565 /* Called after a failed container startup */
566 static void cgm_destroy(void *hdata
)
568 struct cgm_data
*d
= hdata
;
569 char **slist
= subsystems
;
572 if (!d
|| !d
->cgroup_path
)
574 if (!cgm_dbus_connect()) {
575 ERROR("Error connecting to cgroup manager");
579 if (cgm_supports_multiple_controllers
)
580 slist
= subsystems_inone
;
581 for (i
= 0; slist
[i
]; i
++)
582 cgm_remove_cgroup(slist
[i
], d
->cgroup_path
);
585 free(d
->cgroup_path
);
587 cgm_dbus_disconnect();
591 * remove all the cgroups created
592 * called internally with dbus connection open
594 static inline void cleanup_cgroups(char *path
)
597 char **slist
= subsystems
;
599 if (cgm_supports_multiple_controllers
)
600 slist
= subsystems_inone
;
601 for (i
= 0; slist
[i
]; i
++)
602 cgm_remove_cgroup(slist
[i
], path
);
605 static inline bool cgm_create(void *hdata
)
607 struct cgm_data
*d
= hdata
;
608 char **slist
= subsystems
;
609 int i
, index
=0, baselen
, ret
;
611 char result
[MAXPATHLEN
], *tmp
, *cgroup_path
;
615 // XXX we should send a hint to the cgmanager that when these
616 // cgroups become empty they should be deleted. Requires a cgmanager
619 memset(result
, 0, MAXPATHLEN
);
620 tmp
= lxc_string_replace("%n", d
->name
, d
->cgroup_pattern
);
623 if (strlen(tmp
) >= MAXPATHLEN
) {
628 baselen
= strlen(result
);
634 if (index
== 100) { // turn this into a warn later
635 ERROR("cgroup error? 100 cgroups with this name already running");
639 ret
= snprintf(result
+baselen
, MAXPATHLEN
-baselen
, "-%d", index
);
640 if (ret
< 0 || ret
>= MAXPATHLEN
-baselen
)
645 if (cgm_supports_multiple_controllers
)
646 slist
= subsystems_inone
;
648 for (i
= 0; slist
[i
]; i
++) {
649 if (!lxc_cgmanager_create(slist
[i
], tmp
, &existed
)) {
650 ERROR("Error creating cgroup %s:%s", slist
[i
], result
);
651 cleanup_cgroups(tmp
);
658 cgroup_path
= strdup(tmp
);
660 cleanup_cgroups(tmp
);
663 d
->cgroup_path
= cgroup_path
;
664 cgm_dbus_disconnect();
671 cgm_dbus_disconnect();
676 * Use the cgmanager to move a task into a cgroup for a particular
678 * All the subsystems in this hierarchy are co-mounted, so we only
679 * need to transition the task into one of the cgroups
681 * Internal helper, must be called with cgmanager dbus socket open
683 static bool lxc_cgmanager_enter(pid_t pid
, const char *controller
,
684 const char *cgroup_path
, bool abs
)
689 ret
= cgmanager_move_pid_abs_sync(NULL
, cgroup_manager
,
690 controller
, cgroup_path
, pid
);
692 ret
= cgmanager_move_pid_sync(NULL
, cgroup_manager
,
693 controller
, cgroup_path
, pid
);
696 nerr
= nih_error_get();
697 ERROR("call to cgmanager_move_pid_%ssync failed: %s",
698 abs
? "abs_" : "", nerr
->message
);
705 static inline bool cgm_enter(void *hdata
, pid_t pid
)
707 struct cgm_data
*d
= hdata
;
708 char **slist
= subsystems
;
712 if (!d
|| !d
->cgroup_path
)
715 if (!cgm_dbus_connect()) {
716 ERROR("Error connecting to cgroup manager");
720 if (cgm_all_controllers_same
)
721 slist
= subsystems_inone
;
723 for (i
= 0; slist
[i
]; i
++) {
724 if (!lxc_cgmanager_enter(pid
, slist
[i
], d
->cgroup_path
, false))
729 cgm_dbus_disconnect();
733 static const char *cgm_get_cgroup(void *hdata
, const char *subsystem
)
735 struct cgm_data
*d
= hdata
;
737 if (!d
|| !d
->cgroup_path
)
739 return d
->cgroup_path
;
742 static const char *cgm_canonical_path(void *hdata
)
744 struct cgm_data
*d
= hdata
;
746 if (!d
|| !d
->cgroup_path
)
748 return d
->cgroup_path
;
751 #if HAVE_CGMANAGER_GET_PID_CGROUP_ABS_SYNC
752 static inline bool abs_cgroup_supported(void) {
753 return api_version
>= CGM_SUPPORTS_GET_ABS
;
756 static inline bool abs_cgroup_supported(void) {
759 #define cgmanager_get_pid_cgroup_abs_sync(...) -1
762 static char *try_get_abs_cgroup(const char *name
, const char *lxcpath
,
763 const char *controller
)
767 if (abs_cgroup_supported()) {
768 /* get the container init pid and ask for its abs cgroup */
769 pid_t pid
= lxc_cmd_get_init_pid(name
, lxcpath
);
772 if (cgmanager_get_pid_cgroup_abs_sync(NULL
, cgroup_manager
,
773 controller
, pid
, &cgroup
) != 0) {
776 nerr
= nih_error_get();
782 /* use the command interface to look for the cgroup */
783 return lxc_cmd_get_cgroup_path(name
, lxcpath
, controller
);
787 * nrtasks is called by the utmp helper by the container monitor.
788 * cgmanager socket was closed after cgroup setup was complete, so we need
791 * Return -1 on error.
793 static int cgm_get_nrtasks(void *hdata
)
795 struct cgm_data
*d
= hdata
;
799 if (!d
|| !d
->cgroup_path
)
802 if (!cgm_dbus_connect()) {
803 ERROR("Error connecting to cgroup manager");
806 if (cgmanager_get_tasks_sync(NULL
, cgroup_manager
, subsystems
[0],
807 d
->cgroup_path
, &pids
, &pids_len
) != 0) {
809 nerr
= nih_error_get();
810 ERROR("call to cgmanager_get_tasks_sync failed: %s", nerr
->message
);
817 cgm_dbus_disconnect();
821 #if HAVE_CGMANAGER_LIST_CONTROLLERS
822 static bool lxc_list_controllers(char ***list
)
824 if (!cgm_dbus_connect()) {
825 ERROR("Error connecting to cgroup manager");
828 if (cgmanager_list_controllers_sync(NULL
, cgroup_manager
, list
) != 0) {
830 nerr
= nih_error_get();
831 ERROR("call to cgmanager_list_controllers_sync failed: %s", nerr
->message
);
833 cgm_dbus_disconnect();
837 cgm_dbus_disconnect();
841 static bool lxc_list_controllers(char ***list
)
847 static inline void free_abs_cgroup(char *cgroup
)
851 if (abs_cgroup_supported())
857 static void do_cgm_get(const char *name
, const char *lxcpath
, const char *filename
, int outp
, bool sendvalue
)
859 char *controller
, *key
, *cgroup
= NULL
, *cglast
;
862 nih_local
char *result
= NULL
;
864 controller
= alloca(strlen(filename
)+1);
865 strcpy(controller
, filename
);
866 key
= strchr(controller
, '.');
868 ret
= write(outp
, &len
, sizeof(len
));
869 if (ret
!= sizeof(len
))
870 WARN("Failed to warn cgm_get of error; parent may hang");
875 if (!cgm_dbus_connect()) {
876 ERROR("Error connecting to cgroup manager");
877 ret
= write(outp
, &len
, sizeof(len
));
878 if (ret
!= sizeof(len
))
879 WARN("Failed to warn cgm_get of error; parent may hang");
882 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
884 cgm_dbus_disconnect();
885 ret
= write(outp
, &len
, sizeof(len
));
886 if (ret
!= sizeof(len
))
887 WARN("Failed to warn cgm_get of error; parent may hang");
890 cglast
= strrchr(cgroup
, '/');
892 cgm_dbus_disconnect();
893 free_abs_cgroup(cgroup
);
894 ret
= write(outp
, &len
, sizeof(len
));
895 if (ret
!= sizeof(len
))
896 WARN("Failed to warn cgm_get of error; parent may hang");
900 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
901 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
902 ret
= write(outp
, &len
, sizeof(len
));
903 if (ret
!= sizeof(len
))
904 WARN("Failed to warn cgm_get of error; parent may hang");
905 cgm_dbus_disconnect();
906 free_abs_cgroup(cgroup
);
909 if (cgmanager_get_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, &result
) != 0) {
911 nerr
= nih_error_get();
913 free_abs_cgroup(cgroup
);
914 cgm_dbus_disconnect();
915 ret
= write(outp
, &len
, sizeof(len
));
916 if (ret
!= sizeof(len
))
917 WARN("Failed to warn cgm_get of error; parent may hang");
920 free_abs_cgroup(cgroup
);
921 cgm_dbus_disconnect();
922 len
= strlen(result
);
923 ret
= write(outp
, &len
, sizeof(len
));
924 if (ret
!= sizeof(len
)) {
925 WARN("Failed to send length to parent");
928 if (!len
|| !sendvalue
) {
931 ret
= write(outp
, result
, len
);
937 /* cgm_get is called to get container cgroup settings, not during startup */
938 static int cgm_get(const char *filename
, char *value
, size_t len
, const char *name
, const char *lxcpath
)
941 int p
[2], ret
, newlen
, readlen
;
945 if ((pid
= fork()) < 0) {
950 if (!pid
) // do_cgm_get exits
951 do_cgm_get(name
, lxcpath
, filename
, p
[1], len
&& value
);
953 ret
= read(p
[0], &newlen
, sizeof(newlen
));
954 if (ret
!= sizeof(newlen
)) {
959 if (!len
|| !value
) {
964 memset(value
, 0, len
);
965 if (newlen
< 0) { // child is reporting an error
970 if (newlen
== 0) { // empty read
975 readlen
= newlen
> len
? len
: newlen
;
976 ret
= read(p
[0], value
, readlen
);
978 if (ret
!= readlen
) {
985 } else if (newlen
+1 < len
) {
986 // cgmanager doesn't add eol to last entry
987 value
[newlen
++] = '\n';
988 value
[newlen
] = '\0';
992 if (wait_for_pid(pid
))
993 WARN("do_cgm_get exited with error");
997 static void do_cgm_set(const char *name
, const char *lxcpath
, const char *filename
, const char *value
, int outp
)
999 char *controller
, *key
, *cgroup
= NULL
;
1000 int retval
= 0; // value we are sending to the parent over outp
1004 controller
= alloca(strlen(filename
)+1);
1005 strcpy(controller
, filename
);
1006 key
= strchr(controller
, '.');
1008 ret
= write(outp
, &retval
, sizeof(retval
));
1009 if (ret
!= sizeof(retval
))
1010 WARN("Failed to warn cgm_set of error; parent may hang");
1015 if (!cgm_dbus_connect()) {
1016 ERROR("Error connecting to cgroup manager");
1017 ret
= write(outp
, &retval
, sizeof(retval
));
1018 if (ret
!= sizeof(retval
))
1019 WARN("Failed to warn cgm_set of error; parent may hang");
1022 cgroup
= try_get_abs_cgroup(name
, lxcpath
, controller
);
1024 cgm_dbus_disconnect();
1025 ret
= write(outp
, &retval
, sizeof(retval
));
1026 if (ret
!= sizeof(retval
))
1027 WARN("Failed to warn cgm_set of error; parent may hang");
1030 cglast
= strrchr(cgroup
, '/');
1032 cgm_dbus_disconnect();
1033 free_abs_cgroup(cgroup
);
1034 ret
= write(outp
, &retval
, sizeof(retval
));
1035 if (ret
!= sizeof(retval
))
1036 WARN("Failed to warn cgm_set of error; parent may hang");
1040 if (!lxc_cgmanager_enter(getpid(), controller
, cgroup
, abs_cgroup_supported())) {
1041 ERROR("Failed to enter container cgroup %s:%s", controller
, cgroup
);
1042 ret
= write(outp
, &retval
, sizeof(retval
));
1043 if (ret
!= sizeof(retval
))
1044 WARN("Failed to warn cgm_set of error; parent may hang");
1045 cgm_dbus_disconnect();
1046 free_abs_cgroup(cgroup
);
1049 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
, cglast
+1, filename
, value
) != 0) {
1051 nerr
= nih_error_get();
1052 ERROR("Error setting cgroup value %s for %s:%s", filename
, controller
, cgroup
);
1053 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1055 free_abs_cgroup(cgroup
);
1056 cgm_dbus_disconnect();
1057 ret
= write(outp
, &retval
, sizeof(retval
));
1058 if (ret
!= sizeof(retval
))
1059 WARN("Failed to warn cgm_set of error; parent may hang");
1062 free_abs_cgroup(cgroup
);
1063 cgm_dbus_disconnect();
1064 /* tell parent that we are done */
1066 ret
= write(outp
, &retval
, sizeof(retval
));
1067 if (ret
!= sizeof(retval
)) {
1073 /* cgm_set is called to change cgroup settings, not during startup */
1074 static int cgm_set(const char *filename
, const char *value
, const char *name
, const char *lxcpath
)
1081 if ((pid
= fork()) < 0) {
1086 if (!pid
) // do_cgm_set exits
1087 do_cgm_set(name
, lxcpath
, filename
, value
, p
[1]);
1089 ret
= read(p
[0], &v
, sizeof(v
));
1091 if (wait_for_pid(pid
))
1092 WARN("do_cgm_set exited with error");
1093 if (ret
!= sizeof(v
) || !v
)
1098 static void free_subsystems(void)
1102 for (i
= 0; i
< nr_subsystems
; i
++)
1103 free(subsystems
[i
]);
1109 static void cull_user_controllers(void)
1113 for (i
= 0; i
< nr_subsystems
; i
++) {
1114 if (strncmp(subsystems
[i
], "name=", 5) != 0)
1116 for (j
= i
; j
< nr_subsystems
-1; j
++)
1117 subsystems
[j
] = subsystems
[j
+1];
1122 static bool in_comma_list(const char *inword
, const char *cgroup_use
)
1125 size_t inlen
= strlen(inword
), len
;
1128 e
= strchr(cgroup_use
, ',');
1129 len
= e
? e
- cgroup_use
: strlen(cgroup_use
);
1130 if (len
== inlen
&& strncmp(inword
, cgroup_use
, len
) == 0)
1138 static bool in_subsystem_list(const char *c
)
1142 for (i
= 0; i
< nr_subsystems
; i
++) {
1143 if (strcmp(c
, subsystems
[i
]) == 0)
1151 * If /etc/lxc/lxc.conf specifies lxc.cgroup.use = "freezer,memory",
1152 * then clear out any other subsystems, and make sure that freezer
1153 * and memory are both enabled
1155 static bool verify_and_prune(const char *cgroup_use
)
1161 for (p
= cgroup_use
; p
&& *p
; p
= e
+ 1) {
1166 if (!in_subsystem_list(p
)) {
1167 ERROR("Controller %s required by lxc.cgroup.use but not available\n", p
);
1177 for (i
= 0; i
< nr_subsystems
;) {
1178 if (in_comma_list(subsystems
[i
], cgroup_use
)) {
1182 free(subsystems
[i
]);
1183 for (j
= i
; j
< nr_subsystems
-1; j
++)
1184 subsystems
[j
] = subsystems
[j
+1];
1185 subsystems
[nr_subsystems
-1] = NULL
;
1192 static bool collect_subsytems(void)
1195 nih_local
char **cgm_subsys_list
= NULL
;
1199 if (subsystems
) // already initialized
1202 subsystems_inone
= malloc(2 * sizeof(char *));
1203 if (!subsystems_inone
)
1205 subsystems_inone
[0] = "all";
1206 subsystems_inone
[1] = NULL
;
1208 if (lxc_list_controllers(&cgm_subsys_list
)) {
1209 while (cgm_subsys_list
[nr_subsystems
]) {
1210 char **tmp
= NIH_MUST( realloc(subsystems
,
1211 (nr_subsystems
+2)*sizeof(char *)) );
1212 tmp
[nr_subsystems
] = NIH_MUST(
1213 strdup(cgm_subsys_list
[nr_subsystems
++]) );
1217 subsystems
[nr_subsystems
] = NULL
;
1221 INFO("cgmanager_list_controllers failed, falling back to /proc/self/cgroups");
1222 f
= fopen_cloexec("/proc/self/cgroup", "r");
1224 f
= fopen_cloexec("/proc/1/cgroup", "r");
1228 while (getline(&line
, &sz
, f
) != -1) {
1229 /* file format: hierarchy:subsystems:group,
1230 * with multiple subsystems being ,-separated */
1231 char *slist
, *end
, *p
, *saveptr
= NULL
, **tmp
;
1236 slist
= strchr(line
, ':');
1240 end
= strchr(slist
, ':');
1245 for (p
= strtok_r(slist
, ",", &saveptr
);
1247 p
= strtok_r(NULL
, ",", &saveptr
)) {
1248 tmp
= realloc(subsystems
, (nr_subsystems
+2)*sizeof(char *));
1253 tmp
[nr_subsystems
] = strdup(p
);
1254 tmp
[nr_subsystems
+1] = NULL
;
1255 if (!tmp
[nr_subsystems
])
1265 if (!nr_subsystems
) {
1266 ERROR("No cgroup subsystems found");
1270 /* make sure that cgroup.use can be and is honored */
1271 const char *cgroup_use
= lxc_global_config_value("lxc.cgroup.use");
1272 if (!cgroup_use
&& errno
!= 0)
1275 if (!verify_and_prune(cgroup_use
))
1277 subsystems_inone
[0] = NIH_MUST( strdup(cgroup_use
) );
1278 cgm_all_controllers_same
= false;
1293 * called during cgroup.c:cgroup_ops_init(), at startup. No threads.
1294 * We check whether we can talk to cgmanager, escape to root cgroup if
1295 * we are root, then close the connection.
1297 struct cgroup_ops
*cgm_ops_init(void)
1299 if (!collect_subsytems())
1301 if (!cgm_dbus_connect())
1304 // root; try to escape to root cgroup
1305 if (geteuid() == 0 && !lxc_cgmanager_escape())
1307 cgm_dbus_disconnect();
1309 return &cgmanager_ops
;
1312 cgm_dbus_disconnect();
1318 /* unfreeze is called by the command api after killing a container. */
1319 static bool cgm_unfreeze(void *hdata
)
1321 struct cgm_data
*d
= hdata
;
1324 if (!d
|| !d
->cgroup_path
)
1327 if (!cgm_dbus_connect()) {
1328 ERROR("Error connecting to cgroup manager");
1331 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, "freezer", d
->cgroup_path
,
1332 "freezer.state", "THAWED") != 0) {
1334 nerr
= nih_error_get();
1335 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1337 ERROR("Error unfreezing %s", d
->cgroup_path
);
1340 cgm_dbus_disconnect();
1344 static bool cgm_setup_limits(void *hdata
, struct lxc_list
*cgroup_settings
, bool do_devices
)
1346 struct cgm_data
*d
= hdata
;
1347 struct lxc_list
*iterator
, *sorted_cgroup_settings
, *next
;
1348 struct lxc_cgroup
*cg
;
1351 if (lxc_list_empty(cgroup_settings
))
1354 if (!d
|| !d
->cgroup_path
)
1357 if (!cgm_dbus_connect()) {
1358 ERROR("Error connecting to cgroup manager");
1362 sorted_cgroup_settings
= sort_cgroup_settings(cgroup_settings
);
1363 if (!sorted_cgroup_settings
) {
1367 lxc_list_for_each(iterator
, sorted_cgroup_settings
) {
1368 char controller
[100], *p
;
1369 cg
= iterator
->elem
;
1370 if (do_devices
!= !strncmp("devices", cg
->subsystem
, 7))
1372 if (strlen(cg
->subsystem
) > 100) // i smell a rat
1374 strcpy(controller
, cg
->subsystem
);
1375 p
= strchr(controller
, '.');
1378 if (cgmanager_set_value_sync(NULL
, cgroup_manager
, controller
,
1379 d
->cgroup_path
, cg
->subsystem
, cg
->value
) != 0) {
1381 nerr
= nih_error_get();
1382 ERROR("call to cgmanager_set_value_sync failed: %s", nerr
->message
);
1384 ERROR("Error setting cgroup %s:%s limit type %s", controller
,
1385 d
->cgroup_path
, cg
->subsystem
);
1389 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1393 INFO("cgroup limits have been setup");
1395 lxc_list_for_each_safe(iterator
, sorted_cgroup_settings
, next
) {
1396 lxc_list_del(iterator
);
1399 free(sorted_cgroup_settings
);
1400 cgm_dbus_disconnect();
1404 static bool cgm_chown(void *hdata
, struct lxc_conf
*conf
)
1406 struct cgm_data
*d
= hdata
;
1408 if (!d
|| !d
->cgroup_path
)
1410 if (!cgm_dbus_connect()) {
1411 ERROR("Error connecting to cgroup manager");
1414 if (!chown_cgroup(d
->cgroup_path
, conf
))
1415 WARN("Failed to chown %s to container root", d
->cgroup_path
);
1416 cgm_dbus_disconnect();
1421 * TODO: this should be re-written to use the get_config_item("lxc.id_map")
1422 * cmd api instead of getting the idmap from c->lxc_conf. The reason is
1423 * that the id_maps may be different if the container was started with a
1424 * -f or -s argument.
1425 * The reason I'm punting on that is because we'll need to parse the
1428 static bool cgm_attach(const char *name
, const char *lxcpath
, pid_t pid
)
1431 char *cgroup
= NULL
;
1432 char **slist
= subsystems
;
1435 if (!cgm_dbus_connect()) {
1436 ERROR("Error connecting to cgroup manager");
1440 for (i
= 0; slist
[i
]; i
++) {
1441 cgroup
= try_get_abs_cgroup(name
, lxcpath
, slist
[i
]);
1443 ERROR("Failed to get cgroup for controller %s", slist
[i
]);
1444 cgm_dbus_disconnect();
1448 if (!lxc_cgmanager_enter(pid
, slist
[i
], cgroup
, abs_cgroup_supported())) {
1454 cgm_dbus_disconnect();
1456 ERROR("Failed to enter group %s", cgroup
);
1458 free_abs_cgroup(cgroup
);
1462 static bool cgm_bind_dir(const char *root
, const char *dirname
)
1464 nih_local
char *cgpath
= NULL
;
1466 /* /sys should have been mounted by now */
1467 cgpath
= NIH_MUST( nih_strdup(NULL
, root
) );
1468 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/sys/fs/cgroup") );
1470 if (!dir_exists(cgpath
)) {
1471 ERROR("%s does not exist", cgpath
);
1475 /* mount a tmpfs there so we can create subdirs */
1476 if (mount("cgroup", cgpath
, "tmpfs", 0, "size=10000,mode=755")) {
1477 SYSERROR("Failed to mount tmpfs at %s", cgpath
);
1480 NIH_MUST( nih_strcat(&cgpath
, NULL
, "/cgmanager") );
1482 if (mkdir(cgpath
, 0755) < 0) {
1483 SYSERROR("Failed to create %s", cgpath
);
1487 if (mount(dirname
, cgpath
, "none", MS_BIND
, 0)) {
1488 SYSERROR("Failed to bind mount %s to %s", dirname
, cgpath
);
1497 * If /sys/fs/cgroup/cgmanager.lower/ exists, bind mount that to
1498 * /sys/fs/cgroup/cgmanager/ in the container.
1499 * Otherwise, if /sys/fs/cgroup/cgmanager exists, bind mount that.
1502 #define CGMANAGER_LOWER_SOCK "/sys/fs/cgroup/cgmanager.lower"
1503 #define CGMANAGER_UPPER_SOCK "/sys/fs/cgroup/cgmanager"
1504 static bool cgm_mount_cgroup(void *hdata
, const char *root
, int type
)
1506 if (dir_exists(CGMANAGER_LOWER_SOCK
))
1507 return cgm_bind_dir(root
, CGMANAGER_LOWER_SOCK
);
1508 if (dir_exists(CGMANAGER_UPPER_SOCK
))
1509 return cgm_bind_dir(root
, CGMANAGER_UPPER_SOCK
);
1510 // Host doesn't have cgmanager running? Then how did we get here?
1514 static struct cgroup_ops cgmanager_ops
= {
1516 .destroy
= cgm_destroy
,
1517 .create
= cgm_create
,
1519 .create_legacy
= NULL
,
1520 .get_cgroup
= cgm_get_cgroup
,
1521 .canonical_path
= cgm_canonical_path
,
1524 .unfreeze
= cgm_unfreeze
,
1525 .setup_limits
= cgm_setup_limits
,
1526 .name
= "cgmanager",
1528 .attach
= cgm_attach
,
1529 .mount_cgroup
= cgm_mount_cgroup
,
1530 .nrtasks
= cgm_get_nrtasks
,
1532 .driver
= CGMANAGER
,