]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgmanager.c
cgmanager: chown cgroups to the container root
[mirror_lxc.git] / src / lxc / cgmanager.c
CommitLineData
d4ef7c50
SH
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
27#include <errno.h>
28#include <unistd.h>
29#include <string.h>
30#include <dirent.h>
31#include <fcntl.h>
32#include <ctype.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
37#include <sys/mount.h>
38#include <netinet/in.h>
39#include <net/if.h>
40
41#include "error.h"
42#include "config.h"
43#include "commands.h"
44#include "list.h"
45#include "conf.h"
46#include "utils.h"
47#include "bdev.h"
48#include "log.h"
49#include "cgroup.h"
50#include "start.h"
51#include "state.h"
52
53#ifdef HAVE_CGMANAGER
54lxc_log_define(lxc_cgmanager, lxc);
55
56#include <nih-dbus/dbus_connection.h>
57#include <cgmanager-client/cgmanager-client.h>
9daf6f5d 58#include <nih/alloc.h>
d4ef7c50
SH
59NihDBusProxy *cgroup_manager = NULL;
60
61extern struct cgroup_ops *active_cg_ops;
62bool cgmanager_initialized = false;
63bool use_cgmanager = true;
64static struct cgroup_ops cgmanager_ops;
65
66bool lxc_init_cgmanager(void);
67static void cgmanager_disconnected(DBusConnection *connection)
68{
69 WARN("Cgroup manager connection was terminated");
70 cgroup_manager = NULL;
71 cgmanager_initialized = false;
72 if (lxc_init_cgmanager()) {
73 cgmanager_initialized = true;
74 INFO("New cgroup manager connection was opened");
75 }
76}
77
0996e18a
SH
78static int send_creds(int sock, int rpid, int ruid, int rgid)
79{
80 struct msghdr msg = { 0 };
81 struct iovec iov;
82 struct cmsghdr *cmsg;
83 struct ucred cred = {
84 .pid = rpid,
85 .uid = ruid,
86 .gid = rgid,
87 };
88 char cmsgbuf[CMSG_SPACE(sizeof(cred))];
89 char buf[1];
90 buf[0] = 'p';
91
92 msg.msg_control = cmsgbuf;
93 msg.msg_controllen = sizeof(cmsgbuf);
94
95 cmsg = CMSG_FIRSTHDR(&msg);
96 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
97 cmsg->cmsg_level = SOL_SOCKET;
98 cmsg->cmsg_type = SCM_CREDENTIALS;
99 memcpy(CMSG_DATA(cmsg), &cred, sizeof(cred));
100
101 msg.msg_name = NULL;
102 msg.msg_namelen = 0;
103
104 iov.iov_base = buf;
105 iov.iov_len = sizeof(buf);
106 msg.msg_iov = &iov;
107 msg.msg_iovlen = 1;
108
109 if (sendmsg(sock, &msg, 0) < 0) {
110 perror("sendmsg");
111 return -1;
112 }
113 return 0;
114}
115
d4ef7c50
SH
116#define CGMANAGER_DBUS_SOCK "unix:path=/sys/fs/cgroup/cgmanager/sock"
117bool lxc_init_cgmanager(void)
118{
119 DBusError dbus_error;
120 DBusConnection *connection;
121 dbus_error_init(&dbus_error);
122
123 connection = nih_dbus_connect(CGMANAGER_DBUS_SOCK, cgmanager_disconnected);
124 if (!connection) {
125 ERROR("Error opening cgmanager connection at %s", CGMANAGER_DBUS_SOCK);
126 return false;
127 }
128 dbus_connection_set_exit_on_disconnect(connection, FALSE);
129 dbus_error_free(&dbus_error);
130 cgroup_manager = nih_dbus_proxy_new(NULL, connection,
131 NULL /* p2p */,
132 "/org/linuxcontainers/cgmanager", NULL, NULL);
133 dbus_connection_unref(connection);
134 if (!cgroup_manager) {
135 return false;
136 }
137 active_cg_ops = &cgmanager_ops;
138 return true;
139}
140
141/*
142 * Use the cgmanager to move a task into a cgroup for a particular
143 * hierarchy.
144 * All the subsystems in this hierarchy are co-mounted, so we only
145 * need to transition the task into one of the cgroups
146 */
147static bool lxc_cgmanager_enter(pid_t pid, char *controller, char *cgroup_path)
148{
149 return cgmanager_move_pid_sync(NULL, cgroup_manager, controller,
150 cgroup_path, pid) == 0;
151}
152
153static bool lxc_cgmanager_create(const char *controller, const char *cgroup_path, int32_t *existed)
154{
155 if ( cgmanager_create_sync(NULL, cgroup_manager, controller,
156 cgroup_path, existed) != 0) {
157 ERROR("Failed to create %s:%s", controller, cgroup_path);
158 return false;
159 }
160
d4ef7c50
SH
161 return true;
162}
163
0996e18a
SH
164struct chown_data {
165 const char *controller;
166 const char *cgroup_path;
167};
168
169static int do_chown_cgroup(const char *controller, const char *cgroup_path)
170{
171 int sv[2] = {-1, -1}, optval = 1;
172 char buf[1];
173
174 if (setgid(0) < 0)
175 WARN("Failed to setgid to 0");
176 if (setuid(0) < 0)
177 WARN("Failed to setuid to 0");
178
179 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) {
180 SYSERROR("Error creating socketpair");
181 return -1;
182 }
183 if (setsockopt(sv[1], SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
184 SYSERROR("setsockopt failed");
185 return -1;
186 }
187 if (setsockopt(sv[0], SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
188 SYSERROR("setsockopt failed");
189 return -1;
190 }
191 if ( cgmanager_chown_scm_sync(NULL, cgroup_manager, controller,
192 cgroup_path, sv[1]) != 0) {
193 ERROR("call to cgmanager_chown_scm_sync failed");
194 return -1;
195 }
196 /* now send credentials */
197
198 fd_set rfds;
199 FD_ZERO(&rfds);
200 FD_SET(sv[0], &rfds);
201 if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
202 ERROR("Error getting go-ahead from server: %s", strerror(errno));
203 return -1;
204 }
205 if (read(sv[0], &buf, 1) != 1) {
206 ERROR("Error getting reply from server over socketpair");
207 return -1;
208 }
209 if (send_creds(sv[0], getpid(), getuid(), getgid())) {
210 ERROR("Error sending pid over SCM_CREDENTIAL");
211 return -1;
212 }
213 FD_ZERO(&rfds);
214 FD_SET(sv[0], &rfds);
215 if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
216 ERROR("Error getting go-ahead from server: %s", strerror(errno));
217 return -1;
218 }
219 if (read(sv[0], &buf, 1) != 1) {
220 ERROR("Error getting reply from server over socketpair");
221 return -1;
222 }
223 if (send_creds(sv[0], getpid(), 0, 0)) {
224 ERROR("Error sending pid over SCM_CREDENTIAL");
225 return -1;
226 }
227 FD_ZERO(&rfds);
228 FD_SET(sv[0], &rfds);
229 if (select(sv[0]+1, &rfds, NULL, NULL, NULL) < 0) {
230 ERROR("Error getting go-ahead from server: %s", strerror(errno));
231 return -1;
232 }
233 int ret = read(sv[0], buf, 1);
234 close(sv[0]);
235 close(sv[1]);
236 if (ret == 1 && *buf == '1')
237 return 0;
238 return -1;
239}
240
241static int chown_cgroup_wrapper(void *data)
242{
243 struct chown_data *arg = data;
244 return do_chown_cgroup(arg->controller, arg->cgroup_path);
245}
246
247static bool chown_cgroup(const char *controller, const char *cgroup_path,
248 struct lxc_conf *conf)
249{
250 pid_t pid;
251 struct chown_data data;
252 data.controller = controller;
253 data.cgroup_path = cgroup_path;
254
255 if (lxc_list_empty(&conf->id_map)) {
256 if (do_chown_cgroup(controller, cgroup_path) < 0)
257 return false;
258 return true;
259 }
260
261 if ((pid = fork()) < 0) {
262 SYSERROR("fork");
263 return false;
264 }
265 if (pid > 0) {
266 if (wait_for_pid(pid)) {
267 ERROR("Error chowning cgroup");
268 return false;
269 }
270 return true;
271 }
272 if (userns_exec_1(conf, chown_cgroup_wrapper, &data) < 0)
273 exit(1);
274 exit(0);
275}
d4ef7c50
SH
276
277struct cgm_data {
278 int nr_subsystems;
279 char **subsystems;
280 char *cgroup_path;
281};
282
6873d6f0
SH
283#define CG_REMOVE_RECURSIVE 1
284void cgmanager_remove_cgroup(const char *controller, const char *path)
d4ef7c50 285{
6873d6f0
SH
286 int existed;
287 if ( cgmanager_remove_sync(NULL, cgroup_manager, controller,
288 path, CG_REMOVE_RECURSIVE, &existed) != 0)
289 ERROR("Error removing %s:%s", controller, path);
ecc357ca 290 if (existed == -1)
38799354 291 INFO("cgroup removal attempt: %s:%s did not exist", controller, path);
d4ef7c50
SH
292}
293
294static void cgm_destroy(struct lxc_handler *handler)
295{
296 struct cgm_data *d = handler->cgroup_info->data;
297 int i;
298
299 if (!d)
300 return;
301 for (i=0; i<d->nr_subsystems; i++) {
302 if (d->cgroup_path)
303 cgmanager_remove_cgroup(d->subsystems[i], d->cgroup_path);
304 free(d->subsystems[i]);
305 }
306 free(d->subsystems);
307 free(d->cgroup_path);
308 free(d);
309 handler->cgroup_info->data = NULL;
310}
311
312/*
313 * remove all the cgroups created
314 */
315static inline void cleanup_cgroups(struct cgm_data *d, char *path)
316{
317 int i;
318 for (i = 0; i < d->nr_subsystems; i++) {
319 cgmanager_remove_cgroup(d->subsystems[i], path);
320 }
321}
322
323static inline bool cgm_create(struct lxc_handler *handler)
324{
325 int i, index=0, baselen, ret;
326 int32_t existed;
327 char result[MAXPATHLEN], *tmp;
328 struct cgm_data *d = handler->cgroup_info->data;
329
330// XXX we should send a hint to the cgmanager that when these
331// cgroups become empty they should be deleted. Requires a cgmanager
332// extension
333
334 memset(result, 0, MAXPATHLEN);
335 tmp = lxc_string_replace("%n", handler->name, handler->cgroup_info->cgroup_pattern);
336 if (!tmp)
337 return false;
338 if (strlen(tmp) > MAXPATHLEN)
339 return false;
340 strcpy(result, tmp);
341 baselen = strlen(result);
342 free(tmp);
343 tmp = result;
344 while (*tmp == '/')
345 tmp++;
346again:
347 if (index == 100) { // turn this into a warn later
348 ERROR("cgroup error? 100 cgroups with this name already running");
349 return false;
350 }
351 if (index) {
352 ret = snprintf(result+baselen, MAXPATHLEN-baselen, "-%d", index);
353 if (ret < 0 || ret >= MAXPATHLEN-baselen)
354 return false;
355 }
356 existed = 0;
357 for (i = 0; i < d->nr_subsystems; i++) {
358 if (!lxc_cgmanager_create(d->subsystems[i], tmp, &existed)) {
359 ERROR("Error creating cgroup %s:%s", d->subsystems[i], result);
360 cleanup_cgroups(d, tmp);
361 return false;
362 }
ecc357ca 363 if (existed == 1)
d4ef7c50
SH
364 goto next;
365 }
366 // success
367 d->cgroup_path = strdup(tmp);
368 if (!d->cgroup_path) {
369 cleanup_cgroups(d, tmp);
370 return false;
371 }
372 return true;
373next:
374 cleanup_cgroups(d, tmp);
375 index++;
376 goto again;
377}
378
379static inline bool cgm_enter(struct lxc_handler *handler)
380{
381 struct cgm_data *d = handler->cgroup_info->data;
382 int i;
383
384 for (i = 0; i < d->nr_subsystems; i++) {
385 if (!lxc_cgmanager_enter(handler->pid, d->subsystems[i], d->cgroup_path))
386 return false;
387 }
388 return true;
389}
390
391static char *cgm_get_cgroup(struct lxc_handler *handler, const char *subsystem)
392{
393 struct cgm_data *d = handler->cgroup_info->data;
394 return d->cgroup_path;
395}
396
397int cgm_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath)
398{
399 char *result, *controller, *key, *cgroup;
87b86add 400 size_t newlen;
d4ef7c50
SH
401
402 controller = alloca(strlen(filename)+1);
c0e0d2b2 403 strcpy(controller, filename);
d4ef7c50
SH
404 key = strchr(controller, '.');
405 if (!key)
c0e0d2b2 406 return -1;
d4ef7c50 407 *key = '\0';
d4ef7c50
SH
408
409 /* use the command interface to look for the cgroup */
410 cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
c0e0d2b2
SH
411 if (!cgroup)
412 return -1;
413 if (cgmanager_get_value_sync(NULL, cgroup_manager, controller, cgroup, filename, &result) != 0) {
d4ef7c50
SH
414 ERROR("Error getting value for %s from cgmanager for cgroup %s (%s:%s)",
415 filename, cgroup, lxcpath, name);
416 free(cgroup);
c0e0d2b2 417 return -1;
d4ef7c50
SH
418 }
419 free(cgroup);
87b86add 420 newlen = strlen(result);
f6639e3b
SH
421 if (!value) {
422 // user queries the size
423 nih_free(result);
424 return newlen+1;
425 }
426
d4ef7c50 427 strncpy(value, result, len);
87b86add 428 if (newlen >= len) {
d4ef7c50 429 value[len-1] = '\0';
87b86add
SH
430 newlen = len-1;
431 } else if (newlen+1 < len) {
432 // cgmanager doesn't add eol to last entry
433 value[newlen++] = '\n';
434 value[newlen] = '\0';
435 }
9daf6f5d 436 nih_free(result);
87b86add 437 return newlen;
d4ef7c50
SH
438}
439
9daf6f5d
SH
440static int cgm_do_set(const char *controller, const char *file,
441 const char *cgroup, const char *value)
442{
443 int ret;
444 ret = cgmanager_set_value_sync(NULL, cgroup_manager, controller,
445 cgroup, file, value);
446 if (ret != 0)
447 ERROR("Error setting cgroup %s limit %s", file, cgroup);
448 return ret;
449}
450
d4ef7c50
SH
451int cgm_set(const char *filename, const char *value, const char *name, const char *lxcpath)
452{
453 char *controller, *key, *cgroup;
9daf6f5d 454 int ret;
d4ef7c50
SH
455
456 controller = alloca(strlen(filename)+1);
c0e0d2b2 457 strcpy(controller, filename);
d4ef7c50
SH
458 key = strchr(controller, '.');
459 if (!key)
c0e0d2b2 460 return -1;
d4ef7c50 461 *key = '\0';
d4ef7c50
SH
462
463 /* use the command interface to look for the cgroup */
464 cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
465 if (!cgroup) {
466 ERROR("Failed to get cgroup for controller %s for %s:%s",
467 controller, lxcpath, name);
c0e0d2b2 468 return -1;
d4ef7c50 469 }
9daf6f5d 470 ret = cgm_do_set(controller, filename, cgroup, value);
d4ef7c50 471 free(cgroup);
9daf6f5d 472 return ret;
d4ef7c50
SH
473}
474
475/*
476 * TODO really this should be done once for global data, not once
477 * per container
478 */
479static inline bool cgm_init(struct lxc_handler *handler)
480{
481 struct cgm_data *d = malloc(sizeof(*d));
482 char *line = NULL, *tab1;
483 size_t sz = 0, i;
484 FILE *f;
485
486 if (!d)
487 return false;
488 d->nr_subsystems = 0;
489 d->subsystems = NULL;
490 f = fopen_cloexec("/proc/cgroups", "r");
491 if (!f) {
492 free(d);
493 return false;
494 }
495 while (getline(&line, &sz, f) != -1) {
496 char **tmp;
497 if (line[0] == '#')
498 continue;
499 if (!line[0])
500 continue;
501 tab1 = strchr(line, '\t');
502 if (!tab1)
503 continue;
504 *tab1 = '\0';
505 tmp = realloc(d->subsystems, (d->nr_subsystems+1)*sizeof(char *));
506 if (!tmp) {
507 goto out_free;
508 }
509 d->subsystems = tmp;
510 d->subsystems[d->nr_subsystems] = strdup(line);
511 if (!d->subsystems[d->nr_subsystems])
512 goto out_free;
513 d->nr_subsystems++;
514 }
515 fclose(f);
516
517 d->cgroup_path = NULL;
518 handler->cgroup_info->data = d;
519 return true;
520
521out_free:
522 for (i=0; i<d->nr_subsystems; i++)
523 free(d->subsystems[i]);
524 free(d->subsystems);
525 free(d);
526 return false;
527}
528
0086f499
SH
529static int cgm_unfreeze_fromhandler(struct lxc_handler *handler)
530{
531 struct cgm_data *d = handler->cgroup_info->data;
532
533 if (cgmanager_set_value_sync(NULL, cgroup_manager, "freezer", d->cgroup_path,
534 "freezer.state", "THAWED") != 0) {
535 ERROR("Error unfreezing %s", d->cgroup_path);
c0e0d2b2 536 return -1;
0086f499 537 }
c0e0d2b2 538 return 0;
0086f499
SH
539}
540
9daf6f5d
SH
541static bool setup_limits(struct lxc_handler *h, bool do_devices)
542{
543 struct lxc_list *iterator;
544 struct lxc_cgroup *cg;
545 bool ret = false;
546 struct lxc_list *cgroup_settings = &h->conf->cgroup;
547 struct cgm_data *d = h->cgroup_info->data;
548
549 if (lxc_list_empty(cgroup_settings))
d966dd14 550 return true;
9daf6f5d
SH
551
552 lxc_list_for_each(iterator, cgroup_settings) {
553 char controller[100], *p;
554 cg = iterator->elem;
555 if (do_devices != !strncmp("devices", cg->subsystem, 7))
556 continue;
557 if (strlen(cg->subsystem) > 100) // i smell a rat
558 goto out;
559 strcpy(controller, cg->subsystem);
560 p = strchr(controller, '.');
561 if (p)
562 *p = '\0';
563 if (cgm_do_set(controller, cg->subsystem, d->cgroup_path
564 , cg->value) < 0) {
565 ERROR("Error setting %s to %s for %s\n",
566 cg->subsystem, cg->value, h->name);
567 goto out;
568 }
569
570 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
571 }
572
573 ret = true;
574 INFO("cgroup limits have been setup");
575out:
576 return ret;
577}
578
579static bool cgm_setup_limits(struct lxc_handler *handler, bool with_devices)
580{
581 return setup_limits(handler, with_devices);
582}
583
0996e18a
SH
584static bool cgm_chown(struct lxc_handler *handler)
585{
586 struct cgm_data *d = handler->cgroup_info->data;
587 int i;
588
589 for (i = 0; i < d->nr_subsystems; i++) {
590 if (!chown_cgroup(d->subsystems[i], d->cgroup_path, handler->conf))
591 WARN("Failed to chown %s:%s to container root",
592 d->subsystems[i], d->cgroup_path);
593 }
594 return true;
595}
596
d4ef7c50
SH
597static struct cgroup_ops cgmanager_ops = {
598 .destroy = cgm_destroy,
599 .init = cgm_init,
600 .create = cgm_create,
601 .enter = cgm_enter,
602 .create_legacy = NULL,
603 .get_cgroup = cgm_get_cgroup,
604 .get = cgm_get,
605 .set = cgm_set,
0086f499 606 .unfreeze_fromhandler = cgm_unfreeze_fromhandler,
9daf6f5d 607 .setup_limits = cgm_setup_limits,
0996e18a
SH
608 .name = "cgmanager",
609 .chown = cgm_chown,
d4ef7c50
SH
610};
611#endif