]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
detect whether cgmanager_list_controllers is available
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23#define _GNU_SOURCE
24#include <assert.h>
25#include <linux/limits.h>
26#include <sched.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/wait.h>
33#include <unistd.h>
34
35#include "config.h"
36
37#include "bdev.h"
38#include "cgroup.h"
39#include "conf.h"
40#include "criu.h"
41#include "log.h"
42#include "lxc.h"
43#include "lxclock.h"
44#include "network.h"
45#include "utils.h"
46
47lxc_log_define(lxc_criu, lxc);
48
49void exec_criu(struct criu_opts *opts)
50{
51 char **argv, log[PATH_MAX];
dd62857a 52 int static_args = 20, argc = 0, i, ret;
e29fe1dd
TA
53 int netnr = 0;
54 struct lxc_list *it;
55
56 char buf[4096];
57 FILE *mnts = NULL;
58
59 /* The command line always looks like:
60 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
61 * --manage-cgroups action-script foo.sh -D $(directory) \
62 * -o $(directory)/$(action).log --ext-mount-map auto
63 * --enable-external-sharing --enable-external-masters
dd62857a 64 * --enable-fs hugetlbfs
e29fe1dd
TA
65 * +1 for final NULL */
66
67 if (strcmp(opts->action, "dump") == 0) {
68 /* -t pid */
69 static_args += 2;
70
71 /* --leave-running */
72 if (!opts->stop)
73 static_args++;
74 } else if (strcmp(opts->action, "restore") == 0) {
75 /* --root $(lxc_mount_point) --restore-detached
76 * --restore-sibling --pidfile $foo --cgroup-root $foo */
77 static_args += 8;
78 } else {
79 return;
80 }
81
82 if (opts->verbose)
83 static_args++;
84
85 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
86 if (ret < 0 || ret >= PATH_MAX) {
87 ERROR("logfile name too long\n");
88 return;
89 }
90
91 argv = malloc(static_args * sizeof(*argv));
92 if (!argv)
93 return;
94
95 memset(argv, 0, static_args * sizeof(*argv));
96
97#define DECLARE_ARG(arg) \
98 do { \
99 if (arg == NULL) { \
100 ERROR("Got NULL argument for criu"); \
101 goto err; \
102 } \
103 argv[argc++] = strdup(arg); \
104 if (!argv[argc-1]) \
105 goto err; \
106 } while (0)
107
108 argv[argc++] = on_path("criu", NULL);
109 if (!argv[argc-1]) {
110 ERROR("Couldn't find criu binary\n");
111 goto err;
112 }
113
114 DECLARE_ARG(opts->action);
115 DECLARE_ARG("--tcp-established");
116 DECLARE_ARG("--file-locks");
117 DECLARE_ARG("--link-remap");
118 DECLARE_ARG("--force-irmap");
119 DECLARE_ARG("--manage-cgroups");
120 DECLARE_ARG("--ext-mount-map");
121 DECLARE_ARG("auto");
122 DECLARE_ARG("--enable-external-sharing");
123 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
124 DECLARE_ARG("--enable-fs");
125 DECLARE_ARG("hugetlbfs");
e29fe1dd
TA
126 DECLARE_ARG("-D");
127 DECLARE_ARG(opts->directory);
128 DECLARE_ARG("-o");
129 DECLARE_ARG(log);
130
131 if (opts->verbose)
132 DECLARE_ARG("-vvvvvv");
133
134 if (strcmp(opts->action, "dump") == 0) {
135 char pid[32];
136
137 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
138 goto err;
139
140 DECLARE_ARG("-t");
141 DECLARE_ARG(pid);
142 if (!opts->stop)
143 DECLARE_ARG("--leave-running");
144 } else if (strcmp(opts->action, "restore") == 0) {
145 void *m;
146 int additional;
147
148 DECLARE_ARG("--root");
149 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
150 DECLARE_ARG("--restore-detached");
151 DECLARE_ARG("--restore-sibling");
152 DECLARE_ARG("--pidfile");
153 DECLARE_ARG(opts->pidfile);
154 DECLARE_ARG("--cgroup-root");
155 DECLARE_ARG(opts->cgroup_path);
156
157 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
158
159 m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); \
160 if (!m) \
161 goto err; \
162 argv = m;
163
164 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
165 char eth[128], *veth;
166 struct lxc_netdev *n = it->elem;
167
168 if (n->name) {
169 if (strlen(n->name) >= sizeof(eth))
170 goto err;
171 strncpy(eth, n->name, sizeof(eth));
172 } else
173 sprintf(eth, "eth%d", netnr);
174
175 veth = n->priv.veth_attr.pair;
176
177 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
178 if (ret < 0 || ret >= sizeof(buf))
179 goto err;
180
181 DECLARE_ARG("--veth-pair");
182 DECLARE_ARG(buf);
183 }
184
185 }
186
187 argv[argc] = NULL;
188
189#undef DECLARE_ARG
190 execv(argv[0], argv);
191err:
192 if (mnts)
193 fclose(mnts);
194 for (i = 0; argv[i]; i++)
195 free(argv[i]);
196 free(argv);
197}
198
8ba5ced7
TA
199/*
200 * Check to see if the criu version is recent enough for all the features we
201 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
202 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
203 * things potentially before a version is released with a particular feature.
204 *
205 * The intent is that when criu development slows down, we can drop this, but
206 * for now we shouldn't attempt to c/r with versions that we know won't work.
207 */
208static bool criu_version_ok()
209{
210 int pipes[2];
211 pid_t pid;
212
213 if (pipe(pipes) < 0) {
214 SYSERROR("pipe() failed");
215 return false;
216 }
217
218 pid = fork();
219 if (pid < 0) {
220 SYSERROR("fork() failed");
221 return false;
222 }
223
224 if (pid == 0) {
225 char *args[] = { "criu", "--version", NULL };
226 close(pipes[0]);
227
228 close(STDERR_FILENO);
229 if (dup2(pipes[1], STDOUT_FILENO) < 0)
230 exit(1);
231
232 execv("/usr/local/sbin/criu", args);
233 exit(1);
234 } else {
235 FILE *f;
236 char version[1024];
237 int patch;
238
239 close(pipes[1]);
240 if (wait_for_pid(pid) < 0) {
241 close(pipes[0]);
4eae4051 242 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
243 return false;
244 }
245
246 f = fdopen(pipes[0], "r");
247 if (!f) {
248 close(pipes[0]);
249 return false;
250 }
251
252 if (fscanf(f, "Version: %1024[^\n]s", version) != 1)
253 goto version_error;
254
255 if (fgetc(f) != '\n')
256 goto version_error;
257
258 if (strcmp(version, CRIU_VERSION) >= 0)
259 goto version_match;
260
261 if (fscanf(f, "GitID: v%1024[^-]s", version) != 1)
262 goto version_error;
263
264 if (fgetc(f) != '-')
265 goto version_error;
266
267 if (fscanf(f, "%d", &patch) != 1)
268 goto version_error;
269
270 if (strcmp(version, CRIU_GITID_VERSION) < 0)
271 goto version_error;
272
273 if (patch < CRIU_GITID_PATCHLEVEL)
274 goto version_error;
275
276version_match:
277 close(pipes[0]);
278 return true;
279
280version_error:
281 close(pipes[0]);
282 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
283 return false;
284 }
285}
286
e29fe1dd
TA
287/* Check and make sure the container has a configuration that we know CRIU can
288 * dump. */
289bool criu_ok(struct lxc_container *c)
290{
291 struct lxc_list *it;
292 bool found_deny_rule = false;
293
8ba5ced7
TA
294 if (!criu_version_ok())
295 return false;
296
e29fe1dd
TA
297 if (geteuid()) {
298 ERROR("Must be root to checkpoint\n");
299 return false;
300 }
301
302 /* We only know how to restore containers with veth networks. */
303 lxc_list_for_each(it, &c->lxc_conf->network) {
304 struct lxc_netdev *n = it->elem;
305 if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE) {
306 ERROR("Found network that is not VETH or NONE\n");
307 return false;
308 }
309 }
310
311 // These requirements come from http://criu.org/LXC
312 if (c->lxc_conf->console.path &&
313 strcmp(c->lxc_conf->console.path, "none") != 0) {
314 ERROR("lxc.console must be none\n");
315 return false;
316 }
317
318 if (c->lxc_conf->tty != 0) {
319 ERROR("lxc.tty must be 0\n");
320 return false;
321 }
322
323 lxc_list_for_each(it, &c->lxc_conf->cgroup) {
324 struct lxc_cgroup *cg = it->elem;
325 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
326 strcmp(cg->value, "c 5:1 rwm") == 0) {
327
328 found_deny_rule = true;
329 break;
330 }
331 }
332
333 if (!found_deny_rule) {
334 ERROR("couldn't find devices.deny = c 5:1 rwm");
335 return false;
336 }
337
338 return true;
339}
340
341bool dump_net_info(struct lxc_container *c, char *directory)
342{
343 int netnr;
344 struct lxc_list *it;
345
346 netnr = 0;
347 lxc_list_for_each(it, &c->lxc_conf->network) {
348 char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
349 struct lxc_netdev *n = it->elem;
350 bool has_error = true;
351 int pret;
352
353 pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
354 if (pret < 0 || pret >= PATH_MAX)
355 goto out;
356
357 veth = c->get_running_config_item(c, veth_path);
358 if (!veth) {
359 /* criu_ok() checks that all interfaces are
360 * LXC_NET{VETH,NONE}, and VETHs should have this
361 * config */
362 assert(n->type == LXC_NET_NONE);
363 break;
364 }
365
366 bridge = c->get_running_config_item(c, veth_path);
367 if (!bridge)
368 goto out;
369
370 pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
371 if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0)
372 goto out;
373
374 if (n->name) {
375 if (strlen(n->name) >= 128)
376 goto out;
377 strncpy(eth, n->name, 128);
378 } else
379 sprintf(eth, "eth%d", netnr);
380
381 has_error = false;
382out:
383 free(veth);
384 free(bridge);
385 if (has_error)
386 return false;
387 }
388
389 return true;
390}
391
392static bool restore_net_info(struct lxc_container *c)
393{
394 struct lxc_list *it;
395 bool has_error = true;
396
397 if (container_mem_lock(c))
398 return false;
399
400 lxc_list_for_each(it, &c->lxc_conf->network) {
401 struct lxc_netdev *netdev = it->elem;
402 char template[IFNAMSIZ];
403 snprintf(template, sizeof(template), "vethXXXXXX");
404
405 if (!netdev->priv.veth_attr.pair)
406 netdev->priv.veth_attr.pair = lxc_mkifname(template);
407
408 if (!netdev->priv.veth_attr.pair)
409 goto out_unlock;
410 }
411
412 has_error = false;
413
414out_unlock:
415 container_mem_unlock(c);
416 return !has_error;
417}
418
419void do_restore(struct lxc_container *c, int pipe, char *directory, bool verbose)
420{
421 pid_t pid;
422 char pidfile[L_tmpnam];
423 struct lxc_handler *handler;
424 int status;
425
426 if (!tmpnam(pidfile))
427 goto out;
428
429 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
430 if (!handler)
431 goto out;
432
433 if (!cgroup_init(handler)) {
434 ERROR("failed initing cgroups");
435 goto out_fini_handler;
436 }
437
438 if (!cgroup_create(handler)) {
439 ERROR("failed creating groups");
440 goto out_fini_handler;
441 }
442
443 if (!restore_net_info(c)) {
444 ERROR("failed restoring network info");
445 goto out_fini_handler;
446 }
447
448 resolve_clone_flags(handler);
449
450 pid = fork();
451 if (pid < 0)
452 goto out_fini_handler;
453
454 if (pid == 0) {
455 struct criu_opts os;
456 struct lxc_rootfs *rootfs;
457
458 close(pipe);
459 pipe = -1;
460
461 if (unshare(CLONE_NEWNS))
462 goto out_fini_handler;
463
464 /* CRIU needs the lxc root bind mounted so that it is the root of some
465 * mount. */
466 rootfs = &c->lxc_conf->rootfs;
467
468 if (rootfs_is_blockdev(c->lxc_conf)) {
469 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
470 goto out_fini_handler;
471 } else {
472 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
473 goto out_fini_handler;
474
475 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
476 SYSERROR("remount / to private failed");
477 goto out_fini_handler;
478 }
479
480 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
481 rmdir(rootfs->mount);
482 goto out_fini_handler;
483 }
484 }
485
486 os.action = "restore";
487 os.directory = directory;
488 os.c = c;
489 os.pidfile = pidfile;
490 os.verbose = verbose;
491 os.cgroup_path = cgroup_canonical_path(handler);
492
493 /* exec_criu() returning is an error */
494 exec_criu(&os);
495 umount(rootfs->mount);
496 rmdir(rootfs->mount);
497 goto out_fini_handler;
498 } else {
499 int ret;
500 char title[2048];
501
502 pid_t w = waitpid(pid, &status, 0);
503 if (w == -1) {
504 SYSERROR("waitpid");
505 goto out_fini_handler;
506 }
507
508 ret = write(pipe, &status, sizeof(status));
509 close(pipe);
510 pipe = -1;
511
512 if (sizeof(status) != ret) {
513 SYSERROR("failed to write all of status");
514 goto out_fini_handler;
515 }
516
517 if (WIFEXITED(status)) {
518 if (WEXITSTATUS(status)) {
519 goto out_fini_handler;
520 } else {
521 int ret;
522 FILE *f = fopen(pidfile, "r");
523 if (!f) {
524 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
525 goto out_fini_handler;
526 }
527
528 ret = fscanf(f, "%d", (int*) &handler->pid);
529 fclose(f);
59c2d406
TA
530 if (unlink(pidfile) < 0 && errno != ENOENT)
531 SYSERROR("unlinking pidfile failed");
532
e29fe1dd
TA
533 if (ret != 1) {
534 ERROR("reading restore pid failed");
535 goto out_fini_handler;
536 }
537
538 if (lxc_set_state(c->name, handler, RUNNING))
539 goto out_fini_handler;
540 }
541 } else {
542 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
543 goto out_fini_handler;
544 }
545
546 /*
547 * See comment in lxcapi_start; we don't care if these
548 * fail because it's just a beauty thing. We just
549 * assign the return here to silence potential.
550 */
551 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
552 ret = setproctitle(title);
553
554 ret = lxc_poll(c->name, handler);
555 if (ret)
556 lxc_abort(c->name, handler);
557 lxc_fini(c->name, handler);
558 exit(ret);
559 }
560
561out_fini_handler:
562 lxc_fini(c->name, handler);
59c2d406
TA
563 if (unlink(pidfile) < 0 && errno != ENOENT)
564 SYSERROR("unlinking pidfile failed");
e29fe1dd
TA
565
566out:
567 if (pipe >= 0) {
568 status = 1;
569 if (write(pipe, &status, sizeof(status)) != sizeof(status)) {
570 SYSERROR("writing status failed");
571 }
572 close(pipe);
573 }
574
575 exit(1);
576}