]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
c/r: rearrange things to pass struct migrate_opts all the way down
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23#define _GNU_SOURCE
24#include <assert.h>
25#include <linux/limits.h>
26#include <sched.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/wait.h>
33#include <unistd.h>
34
35#include "config.h"
36
4ec31c52 37#include "bdev/bdev.h"
e29fe1dd
TA
38#include "cgroup.h"
39#include "conf.h"
dc259399 40#include "commands.h"
e29fe1dd
TA
41#include "criu.h"
42#include "log.h"
43#include "lxc.h"
44#include "lxclock.h"
45#include "network.h"
46#include "utils.h"
47
73d46752
TA
48#define CRIU_VERSION "2.0"
49
50#define CRIU_GITID_VERSION "2.0"
51#define CRIU_GITID_PATCHLEVEL 0
52
e29fe1dd
TA
53lxc_log_define(lxc_criu, lxc);
54
73d46752
TA
55struct criu_opts {
56 /* The type of criu invocation, one of "dump" or "restore" */
57 char *action;
58
b2c3710f
TA
59 /* the user-provided migrate options relevant to this action */
60 struct migrate_opts *user;
73d46752
TA
61
62 /* The container to dump */
63 struct lxc_container *c;
64
73d46752 65 /* dump: stop the container or not after dumping? */
4b54788e 66 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
73d46752
TA
67
68 /* restore: the file to write the init process' pid into */
69 char *pidfile;
70 const char *cgroup_path;
4b54788e
TA
71 int console_fd;
72 /* The path that is bind mounted from /dev/console, if any. We don't
73 * want to use `--ext-mount-map auto`'s result here because the pts
74 * device may have a different path (e.g. if the pty number is
75 * different) on the target host. NULL if lxc.console = "none".
76 */
77 char *console_name;
73d46752
TA
78};
79
4b54788e
TA
80static int load_tty_major_minor(char *directory, char *output, int len)
81{
82 FILE *f;
83 char path[PATH_MAX];
84 int ret;
85
86 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
87 if (ret < 0 || ret >= sizeof(path)) {
88 ERROR("snprintf'd too many chacters: %d", ret);
89 return -1;
90 }
91
92 f = fopen(path, "r");
93 if (!f) {
94 /* This means we're coming from a liblxc which didn't export
95 * the tty info. In this case they had to have lxc.console =
96 * none, so there's no problem restoring.
97 */
98 if (errno == ENOENT)
99 return 0;
100
101 SYSERROR("couldn't open %s", path);
102 return -1;
103 }
104
105 if (!fgets(output, len, f)) {
106 fclose(f);
107 SYSERROR("couldn't read %s", path);
108 return -1;
109 }
110
111 fclose(f);
112 return 0;
113}
114
9451eeff 115static void exec_criu(struct criu_opts *opts)
e29fe1dd
TA
116{
117 char **argv, log[PATH_MAX];
4b54788e 118 int static_args = 24, argc = 0, i, ret;
e29fe1dd
TA
119 int netnr = 0;
120 struct lxc_list *it;
121
a17fa3c0
NE
122 char buf[4096], tty_info[32];
123 size_t pos;
e9195050
TA
124 /* If we are currently in a cgroup /foo/bar, and the container is in a
125 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
126 * container has an open fd that points to one of the cgroup files
127 * (systemd always opens its "root" cgroup). So, let's escape to the
128 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
129 * see all cgroups.
130 */
7103fe6f 131 if (!cgroup_escape()) {
e9195050
TA
132 ERROR("failed to escape cgroups");
133 return;
134 }
135
e29fe1dd
TA
136 /* The command line always looks like:
137 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
138 * --manage-cgroups action-script foo.sh -D $(directory) \
139 * -o $(directory)/$(action).log --ext-mount-map auto
140 * --enable-external-sharing --enable-external-masters
4b54788e 141 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
e29fe1dd
TA
142 * +1 for final NULL */
143
aef3d51e 144 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399
TA
145 /* -t pid --freeze-cgroup /lxc/ct */
146 static_args += 4;
e29fe1dd 147
aef3d51e 148 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
b2c3710f 149 if (opts->user->predump_dir)
aef3d51e
TA
150 static_args += 2;
151
74eb576c 152 /* --page-server --address <address> --port <port> */
b2c3710f 153 if (opts->user->pageserver_address && opts->user->pageserver_port)
74eb576c
NE
154 static_args += 5;
155
aef3d51e 156 /* --leave-running (only for final dump) */
b2c3710f 157 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd 158 static_args++;
4b54788e
TA
159
160 /* --external tty[88,4] */
161 if (opts->tty_id[0])
162 static_args += 2;
e29fe1dd
TA
163 } else if (strcmp(opts->action, "restore") == 0) {
164 /* --root $(lxc_mount_point) --restore-detached
13389b29
TA
165 * --restore-sibling --pidfile $foo --cgroup-root $foo
166 * --lsm-profile apparmor:whatever
167 */
168 static_args += 10;
4b54788e
TA
169
170 tty_info[0] = 0;
b2c3710f 171 if (load_tty_major_minor(opts->user->directory, tty_info, sizeof(tty_info)))
4b54788e
TA
172 return;
173
174 /* --inherit-fd fd[%d]:tty[%s] */
175 if (tty_info[0])
176 static_args += 2;
e29fe1dd
TA
177 } else {
178 return;
179 }
180
b2c3710f 181 if (opts->user->verbose)
e29fe1dd
TA
182 static_args++;
183
b2c3710f 184 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
e29fe1dd
TA
185 if (ret < 0 || ret >= PATH_MAX) {
186 ERROR("logfile name too long\n");
187 return;
188 }
189
190 argv = malloc(static_args * sizeof(*argv));
191 if (!argv)
192 return;
193
194 memset(argv, 0, static_args * sizeof(*argv));
195
196#define DECLARE_ARG(arg) \
197 do { \
198 if (arg == NULL) { \
199 ERROR("Got NULL argument for criu"); \
200 goto err; \
201 } \
202 argv[argc++] = strdup(arg); \
203 if (!argv[argc-1]) \
204 goto err; \
205 } while (0)
206
207 argv[argc++] = on_path("criu", NULL);
208 if (!argv[argc-1]) {
209 ERROR("Couldn't find criu binary\n");
210 goto err;
211 }
212
213 DECLARE_ARG(opts->action);
214 DECLARE_ARG("--tcp-established");
215 DECLARE_ARG("--file-locks");
216 DECLARE_ARG("--link-remap");
217 DECLARE_ARG("--force-irmap");
218 DECLARE_ARG("--manage-cgroups");
219 DECLARE_ARG("--ext-mount-map");
220 DECLARE_ARG("auto");
221 DECLARE_ARG("--enable-external-sharing");
222 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
223 DECLARE_ARG("--enable-fs");
224 DECLARE_ARG("hugetlbfs");
5b454329
TA
225 DECLARE_ARG("--enable-fs");
226 DECLARE_ARG("tracefs");
e29fe1dd 227 DECLARE_ARG("-D");
b2c3710f 228 DECLARE_ARG(opts->user->directory);
e29fe1dd
TA
229 DECLARE_ARG("-o");
230 DECLARE_ARG(log);
231
b2c3710f 232 if (opts->user->verbose)
e29fe1dd
TA
233 DECLARE_ARG("-vvvvvv");
234
aef3d51e 235 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399 236 char pid[32], *freezer_relative;
e29fe1dd
TA
237
238 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
239 goto err;
240
241 DECLARE_ARG("-t");
242 DECLARE_ARG(pid);
dc259399
TA
243
244 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
245 opts->c->config_path,
246 "freezer");
247 if (!freezer_relative) {
248 ERROR("failed getting freezer path");
249 goto err;
250 }
251
252 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
253 if (ret < 0 || ret >= sizeof(log))
254 goto err;
255
256 DECLARE_ARG("--freeze-cgroup");
257 DECLARE_ARG(log);
258
4b54788e 259 if (opts->tty_id[0]) {
36d2096c
TA
260 DECLARE_ARG("--ext-mount-map");
261 DECLARE_ARG("/dev/console:console");
262
4b54788e
TA
263 DECLARE_ARG("--external");
264 DECLARE_ARG(opts->tty_id);
265 }
266
b2c3710f 267 if (opts->user->predump_dir) {
aef3d51e 268 DECLARE_ARG("--prev-images-dir");
b2c3710f 269 DECLARE_ARG(opts->user->predump_dir);
74eb576c 270 }
4c0c0319 271
b2c3710f 272 if (opts->user->pageserver_address && opts->user->pageserver_port) {
74eb576c
NE
273 DECLARE_ARG("--page-server");
274 DECLARE_ARG("--address");
b2c3710f 275 DECLARE_ARG(opts->user->pageserver_address);
74eb576c 276 DECLARE_ARG("--port");
b2c3710f 277 DECLARE_ARG(opts->user->pageserver_port);
74eb576c 278 }
aef3d51e
TA
279
280 /* only for final dump */
b2c3710f 281 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd
TA
282 DECLARE_ARG("--leave-running");
283 } else if (strcmp(opts->action, "restore") == 0) {
284 void *m;
285 int additional;
13389b29 286 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
e29fe1dd
TA
287
288 DECLARE_ARG("--root");
289 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
290 DECLARE_ARG("--restore-detached");
291 DECLARE_ARG("--restore-sibling");
292 DECLARE_ARG("--pidfile");
293 DECLARE_ARG(opts->pidfile);
294 DECLARE_ARG("--cgroup-root");
295 DECLARE_ARG(opts->cgroup_path);
296
4b54788e 297 if (tty_info[0]) {
97e4f1a9
TA
298 if (opts->console_fd < 0) {
299 ERROR("lxc.console configured on source host but not target");
300 goto err;
301 }
302
4b54788e
TA
303 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, tty_info);
304 if (ret < 0 || ret >= sizeof(buf))
305 goto err;
306
307 DECLARE_ARG("--inherit-fd");
308 DECLARE_ARG(buf);
309 }
310 if (opts->console_name) {
311 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
312 SYSERROR("sprintf'd too many bytes");
313 }
314 DECLARE_ARG("--ext-mount-map");
315 DECLARE_ARG(buf);
316 }
317
13389b29
TA
318 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
319
320 if (lxc_conf->lsm_aa_profile)
321 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
322 else
323 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
324
325 if (ret < 0 || ret >= sizeof(buf))
326 goto err;
327
328 DECLARE_ARG("--lsm-profile");
329 DECLARE_ARG(buf);
330 }
331
e29fe1dd
TA
332 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
333
fa071249
TA
334 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
335 if (!m)
336 goto err;
e29fe1dd
TA
337 argv = m;
338
339 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
340 char eth[128], *veth;
341 struct lxc_netdev *n = it->elem;
342
65b20221
TA
343 if (n->type != LXC_NET_VETH)
344 continue;
345
e29fe1dd
TA
346 if (n->name) {
347 if (strlen(n->name) >= sizeof(eth))
348 goto err;
349 strncpy(eth, n->name, sizeof(eth));
350 } else
351 sprintf(eth, "eth%d", netnr);
352
353 veth = n->priv.veth_attr.pair;
354
c1fd648d
TA
355 if (n->link)
356 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
357 else
358 ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth);
e29fe1dd
TA
359 if (ret < 0 || ret >= sizeof(buf))
360 goto err;
361
362 DECLARE_ARG("--veth-pair");
363 DECLARE_ARG(buf);
364 }
365
366 }
367
368 argv[argc] = NULL;
369
cf4b07a5 370 buf[0] = 0;
a17fa3c0 371 pos = 0;
72a30576 372
cf4b07a5 373 for (i = 0; argv[i]; i++) {
72a30576
NE
374 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
375 if (ret < 0 || ret >= sizeof(buf) - pos)
376 goto err;
377 else
378 pos += ret;
cf4b07a5
TA
379 }
380
381 INFO("execing: %s", buf);
382
e29fe1dd
TA
383#undef DECLARE_ARG
384 execv(argv[0], argv);
385err:
e29fe1dd
TA
386 for (i = 0; argv[i]; i++)
387 free(argv[i]);
388 free(argv);
389}
390
8ba5ced7
TA
391/*
392 * Check to see if the criu version is recent enough for all the features we
393 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
394 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
395 * things potentially before a version is released with a particular feature.
396 *
397 * The intent is that when criu development slows down, we can drop this, but
398 * for now we shouldn't attempt to c/r with versions that we know won't work.
399 */
400static bool criu_version_ok()
401{
402 int pipes[2];
403 pid_t pid;
404
405 if (pipe(pipes) < 0) {
406 SYSERROR("pipe() failed");
407 return false;
408 }
409
410 pid = fork();
411 if (pid < 0) {
412 SYSERROR("fork() failed");
413 return false;
414 }
415
416 if (pid == 0) {
417 char *args[] = { "criu", "--version", NULL };
755fa453 418 char *path;
8ba5ced7
TA
419 close(pipes[0]);
420
421 close(STDERR_FILENO);
422 if (dup2(pipes[1], STDOUT_FILENO) < 0)
423 exit(1);
424
755fa453 425 path = on_path("criu", NULL);
d9b32b09
SH
426 if (!path)
427 exit(1);
428
755fa453 429 execv(path, args);
8ba5ced7
TA
430 exit(1);
431 } else {
432 FILE *f;
433 char version[1024];
434 int patch;
435
436 close(pipes[1]);
437 if (wait_for_pid(pid) < 0) {
438 close(pipes[0]);
4eae4051 439 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
440 return false;
441 }
442
443 f = fdopen(pipes[0], "r");
444 if (!f) {
445 close(pipes[0]);
446 return false;
447 }
448
a90277df 449 if (fscanf(f, "Version: %1023[^\n]s", version) != 1)
8ba5ced7
TA
450 goto version_error;
451
452 if (fgetc(f) != '\n')
453 goto version_error;
454
455 if (strcmp(version, CRIU_VERSION) >= 0)
456 goto version_match;
457
a90277df 458 if (fscanf(f, "GitID: v%1023[^-]s", version) != 1)
8ba5ced7
TA
459 goto version_error;
460
461 if (fgetc(f) != '-')
462 goto version_error;
463
464 if (fscanf(f, "%d", &patch) != 1)
465 goto version_error;
466
467 if (strcmp(version, CRIU_GITID_VERSION) < 0)
468 goto version_error;
469
470 if (patch < CRIU_GITID_PATCHLEVEL)
471 goto version_error;
472
473version_match:
3158ab5b 474 fclose(f);
8ba5ced7
TA
475 return true;
476
477version_error:
3158ab5b 478 fclose(f);
8ba5ced7
TA
479 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
480 return false;
481 }
482}
483
e29fe1dd
TA
484/* Check and make sure the container has a configuration that we know CRIU can
485 * dump. */
73d46752 486static bool criu_ok(struct lxc_container *c)
e29fe1dd
TA
487{
488 struct lxc_list *it;
e29fe1dd 489
8ba5ced7
TA
490 if (!criu_version_ok())
491 return false;
492
e29fe1dd
TA
493 if (geteuid()) {
494 ERROR("Must be root to checkpoint\n");
495 return false;
496 }
497
498 /* We only know how to restore containers with veth networks. */
499 lxc_list_for_each(it, &c->lxc_conf->network) {
500 struct lxc_netdev *n = it->elem;
65b20221
TA
501 switch(n->type) {
502 case LXC_NET_VETH:
503 case LXC_NET_NONE:
504 case LXC_NET_EMPTY:
505 break;
506 default:
e29fe1dd
TA
507 ERROR("Found network that is not VETH or NONE\n");
508 return false;
509 }
510 }
511
e29fe1dd
TA
512 return true;
513}
514
e29fe1dd
TA
515static bool restore_net_info(struct lxc_container *c)
516{
517 struct lxc_list *it;
518 bool has_error = true;
519
520 if (container_mem_lock(c))
521 return false;
522
523 lxc_list_for_each(it, &c->lxc_conf->network) {
524 struct lxc_netdev *netdev = it->elem;
525 char template[IFNAMSIZ];
65b20221
TA
526
527 if (netdev->type != LXC_NET_VETH)
528 continue;
529
e29fe1dd
TA
530 snprintf(template, sizeof(template), "vethXXXXXX");
531
532 if (!netdev->priv.veth_attr.pair)
533 netdev->priv.veth_attr.pair = lxc_mkifname(template);
534
535 if (!netdev->priv.veth_attr.pair)
536 goto out_unlock;
537 }
538
539 has_error = false;
540
541out_unlock:
542 container_mem_unlock(c);
543 return !has_error;
544}
545
aef3d51e
TA
546// do_restore never returns, the calling process is used as the
547// monitor process. do_restore calls exit() if it fails.
b2c3710f 548void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts)
e29fe1dd
TA
549{
550 pid_t pid;
551 char pidfile[L_tmpnam];
552 struct lxc_handler *handler;
3d9a5c85 553 int status, pipes[2] = {-1, -1};
e29fe1dd
TA
554
555 if (!tmpnam(pidfile))
556 goto out;
557
558 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
559 if (!handler)
560 goto out;
561
562 if (!cgroup_init(handler)) {
563 ERROR("failed initing cgroups");
564 goto out_fini_handler;
565 }
566
567 if (!cgroup_create(handler)) {
568 ERROR("failed creating groups");
569 goto out_fini_handler;
570 }
571
572 if (!restore_net_info(c)) {
573 ERROR("failed restoring network info");
574 goto out_fini_handler;
575 }
576
577 resolve_clone_flags(handler);
578
3d9a5c85
TA
579 if (pipe(pipes) < 0) {
580 SYSERROR("pipe() failed");
581 goto out_fini_handler;
582 }
583
e29fe1dd
TA
584 pid = fork();
585 if (pid < 0)
586 goto out_fini_handler;
587
588 if (pid == 0) {
589 struct criu_opts os;
590 struct lxc_rootfs *rootfs;
4b54788e 591 int flags;
e29fe1dd 592
3d9a5c85
TA
593 close(status_pipe);
594 status_pipe = -1;
595
596 close(pipes[0]);
597 pipes[0] = -1;
598 if (dup2(pipes[1], STDERR_FILENO) < 0) {
599 SYSERROR("dup2 failed");
600 goto out_fini_handler;
601 }
602
603 if (dup2(pipes[1], STDOUT_FILENO) < 0) {
604 SYSERROR("dup2 failed");
605 goto out_fini_handler;
606 }
e29fe1dd
TA
607
608 if (unshare(CLONE_NEWNS))
609 goto out_fini_handler;
610
611 /* CRIU needs the lxc root bind mounted so that it is the root of some
612 * mount. */
613 rootfs = &c->lxc_conf->rootfs;
614
615 if (rootfs_is_blockdev(c->lxc_conf)) {
616 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
617 goto out_fini_handler;
618 } else {
619 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
620 goto out_fini_handler;
621
622 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
623 SYSERROR("remount / to private failed");
624 goto out_fini_handler;
625 }
626
627 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
628 rmdir(rootfs->mount);
629 goto out_fini_handler;
630 }
631 }
632
633 os.action = "restore";
b2c3710f 634 os.user = opts;
e29fe1dd
TA
635 os.c = c;
636 os.pidfile = pidfile;
e29fe1dd 637 os.cgroup_path = cgroup_canonical_path(handler);
4b54788e
TA
638 os.console_fd = c->lxc_conf->console.slave;
639
97e4f1a9
TA
640 if (os.console_fd >= 0) {
641 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
642 * via --inherit-fd, so we don't want it to close.
643 */
644 flags = fcntl(os.console_fd, F_GETFD);
645 if (flags < 0) {
646 SYSERROR("F_GETFD failed: %d", os.console_fd);
647 goto out_fini_handler;
648 }
4b54788e 649
97e4f1a9 650 flags &= ~FD_CLOEXEC;
4b54788e 651
97e4f1a9
TA
652 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
653 SYSERROR("F_SETFD failed");
654 goto out_fini_handler;
655 }
4b54788e
TA
656 }
657 os.console_name = c->lxc_conf->console.name;
e29fe1dd
TA
658
659 /* exec_criu() returning is an error */
7103fe6f 660 exec_criu(&os);
e29fe1dd
TA
661 umount(rootfs->mount);
662 rmdir(rootfs->mount);
663 goto out_fini_handler;
664 } else {
665 int ret;
666 char title[2048];
667
3d9a5c85
TA
668 close(pipes[1]);
669 pipes[1] = -1;
670
e29fe1dd
TA
671 pid_t w = waitpid(pid, &status, 0);
672 if (w == -1) {
673 SYSERROR("waitpid");
674 goto out_fini_handler;
675 }
676
3d9a5c85
TA
677 ret = write(status_pipe, &status, sizeof(status));
678 close(status_pipe);
679 status_pipe = -1;
e29fe1dd
TA
680
681 if (sizeof(status) != ret) {
682 SYSERROR("failed to write all of status");
683 goto out_fini_handler;
684 }
685
686 if (WIFEXITED(status)) {
687 if (WEXITSTATUS(status)) {
3d9a5c85
TA
688 char buf[4096];
689 int n;
690
691 n = read(pipes[0], buf, sizeof(buf));
692 if (n < 0) {
693 SYSERROR("failed reading from criu stderr");
694 goto out_fini_handler;
695 }
696
697 buf[n] = 0;
698
699 ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf);
e29fe1dd
TA
700 goto out_fini_handler;
701 } else {
702 int ret;
703 FILE *f = fopen(pidfile, "r");
704 if (!f) {
705 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
706 goto out_fini_handler;
707 }
708
709 ret = fscanf(f, "%d", (int*) &handler->pid);
710 fclose(f);
59c2d406
TA
711 if (unlink(pidfile) < 0 && errno != ENOENT)
712 SYSERROR("unlinking pidfile failed");
713
e29fe1dd
TA
714 if (ret != 1) {
715 ERROR("reading restore pid failed");
716 goto out_fini_handler;
717 }
718
f8a41688
TA
719 if (lxc_set_state(c->name, handler, RUNNING)) {
720 ERROR("error setting running state after restore");
e29fe1dd 721 goto out_fini_handler;
f8a41688 722 }
e29fe1dd
TA
723 }
724 } else {
725 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
726 goto out_fini_handler;
727 }
728
3d9a5c85
TA
729 close(pipes[0]);
730
e29fe1dd
TA
731 /*
732 * See comment in lxcapi_start; we don't care if these
733 * fail because it's just a beauty thing. We just
734 * assign the return here to silence potential.
735 */
736 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
737 ret = setproctitle(title);
738
739 ret = lxc_poll(c->name, handler);
740 if (ret)
741 lxc_abort(c->name, handler);
742 lxc_fini(c->name, handler);
743 exit(ret);
744 }
745
746out_fini_handler:
3d9a5c85
TA
747 if (pipes[0] >= 0)
748 close(pipes[0]);
749 if (pipes[1] >= 0)
750 close(pipes[1]);
751
e29fe1dd 752 lxc_fini(c->name, handler);
59c2d406
TA
753 if (unlink(pidfile) < 0 && errno != ENOENT)
754 SYSERROR("unlinking pidfile failed");
e29fe1dd
TA
755
756out:
3d9a5c85 757 if (status_pipe >= 0) {
e29fe1dd 758 status = 1;
3d9a5c85 759 if (write(status_pipe, &status, sizeof(status)) != sizeof(status)) {
e29fe1dd
TA
760 SYSERROR("writing status failed");
761 }
3d9a5c85 762 close(status_pipe);
e29fe1dd
TA
763 }
764
765 exit(1);
766}
aef3d51e 767
4b54788e
TA
768static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
769{
770 FILE *f;
771 char path[PATH_MAX];
772 int ret;
773 struct stat sb;
774
775 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
776 tty_id[0] = 0;
777 return 0;
778 }
779
780 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
781 if (ret < 0 || ret >= sizeof(path)) {
782 ERROR("snprintf'd too many chacters: %d", ret);
783 return -1;
784 }
785
786 ret = stat(path, &sb);
787 if (ret < 0) {
788 SYSERROR("stat of %s failed", path);
789 return -1;
790 }
791
792 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
793 if (ret < 0 || ret >= sizeof(path)) {
794 ERROR("snprintf'd too many characters: %d", ret);
795 return -1;
796 }
797
f03280a7
TA
798 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
799 (long long unsigned) sb.st_rdev,
800 (long long unsigned) sb.st_dev);
4b54788e
TA
801 if (ret < 0 || ret >= sizeof(path)) {
802 ERROR("snprintf'd too many characters: %d", ret);
803 return -1;
804 }
805
806 f = fopen(path, "w");
807 if (!f) {
808 SYSERROR("failed to open %s", path);
809 return -1;
810 }
811
812 ret = fprintf(f, "%s", tty_id);
813 fclose(f);
814 if (ret < 0)
815 SYSERROR("failed to write to %s", path);
816 return ret;
817}
818
aef3d51e 819/* do one of either predump or a regular dump */
b2c3710f 820static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
aef3d51e
TA
821{
822 pid_t pid;
823
824 if (!criu_ok(c))
825 return false;
826
b2c3710f 827 if (mkdir_p(opts->directory, 0700) < 0)
aef3d51e
TA
828 return false;
829
830 pid = fork();
831 if (pid < 0) {
832 SYSERROR("fork failed");
833 return false;
834 }
835
836 if (pid == 0) {
837 struct criu_opts os;
838
839 os.action = mode;
b2c3710f 840 os.user = opts;
aef3d51e 841 os.c = c;
4b54788e 842 os.console_name = c->lxc_conf->console.path;
74eb576c 843
b2c3710f 844 if (save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id)) < 0)
4b54788e 845 exit(1);
aef3d51e
TA
846
847 /* exec_criu() returning is an error */
7103fe6f 848 exec_criu(&os);
aef3d51e
TA
849 exit(1);
850 } else {
851 int status;
852 pid_t w = waitpid(pid, &status, 0);
853 if (w == -1) {
854 SYSERROR("waitpid");
855 return false;
856 }
857
858 if (WIFEXITED(status)) {
859 if (WEXITSTATUS(status)) {
860 ERROR("dump failed with %d\n", WEXITSTATUS(status));
861 return false;
862 }
863
864 return true;
865 } else if (WIFSIGNALED(status)) {
866 ERROR("dump signaled with %d\n", WTERMSIG(status));
867 return false;
868 } else {
869 ERROR("unknown dump exit %d\n", status);
870 return false;
871 }
872 }
873}
874
b2c3710f 875bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e 876{
b2c3710f 877 return do_dump(c, "pre-dump", opts);
aef3d51e
TA
878}
879
b2c3710f 880bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
881{
882 char path[PATH_MAX];
883 int ret;
884
b2c3710f 885 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
aef3d51e
TA
886 if (ret < 0 || ret >= sizeof(path))
887 return false;
888
889 if (access(path, F_OK) == 0) {
890 ERROR("please use a fresh directory for the dump directory\n");
891 return false;
892 }
893
b2c3710f 894 return do_dump(c, "dump", opts);
aef3d51e
TA
895}
896
b2c3710f 897bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
898{
899 pid_t pid;
900 int status, nread;
901 int pipefd[2];
902
903 if (!criu_ok(c))
904 return false;
905
906 if (geteuid()) {
907 ERROR("Must be root to restore\n");
908 return false;
909 }
910
911 if (pipe(pipefd)) {
912 ERROR("failed to create pipe");
913 return false;
914 }
915
916 pid = fork();
917 if (pid < 0) {
918 close(pipefd[0]);
919 close(pipefd[1]);
920 return false;
921 }
922
923 if (pid == 0) {
924 close(pipefd[0]);
925 // this never returns
b2c3710f 926 do_restore(c, pipefd[1], opts);
aef3d51e
TA
927 }
928
929 close(pipefd[1]);
930
931 nread = read(pipefd[0], &status, sizeof(status));
932 close(pipefd[0]);
933 if (sizeof(status) != nread) {
934 ERROR("reading status from pipe failed");
935 goto err_wait;
936 }
937
938 // If the criu process was killed or exited nonzero, wait() for the
939 // handler, since the restore process died. Otherwise, we don't need to
940 // wait, since the child becomes the monitor process.
941 if (!WIFEXITED(status) || WEXITSTATUS(status))
942 goto err_wait;
943 return true;
944
945err_wait:
946 if (wait_for_pid(pid))
947 ERROR("restore process died");
948 return false;
949}