]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/criu.c
Merge pull request #1075 from tych0/criu-action-script
[mirror_lxc.git] / src / lxc / criu.c
1 /*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #define _GNU_SOURCE
24 #include <assert.h>
25 #include <linux/limits.h>
26 #include <sched.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mount.h>
31 #include <sys/types.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
34
35 #include "config.h"
36
37 #include "bdev/bdev.h"
38 #include "cgroup.h"
39 #include "conf.h"
40 #include "commands.h"
41 #include "criu.h"
42 #include "log.h"
43 #include "lxc.h"
44 #include "lxclock.h"
45 #include "network.h"
46 #include "utils.h"
47
48 #define CRIU_VERSION "2.0"
49
50 #define CRIU_GITID_VERSION "2.0"
51 #define CRIU_GITID_PATCHLEVEL 0
52
53 lxc_log_define(lxc_criu, lxc);
54
55 struct criu_opts {
56 /* The type of criu invocation, one of "dump" or "restore" */
57 char *action;
58
59 /* the user-provided migrate options relevant to this action */
60 struct migrate_opts *user;
61
62 /* The container to dump */
63 struct lxc_container *c;
64
65 /* dump: stop the container or not after dumping? */
66 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
67
68 /* restore: the file to write the init process' pid into */
69 char *pidfile;
70 const char *cgroup_path;
71 int console_fd;
72 /* The path that is bind mounted from /dev/console, if any. We don't
73 * want to use `--ext-mount-map auto`'s result here because the pts
74 * device may have a different path (e.g. if the pty number is
75 * different) on the target host. NULL if lxc.console = "none".
76 */
77 char *console_name;
78 };
79
80 static int load_tty_major_minor(char *directory, char *output, int len)
81 {
82 FILE *f;
83 char path[PATH_MAX];
84 int ret;
85
86 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
87 if (ret < 0 || ret >= sizeof(path)) {
88 ERROR("snprintf'd too many chacters: %d", ret);
89 return -1;
90 }
91
92 f = fopen(path, "r");
93 if (!f) {
94 /* This means we're coming from a liblxc which didn't export
95 * the tty info. In this case they had to have lxc.console =
96 * none, so there's no problem restoring.
97 */
98 if (errno == ENOENT)
99 return 0;
100
101 SYSERROR("couldn't open %s", path);
102 return -1;
103 }
104
105 if (!fgets(output, len, f)) {
106 fclose(f);
107 SYSERROR("couldn't read %s", path);
108 return -1;
109 }
110
111 fclose(f);
112 return 0;
113 }
114
115 static void exec_criu(struct criu_opts *opts)
116 {
117 char **argv, log[PATH_MAX];
118 int static_args = 23, argc = 0, i, ret;
119 int netnr = 0;
120 struct lxc_list *it;
121
122 char buf[4096], tty_info[32];
123 size_t pos;
124 /* If we are currently in a cgroup /foo/bar, and the container is in a
125 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
126 * container has an open fd that points to one of the cgroup files
127 * (systemd always opens its "root" cgroup). So, let's escape to the
128 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
129 * see all cgroups.
130 */
131 if (!cgroup_escape()) {
132 ERROR("failed to escape cgroups");
133 return;
134 }
135
136 /* The command line always looks like:
137 * criu $(action) --tcp-established --file-locks --link-remap \
138 * --manage-cgroups=full action-script foo.sh -D $(directory) \
139 * -o $(directory)/$(action).log --ext-mount-map auto
140 * --enable-external-sharing --enable-external-masters
141 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
142 * +1 for final NULL */
143
144 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
145 /* -t pid --freeze-cgroup /lxc/ct */
146 static_args += 4;
147
148 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
149 if (opts->user->predump_dir)
150 static_args += 2;
151
152 /* --page-server --address <address> --port <port> */
153 if (opts->user->pageserver_address && opts->user->pageserver_port)
154 static_args += 5;
155
156 /* --leave-running (only for final dump) */
157 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
158 static_args++;
159
160 /* --external tty[88,4] */
161 if (opts->tty_id[0])
162 static_args += 2;
163
164 /* --force-irmap */
165 if (!opts->user->preserves_inodes)
166 static_args++;
167 } else if (strcmp(opts->action, "restore") == 0) {
168 /* --root $(lxc_mount_point) --restore-detached
169 * --restore-sibling --pidfile $foo --cgroup-root $foo
170 * --lsm-profile apparmor:whatever
171 */
172 static_args += 10;
173
174 tty_info[0] = 0;
175 if (load_tty_major_minor(opts->user->directory, tty_info, sizeof(tty_info)))
176 return;
177
178 /* --inherit-fd fd[%d]:tty[%s] */
179 if (tty_info[0])
180 static_args += 2;
181 } else {
182 return;
183 }
184
185 if (opts->user->verbose)
186 static_args++;
187
188 if (opts->user->action_script)
189 static_args += 2;
190
191 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
192 if (ret < 0 || ret >= PATH_MAX) {
193 ERROR("logfile name too long\n");
194 return;
195 }
196
197 argv = malloc(static_args * sizeof(*argv));
198 if (!argv)
199 return;
200
201 memset(argv, 0, static_args * sizeof(*argv));
202
203 #define DECLARE_ARG(arg) \
204 do { \
205 if (arg == NULL) { \
206 ERROR("Got NULL argument for criu"); \
207 goto err; \
208 } \
209 argv[argc++] = strdup(arg); \
210 if (!argv[argc-1]) \
211 goto err; \
212 } while (0)
213
214 argv[argc++] = on_path("criu", NULL);
215 if (!argv[argc-1]) {
216 ERROR("Couldn't find criu binary\n");
217 goto err;
218 }
219
220 DECLARE_ARG(opts->action);
221 DECLARE_ARG("--tcp-established");
222 DECLARE_ARG("--file-locks");
223 DECLARE_ARG("--link-remap");
224 DECLARE_ARG("--manage-cgroups=full");
225 DECLARE_ARG("--ext-mount-map");
226 DECLARE_ARG("auto");
227 DECLARE_ARG("--enable-external-sharing");
228 DECLARE_ARG("--enable-external-masters");
229 DECLARE_ARG("--enable-fs");
230 DECLARE_ARG("hugetlbfs");
231 DECLARE_ARG("--enable-fs");
232 DECLARE_ARG("tracefs");
233 DECLARE_ARG("-D");
234 DECLARE_ARG(opts->user->directory);
235 DECLARE_ARG("-o");
236 DECLARE_ARG(log);
237
238 if (opts->user->verbose)
239 DECLARE_ARG("-vvvvvv");
240
241 if (opts->user->action_script) {
242 DECLARE_ARG("--action-script");
243 DECLARE_ARG(opts->user->action_script);
244 }
245
246 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
247 char pid[32], *freezer_relative;
248
249 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
250 goto err;
251
252 DECLARE_ARG("-t");
253 DECLARE_ARG(pid);
254
255 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
256 opts->c->config_path,
257 "freezer");
258 if (!freezer_relative) {
259 ERROR("failed getting freezer path");
260 goto err;
261 }
262
263 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
264 if (ret < 0 || ret >= sizeof(log))
265 goto err;
266
267 DECLARE_ARG("--freeze-cgroup");
268 DECLARE_ARG(log);
269
270 if (opts->tty_id[0]) {
271 DECLARE_ARG("--ext-mount-map");
272 DECLARE_ARG("/dev/console:console");
273
274 DECLARE_ARG("--external");
275 DECLARE_ARG(opts->tty_id);
276 }
277
278 if (opts->user->predump_dir) {
279 DECLARE_ARG("--prev-images-dir");
280 DECLARE_ARG(opts->user->predump_dir);
281 }
282
283 if (opts->user->pageserver_address && opts->user->pageserver_port) {
284 DECLARE_ARG("--page-server");
285 DECLARE_ARG("--address");
286 DECLARE_ARG(opts->user->pageserver_address);
287 DECLARE_ARG("--port");
288 DECLARE_ARG(opts->user->pageserver_port);
289 }
290
291 if (!opts->user->preserves_inodes)
292 DECLARE_ARG("--force-irmap");
293
294 /* only for final dump */
295 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
296 DECLARE_ARG("--leave-running");
297 } else if (strcmp(opts->action, "restore") == 0) {
298 void *m;
299 int additional;
300 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
301
302 DECLARE_ARG("--root");
303 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
304 DECLARE_ARG("--restore-detached");
305 DECLARE_ARG("--restore-sibling");
306 DECLARE_ARG("--pidfile");
307 DECLARE_ARG(opts->pidfile);
308 DECLARE_ARG("--cgroup-root");
309 DECLARE_ARG(opts->cgroup_path);
310
311 if (tty_info[0]) {
312 if (opts->console_fd < 0) {
313 ERROR("lxc.console configured on source host but not target");
314 goto err;
315 }
316
317 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, tty_info);
318 if (ret < 0 || ret >= sizeof(buf))
319 goto err;
320
321 DECLARE_ARG("--inherit-fd");
322 DECLARE_ARG(buf);
323 }
324 if (opts->console_name) {
325 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
326 SYSERROR("sprintf'd too many bytes");
327 }
328 DECLARE_ARG("--ext-mount-map");
329 DECLARE_ARG(buf);
330 }
331
332 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
333
334 if (lxc_conf->lsm_aa_profile)
335 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
336 else
337 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
338
339 if (ret < 0 || ret >= sizeof(buf))
340 goto err;
341
342 DECLARE_ARG("--lsm-profile");
343 DECLARE_ARG(buf);
344 }
345
346 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
347
348 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
349 if (!m)
350 goto err;
351 argv = m;
352
353 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
354 char eth[128], *veth;
355 struct lxc_netdev *n = it->elem;
356
357 if (n->type != LXC_NET_VETH)
358 continue;
359
360 if (n->name) {
361 if (strlen(n->name) >= sizeof(eth))
362 goto err;
363 strncpy(eth, n->name, sizeof(eth));
364 } else
365 sprintf(eth, "eth%d", netnr);
366
367 veth = n->priv.veth_attr.pair;
368
369 if (n->link)
370 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
371 else
372 ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth);
373 if (ret < 0 || ret >= sizeof(buf))
374 goto err;
375
376 DECLARE_ARG("--veth-pair");
377 DECLARE_ARG(buf);
378 }
379
380 }
381
382 argv[argc] = NULL;
383
384 buf[0] = 0;
385 pos = 0;
386
387 for (i = 0; argv[i]; i++) {
388 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
389 if (ret < 0 || ret >= sizeof(buf) - pos)
390 goto err;
391 else
392 pos += ret;
393 }
394
395 INFO("execing: %s", buf);
396
397 #undef DECLARE_ARG
398 execv(argv[0], argv);
399 err:
400 for (i = 0; argv[i]; i++)
401 free(argv[i]);
402 free(argv);
403 }
404
405 /*
406 * Check to see if the criu version is recent enough for all the features we
407 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
408 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
409 * things potentially before a version is released with a particular feature.
410 *
411 * The intent is that when criu development slows down, we can drop this, but
412 * for now we shouldn't attempt to c/r with versions that we know won't work.
413 */
414 static bool criu_version_ok()
415 {
416 int pipes[2];
417 pid_t pid;
418
419 if (pipe(pipes) < 0) {
420 SYSERROR("pipe() failed");
421 return false;
422 }
423
424 pid = fork();
425 if (pid < 0) {
426 SYSERROR("fork() failed");
427 return false;
428 }
429
430 if (pid == 0) {
431 char *args[] = { "criu", "--version", NULL };
432 char *path;
433 close(pipes[0]);
434
435 close(STDERR_FILENO);
436 if (dup2(pipes[1], STDOUT_FILENO) < 0)
437 exit(1);
438
439 path = on_path("criu", NULL);
440 if (!path)
441 exit(1);
442
443 execv(path, args);
444 exit(1);
445 } else {
446 FILE *f;
447 char version[1024];
448 int patch;
449
450 close(pipes[1]);
451 if (wait_for_pid(pid) < 0) {
452 close(pipes[0]);
453 SYSERROR("execing criu failed, is it installed?");
454 return false;
455 }
456
457 f = fdopen(pipes[0], "r");
458 if (!f) {
459 close(pipes[0]);
460 return false;
461 }
462
463 if (fscanf(f, "Version: %1023[^\n]s", version) != 1)
464 goto version_error;
465
466 if (fgetc(f) != '\n')
467 goto version_error;
468
469 if (strcmp(version, CRIU_VERSION) >= 0)
470 goto version_match;
471
472 if (fscanf(f, "GitID: v%1023[^-]s", version) != 1)
473 goto version_error;
474
475 if (fgetc(f) != '-')
476 goto version_error;
477
478 if (fscanf(f, "%d", &patch) != 1)
479 goto version_error;
480
481 if (strcmp(version, CRIU_GITID_VERSION) < 0)
482 goto version_error;
483
484 if (patch < CRIU_GITID_PATCHLEVEL)
485 goto version_error;
486
487 version_match:
488 fclose(f);
489 return true;
490
491 version_error:
492 fclose(f);
493 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
494 return false;
495 }
496 }
497
498 /* Check and make sure the container has a configuration that we know CRIU can
499 * dump. */
500 static bool criu_ok(struct lxc_container *c)
501 {
502 struct lxc_list *it;
503
504 if (!criu_version_ok())
505 return false;
506
507 if (geteuid()) {
508 ERROR("Must be root to checkpoint\n");
509 return false;
510 }
511
512 /* We only know how to restore containers with veth networks. */
513 lxc_list_for_each(it, &c->lxc_conf->network) {
514 struct lxc_netdev *n = it->elem;
515 switch(n->type) {
516 case LXC_NET_VETH:
517 case LXC_NET_NONE:
518 case LXC_NET_EMPTY:
519 break;
520 default:
521 ERROR("Found network that is not VETH or NONE\n");
522 return false;
523 }
524 }
525
526 return true;
527 }
528
529 static bool restore_net_info(struct lxc_container *c)
530 {
531 struct lxc_list *it;
532 bool has_error = true;
533
534 if (container_mem_lock(c))
535 return false;
536
537 lxc_list_for_each(it, &c->lxc_conf->network) {
538 struct lxc_netdev *netdev = it->elem;
539 char template[IFNAMSIZ];
540
541 if (netdev->type != LXC_NET_VETH)
542 continue;
543
544 snprintf(template, sizeof(template), "vethXXXXXX");
545
546 if (!netdev->priv.veth_attr.pair)
547 netdev->priv.veth_attr.pair = lxc_mkifname(template);
548
549 if (!netdev->priv.veth_attr.pair)
550 goto out_unlock;
551 }
552
553 has_error = false;
554
555 out_unlock:
556 container_mem_unlock(c);
557 return !has_error;
558 }
559
560 // do_restore never returns, the calling process is used as the
561 // monitor process. do_restore calls exit() if it fails.
562 void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts)
563 {
564 pid_t pid;
565 char pidfile[L_tmpnam];
566 struct lxc_handler *handler;
567 int status, pipes[2] = {-1, -1};
568
569 if (!tmpnam(pidfile))
570 goto out;
571
572 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
573 if (!handler)
574 goto out;
575
576 if (!cgroup_init(handler)) {
577 ERROR("failed initing cgroups");
578 goto out_fini_handler;
579 }
580
581 if (!cgroup_create(handler)) {
582 ERROR("failed creating groups");
583 goto out_fini_handler;
584 }
585
586 if (!restore_net_info(c)) {
587 ERROR("failed restoring network info");
588 goto out_fini_handler;
589 }
590
591 resolve_clone_flags(handler);
592
593 if (pipe(pipes) < 0) {
594 SYSERROR("pipe() failed");
595 goto out_fini_handler;
596 }
597
598 pid = fork();
599 if (pid < 0)
600 goto out_fini_handler;
601
602 if (pid == 0) {
603 struct criu_opts os;
604 struct lxc_rootfs *rootfs;
605 int flags;
606
607 close(status_pipe);
608 status_pipe = -1;
609
610 close(pipes[0]);
611 pipes[0] = -1;
612 if (dup2(pipes[1], STDERR_FILENO) < 0) {
613 SYSERROR("dup2 failed");
614 goto out_fini_handler;
615 }
616
617 if (dup2(pipes[1], STDOUT_FILENO) < 0) {
618 SYSERROR("dup2 failed");
619 goto out_fini_handler;
620 }
621
622 if (unshare(CLONE_NEWNS))
623 goto out_fini_handler;
624
625 /* CRIU needs the lxc root bind mounted so that it is the root of some
626 * mount. */
627 rootfs = &c->lxc_conf->rootfs;
628
629 if (rootfs_is_blockdev(c->lxc_conf)) {
630 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
631 goto out_fini_handler;
632 } else {
633 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
634 goto out_fini_handler;
635
636 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
637 SYSERROR("remount / to private failed");
638 goto out_fini_handler;
639 }
640
641 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
642 rmdir(rootfs->mount);
643 goto out_fini_handler;
644 }
645 }
646
647 os.action = "restore";
648 os.user = opts;
649 os.c = c;
650 os.pidfile = pidfile;
651 os.cgroup_path = cgroup_canonical_path(handler);
652 os.console_fd = c->lxc_conf->console.slave;
653
654 if (os.console_fd >= 0) {
655 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
656 * via --inherit-fd, so we don't want it to close.
657 */
658 flags = fcntl(os.console_fd, F_GETFD);
659 if (flags < 0) {
660 SYSERROR("F_GETFD failed: %d", os.console_fd);
661 goto out_fini_handler;
662 }
663
664 flags &= ~FD_CLOEXEC;
665
666 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
667 SYSERROR("F_SETFD failed");
668 goto out_fini_handler;
669 }
670 }
671 os.console_name = c->lxc_conf->console.name;
672
673 /* exec_criu() returning is an error */
674 exec_criu(&os);
675 umount(rootfs->mount);
676 rmdir(rootfs->mount);
677 goto out_fini_handler;
678 } else {
679 int ret;
680 char title[2048];
681
682 close(pipes[1]);
683 pipes[1] = -1;
684
685 pid_t w = waitpid(pid, &status, 0);
686 if (w == -1) {
687 SYSERROR("waitpid");
688 goto out_fini_handler;
689 }
690
691 ret = write(status_pipe, &status, sizeof(status));
692 close(status_pipe);
693 status_pipe = -1;
694
695 if (sizeof(status) != ret) {
696 SYSERROR("failed to write all of status");
697 goto out_fini_handler;
698 }
699
700 if (WIFEXITED(status)) {
701 if (WEXITSTATUS(status)) {
702 char buf[4096];
703 int n;
704
705 n = read(pipes[0], buf, sizeof(buf));
706 if (n < 0) {
707 SYSERROR("failed reading from criu stderr");
708 goto out_fini_handler;
709 }
710
711 buf[n] = 0;
712
713 ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf);
714 goto out_fini_handler;
715 } else {
716 int ret;
717 FILE *f = fopen(pidfile, "r");
718 if (!f) {
719 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
720 goto out_fini_handler;
721 }
722
723 ret = fscanf(f, "%d", (int*) &handler->pid);
724 fclose(f);
725 if (unlink(pidfile) < 0 && errno != ENOENT)
726 SYSERROR("unlinking pidfile failed");
727
728 if (ret != 1) {
729 ERROR("reading restore pid failed");
730 goto out_fini_handler;
731 }
732
733 if (lxc_set_state(c->name, handler, RUNNING)) {
734 ERROR("error setting running state after restore");
735 goto out_fini_handler;
736 }
737 }
738 } else {
739 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
740 goto out_fini_handler;
741 }
742
743 close(pipes[0]);
744
745 /*
746 * See comment in lxcapi_start; we don't care if these
747 * fail because it's just a beauty thing. We just
748 * assign the return here to silence potential.
749 */
750 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
751 ret = setproctitle(title);
752
753 ret = lxc_poll(c->name, handler);
754 if (ret)
755 lxc_abort(c->name, handler);
756 lxc_fini(c->name, handler);
757 exit(ret);
758 }
759
760 out_fini_handler:
761 if (pipes[0] >= 0)
762 close(pipes[0]);
763 if (pipes[1] >= 0)
764 close(pipes[1]);
765
766 lxc_fini(c->name, handler);
767 if (unlink(pidfile) < 0 && errno != ENOENT)
768 SYSERROR("unlinking pidfile failed");
769
770 out:
771 if (status_pipe >= 0) {
772 status = 1;
773 if (write(status_pipe, &status, sizeof(status)) != sizeof(status)) {
774 SYSERROR("writing status failed");
775 }
776 close(status_pipe);
777 }
778
779 exit(1);
780 }
781
782 static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
783 {
784 FILE *f;
785 char path[PATH_MAX];
786 int ret;
787 struct stat sb;
788
789 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
790 tty_id[0] = 0;
791 return 0;
792 }
793
794 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
795 if (ret < 0 || ret >= sizeof(path)) {
796 ERROR("snprintf'd too many chacters: %d", ret);
797 return -1;
798 }
799
800 ret = stat(path, &sb);
801 if (ret < 0) {
802 SYSERROR("stat of %s failed", path);
803 return -1;
804 }
805
806 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
807 if (ret < 0 || ret >= sizeof(path)) {
808 ERROR("snprintf'd too many characters: %d", ret);
809 return -1;
810 }
811
812 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
813 (long long unsigned) sb.st_rdev,
814 (long long unsigned) sb.st_dev);
815 if (ret < 0 || ret >= sizeof(path)) {
816 ERROR("snprintf'd too many characters: %d", ret);
817 return -1;
818 }
819
820 f = fopen(path, "w");
821 if (!f) {
822 SYSERROR("failed to open %s", path);
823 return -1;
824 }
825
826 ret = fprintf(f, "%s", tty_id);
827 fclose(f);
828 if (ret < 0)
829 SYSERROR("failed to write to %s", path);
830 return ret;
831 }
832
833 /* do one of either predump or a regular dump */
834 static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
835 {
836 pid_t pid;
837
838 if (!criu_ok(c))
839 return false;
840
841 if (mkdir_p(opts->directory, 0700) < 0)
842 return false;
843
844 pid = fork();
845 if (pid < 0) {
846 SYSERROR("fork failed");
847 return false;
848 }
849
850 if (pid == 0) {
851 struct criu_opts os;
852
853 os.action = mode;
854 os.user = opts;
855 os.c = c;
856 os.console_name = c->lxc_conf->console.path;
857
858 if (save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id)) < 0)
859 exit(1);
860
861 /* exec_criu() returning is an error */
862 exec_criu(&os);
863 exit(1);
864 } else {
865 int status;
866 pid_t w = waitpid(pid, &status, 0);
867 if (w == -1) {
868 SYSERROR("waitpid");
869 return false;
870 }
871
872 if (WIFEXITED(status)) {
873 if (WEXITSTATUS(status)) {
874 ERROR("dump failed with %d\n", WEXITSTATUS(status));
875 return false;
876 }
877
878 return true;
879 } else if (WIFSIGNALED(status)) {
880 ERROR("dump signaled with %d\n", WTERMSIG(status));
881 return false;
882 } else {
883 ERROR("unknown dump exit %d\n", status);
884 return false;
885 }
886 }
887 }
888
889 bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
890 {
891 return do_dump(c, "pre-dump", opts);
892 }
893
894 bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
895 {
896 char path[PATH_MAX];
897 int ret;
898
899 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
900 if (ret < 0 || ret >= sizeof(path))
901 return false;
902
903 if (access(path, F_OK) == 0) {
904 ERROR("please use a fresh directory for the dump directory\n");
905 return false;
906 }
907
908 return do_dump(c, "dump", opts);
909 }
910
911 bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
912 {
913 pid_t pid;
914 int status, nread;
915 int pipefd[2];
916
917 if (!criu_ok(c))
918 return false;
919
920 if (geteuid()) {
921 ERROR("Must be root to restore\n");
922 return false;
923 }
924
925 if (pipe(pipefd)) {
926 ERROR("failed to create pipe");
927 return false;
928 }
929
930 pid = fork();
931 if (pid < 0) {
932 close(pipefd[0]);
933 close(pipefd[1]);
934 return false;
935 }
936
937 if (pid == 0) {
938 close(pipefd[0]);
939 // this never returns
940 do_restore(c, pipefd[1], opts);
941 }
942
943 close(pipefd[1]);
944
945 nread = read(pipefd[0], &status, sizeof(status));
946 close(pipefd[0]);
947 if (sizeof(status) != nread) {
948 ERROR("reading status from pipe failed");
949 goto err_wait;
950 }
951
952 // If the criu process was killed or exited nonzero, wait() for the
953 // handler, since the restore process died. Otherwise, we don't need to
954 // wait, since the child becomes the monitor process.
955 if (!WIFEXITED(status) || WEXITSTATUS(status))
956 goto err_wait;
957 return true;
958
959 err_wait:
960 if (wait_for_pid(pid))
961 ERROR("restore process died");
962 return false;
963 }