]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/criu.c
c/r: don't fail if there is no console_fd on restore
[mirror_lxc.git] / src / lxc / criu.c
1 /*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #define _GNU_SOURCE
24 #include <assert.h>
25 #include <linux/limits.h>
26 #include <sched.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mount.h>
31 #include <sys/types.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
34
35 #include "config.h"
36
37 #include "bdev/bdev.h"
38 #include "cgroup.h"
39 #include "conf.h"
40 #include "commands.h"
41 #include "criu.h"
42 #include "log.h"
43 #include "lxc.h"
44 #include "lxclock.h"
45 #include "network.h"
46 #include "utils.h"
47
48 #define CRIU_VERSION "2.0"
49
50 #define CRIU_GITID_VERSION "2.0"
51 #define CRIU_GITID_PATCHLEVEL 0
52
53 lxc_log_define(lxc_criu, lxc);
54
55 struct criu_opts {
56 /* The type of criu invocation, one of "dump" or "restore" */
57 char *action;
58
59 /* The directory to pass to criu */
60 char *directory;
61
62 /* The container to dump */
63 struct lxc_container *c;
64
65 /* Enable criu verbose mode? */
66 bool verbose;
67
68 /* (pre-)dump: a directory for the previous dump's images */
69 char *predump_dir;
70
71 /* dump: stop the container or not after dumping? */
72 bool stop;
73 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
74
75 /* restore: the file to write the init process' pid into */
76 char *pidfile;
77 const char *cgroup_path;
78 int console_fd;
79 /* The path that is bind mounted from /dev/console, if any. We don't
80 * want to use `--ext-mount-map auto`'s result here because the pts
81 * device may have a different path (e.g. if the pty number is
82 * different) on the target host. NULL if lxc.console = "none".
83 */
84 char *console_name;
85 };
86
87 static int load_tty_major_minor(char *directory, char *output, int len)
88 {
89 FILE *f;
90 char path[PATH_MAX];
91 int ret;
92
93 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
94 if (ret < 0 || ret >= sizeof(path)) {
95 ERROR("snprintf'd too many chacters: %d", ret);
96 return -1;
97 }
98
99 f = fopen(path, "r");
100 if (!f) {
101 /* This means we're coming from a liblxc which didn't export
102 * the tty info. In this case they had to have lxc.console =
103 * none, so there's no problem restoring.
104 */
105 if (errno == ENOENT)
106 return 0;
107
108 SYSERROR("couldn't open %s", path);
109 return -1;
110 }
111
112 if (!fgets(output, len, f)) {
113 fclose(f);
114 SYSERROR("couldn't read %s", path);
115 return -1;
116 }
117
118 fclose(f);
119 return 0;
120 }
121
122 static void exec_criu(struct criu_opts *opts)
123 {
124 char **argv, log[PATH_MAX];
125 int static_args = 24, argc = 0, i, ret;
126 int netnr = 0;
127 struct lxc_list *it;
128
129 char buf[4096], *pos, tty_info[32];
130
131 /* If we are currently in a cgroup /foo/bar, and the container is in a
132 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
133 * container has an open fd that points to one of the cgroup files
134 * (systemd always opens its "root" cgroup). So, let's escape to the
135 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
136 * see all cgroups.
137 */
138 if (!cgroup_escape()) {
139 ERROR("failed to escape cgroups");
140 return;
141 }
142
143 /* The command line always looks like:
144 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
145 * --manage-cgroups action-script foo.sh -D $(directory) \
146 * -o $(directory)/$(action).log --ext-mount-map auto
147 * --enable-external-sharing --enable-external-masters
148 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
149 * +1 for final NULL */
150
151 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
152 /* -t pid --freeze-cgroup /lxc/ct */
153 static_args += 4;
154
155 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
156 if (opts->predump_dir)
157 static_args += 2;
158
159 /* --leave-running (only for final dump) */
160 if (strcmp(opts->action, "dump") == 0 && !opts->stop)
161 static_args++;
162
163 /* --external tty[88,4] */
164 if (opts->tty_id[0])
165 static_args += 2;
166 } else if (strcmp(opts->action, "restore") == 0) {
167 /* --root $(lxc_mount_point) --restore-detached
168 * --restore-sibling --pidfile $foo --cgroup-root $foo
169 * --lsm-profile apparmor:whatever
170 */
171 static_args += 10;
172
173 tty_info[0] = 0;
174 if (load_tty_major_minor(opts->directory, tty_info, sizeof(tty_info)))
175 return;
176
177 /* --inherit-fd fd[%d]:tty[%s] */
178 if (tty_info[0])
179 static_args += 2;
180 } else {
181 return;
182 }
183
184 if (opts->verbose)
185 static_args++;
186
187 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
188 if (ret < 0 || ret >= PATH_MAX) {
189 ERROR("logfile name too long\n");
190 return;
191 }
192
193 argv = malloc(static_args * sizeof(*argv));
194 if (!argv)
195 return;
196
197 memset(argv, 0, static_args * sizeof(*argv));
198
199 #define DECLARE_ARG(arg) \
200 do { \
201 if (arg == NULL) { \
202 ERROR("Got NULL argument for criu"); \
203 goto err; \
204 } \
205 argv[argc++] = strdup(arg); \
206 if (!argv[argc-1]) \
207 goto err; \
208 } while (0)
209
210 argv[argc++] = on_path("criu", NULL);
211 if (!argv[argc-1]) {
212 ERROR("Couldn't find criu binary\n");
213 goto err;
214 }
215
216 DECLARE_ARG(opts->action);
217 DECLARE_ARG("--tcp-established");
218 DECLARE_ARG("--file-locks");
219 DECLARE_ARG("--link-remap");
220 DECLARE_ARG("--force-irmap");
221 DECLARE_ARG("--manage-cgroups");
222 DECLARE_ARG("--ext-mount-map");
223 DECLARE_ARG("auto");
224 DECLARE_ARG("--enable-external-sharing");
225 DECLARE_ARG("--enable-external-masters");
226 DECLARE_ARG("--enable-fs");
227 DECLARE_ARG("hugetlbfs");
228 DECLARE_ARG("--enable-fs");
229 DECLARE_ARG("tracefs");
230 DECLARE_ARG("-D");
231 DECLARE_ARG(opts->directory);
232 DECLARE_ARG("-o");
233 DECLARE_ARG(log);
234
235 if (opts->verbose)
236 DECLARE_ARG("-vvvvvv");
237
238 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
239 char pid[32], *freezer_relative;
240
241 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
242 goto err;
243
244 DECLARE_ARG("-t");
245 DECLARE_ARG(pid);
246
247 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
248 opts->c->config_path,
249 "freezer");
250 if (!freezer_relative) {
251 ERROR("failed getting freezer path");
252 goto err;
253 }
254
255 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
256 if (ret < 0 || ret >= sizeof(log))
257 goto err;
258
259 DECLARE_ARG("--freeze-cgroup");
260 DECLARE_ARG(log);
261
262 if (opts->tty_id[0]) {
263 DECLARE_ARG("--ext-mount-map");
264 DECLARE_ARG("/dev/console:console");
265
266 DECLARE_ARG("--external");
267 DECLARE_ARG(opts->tty_id);
268 }
269
270 if (opts->predump_dir) {
271 DECLARE_ARG("--prev-images-dir");
272 DECLARE_ARG(opts->predump_dir);
273 }
274
275 /* only for final dump */
276 if (strcmp(opts->action, "dump") == 0 && !opts->stop)
277 DECLARE_ARG("--leave-running");
278 } else if (strcmp(opts->action, "restore") == 0) {
279 void *m;
280 int additional;
281 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
282
283 DECLARE_ARG("--root");
284 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
285 DECLARE_ARG("--restore-detached");
286 DECLARE_ARG("--restore-sibling");
287 DECLARE_ARG("--pidfile");
288 DECLARE_ARG(opts->pidfile);
289 DECLARE_ARG("--cgroup-root");
290 DECLARE_ARG(opts->cgroup_path);
291
292 if (tty_info[0]) {
293 if (opts->console_fd < 0) {
294 ERROR("lxc.console configured on source host but not target");
295 goto err;
296 }
297
298 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, tty_info);
299 if (ret < 0 || ret >= sizeof(buf))
300 goto err;
301
302 DECLARE_ARG("--inherit-fd");
303 DECLARE_ARG(buf);
304 }
305 if (opts->console_name) {
306 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
307 SYSERROR("sprintf'd too many bytes");
308 }
309 DECLARE_ARG("--ext-mount-map");
310 DECLARE_ARG(buf);
311 }
312
313 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
314
315 if (lxc_conf->lsm_aa_profile)
316 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
317 else
318 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
319
320 if (ret < 0 || ret >= sizeof(buf))
321 goto err;
322
323 DECLARE_ARG("--lsm-profile");
324 DECLARE_ARG(buf);
325 }
326
327 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
328
329 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
330 if (!m)
331 goto err;
332 argv = m;
333
334 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
335 char eth[128], *veth;
336 struct lxc_netdev *n = it->elem;
337
338 if (n->type != LXC_NET_VETH)
339 continue;
340
341 if (n->name) {
342 if (strlen(n->name) >= sizeof(eth))
343 goto err;
344 strncpy(eth, n->name, sizeof(eth));
345 } else
346 sprintf(eth, "eth%d", netnr);
347
348 veth = n->priv.veth_attr.pair;
349
350 if (n->link)
351 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
352 else
353 ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth);
354 if (ret < 0 || ret >= sizeof(buf))
355 goto err;
356
357 DECLARE_ARG("--veth-pair");
358 DECLARE_ARG(buf);
359 }
360
361 }
362
363 argv[argc] = NULL;
364
365 buf[0] = 0;
366 pos = buf;
367 for (i = 0; argv[i]; i++) {
368 pos = strncat(buf, argv[i], buf + sizeof(buf) - pos);
369 pos = strncat(buf, " ", buf + sizeof(buf) - pos);
370 }
371
372 INFO("execing: %s", buf);
373
374 #undef DECLARE_ARG
375 execv(argv[0], argv);
376 err:
377 for (i = 0; argv[i]; i++)
378 free(argv[i]);
379 free(argv);
380 }
381
382 /*
383 * Check to see if the criu version is recent enough for all the features we
384 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
385 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
386 * things potentially before a version is released with a particular feature.
387 *
388 * The intent is that when criu development slows down, we can drop this, but
389 * for now we shouldn't attempt to c/r with versions that we know won't work.
390 */
391 static bool criu_version_ok()
392 {
393 int pipes[2];
394 pid_t pid;
395
396 if (pipe(pipes) < 0) {
397 SYSERROR("pipe() failed");
398 return false;
399 }
400
401 pid = fork();
402 if (pid < 0) {
403 SYSERROR("fork() failed");
404 return false;
405 }
406
407 if (pid == 0) {
408 char *args[] = { "criu", "--version", NULL };
409 char *path;
410 close(pipes[0]);
411
412 close(STDERR_FILENO);
413 if (dup2(pipes[1], STDOUT_FILENO) < 0)
414 exit(1);
415
416 path = on_path("criu", NULL);
417 if (!path)
418 exit(1);
419
420 execv(path, args);
421 exit(1);
422 } else {
423 FILE *f;
424 char version[1024];
425 int patch;
426
427 close(pipes[1]);
428 if (wait_for_pid(pid) < 0) {
429 close(pipes[0]);
430 SYSERROR("execing criu failed, is it installed?");
431 return false;
432 }
433
434 f = fdopen(pipes[0], "r");
435 if (!f) {
436 close(pipes[0]);
437 return false;
438 }
439
440 if (fscanf(f, "Version: %1023[^\n]s", version) != 1)
441 goto version_error;
442
443 if (fgetc(f) != '\n')
444 goto version_error;
445
446 if (strcmp(version, CRIU_VERSION) >= 0)
447 goto version_match;
448
449 if (fscanf(f, "GitID: v%1023[^-]s", version) != 1)
450 goto version_error;
451
452 if (fgetc(f) != '-')
453 goto version_error;
454
455 if (fscanf(f, "%d", &patch) != 1)
456 goto version_error;
457
458 if (strcmp(version, CRIU_GITID_VERSION) < 0)
459 goto version_error;
460
461 if (patch < CRIU_GITID_PATCHLEVEL)
462 goto version_error;
463
464 version_match:
465 fclose(f);
466 return true;
467
468 version_error:
469 fclose(f);
470 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
471 return false;
472 }
473 }
474
475 /* Check and make sure the container has a configuration that we know CRIU can
476 * dump. */
477 static bool criu_ok(struct lxc_container *c)
478 {
479 struct lxc_list *it;
480
481 if (!criu_version_ok())
482 return false;
483
484 if (geteuid()) {
485 ERROR("Must be root to checkpoint\n");
486 return false;
487 }
488
489 /* We only know how to restore containers with veth networks. */
490 lxc_list_for_each(it, &c->lxc_conf->network) {
491 struct lxc_netdev *n = it->elem;
492 switch(n->type) {
493 case LXC_NET_VETH:
494 case LXC_NET_NONE:
495 case LXC_NET_EMPTY:
496 break;
497 default:
498 ERROR("Found network that is not VETH or NONE\n");
499 return false;
500 }
501 }
502
503 return true;
504 }
505
506 static bool restore_net_info(struct lxc_container *c)
507 {
508 struct lxc_list *it;
509 bool has_error = true;
510
511 if (container_mem_lock(c))
512 return false;
513
514 lxc_list_for_each(it, &c->lxc_conf->network) {
515 struct lxc_netdev *netdev = it->elem;
516 char template[IFNAMSIZ];
517
518 if (netdev->type != LXC_NET_VETH)
519 continue;
520
521 snprintf(template, sizeof(template), "vethXXXXXX");
522
523 if (!netdev->priv.veth_attr.pair)
524 netdev->priv.veth_attr.pair = lxc_mkifname(template);
525
526 if (!netdev->priv.veth_attr.pair)
527 goto out_unlock;
528 }
529
530 has_error = false;
531
532 out_unlock:
533 container_mem_unlock(c);
534 return !has_error;
535 }
536
537 // do_restore never returns, the calling process is used as the
538 // monitor process. do_restore calls exit() if it fails.
539 void do_restore(struct lxc_container *c, int status_pipe, char *directory, bool verbose)
540 {
541 pid_t pid;
542 char pidfile[L_tmpnam];
543 struct lxc_handler *handler;
544 int status, pipes[2] = {-1, -1};
545
546 if (!tmpnam(pidfile))
547 goto out;
548
549 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
550 if (!handler)
551 goto out;
552
553 if (!cgroup_init(handler)) {
554 ERROR("failed initing cgroups");
555 goto out_fini_handler;
556 }
557
558 if (!cgroup_create(handler)) {
559 ERROR("failed creating groups");
560 goto out_fini_handler;
561 }
562
563 if (!restore_net_info(c)) {
564 ERROR("failed restoring network info");
565 goto out_fini_handler;
566 }
567
568 resolve_clone_flags(handler);
569
570 if (pipe(pipes) < 0) {
571 SYSERROR("pipe() failed");
572 goto out_fini_handler;
573 }
574
575 pid = fork();
576 if (pid < 0)
577 goto out_fini_handler;
578
579 if (pid == 0) {
580 struct criu_opts os;
581 struct lxc_rootfs *rootfs;
582 int flags;
583
584 close(status_pipe);
585 status_pipe = -1;
586
587 close(pipes[0]);
588 pipes[0] = -1;
589 if (dup2(pipes[1], STDERR_FILENO) < 0) {
590 SYSERROR("dup2 failed");
591 goto out_fini_handler;
592 }
593
594 if (dup2(pipes[1], STDOUT_FILENO) < 0) {
595 SYSERROR("dup2 failed");
596 goto out_fini_handler;
597 }
598
599 if (unshare(CLONE_NEWNS))
600 goto out_fini_handler;
601
602 /* CRIU needs the lxc root bind mounted so that it is the root of some
603 * mount. */
604 rootfs = &c->lxc_conf->rootfs;
605
606 if (rootfs_is_blockdev(c->lxc_conf)) {
607 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
608 goto out_fini_handler;
609 } else {
610 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
611 goto out_fini_handler;
612
613 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
614 SYSERROR("remount / to private failed");
615 goto out_fini_handler;
616 }
617
618 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
619 rmdir(rootfs->mount);
620 goto out_fini_handler;
621 }
622 }
623
624 os.action = "restore";
625 os.directory = directory;
626 os.c = c;
627 os.pidfile = pidfile;
628 os.verbose = verbose;
629 os.cgroup_path = cgroup_canonical_path(handler);
630 os.console_fd = c->lxc_conf->console.slave;
631
632 if (os.console_fd >= 0) {
633 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
634 * via --inherit-fd, so we don't want it to close.
635 */
636 flags = fcntl(os.console_fd, F_GETFD);
637 if (flags < 0) {
638 SYSERROR("F_GETFD failed: %d", os.console_fd);
639 goto out_fini_handler;
640 }
641
642 flags &= ~FD_CLOEXEC;
643
644 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
645 SYSERROR("F_SETFD failed");
646 goto out_fini_handler;
647 }
648 }
649 os.console_name = c->lxc_conf->console.name;
650
651 /* exec_criu() returning is an error */
652 exec_criu(&os);
653 umount(rootfs->mount);
654 rmdir(rootfs->mount);
655 goto out_fini_handler;
656 } else {
657 int ret;
658 char title[2048];
659
660 close(pipes[1]);
661 pipes[1] = -1;
662
663 pid_t w = waitpid(pid, &status, 0);
664 if (w == -1) {
665 SYSERROR("waitpid");
666 goto out_fini_handler;
667 }
668
669 ret = write(status_pipe, &status, sizeof(status));
670 close(status_pipe);
671 status_pipe = -1;
672
673 if (sizeof(status) != ret) {
674 SYSERROR("failed to write all of status");
675 goto out_fini_handler;
676 }
677
678 if (WIFEXITED(status)) {
679 if (WEXITSTATUS(status)) {
680 char buf[4096];
681 int n;
682
683 n = read(pipes[0], buf, sizeof(buf));
684 if (n < 0) {
685 SYSERROR("failed reading from criu stderr");
686 goto out_fini_handler;
687 }
688
689 buf[n] = 0;
690
691 ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf);
692 goto out_fini_handler;
693 } else {
694 int ret;
695 FILE *f = fopen(pidfile, "r");
696 if (!f) {
697 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
698 goto out_fini_handler;
699 }
700
701 ret = fscanf(f, "%d", (int*) &handler->pid);
702 fclose(f);
703 if (unlink(pidfile) < 0 && errno != ENOENT)
704 SYSERROR("unlinking pidfile failed");
705
706 if (ret != 1) {
707 ERROR("reading restore pid failed");
708 goto out_fini_handler;
709 }
710
711 if (lxc_set_state(c->name, handler, RUNNING)) {
712 ERROR("error setting running state after restore");
713 goto out_fini_handler;
714 }
715 }
716 } else {
717 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
718 goto out_fini_handler;
719 }
720
721 close(pipes[0]);
722
723 /*
724 * See comment in lxcapi_start; we don't care if these
725 * fail because it's just a beauty thing. We just
726 * assign the return here to silence potential.
727 */
728 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
729 ret = setproctitle(title);
730
731 ret = lxc_poll(c->name, handler);
732 if (ret)
733 lxc_abort(c->name, handler);
734 lxc_fini(c->name, handler);
735 exit(ret);
736 }
737
738 out_fini_handler:
739 if (pipes[0] >= 0)
740 close(pipes[0]);
741 if (pipes[1] >= 0)
742 close(pipes[1]);
743
744 lxc_fini(c->name, handler);
745 if (unlink(pidfile) < 0 && errno != ENOENT)
746 SYSERROR("unlinking pidfile failed");
747
748 out:
749 if (status_pipe >= 0) {
750 status = 1;
751 if (write(status_pipe, &status, sizeof(status)) != sizeof(status)) {
752 SYSERROR("writing status failed");
753 }
754 close(status_pipe);
755 }
756
757 exit(1);
758 }
759
760 static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
761 {
762 FILE *f;
763 char path[PATH_MAX];
764 int ret;
765 struct stat sb;
766
767 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
768 tty_id[0] = 0;
769 return 0;
770 }
771
772 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
773 if (ret < 0 || ret >= sizeof(path)) {
774 ERROR("snprintf'd too many chacters: %d", ret);
775 return -1;
776 }
777
778 ret = stat(path, &sb);
779 if (ret < 0) {
780 SYSERROR("stat of %s failed", path);
781 return -1;
782 }
783
784 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
785 if (ret < 0 || ret >= sizeof(path)) {
786 ERROR("snprintf'd too many characters: %d", ret);
787 return -1;
788 }
789
790 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
791 (long long unsigned) sb.st_rdev,
792 (long long unsigned) sb.st_dev);
793 if (ret < 0 || ret >= sizeof(path)) {
794 ERROR("snprintf'd too many characters: %d", ret);
795 return -1;
796 }
797
798 f = fopen(path, "w");
799 if (!f) {
800 SYSERROR("failed to open %s", path);
801 return -1;
802 }
803
804 ret = fprintf(f, "%s", tty_id);
805 fclose(f);
806 if (ret < 0)
807 SYSERROR("failed to write to %s", path);
808 return ret;
809 }
810
811 /* do one of either predump or a regular dump */
812 static bool do_dump(struct lxc_container *c, char *mode, char *directory,
813 bool stop, bool verbose, char *predump_dir)
814 {
815 pid_t pid;
816
817 if (!criu_ok(c))
818 return false;
819
820 if (mkdir_p(directory, 0700) < 0)
821 return false;
822
823 pid = fork();
824 if (pid < 0) {
825 SYSERROR("fork failed");
826 return false;
827 }
828
829 if (pid == 0) {
830 struct criu_opts os;
831
832 os.action = mode;
833 os.directory = directory;
834 os.c = c;
835 os.stop = stop;
836 os.verbose = verbose;
837 os.predump_dir = predump_dir;
838 os.console_name = c->lxc_conf->console.path;
839
840 if (save_tty_major_minor(directory, c, os.tty_id, sizeof(os.tty_id)) < 0)
841 exit(1);
842
843 /* exec_criu() returning is an error */
844 exec_criu(&os);
845 exit(1);
846 } else {
847 int status;
848 pid_t w = waitpid(pid, &status, 0);
849 if (w == -1) {
850 SYSERROR("waitpid");
851 return false;
852 }
853
854 if (WIFEXITED(status)) {
855 if (WEXITSTATUS(status)) {
856 ERROR("dump failed with %d\n", WEXITSTATUS(status));
857 return false;
858 }
859
860 return true;
861 } else if (WIFSIGNALED(status)) {
862 ERROR("dump signaled with %d\n", WTERMSIG(status));
863 return false;
864 } else {
865 ERROR("unknown dump exit %d\n", status);
866 return false;
867 }
868 }
869 }
870
871 bool pre_dump(struct lxc_container *c, char *directory, bool verbose, char *predump_dir)
872 {
873 return do_dump(c, "pre-dump", directory, false, verbose, predump_dir);
874 }
875
876 bool dump(struct lxc_container *c, char *directory, bool stop, bool verbose, char *predump_dir)
877 {
878 char path[PATH_MAX];
879 int ret;
880
881 ret = snprintf(path, sizeof(path), "%s/inventory.img", directory);
882 if (ret < 0 || ret >= sizeof(path))
883 return false;
884
885 if (access(path, F_OK) == 0) {
886 ERROR("please use a fresh directory for the dump directory\n");
887 return false;
888 }
889
890 return do_dump(c, "dump", directory, stop, verbose, predump_dir);
891 }
892
893 bool restore(struct lxc_container *c, char *directory, bool verbose)
894 {
895 pid_t pid;
896 int status, nread;
897 int pipefd[2];
898
899 if (!criu_ok(c))
900 return false;
901
902 if (geteuid()) {
903 ERROR("Must be root to restore\n");
904 return false;
905 }
906
907 if (pipe(pipefd)) {
908 ERROR("failed to create pipe");
909 return false;
910 }
911
912 pid = fork();
913 if (pid < 0) {
914 close(pipefd[0]);
915 close(pipefd[1]);
916 return false;
917 }
918
919 if (pid == 0) {
920 close(pipefd[0]);
921 // this never returns
922 do_restore(c, pipefd[1], directory, verbose);
923 }
924
925 close(pipefd[1]);
926
927 nread = read(pipefd[0], &status, sizeof(status));
928 close(pipefd[0]);
929 if (sizeof(status) != nread) {
930 ERROR("reading status from pipe failed");
931 goto err_wait;
932 }
933
934 // If the criu process was killed or exited nonzero, wait() for the
935 // handler, since the restore process died. Otherwise, we don't need to
936 // wait, since the child becomes the monitor process.
937 if (!WIFEXITED(status) || WEXITSTATUS(status))
938 goto err_wait;
939 return true;
940
941 err_wait:
942 if (wait_for_pid(pid))
943 ERROR("restore process died");
944 return false;
945 }