]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/criu.c
remove extra 'ret'
[mirror_lxc.git] / src / lxc / criu.c
1 /*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #define _GNU_SOURCE
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <linux/limits.h>
27 #include <sched.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/mount.h>
32 #include <sys/types.h>
33 #include <sys/wait.h>
34 #include <unistd.h>
35
36 #include "config.h"
37
38 #include "bdev.h"
39 #include "cgroup.h"
40 #include "conf.h"
41 #include "commands.h"
42 #include "criu.h"
43 #include "log.h"
44 #include "lxc.h"
45 #include "lxclock.h"
46 #include "network.h"
47 #include "utils.h"
48
49 #define CRIU_VERSION "2.0"
50
51 #define CRIU_GITID_VERSION "2.0"
52 #define CRIU_GITID_PATCHLEVEL 0
53
54 #define CRIU_IN_FLIGHT_SUPPORT "2.4"
55
56 lxc_log_define(lxc_criu, lxc);
57
58 struct criu_opts {
59 /* The type of criu invocation, one of "dump" or "restore" */
60 char *action;
61
62 /* the user-provided migrate options relevant to this action */
63 struct migrate_opts *user;
64
65 /* The container to dump */
66 struct lxc_container *c;
67
68 /* dump: stop the container or not after dumping? */
69 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
70
71 /* restore: the file to write the init process' pid into */
72 const char *cgroup_path;
73 int console_fd;
74 /* The path that is bind mounted from /dev/console, if any. We don't
75 * want to use `--ext-mount-map auto`'s result here because the pts
76 * device may have a different path (e.g. if the pty number is
77 * different) on the target host. NULL if lxc.console = "none".
78 */
79 char *console_name;
80
81 /* The detected version of criu */
82 char *criu_version;
83 };
84
85 static int load_tty_major_minor(char *directory, char *output, int len)
86 {
87 FILE *f;
88 char path[PATH_MAX];
89 int ret;
90
91 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
92 if (ret < 0 || ret >= sizeof(path)) {
93 ERROR("snprintf'd too many chacters: %d", ret);
94 return -1;
95 }
96
97 f = fopen(path, "r");
98 if (!f) {
99 /* This means we're coming from a liblxc which didn't export
100 * the tty info. In this case they had to have lxc.console =
101 * none, so there's no problem restoring.
102 */
103 if (errno == ENOENT)
104 return 0;
105
106 SYSERROR("couldn't open %s", path);
107 return -1;
108 }
109
110 if (!fgets(output, len, f)) {
111 fclose(f);
112 SYSERROR("couldn't read %s", path);
113 return -1;
114 }
115
116 fclose(f);
117 return 0;
118 }
119
120 static void exec_criu(struct criu_opts *opts)
121 {
122 char **argv, log[PATH_MAX];
123 int static_args = 23, argc = 0, i, ret;
124 int netnr = 0;
125 struct lxc_list *it;
126
127 char buf[4096], tty_info[32];
128 size_t pos;
129 /* If we are currently in a cgroup /foo/bar, and the container is in a
130 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
131 * container has an open fd that points to one of the cgroup files
132 * (systemd always opens its "root" cgroup). So, let's escape to the
133 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
134 * see all cgroups.
135 */
136 if (!cgroup_escape()) {
137 ERROR("failed to escape cgroups");
138 return;
139 }
140
141 /* The command line always looks like:
142 * criu $(action) --tcp-established --file-locks --link-remap \
143 * --manage-cgroups=full action-script foo.sh -D $(directory) \
144 * -o $(directory)/$(action).log --ext-mount-map auto
145 * --enable-external-sharing --enable-external-masters
146 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
147 * +1 for final NULL */
148
149 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
150 /* -t pid --freeze-cgroup /lxc/ct */
151 static_args += 4;
152
153 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
154 if (opts->user->predump_dir)
155 static_args += 2;
156
157 /* --page-server --address <address> --port <port> */
158 if (opts->user->pageserver_address && opts->user->pageserver_port)
159 static_args += 5;
160
161 /* --leave-running (only for final dump) */
162 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
163 static_args++;
164
165 /* --external tty[88,4] */
166 if (opts->tty_id[0])
167 static_args += 2;
168
169 /* --force-irmap */
170 if (!opts->user->preserves_inodes)
171 static_args++;
172
173 /* --ghost-limit 1024 */
174 if (opts->user->ghost_limit)
175 static_args += 2;
176 } else if (strcmp(opts->action, "restore") == 0) {
177 /* --root $(lxc_mount_point) --restore-detached
178 * --restore-sibling --cgroup-root $foo
179 * --lsm-profile apparmor:whatever
180 */
181 static_args += 8;
182
183 tty_info[0] = 0;
184 if (load_tty_major_minor(opts->user->directory, tty_info, sizeof(tty_info)))
185 return;
186
187 /* --inherit-fd fd[%d]:tty[%s] */
188 if (tty_info[0])
189 static_args += 2;
190 } else {
191 return;
192 }
193
194 if (opts->user->verbose)
195 static_args++;
196
197 if (opts->user->action_script)
198 static_args += 2;
199
200 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
201 if (ret < 0 || ret >= PATH_MAX) {
202 ERROR("logfile name too long\n");
203 return;
204 }
205
206 argv = malloc(static_args * sizeof(*argv));
207 if (!argv)
208 return;
209
210 memset(argv, 0, static_args * sizeof(*argv));
211
212 #define DECLARE_ARG(arg) \
213 do { \
214 if (arg == NULL) { \
215 ERROR("Got NULL argument for criu"); \
216 goto err; \
217 } \
218 argv[argc++] = strdup(arg); \
219 if (!argv[argc-1]) \
220 goto err; \
221 } while (0)
222
223 argv[argc++] = on_path("criu", NULL);
224 if (!argv[argc-1]) {
225 ERROR("Couldn't find criu binary\n");
226 goto err;
227 }
228
229 DECLARE_ARG(opts->action);
230 DECLARE_ARG("--tcp-established");
231 DECLARE_ARG("--file-locks");
232 DECLARE_ARG("--link-remap");
233 DECLARE_ARG("--manage-cgroups=full");
234 DECLARE_ARG("--ext-mount-map");
235 DECLARE_ARG("auto");
236 DECLARE_ARG("--enable-external-sharing");
237 DECLARE_ARG("--enable-external-masters");
238 DECLARE_ARG("--enable-fs");
239 DECLARE_ARG("hugetlbfs");
240 DECLARE_ARG("--enable-fs");
241 DECLARE_ARG("tracefs");
242 DECLARE_ARG("-D");
243 DECLARE_ARG(opts->user->directory);
244 DECLARE_ARG("-o");
245 DECLARE_ARG(log);
246
247 if (opts->user->verbose)
248 DECLARE_ARG("-vvvvvv");
249
250 if (opts->user->action_script) {
251 DECLARE_ARG("--action-script");
252 DECLARE_ARG(opts->user->action_script);
253 }
254
255 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
256 char pid[32], *freezer_relative;
257
258 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
259 goto err;
260
261 DECLARE_ARG("-t");
262 DECLARE_ARG(pid);
263
264 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
265 opts->c->config_path,
266 "freezer");
267 if (!freezer_relative) {
268 ERROR("failed getting freezer path");
269 goto err;
270 }
271
272 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
273 if (ret < 0 || ret >= sizeof(log))
274 goto err;
275
276 if (!opts->user->disable_skip_in_flight &&
277 strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
278 DECLARE_ARG("--skip-in-flight");
279
280 DECLARE_ARG("--freeze-cgroup");
281 DECLARE_ARG(log);
282
283 if (opts->tty_id[0]) {
284 DECLARE_ARG("--ext-mount-map");
285 DECLARE_ARG("/dev/console:console");
286
287 DECLARE_ARG("--external");
288 DECLARE_ARG(opts->tty_id);
289 }
290
291 if (opts->user->predump_dir) {
292 DECLARE_ARG("--prev-images-dir");
293 DECLARE_ARG(opts->user->predump_dir);
294 }
295
296 if (opts->user->pageserver_address && opts->user->pageserver_port) {
297 DECLARE_ARG("--page-server");
298 DECLARE_ARG("--address");
299 DECLARE_ARG(opts->user->pageserver_address);
300 DECLARE_ARG("--port");
301 DECLARE_ARG(opts->user->pageserver_port);
302 }
303
304 if (!opts->user->preserves_inodes)
305 DECLARE_ARG("--force-irmap");
306
307 if (opts->user->ghost_limit) {
308 char ghost_limit[32];
309
310 ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
311 if (ret < 0 || ret >= sizeof(ghost_limit)) {
312 ERROR("failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
313 goto err;
314 }
315
316 DECLARE_ARG("--ghost-limit");
317 DECLARE_ARG(ghost_limit);
318 }
319
320 /* only for final dump */
321 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
322 DECLARE_ARG("--leave-running");
323 } else if (strcmp(opts->action, "restore") == 0) {
324 void *m;
325 int additional;
326 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
327
328 DECLARE_ARG("--root");
329 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
330 DECLARE_ARG("--restore-detached");
331 DECLARE_ARG("--restore-sibling");
332 DECLARE_ARG("--cgroup-root");
333 DECLARE_ARG(opts->cgroup_path);
334
335 if (tty_info[0]) {
336 if (opts->console_fd < 0) {
337 ERROR("lxc.console configured on source host but not target");
338 goto err;
339 }
340
341 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, tty_info);
342 if (ret < 0 || ret >= sizeof(buf))
343 goto err;
344
345 DECLARE_ARG("--inherit-fd");
346 DECLARE_ARG(buf);
347 }
348 if (opts->console_name) {
349 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
350 SYSERROR("sprintf'd too many bytes");
351 }
352 DECLARE_ARG("--ext-mount-map");
353 DECLARE_ARG(buf);
354 }
355
356 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
357
358 if (lxc_conf->lsm_aa_profile)
359 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
360 else
361 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
362
363 if (ret < 0 || ret >= sizeof(buf))
364 goto err;
365
366 DECLARE_ARG("--lsm-profile");
367 DECLARE_ARG(buf);
368 }
369
370 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
371
372 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
373 if (!m)
374 goto err;
375 argv = m;
376
377 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
378 char eth[128], *veth;
379 struct lxc_netdev *n = it->elem;
380
381 if (n->type != LXC_NET_VETH)
382 continue;
383
384 if (n->name) {
385 if (strlen(n->name) >= sizeof(eth))
386 goto err;
387 strncpy(eth, n->name, sizeof(eth));
388 } else
389 sprintf(eth, "eth%d", netnr);
390
391 veth = n->priv.veth_attr.pair;
392
393 if (n->link)
394 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
395 else
396 ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth);
397 if (ret < 0 || ret >= sizeof(buf))
398 goto err;
399
400 DECLARE_ARG("--veth-pair");
401 DECLARE_ARG(buf);
402 }
403
404 }
405
406 argv[argc] = NULL;
407
408 buf[0] = 0;
409 pos = 0;
410
411 for (i = 0; argv[i]; i++) {
412 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
413 if (ret < 0 || ret >= sizeof(buf) - pos)
414 goto err;
415 else
416 pos += ret;
417 }
418
419 INFO("execing: %s", buf);
420
421 #undef DECLARE_ARG
422 execv(argv[0], argv);
423 err:
424 for (i = 0; argv[i]; i++)
425 free(argv[i]);
426 free(argv);
427 }
428
429 /*
430 * Check to see if the criu version is recent enough for all the features we
431 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
432 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
433 * things potentially before a version is released with a particular feature.
434 *
435 * The intent is that when criu development slows down, we can drop this, but
436 * for now we shouldn't attempt to c/r with versions that we know won't work.
437 *
438 * Note: If version != NULL criu_version() stores the detected criu version in
439 * version. Allocates memory for version which must be freed by caller.
440 */
441 static bool criu_version_ok(char **version)
442 {
443 int pipes[2];
444 pid_t pid;
445
446 if (pipe(pipes) < 0) {
447 SYSERROR("pipe() failed");
448 return false;
449 }
450
451 pid = fork();
452 if (pid < 0) {
453 SYSERROR("fork() failed");
454 return false;
455 }
456
457 if (pid == 0) {
458 char *args[] = { "criu", "--version", NULL };
459 char *path;
460 close(pipes[0]);
461
462 close(STDERR_FILENO);
463 if (dup2(pipes[1], STDOUT_FILENO) < 0)
464 exit(1);
465
466 path = on_path("criu", NULL);
467 if (!path)
468 exit(1);
469
470 execv(path, args);
471 exit(1);
472 } else {
473 FILE *f;
474 char *tmp;
475 int patch;
476
477 close(pipes[1]);
478 if (wait_for_pid(pid) < 0) {
479 close(pipes[0]);
480 SYSERROR("execing criu failed, is it installed?");
481 return false;
482 }
483
484 f = fdopen(pipes[0], "r");
485 if (!f) {
486 close(pipes[0]);
487 return false;
488 }
489
490 tmp = malloc(1024);
491 if (!tmp) {
492 fclose(f);
493 return false;
494 }
495
496 if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
497 goto version_error;
498
499 if (fgetc(f) != '\n')
500 goto version_error;
501
502 if (strcmp(tmp, CRIU_VERSION) >= 0)
503 goto version_match;
504
505 if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
506 goto version_error;
507
508 if (fgetc(f) != '-')
509 goto version_error;
510
511 if (fscanf(f, "%d", &patch) != 1)
512 goto version_error;
513
514 if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
515 goto version_error;
516
517 if (patch < CRIU_GITID_PATCHLEVEL)
518 goto version_error;
519
520 version_match:
521 fclose(f);
522 if (!version)
523 free(tmp);
524 else
525 *version = tmp;
526 return true;
527
528 version_error:
529 fclose(f);
530 free(tmp);
531 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
532 return false;
533 }
534 }
535
536 /* Check and make sure the container has a configuration that we know CRIU can
537 * dump. */
538 static bool criu_ok(struct lxc_container *c, char **criu_version)
539 {
540 struct lxc_list *it;
541
542 if (!criu_version_ok(criu_version))
543 return false;
544
545 if (geteuid()) {
546 ERROR("Must be root to checkpoint\n");
547 return false;
548 }
549
550 /* We only know how to restore containers with veth networks. */
551 lxc_list_for_each(it, &c->lxc_conf->network) {
552 struct lxc_netdev *n = it->elem;
553 switch(n->type) {
554 case LXC_NET_VETH:
555 case LXC_NET_NONE:
556 case LXC_NET_EMPTY:
557 break;
558 default:
559 ERROR("Found network that is not VETH or NONE\n");
560 return false;
561 }
562 }
563
564 return true;
565 }
566
567 static bool restore_net_info(struct lxc_container *c)
568 {
569 struct lxc_list *it;
570 bool has_error = true;
571
572 if (container_mem_lock(c))
573 return false;
574
575 lxc_list_for_each(it, &c->lxc_conf->network) {
576 struct lxc_netdev *netdev = it->elem;
577 char template[IFNAMSIZ];
578
579 if (netdev->type != LXC_NET_VETH)
580 continue;
581
582 snprintf(template, sizeof(template), "vethXXXXXX");
583
584 if (!netdev->priv.veth_attr.pair)
585 netdev->priv.veth_attr.pair = lxc_mkifname(template);
586
587 if (!netdev->priv.veth_attr.pair)
588 goto out_unlock;
589 }
590
591 has_error = false;
592
593 out_unlock:
594 container_mem_unlock(c);
595 return !has_error;
596 }
597
598 // do_restore never returns, the calling process is used as the
599 // monitor process. do_restore calls exit() if it fails.
600 static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
601 {
602 pid_t pid;
603 struct lxc_handler *handler;
604 int status;
605 int pipes[2] = {-1, -1};
606
607 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
608 if (!handler)
609 goto out;
610
611 if (!cgroup_init(handler)) {
612 ERROR("failed initing cgroups");
613 goto out_fini_handler;
614 }
615
616 if (!cgroup_create(handler)) {
617 ERROR("failed creating groups");
618 goto out_fini_handler;
619 }
620
621 if (!restore_net_info(c)) {
622 ERROR("failed restoring network info");
623 goto out_fini_handler;
624 }
625
626 resolve_clone_flags(handler);
627
628 if (pipe(pipes) < 0) {
629 SYSERROR("pipe() failed");
630 goto out_fini_handler;
631 }
632
633 pid = fork();
634 if (pid < 0)
635 goto out_fini_handler;
636
637 if (pid == 0) {
638 struct criu_opts os;
639 struct lxc_rootfs *rootfs;
640 int flags;
641
642 close(status_pipe);
643 status_pipe = -1;
644
645 close(pipes[0]);
646 pipes[0] = -1;
647 if (dup2(pipes[1], STDERR_FILENO) < 0) {
648 SYSERROR("dup2 failed");
649 goto out_fini_handler;
650 }
651
652 if (dup2(pipes[1], STDOUT_FILENO) < 0) {
653 SYSERROR("dup2 failed");
654 goto out_fini_handler;
655 }
656
657 if (unshare(CLONE_NEWNS))
658 goto out_fini_handler;
659
660 /* CRIU needs the lxc root bind mounted so that it is the root of some
661 * mount. */
662 rootfs = &c->lxc_conf->rootfs;
663
664 if (rootfs_is_blockdev(c->lxc_conf)) {
665 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
666 goto out_fini_handler;
667 } else {
668 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
669 goto out_fini_handler;
670
671 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
672 SYSERROR("remount / to private failed");
673 goto out_fini_handler;
674 }
675
676 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
677 rmdir(rootfs->mount);
678 goto out_fini_handler;
679 }
680 }
681
682 os.action = "restore";
683 os.user = opts;
684 os.c = c;
685 os.cgroup_path = cgroup_canonical_path(handler);
686 os.console_fd = c->lxc_conf->console.slave;
687 os.criu_version = criu_version;
688
689 if (os.console_fd >= 0) {
690 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
691 * via --inherit-fd, so we don't want it to close.
692 */
693 flags = fcntl(os.console_fd, F_GETFD);
694 if (flags < 0) {
695 SYSERROR("F_GETFD failed: %d", os.console_fd);
696 goto out_fini_handler;
697 }
698
699 flags &= ~FD_CLOEXEC;
700
701 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
702 SYSERROR("F_SETFD failed");
703 goto out_fini_handler;
704 }
705 }
706 os.console_name = c->lxc_conf->console.name;
707
708 /* exec_criu() returning is an error */
709 exec_criu(&os);
710 umount(rootfs->mount);
711 rmdir(rootfs->mount);
712 goto out_fini_handler;
713 } else {
714 int ret;
715 char title[2048];
716
717 close(pipes[1]);
718 pipes[1] = -1;
719
720 pid_t w = waitpid(pid, &status, 0);
721 if (w == -1) {
722 SYSERROR("waitpid");
723 goto out_fini_handler;
724 }
725
726 ret = write(status_pipe, &status, sizeof(status));
727 close(status_pipe);
728 status_pipe = -1;
729
730 if (sizeof(status) != ret) {
731 SYSERROR("failed to write all of status");
732 goto out_fini_handler;
733 }
734
735 if (WIFEXITED(status)) {
736 char buf[4096];
737
738 if (WEXITSTATUS(status)) {
739 int n;
740
741 n = read(pipes[0], buf, sizeof(buf));
742 if (n < 0) {
743 SYSERROR("failed reading from criu stderr");
744 goto out_fini_handler;
745 }
746
747 buf[n] = 0;
748
749 ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf);
750 goto out_fini_handler;
751 } else {
752 ret = snprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
753 if (ret < 0 || ret >= sizeof(buf)) {
754 ERROR("snprintf'd too many characters: %d", ret);
755 goto out_fini_handler;
756 }
757
758 FILE *f = fopen(buf, "r");
759 if (!f) {
760 SYSERROR("couldn't read restore's children file %s\n", buf);
761 goto out_fini_handler;
762 }
763
764 ret = fscanf(f, "%d", (int*) &handler->pid);
765 fclose(f);
766 if (ret != 1) {
767 ERROR("reading restore pid failed");
768 goto out_fini_handler;
769 }
770
771 if (lxc_set_state(c->name, handler, RUNNING)) {
772 ERROR("error setting running state after restore");
773 goto out_fini_handler;
774 }
775 }
776 } else {
777 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
778 goto out_fini_handler;
779 }
780
781 close(pipes[0]);
782
783 /*
784 * See comment in lxcapi_start; we don't care if these
785 * fail because it's just a beauty thing. We just
786 * assign the return here to silence potential.
787 */
788 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
789 ret = setproctitle(title);
790
791 ret = lxc_poll(c->name, handler);
792 if (ret)
793 lxc_abort(c->name, handler);
794 lxc_fini(c->name, handler);
795 exit(ret);
796 }
797
798 out_fini_handler:
799 if (pipes[0] >= 0)
800 close(pipes[0]);
801 if (pipes[1] >= 0)
802 close(pipes[1]);
803
804 lxc_fini(c->name, handler);
805
806 out:
807 if (status_pipe >= 0) {
808 status = 1;
809 if (write(status_pipe, &status, sizeof(status)) != sizeof(status)) {
810 SYSERROR("writing status failed");
811 }
812 close(status_pipe);
813 }
814
815 exit(1);
816 }
817
818 static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
819 {
820 FILE *f;
821 char path[PATH_MAX];
822 int ret;
823 struct stat sb;
824
825 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
826 tty_id[0] = 0;
827 return 0;
828 }
829
830 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
831 if (ret < 0 || ret >= sizeof(path)) {
832 ERROR("snprintf'd too many chacters: %d", ret);
833 return -1;
834 }
835
836 ret = stat(path, &sb);
837 if (ret < 0) {
838 SYSERROR("stat of %s failed", path);
839 return -1;
840 }
841
842 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
843 if (ret < 0 || ret >= sizeof(path)) {
844 ERROR("snprintf'd too many characters: %d", ret);
845 return -1;
846 }
847
848 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
849 (long long unsigned) sb.st_rdev,
850 (long long unsigned) sb.st_dev);
851 if (ret < 0 || ret >= sizeof(path)) {
852 ERROR("snprintf'd too many characters: %d", ret);
853 return -1;
854 }
855
856 f = fopen(path, "w");
857 if (!f) {
858 SYSERROR("failed to open %s", path);
859 return -1;
860 }
861
862 ret = fprintf(f, "%s", tty_id);
863 fclose(f);
864 if (ret < 0)
865 SYSERROR("failed to write to %s", path);
866 return ret;
867 }
868
869 /* do one of either predump or a regular dump */
870 static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
871 {
872 pid_t pid;
873 char *criu_version = NULL;
874
875 if (!criu_ok(c, &criu_version))
876 return false;
877
878 if (mkdir_p(opts->directory, 0700) < 0)
879 return false;
880
881 pid = fork();
882 if (pid < 0) {
883 SYSERROR("fork failed");
884 return false;
885 }
886
887 if (pid == 0) {
888 struct criu_opts os;
889
890 os.action = mode;
891 os.user = opts;
892 os.c = c;
893 os.console_name = c->lxc_conf->console.path;
894 os.criu_version = criu_version;
895
896 if (save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id)) < 0)
897 exit(1);
898
899 /* exec_criu() returning is an error */
900 exec_criu(&os);
901 exit(1);
902 } else {
903 int status;
904 pid_t w = waitpid(pid, &status, 0);
905 if (w == -1) {
906 SYSERROR("waitpid");
907 return false;
908 }
909
910 if (WIFEXITED(status)) {
911 if (WEXITSTATUS(status)) {
912 ERROR("dump failed with %d\n", WEXITSTATUS(status));
913 return false;
914 }
915
916 return true;
917 } else if (WIFSIGNALED(status)) {
918 ERROR("dump signaled with %d\n", WTERMSIG(status));
919 return false;
920 } else {
921 ERROR("unknown dump exit %d\n", status);
922 return false;
923 }
924 }
925 }
926
927 bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
928 {
929 return do_dump(c, "pre-dump", opts);
930 }
931
932 bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
933 {
934 char path[PATH_MAX];
935 int ret;
936
937 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
938 if (ret < 0 || ret >= sizeof(path))
939 return false;
940
941 if (access(path, F_OK) == 0) {
942 ERROR("please use a fresh directory for the dump directory\n");
943 return false;
944 }
945
946 return do_dump(c, "dump", opts);
947 }
948
949 bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
950 {
951 pid_t pid;
952 int status, nread;
953 int pipefd[2];
954 char *criu_version = NULL;
955
956 if (!criu_ok(c, &criu_version))
957 return false;
958
959 if (geteuid()) {
960 ERROR("Must be root to restore\n");
961 return false;
962 }
963
964 if (pipe(pipefd)) {
965 ERROR("failed to create pipe");
966 return false;
967 }
968
969 pid = fork();
970 if (pid < 0) {
971 close(pipefd[0]);
972 close(pipefd[1]);
973 return false;
974 }
975
976 if (pid == 0) {
977 close(pipefd[0]);
978 // this never returns
979 do_restore(c, pipefd[1], opts, criu_version);
980 }
981
982 close(pipefd[1]);
983
984 nread = read(pipefd[0], &status, sizeof(status));
985 close(pipefd[0]);
986 if (sizeof(status) != nread) {
987 ERROR("reading status from pipe failed");
988 goto err_wait;
989 }
990
991 // If the criu process was killed or exited nonzero, wait() for the
992 // handler, since the restore process died. Otherwise, we don't need to
993 // wait, since the child becomes the monitor process.
994 if (!WIFEXITED(status) || WEXITSTATUS(status))
995 goto err_wait;
996 return true;
997
998 err_wait:
999 if (wait_for_pid(pid))
1000 ERROR("restore process died");
1001 return false;
1002 }