]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
spelling: otherwise
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
d38dd64a
CB
23
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
9b945f13 27#include <inttypes.h>
e29fe1dd
TA
28#include <linux/limits.h>
29#include <sched.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <sys/mount.h>
34#include <sys/types.h>
35#include <sys/wait.h>
36#include <unistd.h>
37
e29fe1dd 38#include "cgroup.h"
dc259399 39#include "commands.h"
d38dd64a
CB
40#include "conf.h"
41#include "config.h"
e29fe1dd
TA
42#include "criu.h"
43#include "log.h"
44#include "lxc.h"
45#include "lxclock.h"
46#include "network.h"
28d832c4 47#include "storage.h"
e8f764b6 48#include "syscall_wrappers.h"
e29fe1dd
TA
49#include "utils.h"
50
5f4e44a2
TA
51#if IS_BIONIC
52#include <../include/lxcmntent.h>
53#else
54#include <mntent.h>
55#endif
56
9de31d5a
CB
57#ifndef HAVE_STRLCPY
58#include "include/strlcpy.h"
59#endif
60
c33b0338 61#define CRIU_VERSION "2.0"
73d46752
TA
62
63#define CRIU_GITID_VERSION "2.0"
64#define CRIU_GITID_PATCHLEVEL 0
65
f1954503 66#define CRIU_IN_FLIGHT_SUPPORT "2.4"
46c8ffd5 67#define CRIU_EXTERNAL_NOT_VETH "2.8"
f1954503 68
ac2cecc4 69lxc_log_define(criu, lxc);
e29fe1dd 70
73d46752 71struct criu_opts {
5af85cb1
TA
72 /* the thing to hook to stdout and stderr for logging */
73 int pipefd;
74
73d46752
TA
75 /* The type of criu invocation, one of "dump" or "restore" */
76 char *action;
77
b2c3710f
TA
78 /* the user-provided migrate options relevant to this action */
79 struct migrate_opts *user;
73d46752
TA
80
81 /* The container to dump */
82 struct lxc_container *c;
83
73d46752 84 /* dump: stop the container or not after dumping? */
4b54788e 85 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
73d46752
TA
86
87 /* restore: the file to write the init process' pid into */
0ab5703f 88 struct lxc_handler *handler;
4b54788e
TA
89 int console_fd;
90 /* The path that is bind mounted from /dev/console, if any. We don't
91 * want to use `--ext-mount-map auto`'s result here because the pts
92 * device may have a different path (e.g. if the pty number is
3aed4934 93 * different) on the target host. NULL if lxc.console.path = "none".
4b54788e
TA
94 */
95 char *console_name;
f1954503
AR
96
97 /* The detected version of criu */
98 char *criu_version;
73d46752
TA
99};
100
4b54788e
TA
101static int load_tty_major_minor(char *directory, char *output, int len)
102{
103 FILE *f;
104 char path[PATH_MAX];
105 int ret;
106
107 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
108 if (ret < 0 || ret >= sizeof(path)) {
f510330c 109 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
110 return -1;
111 }
112
113 f = fopen(path, "r");
114 if (!f) {
115 /* This means we're coming from a liblxc which didn't export
3aed4934
CB
116 * the tty info. In this case they had to have lxc.console.path
117 * = * none, so there's no problem restoring.
4b54788e
TA
118 */
119 if (errno == ENOENT)
120 return 0;
121
122 SYSERROR("couldn't open %s", path);
123 return -1;
124 }
125
126 if (!fgets(output, len, f)) {
127 fclose(f);
128 SYSERROR("couldn't read %s", path);
129 return -1;
130 }
131
132 fclose(f);
133 return 0;
134}
135
74ad3607
FB
136static int cmp_version(const char *v1, const char *v2)
137{
138 int ret;
139 int oct_v1[3], oct_v2[3];
140
141 memset(oct_v1, -1, sizeof(oct_v1));
142 memset(oct_v2, -1, sizeof(oct_v2));
143
144 ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
145 if (ret < 1)
146 return -1;
147
148 ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
149 if (ret < 1)
150 return -1;
151
152 /* Major version is greater. */
153 if (oct_v1[0] > oct_v2[0])
154 return 1;
155
156 if (oct_v1[0] < oct_v2[0])
157 return -1;
158
159 /* Minor number is greater.*/
160 if (oct_v1[1] > oct_v2[1])
161 return 1;
162
163 if (oct_v1[1] < oct_v2[1])
164 return -1;
165
166 /* Patch number is greater. */
167 if (oct_v1[2] > oct_v2[2])
168 return 1;
169
170 /* Patch numbers are equal. */
171 if (oct_v1[2] == oct_v2[2])
172 return 0;
173
174 return -1;
175}
176
e20f46f8
AR
177static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
178 struct criu_opts *opts)
e29fe1dd
TA
179{
180 char **argv, log[PATH_MAX];
19d1509c 181 int static_args = 23, argc = 0, i, ret;
e29fe1dd
TA
182 int netnr = 0;
183 struct lxc_list *it;
5f4e44a2
TA
184 FILE *mnts;
185 struct mntent mntent;
e29fe1dd 186
0e4be3cf 187 char buf[4096], ttys[32];
a17fa3c0 188 size_t pos;
5af85cb1 189
e9195050
TA
190 /* If we are currently in a cgroup /foo/bar, and the container is in a
191 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
192 * container has an open fd that points to one of the cgroup files
193 * (systemd always opens its "root" cgroup). So, let's escape to the
194 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
195 * see all cgroups.
196 */
e20f46f8 197 if (!cgroup_ops->escape(cgroup_ops, conf)) {
e9195050
TA
198 ERROR("failed to escape cgroups");
199 return;
200 }
201
e29fe1dd 202 /* The command line always looks like:
19d1509c 203 * criu $(action) --tcp-established --file-locks --link-remap \
5f178bc9 204 * --manage-cgroups=full --action-script foo.sh -D $(directory) \
e29fe1dd
TA
205 * -o $(directory)/$(action).log --ext-mount-map auto
206 * --enable-external-sharing --enable-external-masters
4b54788e 207 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
e29fe1dd
TA
208 * +1 for final NULL */
209
aef3d51e 210 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399
TA
211 /* -t pid --freeze-cgroup /lxc/ct */
212 static_args += 4;
e29fe1dd 213
aef3d51e 214 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
b2c3710f 215 if (opts->user->predump_dir)
aef3d51e
TA
216 static_args += 2;
217
74eb576c 218 /* --page-server --address <address> --port <port> */
b2c3710f 219 if (opts->user->pageserver_address && opts->user->pageserver_port)
74eb576c
NE
220 static_args += 5;
221
aef3d51e 222 /* --leave-running (only for final dump) */
b2c3710f 223 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd 224 static_args++;
4b54788e
TA
225
226 /* --external tty[88,4] */
227 if (opts->tty_id[0])
228 static_args += 2;
19d1509c
TA
229
230 /* --force-irmap */
231 if (!opts->user->preserves_inodes)
232 static_args++;
b2b7b0d2
TA
233
234 /* --ghost-limit 1024 */
235 if (opts->user->ghost_limit)
236 static_args += 2;
e29fe1dd
TA
237 } else if (strcmp(opts->action, "restore") == 0) {
238 /* --root $(lxc_mount_point) --restore-detached
0ab5703f 239 * --restore-sibling
13389b29
TA
240 * --lsm-profile apparmor:whatever
241 */
0ab5703f 242 static_args += 6;
4b54788e 243
0e4be3cf
CB
244 ttys[0] = 0;
245 if (load_tty_major_minor(opts->user->directory, ttys, sizeof(ttys)))
4b54788e
TA
246 return;
247
248 /* --inherit-fd fd[%d]:tty[%s] */
0e4be3cf 249 if (ttys[0])
4b54788e 250 static_args += 2;
e29fe1dd
TA
251 } else {
252 return;
253 }
254
2202afc9
CB
255 if (cgroup_ops->num_hierarchies(cgroup_ops) > 0)
256 static_args += 2 * cgroup_ops->num_hierarchies(cgroup_ops);
0ab5703f 257
b2c3710f 258 if (opts->user->verbose)
e29fe1dd
TA
259 static_args++;
260
b9ee6643
TA
261 if (opts->user->action_script)
262 static_args += 2;
263
5f4e44a2
TA
264 static_args += 2 * lxc_list_len(&opts->c->lxc_conf->mount_list);
265
b2c3710f 266 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
e29fe1dd 267 if (ret < 0 || ret >= PATH_MAX) {
9f1f54b0 268 ERROR("logfile name too long");
e29fe1dd
TA
269 return;
270 }
271
272 argv = malloc(static_args * sizeof(*argv));
273 if (!argv)
274 return;
275
276 memset(argv, 0, static_args * sizeof(*argv));
277
278#define DECLARE_ARG(arg) \
279 do { \
280 if (arg == NULL) { \
281 ERROR("Got NULL argument for criu"); \
282 goto err; \
283 } \
284 argv[argc++] = strdup(arg); \
285 if (!argv[argc-1]) \
286 goto err; \
287 } while (0)
288
289 argv[argc++] = on_path("criu", NULL);
290 if (!argv[argc-1]) {
9f1f54b0 291 ERROR("Couldn't find criu binary");
e29fe1dd
TA
292 goto err;
293 }
294
295 DECLARE_ARG(opts->action);
296 DECLARE_ARG("--tcp-established");
297 DECLARE_ARG("--file-locks");
298 DECLARE_ARG("--link-remap");
0a5fc6df 299 DECLARE_ARG("--manage-cgroups=full");
e29fe1dd
TA
300 DECLARE_ARG("--ext-mount-map");
301 DECLARE_ARG("auto");
302 DECLARE_ARG("--enable-external-sharing");
303 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
304 DECLARE_ARG("--enable-fs");
305 DECLARE_ARG("hugetlbfs");
5b454329
TA
306 DECLARE_ARG("--enable-fs");
307 DECLARE_ARG("tracefs");
e29fe1dd 308 DECLARE_ARG("-D");
b2c3710f 309 DECLARE_ARG(opts->user->directory);
e29fe1dd
TA
310 DECLARE_ARG("-o");
311 DECLARE_ARG(log);
312
2202afc9 313 for (i = 0; i < cgroup_ops->num_hierarchies(cgroup_ops); i++) {
0ab5703f 314 char **controllers = NULL, *fullname;
31b204e4 315 char *path, *tmp;
0ab5703f 316
2202afc9 317 if (!cgroup_ops->get_hierarchies(cgroup_ops, i, &controllers)) {
0ab5703f
TA
318 ERROR("failed to get hierarchy %d", i);
319 goto err;
320 }
321
322 /* if we are in a dump, we have to ask the monitor process what
323 * the right cgroup is. if this is a restore, we can just use
324 * the handler the restore task created.
325 */
326 if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
327 path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
328 if (!path) {
329 ERROR("failed to get cgroup path for %s", controllers[0]);
330 goto err;
331 }
332 } else {
333 const char *p;
334
2202afc9 335 p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
0ab5703f
TA
336 if (!p) {
337 ERROR("failed to get cgroup path for %s", controllers[0]);
338 goto err;
339 }
340
341 path = strdup(p);
342 if (!path) {
343 ERROR("strdup failed");
344 goto err;
345 }
346 }
347
31b204e4
CB
348 tmp = lxc_deslashify(path);
349 if (!tmp) {
350 ERROR("Failed to remove extraneous slashes from \"%s\"",
351 path);
0ab5703f
TA
352 free(path);
353 goto err;
354 }
31b204e4
CB
355 free(path);
356 path = tmp;
0ab5703f
TA
357
358 fullname = lxc_string_join(",", (const char **) controllers, false);
359 if (!fullname) {
360 ERROR("failed to join controllers");
361 free(path);
362 goto err;
363 }
364
365 ret = sprintf(buf, "%s:%s", fullname, path);
366 free(path);
367 free(fullname);
368 if (ret < 0 || ret >= sizeof(buf)) {
369 ERROR("sprintf of cgroup root arg failed");
370 goto err;
371 }
372
373 DECLARE_ARG("--cgroup-root");
374 DECLARE_ARG(buf);
375 }
376
b2c3710f 377 if (opts->user->verbose)
e29fe1dd
TA
378 DECLARE_ARG("-vvvvvv");
379
b9ee6643
TA
380 if (opts->user->action_script) {
381 DECLARE_ARG("--action-script");
382 DECLARE_ARG(opts->user->action_script);
383 }
384
1800f924
WB
385 mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list,
386 opts->c->lxc_conf->lsm_aa_allow_nesting);
5f4e44a2
TA
387 if (!mnts)
388 goto err;
389
390 while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
19d2422b 391 char *fmt, *key, *val, *mntdata;
5f4e44a2 392 char arg[2 * PATH_MAX + 2];
19d2422b
TA
393 unsigned long flags;
394
395 if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0)
396 goto err;
397
398 free(mntdata);
399
400 /* only add --ext-mount-map for actual bind mounts */
401 if (!(flags & MS_BIND))
402 continue;
5f4e44a2
TA
403
404 if (strcmp(opts->action, "dump") == 0) {
405 fmt = "/%s:%s";
406 key = mntent.mnt_dir;
407 val = mntent.mnt_dir;
408 } else {
409 fmt = "%s:%s";
410 key = mntent.mnt_dir;
411 val = mntent.mnt_fsname;
412 }
413
414 ret = snprintf(arg, sizeof(arg), fmt, key, val);
415 if (ret < 0 || ret >= sizeof(arg)) {
416 fclose(mnts);
417 ERROR("snprintf failed");
418 goto err;
419 }
420
421 DECLARE_ARG("--ext-mount-map");
422 DECLARE_ARG(arg);
423 }
424 fclose(mnts);
425
aef3d51e 426 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399 427 char pid[32], *freezer_relative;
e29fe1dd
TA
428
429 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
430 goto err;
431
432 DECLARE_ARG("-t");
433 DECLARE_ARG(pid);
dc259399
TA
434
435 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
436 opts->c->config_path,
437 "freezer");
438 if (!freezer_relative) {
439 ERROR("failed getting freezer path");
440 goto err;
441 }
442
443 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
444 if (ret < 0 || ret >= sizeof(log))
445 goto err;
446
f1954503
AR
447 if (!opts->user->disable_skip_in_flight &&
448 strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
449 DECLARE_ARG("--skip-in-flight");
450
dc259399
TA
451 DECLARE_ARG("--freeze-cgroup");
452 DECLARE_ARG(log);
453
4b54788e 454 if (opts->tty_id[0]) {
36d2096c
TA
455 DECLARE_ARG("--ext-mount-map");
456 DECLARE_ARG("/dev/console:console");
457
4b54788e
TA
458 DECLARE_ARG("--external");
459 DECLARE_ARG(opts->tty_id);
460 }
461
b2c3710f 462 if (opts->user->predump_dir) {
aef3d51e 463 DECLARE_ARG("--prev-images-dir");
b2c3710f 464 DECLARE_ARG(opts->user->predump_dir);
9f99a33f 465 DECLARE_ARG("--track-mem");
74eb576c 466 }
4c0c0319 467
b2c3710f 468 if (opts->user->pageserver_address && opts->user->pageserver_port) {
74eb576c
NE
469 DECLARE_ARG("--page-server");
470 DECLARE_ARG("--address");
b2c3710f 471 DECLARE_ARG(opts->user->pageserver_address);
74eb576c 472 DECLARE_ARG("--port");
b2c3710f 473 DECLARE_ARG(opts->user->pageserver_port);
74eb576c 474 }
aef3d51e 475
19d1509c
TA
476 if (!opts->user->preserves_inodes)
477 DECLARE_ARG("--force-irmap");
478
b2b7b0d2
TA
479 if (opts->user->ghost_limit) {
480 char ghost_limit[32];
481
9b945f13 482 ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
b2b7b0d2 483 if (ret < 0 || ret >= sizeof(ghost_limit)) {
9b945f13 484 ERROR("failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
b2b7b0d2
TA
485 goto err;
486 }
487
488 DECLARE_ARG("--ghost-limit");
489 DECLARE_ARG(ghost_limit);
490 }
491
aef3d51e 492 /* only for final dump */
b2c3710f 493 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd
TA
494 DECLARE_ARG("--leave-running");
495 } else if (strcmp(opts->action, "restore") == 0) {
496 void *m;
497 int additional;
13389b29 498 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
e29fe1dd
TA
499
500 DECLARE_ARG("--root");
501 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
502 DECLARE_ARG("--restore-detached");
503 DECLARE_ARG("--restore-sibling");
e29fe1dd 504
0e4be3cf 505 if (ttys[0]) {
97e4f1a9 506 if (opts->console_fd < 0) {
3aed4934 507 ERROR("lxc.console.path configured on source host but not target");
97e4f1a9
TA
508 goto err;
509 }
510
0e4be3cf 511 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, ttys);
4b54788e
TA
512 if (ret < 0 || ret >= sizeof(buf))
513 goto err;
514
515 DECLARE_ARG("--inherit-fd");
516 DECLARE_ARG(buf);
517 }
518 if (opts->console_name) {
519 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
520 SYSERROR("sprintf'd too many bytes");
521 }
522 DECLARE_ARG("--ext-mount-map");
523 DECLARE_ARG(buf);
524 }
525
13389b29
TA
526 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
527
528 if (lxc_conf->lsm_aa_profile)
529 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
530 else
531 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
532
533 if (ret < 0 || ret >= sizeof(buf))
534 goto err;
535
536 DECLARE_ARG("--lsm-profile");
537 DECLARE_ARG(buf);
538 }
539
e29fe1dd
TA
540 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
541
fa071249
TA
542 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
543 if (!m)
544 goto err;
e29fe1dd
TA
545 argv = m;
546
547 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
9de31d5a 548 size_t retlen;
e29fe1dd 549 char eth[128], *veth;
46c8ffd5 550 char *fmt;
e29fe1dd 551 struct lxc_netdev *n = it->elem;
46c8ffd5
AR
552 bool external_not_veth;
553
74ad3607 554 if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
46c8ffd5
AR
555 /* Since criu version 2.8 the usage of --veth-pair
556 * has been deprecated:
557 * git tag --contains f2037e6d3445fc400
558 * v2.8 */
559 external_not_veth = true;
560 } else {
561 external_not_veth = false;
562 }
e29fe1dd 563
42277b1c 564 if (n->name[0] != '\0') {
9de31d5a
CB
565 retlen = strlcpy(eth, n->name, sizeof(eth));
566 if (retlen >= sizeof(eth))
e29fe1dd 567 goto err;
796a109d
TA
568 } else {
569 ret = snprintf(eth, sizeof(eth), "eth%d", netnr);
570 if (ret < 0 || ret >= sizeof(eth))
571 goto err;
572 }
e29fe1dd 573
e2697330
TA
574 switch (n->type) {
575 case LXC_NET_VETH:
576 veth = n->priv.veth_attr.pair;
ea7f6b29
CB
577 if (veth[0] == '\0')
578 veth = n->priv.veth_attr.veth1;
e29fe1dd 579
de4855a8 580 if (n->link[0] != '\0') {
46c8ffd5
AR
581 if (external_not_veth)
582 fmt = "veth[%s]:%s@%s";
583 else
584 fmt = "%s=%s@%s";
585
586 ret = snprintf(buf, sizeof(buf), fmt, eth, veth, n->link);
587 } else {
588 if (external_not_veth)
589 fmt = "veth[%s]:%s";
590 else
591 fmt = "%s=%s";
592
593 ret = snprintf(buf, sizeof(buf), fmt, eth, veth);
594 }
e2697330
TA
595 if (ret < 0 || ret >= sizeof(buf))
596 goto err;
597 break;
598 case LXC_NET_MACVLAN:
de4855a8 599 if (n->link[0] == '\0') {
9f1f54b0 600 ERROR("no host interface for macvlan %s", n->name);
e2697330
TA
601 goto err;
602 }
603
604 ret = snprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, n->link);
605 if (ret < 0 || ret >= sizeof(buf))
606 goto err;
607 break;
608 case LXC_NET_NONE:
609 case LXC_NET_EMPTY:
610 break;
611 default:
612 /* we have screened for this earlier... */
9f1f54b0 613 ERROR("unexpected network type %d", n->type);
e29fe1dd 614 goto err;
e2697330 615 }
e29fe1dd 616
46c8ffd5
AR
617 if (external_not_veth)
618 DECLARE_ARG("--external");
619 else
620 DECLARE_ARG("--veth-pair");
e29fe1dd 621 DECLARE_ARG(buf);
2f3fbc6b 622 netnr++;
e29fe1dd
TA
623 }
624
625 }
626
627 argv[argc] = NULL;
628
cf4b07a5 629 buf[0] = 0;
a17fa3c0 630 pos = 0;
72a30576 631
cf4b07a5 632 for (i = 0; argv[i]; i++) {
72a30576
NE
633 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
634 if (ret < 0 || ret >= sizeof(buf) - pos)
635 goto err;
636 else
637 pos += ret;
cf4b07a5
TA
638 }
639
640 INFO("execing: %s", buf);
641
5af85cb1
TA
642 /* before criu inits its log, it sometimes prints things to stdout/err;
643 * let's be sure we capture that.
644 */
645 if (dup2(opts->pipefd, STDOUT_FILENO) < 0) {
646 SYSERROR("dup2 stdout failed");
647 goto err;
648 }
649
650 if (dup2(opts->pipefd, STDERR_FILENO) < 0) {
651 SYSERROR("dup2 stderr failed");
652 goto err;
653 }
654
655 close(opts->pipefd);
656
e29fe1dd
TA
657#undef DECLARE_ARG
658 execv(argv[0], argv);
659err:
e29fe1dd
TA
660 for (i = 0; argv[i]; i++)
661 free(argv[i]);
662 free(argv);
663}
664
b5b12b9e
AR
665/*
666 * Function to check if the checks activated in 'features_to_check' are
667 * available with the current architecture/kernel/criu combination.
668 *
669 * Parameter features_to_check is a bit mask of all features that should be
670 * checked (see feature check defines in lxc/lxccontainer.h).
671 *
672 * If the return value is true, all requested features are supported. If
673 * the return value is false the features_to_check parameter is updated
674 * to reflect which features are available. '0' means no feature but
675 * also that something went totally wrong.
676 *
677 * Some of the code flow of criu_version_ok() is duplicated and maybe it
678 * is a good candidate for refactoring.
679 */
680bool __criu_check_feature(uint64_t *features_to_check)
681{
682 pid_t pid;
683 uint64_t current_bit = 0;
684 int ret;
fca23691 685 uint64_t features = *features_to_check;
b5b12b9e
AR
686 /* Feature checking is currently always like
687 * criu check --feature <feature-name>
688 */
689 char *args[] = { "criu", "check", "--feature", NULL, NULL };
690
691 if ((features & ~FEATURE_MEM_TRACK & ~FEATURE_LAZY_PAGES) != 0) {
692 /* There are feature bits activated we do not understand.
693 * Refusing to answer at all */
694 *features_to_check = 0;
695 return false;
696 }
697
6d61f17d 698 while (current_bit < (sizeof(uint64_t) * 8 - 1)) {
b5b12b9e
AR
699 /* only test requested features */
700 if (!(features & (1ULL << current_bit))) {
701 /* skip this */
702 current_bit++;
703 continue;
704 }
705
706 pid = fork();
707 if (pid < 0) {
708 SYSERROR("fork() failed");
709 *features_to_check = 0;
710 return false;
711 }
712
713 if (pid == 0) {
714 if ((1ULL << current_bit) == FEATURE_MEM_TRACK)
715 /* This is needed for pre-dump support, which
716 * enables pre-copy migration. */
717 args[3] = "mem_dirty_track";
718 else if ((1ULL << current_bit) == FEATURE_LAZY_PAGES)
719 /* CRIU has two checks for userfaultfd support.
720 *
721 * The simpler check is only for 'uffd'. If the
722 * kernel supports userfaultfd without noncoop
723 * then only process can be lazily restored
724 * which do not fork. With 'uffd-noncoop'
725 * it is also possible to lazily restore processes
726 * which do fork. For a container runtime like
727 * LXC checking only for 'uffd' makes not much sense. */
728 args[3] = "uffd-noncoop";
729 else
4f43526d 730 _exit(EXIT_FAILURE);
b5b12b9e
AR
731
732 null_stdfds();
733
734 execvp("criu", args);
735 SYSERROR("Failed to exec \"criu\"");
4f43526d 736 _exit(EXIT_FAILURE);
b5b12b9e
AR
737 }
738
739 ret = wait_for_pid(pid);
740
741 if (ret == -1) {
742 /* It is not known why CRIU failed. Either
743 * CRIU is not available, the feature check
744 * does not exist or the feature is not
745 * supported. */
746 INFO("feature not supported");
747 /* Clear not supported feature bit */
748 features &= ~(1ULL << current_bit);
749 }
750
751 current_bit++;
752 /* no more checks requested; exit check loop */
753 if (!(features & ~((1ULL << current_bit)-1)))
754 break;
755 }
756 if (features != *features_to_check) {
757 *features_to_check = features;
758 return false;
759 }
760 return true;
761}
762
8ba5ced7
TA
763/*
764 * Check to see if the criu version is recent enough for all the features we
765 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
766 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
767 * things potentially before a version is released with a particular feature.
768 *
769 * The intent is that when criu development slows down, we can drop this, but
770 * for now we shouldn't attempt to c/r with versions that we know won't work.
5407e2ab
CB
771 *
772 * Note: If version != NULL criu_version() stores the detected criu version in
773 * version. Allocates memory for version which must be freed by caller.
8ba5ced7 774 */
5407e2ab 775static bool criu_version_ok(char **version)
8ba5ced7
TA
776{
777 int pipes[2];
778 pid_t pid;
779
780 if (pipe(pipes) < 0) {
781 SYSERROR("pipe() failed");
782 return false;
783 }
784
785 pid = fork();
786 if (pid < 0) {
787 SYSERROR("fork() failed");
788 return false;
789 }
790
791 if (pid == 0) {
792 char *args[] = { "criu", "--version", NULL };
755fa453 793 char *path;
8ba5ced7
TA
794 close(pipes[0]);
795
796 close(STDERR_FILENO);
797 if (dup2(pipes[1], STDOUT_FILENO) < 0)
665bb114 798 _exit(EXIT_FAILURE);
8ba5ced7 799
755fa453 800 path = on_path("criu", NULL);
d9b32b09 801 if (!path)
665bb114 802 _exit(EXIT_FAILURE);
d9b32b09 803
755fa453 804 execv(path, args);
665bb114 805 _exit(EXIT_FAILURE);
8ba5ced7
TA
806 } else {
807 FILE *f;
5407e2ab 808 char *tmp;
8ba5ced7
TA
809 int patch;
810
811 close(pipes[1]);
812 if (wait_for_pid(pid) < 0) {
813 close(pipes[0]);
4eae4051 814 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
815 return false;
816 }
817
818 f = fdopen(pipes[0], "r");
819 if (!f) {
820 close(pipes[0]);
821 return false;
822 }
823
5407e2ab
CB
824 tmp = malloc(1024);
825 if (!tmp) {
826 fclose(f);
827 return false;
828 }
829
830 if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
8ba5ced7
TA
831 goto version_error;
832
833 if (fgetc(f) != '\n')
834 goto version_error;
835
5407e2ab 836 if (strcmp(tmp, CRIU_VERSION) >= 0)
8ba5ced7
TA
837 goto version_match;
838
5407e2ab 839 if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
8ba5ced7
TA
840 goto version_error;
841
842 if (fgetc(f) != '-')
843 goto version_error;
844
845 if (fscanf(f, "%d", &patch) != 1)
846 goto version_error;
847
5407e2ab 848 if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
8ba5ced7
TA
849 goto version_error;
850
851 if (patch < CRIU_GITID_PATCHLEVEL)
852 goto version_error;
853
854version_match:
3158ab5b 855 fclose(f);
5407e2ab
CB
856 if (!version)
857 free(tmp);
858 else
859 *version = tmp;
8ba5ced7
TA
860 return true;
861
862version_error:
3158ab5b 863 fclose(f);
5407e2ab 864 free(tmp);
9f1f54b0 865 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
8ba5ced7
TA
866 return false;
867 }
868}
869
e29fe1dd
TA
870/* Check and make sure the container has a configuration that we know CRIU can
871 * dump. */
f1954503 872static bool criu_ok(struct lxc_container *c, char **criu_version)
e29fe1dd
TA
873{
874 struct lxc_list *it;
e29fe1dd
TA
875
876 if (geteuid()) {
9f1f54b0 877 ERROR("Must be root to checkpoint");
e29fe1dd
TA
878 return false;
879 }
880
7177e6b1
DJ
881 if (!criu_version_ok(criu_version))
882 return false;
883
e29fe1dd
TA
884 /* We only know how to restore containers with veth networks. */
885 lxc_list_for_each(it, &c->lxc_conf->network) {
886 struct lxc_netdev *n = it->elem;
65b20221
TA
887 switch(n->type) {
888 case LXC_NET_VETH:
889 case LXC_NET_NONE:
890 case LXC_NET_EMPTY:
e2697330 891 case LXC_NET_MACVLAN:
65b20221
TA
892 break;
893 default:
9f1f54b0 894 ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(n->type), n->name);
7177e6b1
DJ
895 if (criu_version) {
896 free(*criu_version);
897 *criu_version = NULL;
898 }
e29fe1dd
TA
899 return false;
900 }
901 }
902
e29fe1dd
TA
903 return true;
904}
905
e29fe1dd
TA
906static bool restore_net_info(struct lxc_container *c)
907{
7eab8fc6 908 int ret;
e29fe1dd
TA
909 struct lxc_list *it;
910 bool has_error = true;
911
912 if (container_mem_lock(c))
913 return false;
914
915 lxc_list_for_each(it, &c->lxc_conf->network) {
916 struct lxc_netdev *netdev = it->elem;
917 char template[IFNAMSIZ];
65b20221
TA
918
919 if (netdev->type != LXC_NET_VETH)
920 continue;
921
7eab8fc6
CB
922 ret = snprintf(template, sizeof(template), "vethXXXXXX");
923 if (ret < 0 || ret >= sizeof(template))
924 goto out_unlock;
e29fe1dd 925
de4855a8
CB
926 if (netdev->priv.veth_attr.pair[0] == '\0' &&
927 netdev->priv.veth_attr.veth1[0] == '\0') {
966e9f1f 928 if (!lxc_mkifname(template))
de4855a8
CB
929 goto out_unlock;
930
cbb9c7c7 931 (void)strlcpy(netdev->priv.veth_attr.veth1, template, IFNAMSIZ);
de4855a8 932 }
e29fe1dd
TA
933 }
934
935 has_error = false;
936
937out_unlock:
938 container_mem_unlock(c);
939 return !has_error;
940}
941
1a0e70ac 942/* do_restore never returns, the calling process is used as the monitor process.
5a24adb8 943 * do_restore calls _exit() if it fails.
1a0e70ac 944 */
c33b0338 945static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
e29fe1dd 946{
5af9369b 947 int fd, ret;
e29fe1dd 948 pid_t pid;
e29fe1dd 949 struct lxc_handler *handler;
113ebd57 950 int status = 0;
9b1e2e6e 951 int pipes[2] = {-1, -1};
2202afc9 952 struct cgroup_ops *cgroup_ops;
e29fe1dd 953
a7fb6043 954 /* Try to detach from the current controlling tty if it exists.
69e3b3be 955 * Otherwise, lxc_init (via lxc_console) will attach the container's
a7fb6043
TA
956 * console output to the current tty, which is probably not what any
957 * library user wants, and if they do, they can just manually configure
958 * it :)
959 */
960 fd = open("/dev/tty", O_RDWR);
961 if (fd >= 0) {
962 if (ioctl(fd, TIOCNOTTY, NULL) < 0)
963 SYSERROR("couldn't detach from tty");
964 close(fd);
965 }
966
5e5576a4 967 handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false);
e29fe1dd
TA
968 if (!handler)
969 goto out;
970
aa460476
CB
971 if (lxc_init(c->name, handler) < 0)
972 goto out;
973
5a087e05 974 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 975 if (!cgroup_ops)
e29fe1dd 976 goto out_fini_handler;
2202afc9 977 handler->cgroup_ops = cgroup_ops;
e29fe1dd 978
e8b181f5 979 if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
e29fe1dd
TA
980 ERROR("failed creating groups");
981 goto out_fini_handler;
982 }
983
984 if (!restore_net_info(c)) {
985 ERROR("failed restoring network info");
986 goto out_fini_handler;
987 }
988
5af9369b
CB
989 ret = resolve_clone_flags(handler);
990 if (ret < 0) {
6d1400b5 991 SYSERROR("Unsupported clone flag specified");
5af9369b
CB
992 goto out_fini_handler;
993 }
e29fe1dd 994
de31cb57 995 if (pipe2(pipes, O_CLOEXEC) < 0) {
3d9a5c85
TA
996 SYSERROR("pipe() failed");
997 goto out_fini_handler;
998 }
999
e29fe1dd
TA
1000 pid = fork();
1001 if (pid < 0)
1002 goto out_fini_handler;
1003
1004 if (pid == 0) {
1005 struct criu_opts os;
1006 struct lxc_rootfs *rootfs;
4b54788e 1007 int flags;
e29fe1dd 1008
3d9a5c85
TA
1009 close(status_pipe);
1010 status_pipe = -1;
1011
1012 close(pipes[0]);
1013 pipes[0] = -1;
e29fe1dd
TA
1014
1015 if (unshare(CLONE_NEWNS))
1016 goto out_fini_handler;
1017
1018 /* CRIU needs the lxc root bind mounted so that it is the root of some
1019 * mount. */
1020 rootfs = &c->lxc_conf->rootfs;
1021
1022 if (rootfs_is_blockdev(c->lxc_conf)) {
8ce1abc2
CB
1023 if (lxc_setup_rootfs_prepare_root(c->lxc_conf, c->name,
1024 c->config_path) < 0)
e29fe1dd
TA
1025 goto out_fini_handler;
1026 } else {
1027 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
1028 goto out_fini_handler;
1029
1030 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
1031 SYSERROR("remount / to private failed");
1032 goto out_fini_handler;
1033 }
1034
1035 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
1036 rmdir(rootfs->mount);
1037 goto out_fini_handler;
1038 }
1039 }
1040
5af85cb1 1041 os.pipefd = pipes[1];
e29fe1dd 1042 os.action = "restore";
b2c3710f 1043 os.user = opts;
e29fe1dd 1044 os.c = c;
4b54788e 1045 os.console_fd = c->lxc_conf->console.slave;
f1954503 1046 os.criu_version = criu_version;
0ab5703f 1047 os.handler = handler;
4b54788e 1048
97e4f1a9
TA
1049 if (os.console_fd >= 0) {
1050 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
1051 * via --inherit-fd, so we don't want it to close.
1052 */
1053 flags = fcntl(os.console_fd, F_GETFD);
1054 if (flags < 0) {
1055 SYSERROR("F_GETFD failed: %d", os.console_fd);
1056 goto out_fini_handler;
1057 }
4b54788e 1058
97e4f1a9 1059 flags &= ~FD_CLOEXEC;
4b54788e 1060
97e4f1a9
TA
1061 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
1062 SYSERROR("F_SETFD failed");
1063 goto out_fini_handler;
1064 }
4b54788e
TA
1065 }
1066 os.console_name = c->lxc_conf->console.name;
e29fe1dd
TA
1067
1068 /* exec_criu() returning is an error */
e20f46f8 1069 exec_criu(cgroup_ops, c->lxc_conf, &os);
e29fe1dd
TA
1070 umount(rootfs->mount);
1071 rmdir(rootfs->mount);
1072 goto out_fini_handler;
1073 } else {
1074 int ret;
1075 char title[2048];
1076
3d9a5c85
TA
1077 close(pipes[1]);
1078 pipes[1] = -1;
1079
e29fe1dd
TA
1080 pid_t w = waitpid(pid, &status, 0);
1081 if (w == -1) {
1082 SYSERROR("waitpid");
1083 goto out_fini_handler;
1084 }
1085
e29fe1dd 1086 if (WIFEXITED(status)) {
75d219f0
TA
1087 char buf[4096];
1088
e29fe1dd 1089 if (WEXITSTATUS(status)) {
3d9a5c85
TA
1090 int n;
1091
668ba602 1092 n = lxc_read_nointr(pipes[0], buf, sizeof(buf));
3d9a5c85
TA
1093 if (n < 0) {
1094 SYSERROR("failed reading from criu stderr");
1095 goto out_fini_handler;
1096 }
1097
2735dfae
TA
1098 if (n == sizeof(buf))
1099 n--;
3d9a5c85
TA
1100 buf[n] = 0;
1101
9f1f54b0 1102 ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
e29fe1dd
TA
1103 goto out_fini_handler;
1104 } else {
3eba9b49 1105 ret = snprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
75d219f0
TA
1106 if (ret < 0 || ret >= sizeof(buf)) {
1107 ERROR("snprintf'd too many characters: %d", ret);
1108 goto out_fini_handler;
1109 }
1110
1111 FILE *f = fopen(buf, "r");
e29fe1dd 1112 if (!f) {
9f1f54b0 1113 SYSERROR("couldn't read restore's children file %s", buf);
e29fe1dd
TA
1114 goto out_fini_handler;
1115 }
1116
1117 ret = fscanf(f, "%d", (int*) &handler->pid);
1118 fclose(f);
1119 if (ret != 1) {
1120 ERROR("reading restore pid failed");
1121 goto out_fini_handler;
1122 }
1123
f8a41688
TA
1124 if (lxc_set_state(c->name, handler, RUNNING)) {
1125 ERROR("error setting running state after restore");
e29fe1dd 1126 goto out_fini_handler;
f8a41688 1127 }
e29fe1dd
TA
1128 }
1129 } else {
9f1f54b0 1130 ERROR("CRIU was killed with signal %d", WTERMSIG(status));
e29fe1dd
TA
1131 goto out_fini_handler;
1132 }
1133
3d9a5c85
TA
1134 close(pipes[0]);
1135
614be9bc 1136 ret = lxc_write_nointr(status_pipe, &status, sizeof(status));
f3886023
TA
1137 close(status_pipe);
1138 status_pipe = -1;
1139
1140 if (sizeof(status) != ret) {
1141 SYSERROR("failed to write all of status");
1142 goto out_fini_handler;
1143 }
1144
e29fe1dd
TA
1145 /*
1146 * See comment in lxcapi_start; we don't care if these
1147 * fail because it's just a beauty thing. We just
1148 * assign the return here to silence potential.
1149 */
1150 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
223e30c1
CB
1151 if (ret < 0 || (size_t)ret >= sizeof(title))
1152 INFO("Setting truncated process name");
1153
e29fe1dd 1154 ret = setproctitle(title);
223e30c1
CB
1155 if (ret < 0)
1156 INFO("Failed to set process name");
e29fe1dd
TA
1157
1158 ret = lxc_poll(c->name, handler);
1159 if (ret)
1160 lxc_abort(c->name, handler);
1161 lxc_fini(c->name, handler);
5a24adb8 1162 _exit(ret);
e29fe1dd
TA
1163 }
1164
1165out_fini_handler:
3d9a5c85
TA
1166 if (pipes[0] >= 0)
1167 close(pipes[0]);
1168 if (pipes[1] >= 0)
1169 close(pipes[1]);
1170
e29fe1dd
TA
1171 lxc_fini(c->name, handler);
1172
1173out:
3d9a5c85 1174 if (status_pipe >= 0) {
f3886023
TA
1175 /* ensure getting here was a failure, e.g. if we failed to
1176 * parse the child pid or something, even after a successful
1177 * restore
1178 */
1179 if (!status)
1180 status = 1;
113ebd57 1181
614be9bc 1182 if (lxc_write_nointr(status_pipe, &status, sizeof(status)) != sizeof(status))
e29fe1dd 1183 SYSERROR("writing status failed");
3d9a5c85 1184 close(status_pipe);
e29fe1dd
TA
1185 }
1186
5a24adb8 1187 _exit(EXIT_FAILURE);
e29fe1dd 1188}
aef3d51e 1189
4b54788e
TA
1190static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
1191{
1192 FILE *f;
1193 char path[PATH_MAX];
1194 int ret;
1195 struct stat sb;
1196
1197 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
1198 tty_id[0] = 0;
1199 return 0;
1200 }
1201
1202 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
1203 if (ret < 0 || ret >= sizeof(path)) {
f510330c 1204 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
1205 return -1;
1206 }
1207
1208 ret = stat(path, &sb);
1209 if (ret < 0) {
1210 SYSERROR("stat of %s failed", path);
1211 return -1;
1212 }
1213
1214 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
1215 if (ret < 0 || ret >= sizeof(path)) {
1216 ERROR("snprintf'd too many characters: %d", ret);
1217 return -1;
1218 }
1219
f03280a7
TA
1220 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
1221 (long long unsigned) sb.st_rdev,
1222 (long long unsigned) sb.st_dev);
4b54788e
TA
1223 if (ret < 0 || ret >= sizeof(path)) {
1224 ERROR("snprintf'd too many characters: %d", ret);
1225 return -1;
1226 }
1227
1228 f = fopen(path, "w");
1229 if (!f) {
1230 SYSERROR("failed to open %s", path);
1231 return -1;
1232 }
1233
1234 ret = fprintf(f, "%s", tty_id);
1235 fclose(f);
1236 if (ret < 0)
1237 SYSERROR("failed to write to %s", path);
1238 return ret;
1239}
1240
aef3d51e 1241/* do one of either predump or a regular dump */
b2c3710f 1242static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
aef3d51e 1243{
0e4adc1a 1244 int ret;
aef3d51e 1245 pid_t pid;
5af85cb1 1246 int criuout[2];
0e4adc1a 1247 char *criu_version = NULL;
aef3d51e 1248
f1954503 1249 if (!criu_ok(c, &criu_version))
aef3d51e
TA
1250 return false;
1251
0e4adc1a
CB
1252 ret = pipe(criuout);
1253 if (ret < 0) {
5af85cb1 1254 SYSERROR("pipe() failed");
7177e6b1 1255 free(criu_version);
aef3d51e 1256 return false;
5af85cb1
TA
1257 }
1258
1259 if (mkdir_p(opts->directory, 0700) < 0)
1260 goto fail;
aef3d51e
TA
1261
1262 pid = fork();
1263 if (pid < 0) {
1264 SYSERROR("fork failed");
5af85cb1 1265 goto fail;
aef3d51e
TA
1266 }
1267
1268 if (pid == 0) {
1269 struct criu_opts os;
2202afc9 1270 struct cgroup_ops *cgroup_ops;
0ab5703f 1271
5af85cb1
TA
1272 close(criuout[0]);
1273
5a087e05 1274 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 1275 if (!cgroup_ops) {
0ab5703f 1276 ERROR("failed to cgroup_init()");
7211378b 1277 _exit(EXIT_FAILURE);
2202afc9 1278 return -1;
0ab5703f 1279 }
aef3d51e 1280
5af85cb1 1281 os.pipefd = criuout[1];
aef3d51e 1282 os.action = mode;
b2c3710f 1283 os.user = opts;
aef3d51e 1284 os.c = c;
4b54788e 1285 os.console_name = c->lxc_conf->console.path;
f1954503 1286 os.criu_version = criu_version;
e20f46f8 1287 os.handler = NULL;
74eb576c 1288
0e4adc1a
CB
1289 ret = save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id));
1290 if (ret < 0) {
1291 free(criu_version);
7211378b 1292 _exit(EXIT_FAILURE);
0e4adc1a 1293 }
aef3d51e
TA
1294
1295 /* exec_criu() returning is an error */
e20f46f8 1296 exec_criu(cgroup_ops, c->lxc_conf, &os);
0e4adc1a 1297 free(criu_version);
7211378b 1298 _exit(EXIT_FAILURE);
aef3d51e
TA
1299 } else {
1300 int status;
5af85cb1
TA
1301 ssize_t n;
1302 char buf[4096];
1303 bool ret;
1304
1305 close(criuout[1]);
1306
aef3d51e
TA
1307 pid_t w = waitpid(pid, &status, 0);
1308 if (w == -1) {
1309 SYSERROR("waitpid");
5af85cb1 1310 close(criuout[0]);
7177e6b1 1311 free(criu_version);
aef3d51e
TA
1312 return false;
1313 }
1314
668ba602 1315 n = lxc_read_nointr(criuout[0], buf, sizeof(buf));
5af85cb1
TA
1316 close(criuout[0]);
1317 if (n < 0) {
1318 SYSERROR("read");
1319 n = 0;
1320 }
40229e95 1321
1322 if (n == sizeof(buf))
1323 buf[n-1] = 0;
1324 else
1325 buf[n] = 0;
5af85cb1 1326
aef3d51e
TA
1327 if (WIFEXITED(status)) {
1328 if (WEXITSTATUS(status)) {
9f1f54b0 1329 ERROR("dump failed with %d", WEXITSTATUS(status));
5af85cb1
TA
1330 ret = false;
1331 } else {
1332 ret = true;
aef3d51e 1333 }
aef3d51e 1334 } else if (WIFSIGNALED(status)) {
9f1f54b0 1335 ERROR("dump signaled with %d", WTERMSIG(status));
5af85cb1 1336 ret = false;
aef3d51e 1337 } else {
9f1f54b0 1338 ERROR("unknown dump exit %d", status);
5af85cb1 1339 ret = false;
aef3d51e 1340 }
5af85cb1
TA
1341
1342 if (!ret)
1343 ERROR("criu output: %s", buf);
7177e6b1
DJ
1344
1345 free(criu_version);
5af85cb1 1346 return ret;
aef3d51e 1347 }
5af85cb1
TA
1348fail:
1349 close(criuout[0]);
1350 close(criuout[1]);
1351 rmdir(opts->directory);
0e4adc1a 1352 free(criu_version);
5af85cb1 1353 return false;
aef3d51e
TA
1354}
1355
b2c3710f 1356bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e 1357{
b2c3710f 1358 return do_dump(c, "pre-dump", opts);
aef3d51e
TA
1359}
1360
b2c3710f 1361bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1362{
1363 char path[PATH_MAX];
1364 int ret;
1365
b2c3710f 1366 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
aef3d51e
TA
1367 if (ret < 0 || ret >= sizeof(path))
1368 return false;
1369
1370 if (access(path, F_OK) == 0) {
9f1f54b0 1371 ERROR("please use a fresh directory for the dump directory");
aef3d51e
TA
1372 return false;
1373 }
1374
b2c3710f 1375 return do_dump(c, "dump", opts);
aef3d51e
TA
1376}
1377
b2c3710f 1378bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1379{
1380 pid_t pid;
1381 int status, nread;
1382 int pipefd[2];
f1954503 1383 char *criu_version = NULL;
aef3d51e 1384
aef3d51e 1385 if (geteuid()) {
9f1f54b0 1386 ERROR("Must be root to restore");
aef3d51e
TA
1387 return false;
1388 }
1389
1390 if (pipe(pipefd)) {
1391 ERROR("failed to create pipe");
1392 return false;
1393 }
1394
7177e6b1
DJ
1395 if (!criu_ok(c, &criu_version)) {
1396 close(pipefd[0]);
1397 close(pipefd[1]);
1398 return false;
1399 }
1400
aef3d51e
TA
1401 pid = fork();
1402 if (pid < 0) {
1403 close(pipefd[0]);
1404 close(pipefd[1]);
7177e6b1 1405 free(criu_version);
aef3d51e
TA
1406 return false;
1407 }
1408
1409 if (pid == 0) {
1410 close(pipefd[0]);
1a0e70ac 1411 /* this never returns */
f1954503 1412 do_restore(c, pipefd[1], opts, criu_version);
aef3d51e
TA
1413 }
1414
1415 close(pipefd[1]);
7177e6b1 1416 free(criu_version);
aef3d51e 1417
668ba602 1418 nread = lxc_read_nointr(pipefd[0], &status, sizeof(status));
aef3d51e
TA
1419 close(pipefd[0]);
1420 if (sizeof(status) != nread) {
1421 ERROR("reading status from pipe failed");
1422 goto err_wait;
1423 }
1424
1a0e70ac
CB
1425 /* If the criu process was killed or exited nonzero, wait() for the
1426 * handler, since the restore process died. Otherwise, we don't need to
1427 * wait, since the child becomes the monitor process.
1428 */
aef3d51e
TA
1429 if (!WIFEXITED(status) || WEXITSTATUS(status))
1430 goto err_wait;
1431 return true;
1432
1433err_wait:
1434 if (wait_for_pid(pid))
1435 ERROR("restore process died");
1436 return false;
1437}