]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
Merge pull request #3235 from xinhua9569/master
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
d38dd64a
CB
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
9b945f13 6#include <inttypes.h>
e29fe1dd
TA
7#include <linux/limits.h>
8#include <sched.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/mount.h>
13#include <sys/types.h>
14#include <sys/wait.h>
15#include <unistd.h>
16
e29fe1dd 17#include "cgroup.h"
dc259399 18#include "commands.h"
d38dd64a
CB
19#include "conf.h"
20#include "config.h"
e29fe1dd
TA
21#include "criu.h"
22#include "log.h"
23#include "lxc.h"
24#include "lxclock.h"
25#include "network.h"
28d832c4 26#include "storage.h"
e8f764b6 27#include "syscall_wrappers.h"
e29fe1dd
TA
28#include "utils.h"
29
5f4e44a2
TA
30#if IS_BIONIC
31#include <../include/lxcmntent.h>
32#else
33#include <mntent.h>
34#endif
35
9de31d5a
CB
36#ifndef HAVE_STRLCPY
37#include "include/strlcpy.h"
38#endif
39
c33b0338 40#define CRIU_VERSION "2.0"
73d46752
TA
41
42#define CRIU_GITID_VERSION "2.0"
43#define CRIU_GITID_PATCHLEVEL 0
44
f1954503 45#define CRIU_IN_FLIGHT_SUPPORT "2.4"
46c8ffd5 46#define CRIU_EXTERNAL_NOT_VETH "2.8"
f1954503 47
ac2cecc4 48lxc_log_define(criu, lxc);
e29fe1dd 49
73d46752 50struct criu_opts {
5af85cb1
TA
51 /* the thing to hook to stdout and stderr for logging */
52 int pipefd;
53
73d46752
TA
54 /* The type of criu invocation, one of "dump" or "restore" */
55 char *action;
56
b2c3710f
TA
57 /* the user-provided migrate options relevant to this action */
58 struct migrate_opts *user;
73d46752
TA
59
60 /* The container to dump */
61 struct lxc_container *c;
62
73d46752 63 /* dump: stop the container or not after dumping? */
4b54788e 64 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
73d46752
TA
65
66 /* restore: the file to write the init process' pid into */
0ab5703f 67 struct lxc_handler *handler;
4b54788e
TA
68 int console_fd;
69 /* The path that is bind mounted from /dev/console, if any. We don't
70 * want to use `--ext-mount-map auto`'s result here because the pts
71 * device may have a different path (e.g. if the pty number is
3aed4934 72 * different) on the target host. NULL if lxc.console.path = "none".
4b54788e
TA
73 */
74 char *console_name;
f1954503
AR
75
76 /* The detected version of criu */
77 char *criu_version;
73d46752
TA
78};
79
4b54788e
TA
80static int load_tty_major_minor(char *directory, char *output, int len)
81{
82 FILE *f;
83 char path[PATH_MAX];
84 int ret;
85
86 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
87 if (ret < 0 || ret >= sizeof(path)) {
f510330c 88 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
89 return -1;
90 }
91
92 f = fopen(path, "r");
93 if (!f) {
94 /* This means we're coming from a liblxc which didn't export
3aed4934
CB
95 * the tty info. In this case they had to have lxc.console.path
96 * = * none, so there's no problem restoring.
4b54788e
TA
97 */
98 if (errno == ENOENT)
99 return 0;
100
101 SYSERROR("couldn't open %s", path);
102 return -1;
103 }
104
105 if (!fgets(output, len, f)) {
106 fclose(f);
107 SYSERROR("couldn't read %s", path);
108 return -1;
109 }
110
111 fclose(f);
112 return 0;
113}
114
74ad3607
FB
115static int cmp_version(const char *v1, const char *v2)
116{
117 int ret;
118 int oct_v1[3], oct_v2[3];
119
120 memset(oct_v1, -1, sizeof(oct_v1));
121 memset(oct_v2, -1, sizeof(oct_v2));
122
123 ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
124 if (ret < 1)
125 return -1;
126
127 ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
128 if (ret < 1)
129 return -1;
130
131 /* Major version is greater. */
132 if (oct_v1[0] > oct_v2[0])
133 return 1;
134
135 if (oct_v1[0] < oct_v2[0])
136 return -1;
137
138 /* Minor number is greater.*/
139 if (oct_v1[1] > oct_v2[1])
140 return 1;
141
142 if (oct_v1[1] < oct_v2[1])
143 return -1;
144
145 /* Patch number is greater. */
146 if (oct_v1[2] > oct_v2[2])
147 return 1;
148
149 /* Patch numbers are equal. */
150 if (oct_v1[2] == oct_v2[2])
151 return 0;
152
153 return -1;
154}
155
e20f46f8
AR
156static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
157 struct criu_opts *opts)
e29fe1dd
TA
158{
159 char **argv, log[PATH_MAX];
19d1509c 160 int static_args = 23, argc = 0, i, ret;
e29fe1dd
TA
161 int netnr = 0;
162 struct lxc_list *it;
5f4e44a2
TA
163 FILE *mnts;
164 struct mntent mntent;
e29fe1dd 165
0e4be3cf 166 char buf[4096], ttys[32];
a17fa3c0 167 size_t pos;
5af85cb1 168
e9195050
TA
169 /* If we are currently in a cgroup /foo/bar, and the container is in a
170 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
171 * container has an open fd that points to one of the cgroup files
172 * (systemd always opens its "root" cgroup). So, let's escape to the
173 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
174 * see all cgroups.
175 */
e20f46f8 176 if (!cgroup_ops->escape(cgroup_ops, conf)) {
e9195050
TA
177 ERROR("failed to escape cgroups");
178 return;
179 }
180
e29fe1dd 181 /* The command line always looks like:
19d1509c 182 * criu $(action) --tcp-established --file-locks --link-remap \
5f178bc9 183 * --manage-cgroups=full --action-script foo.sh -D $(directory) \
e29fe1dd
TA
184 * -o $(directory)/$(action).log --ext-mount-map auto
185 * --enable-external-sharing --enable-external-masters
4b54788e 186 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
e29fe1dd
TA
187 * +1 for final NULL */
188
aef3d51e 189 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399
TA
190 /* -t pid --freeze-cgroup /lxc/ct */
191 static_args += 4;
e29fe1dd 192
aef3d51e 193 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
b2c3710f 194 if (opts->user->predump_dir)
aef3d51e
TA
195 static_args += 2;
196
74eb576c 197 /* --page-server --address <address> --port <port> */
b2c3710f 198 if (opts->user->pageserver_address && opts->user->pageserver_port)
74eb576c
NE
199 static_args += 5;
200
aef3d51e 201 /* --leave-running (only for final dump) */
b2c3710f 202 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd 203 static_args++;
4b54788e
TA
204
205 /* --external tty[88,4] */
206 if (opts->tty_id[0])
207 static_args += 2;
19d1509c
TA
208
209 /* --force-irmap */
210 if (!opts->user->preserves_inodes)
211 static_args++;
b2b7b0d2
TA
212
213 /* --ghost-limit 1024 */
214 if (opts->user->ghost_limit)
215 static_args += 2;
e29fe1dd
TA
216 } else if (strcmp(opts->action, "restore") == 0) {
217 /* --root $(lxc_mount_point) --restore-detached
0ab5703f 218 * --restore-sibling
13389b29
TA
219 * --lsm-profile apparmor:whatever
220 */
0ab5703f 221 static_args += 6;
4b54788e 222
0e4be3cf
CB
223 ttys[0] = 0;
224 if (load_tty_major_minor(opts->user->directory, ttys, sizeof(ttys)))
4b54788e
TA
225 return;
226
227 /* --inherit-fd fd[%d]:tty[%s] */
0e4be3cf 228 if (ttys[0])
4b54788e 229 static_args += 2;
e29fe1dd
TA
230 } else {
231 return;
232 }
233
2202afc9
CB
234 if (cgroup_ops->num_hierarchies(cgroup_ops) > 0)
235 static_args += 2 * cgroup_ops->num_hierarchies(cgroup_ops);
0ab5703f 236
b2c3710f 237 if (opts->user->verbose)
e29fe1dd
TA
238 static_args++;
239
b9ee6643
TA
240 if (opts->user->action_script)
241 static_args += 2;
242
5f4e44a2
TA
243 static_args += 2 * lxc_list_len(&opts->c->lxc_conf->mount_list);
244
b2c3710f 245 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
e29fe1dd 246 if (ret < 0 || ret >= PATH_MAX) {
9f1f54b0 247 ERROR("logfile name too long");
e29fe1dd
TA
248 return;
249 }
250
251 argv = malloc(static_args * sizeof(*argv));
252 if (!argv)
253 return;
254
255 memset(argv, 0, static_args * sizeof(*argv));
256
257#define DECLARE_ARG(arg) \
258 do { \
259 if (arg == NULL) { \
260 ERROR("Got NULL argument for criu"); \
261 goto err; \
262 } \
263 argv[argc++] = strdup(arg); \
264 if (!argv[argc-1]) \
265 goto err; \
266 } while (0)
267
268 argv[argc++] = on_path("criu", NULL);
269 if (!argv[argc-1]) {
9f1f54b0 270 ERROR("Couldn't find criu binary");
e29fe1dd
TA
271 goto err;
272 }
273
274 DECLARE_ARG(opts->action);
275 DECLARE_ARG("--tcp-established");
276 DECLARE_ARG("--file-locks");
277 DECLARE_ARG("--link-remap");
0a5fc6df 278 DECLARE_ARG("--manage-cgroups=full");
e29fe1dd
TA
279 DECLARE_ARG("--ext-mount-map");
280 DECLARE_ARG("auto");
281 DECLARE_ARG("--enable-external-sharing");
282 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
283 DECLARE_ARG("--enable-fs");
284 DECLARE_ARG("hugetlbfs");
5b454329
TA
285 DECLARE_ARG("--enable-fs");
286 DECLARE_ARG("tracefs");
e29fe1dd 287 DECLARE_ARG("-D");
b2c3710f 288 DECLARE_ARG(opts->user->directory);
e29fe1dd
TA
289 DECLARE_ARG("-o");
290 DECLARE_ARG(log);
291
2202afc9 292 for (i = 0; i < cgroup_ops->num_hierarchies(cgroup_ops); i++) {
0ab5703f 293 char **controllers = NULL, *fullname;
31b204e4 294 char *path, *tmp;
0ab5703f 295
2202afc9 296 if (!cgroup_ops->get_hierarchies(cgroup_ops, i, &controllers)) {
0ab5703f
TA
297 ERROR("failed to get hierarchy %d", i);
298 goto err;
299 }
300
301 /* if we are in a dump, we have to ask the monitor process what
302 * the right cgroup is. if this is a restore, we can just use
303 * the handler the restore task created.
304 */
305 if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
306 path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
307 if (!path) {
308 ERROR("failed to get cgroup path for %s", controllers[0]);
309 goto err;
310 }
311 } else {
312 const char *p;
313
2202afc9 314 p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
0ab5703f
TA
315 if (!p) {
316 ERROR("failed to get cgroup path for %s", controllers[0]);
317 goto err;
318 }
319
320 path = strdup(p);
321 if (!path) {
322 ERROR("strdup failed");
323 goto err;
324 }
325 }
326
31b204e4
CB
327 tmp = lxc_deslashify(path);
328 if (!tmp) {
329 ERROR("Failed to remove extraneous slashes from \"%s\"",
330 path);
0ab5703f
TA
331 free(path);
332 goto err;
333 }
31b204e4
CB
334 free(path);
335 path = tmp;
0ab5703f
TA
336
337 fullname = lxc_string_join(",", (const char **) controllers, false);
338 if (!fullname) {
339 ERROR("failed to join controllers");
340 free(path);
341 goto err;
342 }
343
344 ret = sprintf(buf, "%s:%s", fullname, path);
345 free(path);
346 free(fullname);
347 if (ret < 0 || ret >= sizeof(buf)) {
348 ERROR("sprintf of cgroup root arg failed");
349 goto err;
350 }
351
352 DECLARE_ARG("--cgroup-root");
353 DECLARE_ARG(buf);
354 }
355
b2c3710f 356 if (opts->user->verbose)
582cb478 357 DECLARE_ARG("-v4");
e29fe1dd 358
b9ee6643
TA
359 if (opts->user->action_script) {
360 DECLARE_ARG("--action-script");
361 DECLARE_ARG(opts->user->action_script);
362 }
363
1800f924
WB
364 mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list,
365 opts->c->lxc_conf->lsm_aa_allow_nesting);
5f4e44a2
TA
366 if (!mnts)
367 goto err;
368
369 while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
d07545c7 370 char *mntdata;
5f4e44a2 371 char arg[2 * PATH_MAX + 2];
19d2422b
TA
372 unsigned long flags;
373
374 if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0)
375 goto err;
376
377 free(mntdata);
378
379 /* only add --ext-mount-map for actual bind mounts */
380 if (!(flags & MS_BIND))
381 continue;
5f4e44a2 382
d07545c7
CB
383 if (strcmp(opts->action, "dump") == 0)
384 ret = snprintf(arg, sizeof(arg), "/%s:%s",
385 mntent.mnt_dir, mntent.mnt_dir);
386 else
387 ret = snprintf(arg, sizeof(arg), "%s:%s",
388 mntent.mnt_dir, mntent.mnt_fsname);
5f4e44a2
TA
389 if (ret < 0 || ret >= sizeof(arg)) {
390 fclose(mnts);
391 ERROR("snprintf failed");
392 goto err;
393 }
394
395 DECLARE_ARG("--ext-mount-map");
396 DECLARE_ARG(arg);
397 }
398 fclose(mnts);
399
aef3d51e 400 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399 401 char pid[32], *freezer_relative;
e29fe1dd
TA
402
403 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
404 goto err;
405
406 DECLARE_ARG("-t");
407 DECLARE_ARG(pid);
dc259399
TA
408
409 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
410 opts->c->config_path,
411 "freezer");
412 if (!freezer_relative) {
413 ERROR("failed getting freezer path");
414 goto err;
415 }
416
417 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
418 if (ret < 0 || ret >= sizeof(log))
419 goto err;
420
f1954503
AR
421 if (!opts->user->disable_skip_in_flight &&
422 strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
423 DECLARE_ARG("--skip-in-flight");
424
dc259399
TA
425 DECLARE_ARG("--freeze-cgroup");
426 DECLARE_ARG(log);
427
4b54788e 428 if (opts->tty_id[0]) {
36d2096c
TA
429 DECLARE_ARG("--ext-mount-map");
430 DECLARE_ARG("/dev/console:console");
431
4b54788e
TA
432 DECLARE_ARG("--external");
433 DECLARE_ARG(opts->tty_id);
434 }
435
b2c3710f 436 if (opts->user->predump_dir) {
aef3d51e 437 DECLARE_ARG("--prev-images-dir");
b2c3710f 438 DECLARE_ARG(opts->user->predump_dir);
9f99a33f 439 DECLARE_ARG("--track-mem");
74eb576c 440 }
4c0c0319 441
b2c3710f 442 if (opts->user->pageserver_address && opts->user->pageserver_port) {
74eb576c
NE
443 DECLARE_ARG("--page-server");
444 DECLARE_ARG("--address");
b2c3710f 445 DECLARE_ARG(opts->user->pageserver_address);
74eb576c 446 DECLARE_ARG("--port");
b2c3710f 447 DECLARE_ARG(opts->user->pageserver_port);
74eb576c 448 }
aef3d51e 449
19d1509c
TA
450 if (!opts->user->preserves_inodes)
451 DECLARE_ARG("--force-irmap");
452
b2b7b0d2
TA
453 if (opts->user->ghost_limit) {
454 char ghost_limit[32];
455
9b945f13 456 ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
b2b7b0d2 457 if (ret < 0 || ret >= sizeof(ghost_limit)) {
9b945f13 458 ERROR("failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
b2b7b0d2
TA
459 goto err;
460 }
461
462 DECLARE_ARG("--ghost-limit");
463 DECLARE_ARG(ghost_limit);
464 }
465
aef3d51e 466 /* only for final dump */
b2c3710f 467 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd
TA
468 DECLARE_ARG("--leave-running");
469 } else if (strcmp(opts->action, "restore") == 0) {
470 void *m;
471 int additional;
13389b29 472 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
e29fe1dd
TA
473
474 DECLARE_ARG("--root");
475 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
476 DECLARE_ARG("--restore-detached");
477 DECLARE_ARG("--restore-sibling");
e29fe1dd 478
0e4be3cf 479 if (ttys[0]) {
97e4f1a9 480 if (opts->console_fd < 0) {
3aed4934 481 ERROR("lxc.console.path configured on source host but not target");
97e4f1a9
TA
482 goto err;
483 }
484
0e4be3cf 485 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, ttys);
4b54788e
TA
486 if (ret < 0 || ret >= sizeof(buf))
487 goto err;
488
489 DECLARE_ARG("--inherit-fd");
490 DECLARE_ARG(buf);
491 }
492 if (opts->console_name) {
493 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
494 SYSERROR("sprintf'd too many bytes");
495 }
496 DECLARE_ARG("--ext-mount-map");
497 DECLARE_ARG(buf);
498 }
499
13389b29
TA
500 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
501
502 if (lxc_conf->lsm_aa_profile)
503 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
504 else
505 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
506
507 if (ret < 0 || ret >= sizeof(buf))
508 goto err;
509
510 DECLARE_ARG("--lsm-profile");
511 DECLARE_ARG(buf);
512 }
513
e29fe1dd
TA
514 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
515
fa071249
TA
516 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
517 if (!m)
518 goto err;
e29fe1dd
TA
519 argv = m;
520
521 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
9de31d5a 522 size_t retlen;
e29fe1dd
TA
523 char eth[128], *veth;
524 struct lxc_netdev *n = it->elem;
46c8ffd5
AR
525 bool external_not_veth;
526
74ad3607 527 if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
46c8ffd5
AR
528 /* Since criu version 2.8 the usage of --veth-pair
529 * has been deprecated:
530 * git tag --contains f2037e6d3445fc400
531 * v2.8 */
532 external_not_veth = true;
533 } else {
534 external_not_veth = false;
535 }
e29fe1dd 536
42277b1c 537 if (n->name[0] != '\0') {
9de31d5a
CB
538 retlen = strlcpy(eth, n->name, sizeof(eth));
539 if (retlen >= sizeof(eth))
e29fe1dd 540 goto err;
796a109d
TA
541 } else {
542 ret = snprintf(eth, sizeof(eth), "eth%d", netnr);
543 if (ret < 0 || ret >= sizeof(eth))
544 goto err;
545 }
e29fe1dd 546
e2697330
TA
547 switch (n->type) {
548 case LXC_NET_VETH:
549 veth = n->priv.veth_attr.pair;
ea7f6b29
CB
550 if (veth[0] == '\0')
551 veth = n->priv.veth_attr.veth1;
e29fe1dd 552
de4855a8 553 if (n->link[0] != '\0') {
46c8ffd5 554 if (external_not_veth)
d07545c7
CB
555 ret = snprintf(buf, sizeof(buf),
556 "veth[%s]:%s@%s",
557 eth, veth,
558 n->link);
46c8ffd5 559 else
d07545c7
CB
560 ret = snprintf(buf, sizeof(buf),
561 "%s=%s@%s", eth,
562 veth, n->link);
46c8ffd5
AR
563 } else {
564 if (external_not_veth)
d07545c7
CB
565 ret = snprintf(buf, sizeof(buf),
566 "veth[%s]:%s",
567 eth, veth);
46c8ffd5 568 else
d07545c7
CB
569 ret = snprintf(buf, sizeof(buf),
570 "%s=%s", eth,
571 veth);
46c8ffd5 572 }
e2697330
TA
573 if (ret < 0 || ret >= sizeof(buf))
574 goto err;
575 break;
576 case LXC_NET_MACVLAN:
de4855a8 577 if (n->link[0] == '\0') {
9f1f54b0 578 ERROR("no host interface for macvlan %s", n->name);
e2697330
TA
579 goto err;
580 }
581
582 ret = snprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, n->link);
583 if (ret < 0 || ret >= sizeof(buf))
584 goto err;
585 break;
586 case LXC_NET_NONE:
587 case LXC_NET_EMPTY:
588 break;
589 default:
590 /* we have screened for this earlier... */
9f1f54b0 591 ERROR("unexpected network type %d", n->type);
e29fe1dd 592 goto err;
e2697330 593 }
e29fe1dd 594
46c8ffd5
AR
595 if (external_not_veth)
596 DECLARE_ARG("--external");
597 else
598 DECLARE_ARG("--veth-pair");
e29fe1dd 599 DECLARE_ARG(buf);
2f3fbc6b 600 netnr++;
e29fe1dd
TA
601 }
602
603 }
604
605 argv[argc] = NULL;
606
cf4b07a5 607 buf[0] = 0;
a17fa3c0 608 pos = 0;
72a30576 609
cf4b07a5 610 for (i = 0; argv[i]; i++) {
72a30576
NE
611 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
612 if (ret < 0 || ret >= sizeof(buf) - pos)
613 goto err;
614 else
615 pos += ret;
cf4b07a5
TA
616 }
617
618 INFO("execing: %s", buf);
619
5af85cb1
TA
620 /* before criu inits its log, it sometimes prints things to stdout/err;
621 * let's be sure we capture that.
622 */
623 if (dup2(opts->pipefd, STDOUT_FILENO) < 0) {
624 SYSERROR("dup2 stdout failed");
625 goto err;
626 }
627
628 if (dup2(opts->pipefd, STDERR_FILENO) < 0) {
629 SYSERROR("dup2 stderr failed");
630 goto err;
631 }
632
633 close(opts->pipefd);
634
e29fe1dd
TA
635#undef DECLARE_ARG
636 execv(argv[0], argv);
637err:
e29fe1dd
TA
638 for (i = 0; argv[i]; i++)
639 free(argv[i]);
640 free(argv);
641}
642
b5b12b9e
AR
643/*
644 * Function to check if the checks activated in 'features_to_check' are
645 * available with the current architecture/kernel/criu combination.
646 *
647 * Parameter features_to_check is a bit mask of all features that should be
648 * checked (see feature check defines in lxc/lxccontainer.h).
649 *
650 * If the return value is true, all requested features are supported. If
651 * the return value is false the features_to_check parameter is updated
652 * to reflect which features are available. '0' means no feature but
653 * also that something went totally wrong.
654 *
655 * Some of the code flow of criu_version_ok() is duplicated and maybe it
656 * is a good candidate for refactoring.
657 */
658bool __criu_check_feature(uint64_t *features_to_check)
659{
660 pid_t pid;
661 uint64_t current_bit = 0;
662 int ret;
fca23691 663 uint64_t features = *features_to_check;
b5b12b9e
AR
664 /* Feature checking is currently always like
665 * criu check --feature <feature-name>
666 */
667 char *args[] = { "criu", "check", "--feature", NULL, NULL };
668
669 if ((features & ~FEATURE_MEM_TRACK & ~FEATURE_LAZY_PAGES) != 0) {
670 /* There are feature bits activated we do not understand.
671 * Refusing to answer at all */
672 *features_to_check = 0;
673 return false;
674 }
675
6d61f17d 676 while (current_bit < (sizeof(uint64_t) * 8 - 1)) {
b5b12b9e
AR
677 /* only test requested features */
678 if (!(features & (1ULL << current_bit))) {
679 /* skip this */
680 current_bit++;
681 continue;
682 }
683
684 pid = fork();
685 if (pid < 0) {
686 SYSERROR("fork() failed");
687 *features_to_check = 0;
688 return false;
689 }
690
691 if (pid == 0) {
692 if ((1ULL << current_bit) == FEATURE_MEM_TRACK)
693 /* This is needed for pre-dump support, which
694 * enables pre-copy migration. */
695 args[3] = "mem_dirty_track";
696 else if ((1ULL << current_bit) == FEATURE_LAZY_PAGES)
697 /* CRIU has two checks for userfaultfd support.
698 *
699 * The simpler check is only for 'uffd'. If the
700 * kernel supports userfaultfd without noncoop
701 * then only process can be lazily restored
702 * which do not fork. With 'uffd-noncoop'
703 * it is also possible to lazily restore processes
704 * which do fork. For a container runtime like
705 * LXC checking only for 'uffd' makes not much sense. */
706 args[3] = "uffd-noncoop";
707 else
4f43526d 708 _exit(EXIT_FAILURE);
b5b12b9e
AR
709
710 null_stdfds();
711
712 execvp("criu", args);
713 SYSERROR("Failed to exec \"criu\"");
4f43526d 714 _exit(EXIT_FAILURE);
b5b12b9e
AR
715 }
716
717 ret = wait_for_pid(pid);
718
719 if (ret == -1) {
720 /* It is not known why CRIU failed. Either
721 * CRIU is not available, the feature check
722 * does not exist or the feature is not
723 * supported. */
724 INFO("feature not supported");
725 /* Clear not supported feature bit */
726 features &= ~(1ULL << current_bit);
727 }
728
729 current_bit++;
730 /* no more checks requested; exit check loop */
731 if (!(features & ~((1ULL << current_bit)-1)))
732 break;
733 }
734 if (features != *features_to_check) {
735 *features_to_check = features;
736 return false;
737 }
738 return true;
739}
740
8ba5ced7
TA
741/*
742 * Check to see if the criu version is recent enough for all the features we
743 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
744 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
745 * things potentially before a version is released with a particular feature.
746 *
747 * The intent is that when criu development slows down, we can drop this, but
748 * for now we shouldn't attempt to c/r with versions that we know won't work.
5407e2ab
CB
749 *
750 * Note: If version != NULL criu_version() stores the detected criu version in
751 * version. Allocates memory for version which must be freed by caller.
8ba5ced7 752 */
5407e2ab 753static bool criu_version_ok(char **version)
8ba5ced7
TA
754{
755 int pipes[2];
756 pid_t pid;
757
758 if (pipe(pipes) < 0) {
759 SYSERROR("pipe() failed");
760 return false;
761 }
762
763 pid = fork();
764 if (pid < 0) {
765 SYSERROR("fork() failed");
766 return false;
767 }
768
769 if (pid == 0) {
770 char *args[] = { "criu", "--version", NULL };
755fa453 771 char *path;
8ba5ced7
TA
772 close(pipes[0]);
773
774 close(STDERR_FILENO);
775 if (dup2(pipes[1], STDOUT_FILENO) < 0)
665bb114 776 _exit(EXIT_FAILURE);
8ba5ced7 777
755fa453 778 path = on_path("criu", NULL);
d9b32b09 779 if (!path)
665bb114 780 _exit(EXIT_FAILURE);
d9b32b09 781
755fa453 782 execv(path, args);
665bb114 783 _exit(EXIT_FAILURE);
8ba5ced7
TA
784 } else {
785 FILE *f;
5407e2ab 786 char *tmp;
8ba5ced7
TA
787 int patch;
788
789 close(pipes[1]);
790 if (wait_for_pid(pid) < 0) {
791 close(pipes[0]);
4eae4051 792 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
793 return false;
794 }
795
796 f = fdopen(pipes[0], "r");
797 if (!f) {
798 close(pipes[0]);
799 return false;
800 }
801
5407e2ab
CB
802 tmp = malloc(1024);
803 if (!tmp) {
804 fclose(f);
805 return false;
806 }
807
808 if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
8ba5ced7
TA
809 goto version_error;
810
811 if (fgetc(f) != '\n')
812 goto version_error;
813
5407e2ab 814 if (strcmp(tmp, CRIU_VERSION) >= 0)
8ba5ced7
TA
815 goto version_match;
816
5407e2ab 817 if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
8ba5ced7
TA
818 goto version_error;
819
820 if (fgetc(f) != '-')
821 goto version_error;
822
823 if (fscanf(f, "%d", &patch) != 1)
824 goto version_error;
825
5407e2ab 826 if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
8ba5ced7
TA
827 goto version_error;
828
829 if (patch < CRIU_GITID_PATCHLEVEL)
830 goto version_error;
831
832version_match:
3158ab5b 833 fclose(f);
5407e2ab
CB
834 if (!version)
835 free(tmp);
836 else
837 *version = tmp;
8ba5ced7
TA
838 return true;
839
840version_error:
3158ab5b 841 fclose(f);
5407e2ab 842 free(tmp);
9f1f54b0 843 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
8ba5ced7
TA
844 return false;
845 }
846}
847
e29fe1dd
TA
848/* Check and make sure the container has a configuration that we know CRIU can
849 * dump. */
f1954503 850static bool criu_ok(struct lxc_container *c, char **criu_version)
e29fe1dd
TA
851{
852 struct lxc_list *it;
e29fe1dd
TA
853
854 if (geteuid()) {
9f1f54b0 855 ERROR("Must be root to checkpoint");
e29fe1dd
TA
856 return false;
857 }
858
7177e6b1
DJ
859 if (!criu_version_ok(criu_version))
860 return false;
861
e29fe1dd
TA
862 /* We only know how to restore containers with veth networks. */
863 lxc_list_for_each(it, &c->lxc_conf->network) {
864 struct lxc_netdev *n = it->elem;
65b20221
TA
865 switch(n->type) {
866 case LXC_NET_VETH:
867 case LXC_NET_NONE:
868 case LXC_NET_EMPTY:
e2697330 869 case LXC_NET_MACVLAN:
65b20221
TA
870 break;
871 default:
9f1f54b0 872 ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(n->type), n->name);
7177e6b1
DJ
873 if (criu_version) {
874 free(*criu_version);
875 *criu_version = NULL;
876 }
e29fe1dd
TA
877 return false;
878 }
879 }
880
e29fe1dd
TA
881 return true;
882}
883
e29fe1dd
TA
884static bool restore_net_info(struct lxc_container *c)
885{
7eab8fc6 886 int ret;
e29fe1dd
TA
887 struct lxc_list *it;
888 bool has_error = true;
889
890 if (container_mem_lock(c))
891 return false;
892
893 lxc_list_for_each(it, &c->lxc_conf->network) {
894 struct lxc_netdev *netdev = it->elem;
895 char template[IFNAMSIZ];
65b20221
TA
896
897 if (netdev->type != LXC_NET_VETH)
898 continue;
899
7eab8fc6
CB
900 ret = snprintf(template, sizeof(template), "vethXXXXXX");
901 if (ret < 0 || ret >= sizeof(template))
902 goto out_unlock;
e29fe1dd 903
de4855a8
CB
904 if (netdev->priv.veth_attr.pair[0] == '\0' &&
905 netdev->priv.veth_attr.veth1[0] == '\0') {
966e9f1f 906 if (!lxc_mkifname(template))
de4855a8
CB
907 goto out_unlock;
908
cbb9c7c7 909 (void)strlcpy(netdev->priv.veth_attr.veth1, template, IFNAMSIZ);
de4855a8 910 }
e29fe1dd
TA
911 }
912
913 has_error = false;
914
915out_unlock:
916 container_mem_unlock(c);
917 return !has_error;
918}
919
1a0e70ac 920/* do_restore never returns, the calling process is used as the monitor process.
5a24adb8 921 * do_restore calls _exit() if it fails.
1a0e70ac 922 */
c33b0338 923static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
e29fe1dd 924{
5af9369b 925 int fd, ret;
e29fe1dd 926 pid_t pid;
e29fe1dd 927 struct lxc_handler *handler;
113ebd57 928 int status = 0;
9b1e2e6e 929 int pipes[2] = {-1, -1};
2202afc9 930 struct cgroup_ops *cgroup_ops;
e29fe1dd 931
a7fb6043 932 /* Try to detach from the current controlling tty if it exists.
69e3b3be 933 * Otherwise, lxc_init (via lxc_console) will attach the container's
a7fb6043
TA
934 * console output to the current tty, which is probably not what any
935 * library user wants, and if they do, they can just manually configure
936 * it :)
937 */
938 fd = open("/dev/tty", O_RDWR);
939 if (fd >= 0) {
940 if (ioctl(fd, TIOCNOTTY, NULL) < 0)
941 SYSERROR("couldn't detach from tty");
942 close(fd);
943 }
944
5e5576a4 945 handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false);
e29fe1dd
TA
946 if (!handler)
947 goto out;
948
aa460476
CB
949 if (lxc_init(c->name, handler) < 0)
950 goto out;
951
5a087e05 952 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 953 if (!cgroup_ops)
e29fe1dd 954 goto out_fini_handler;
2202afc9 955 handler->cgroup_ops = cgroup_ops;
e29fe1dd 956
e8b181f5 957 if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
e29fe1dd
TA
958 ERROR("failed creating groups");
959 goto out_fini_handler;
960 }
961
962 if (!restore_net_info(c)) {
963 ERROR("failed restoring network info");
964 goto out_fini_handler;
965 }
966
5af9369b
CB
967 ret = resolve_clone_flags(handler);
968 if (ret < 0) {
6d1400b5 969 SYSERROR("Unsupported clone flag specified");
5af9369b
CB
970 goto out_fini_handler;
971 }
e29fe1dd 972
de31cb57 973 if (pipe2(pipes, O_CLOEXEC) < 0) {
3d9a5c85
TA
974 SYSERROR("pipe() failed");
975 goto out_fini_handler;
976 }
977
e29fe1dd
TA
978 pid = fork();
979 if (pid < 0)
980 goto out_fini_handler;
981
982 if (pid == 0) {
983 struct criu_opts os;
984 struct lxc_rootfs *rootfs;
4b54788e 985 int flags;
e29fe1dd 986
3d9a5c85
TA
987 close(status_pipe);
988 status_pipe = -1;
989
990 close(pipes[0]);
991 pipes[0] = -1;
e29fe1dd
TA
992
993 if (unshare(CLONE_NEWNS))
994 goto out_fini_handler;
995
996 /* CRIU needs the lxc root bind mounted so that it is the root of some
997 * mount. */
998 rootfs = &c->lxc_conf->rootfs;
999
1000 if (rootfs_is_blockdev(c->lxc_conf)) {
8ce1abc2
CB
1001 if (lxc_setup_rootfs_prepare_root(c->lxc_conf, c->name,
1002 c->config_path) < 0)
e29fe1dd
TA
1003 goto out_fini_handler;
1004 } else {
1005 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
1006 goto out_fini_handler;
1007
1008 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
1009 SYSERROR("remount / to private failed");
1010 goto out_fini_handler;
1011 }
1012
1013 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
1014 rmdir(rootfs->mount);
1015 goto out_fini_handler;
1016 }
1017 }
1018
5af85cb1 1019 os.pipefd = pipes[1];
e29fe1dd 1020 os.action = "restore";
b2c3710f 1021 os.user = opts;
e29fe1dd 1022 os.c = c;
4b54788e 1023 os.console_fd = c->lxc_conf->console.slave;
f1954503 1024 os.criu_version = criu_version;
0ab5703f 1025 os.handler = handler;
4b54788e 1026
97e4f1a9
TA
1027 if (os.console_fd >= 0) {
1028 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
1029 * via --inherit-fd, so we don't want it to close.
1030 */
1031 flags = fcntl(os.console_fd, F_GETFD);
1032 if (flags < 0) {
1033 SYSERROR("F_GETFD failed: %d", os.console_fd);
1034 goto out_fini_handler;
1035 }
4b54788e 1036
97e4f1a9 1037 flags &= ~FD_CLOEXEC;
4b54788e 1038
97e4f1a9
TA
1039 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
1040 SYSERROR("F_SETFD failed");
1041 goto out_fini_handler;
1042 }
4b54788e
TA
1043 }
1044 os.console_name = c->lxc_conf->console.name;
e29fe1dd
TA
1045
1046 /* exec_criu() returning is an error */
e20f46f8 1047 exec_criu(cgroup_ops, c->lxc_conf, &os);
e29fe1dd
TA
1048 umount(rootfs->mount);
1049 rmdir(rootfs->mount);
1050 goto out_fini_handler;
1051 } else {
e29fe1dd
TA
1052 char title[2048];
1053
3d9a5c85
TA
1054 close(pipes[1]);
1055 pipes[1] = -1;
1056
e29fe1dd
TA
1057 pid_t w = waitpid(pid, &status, 0);
1058 if (w == -1) {
1059 SYSERROR("waitpid");
1060 goto out_fini_handler;
1061 }
1062
e29fe1dd 1063 if (WIFEXITED(status)) {
75d219f0
TA
1064 char buf[4096];
1065
e29fe1dd 1066 if (WEXITSTATUS(status)) {
3d9a5c85
TA
1067 int n;
1068
668ba602 1069 n = lxc_read_nointr(pipes[0], buf, sizeof(buf));
3d9a5c85
TA
1070 if (n < 0) {
1071 SYSERROR("failed reading from criu stderr");
1072 goto out_fini_handler;
1073 }
1074
2735dfae
TA
1075 if (n == sizeof(buf))
1076 n--;
3d9a5c85
TA
1077 buf[n] = 0;
1078
9f1f54b0 1079 ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
e29fe1dd
TA
1080 goto out_fini_handler;
1081 } else {
3eba9b49 1082 ret = snprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
75d219f0
TA
1083 if (ret < 0 || ret >= sizeof(buf)) {
1084 ERROR("snprintf'd too many characters: %d", ret);
1085 goto out_fini_handler;
1086 }
1087
1088 FILE *f = fopen(buf, "r");
e29fe1dd 1089 if (!f) {
9f1f54b0 1090 SYSERROR("couldn't read restore's children file %s", buf);
e29fe1dd
TA
1091 goto out_fini_handler;
1092 }
1093
1094 ret = fscanf(f, "%d", (int*) &handler->pid);
1095 fclose(f);
1096 if (ret != 1) {
1097 ERROR("reading restore pid failed");
1098 goto out_fini_handler;
1099 }
1100
f8a41688
TA
1101 if (lxc_set_state(c->name, handler, RUNNING)) {
1102 ERROR("error setting running state after restore");
e29fe1dd 1103 goto out_fini_handler;
f8a41688 1104 }
e29fe1dd
TA
1105 }
1106 } else {
9f1f54b0 1107 ERROR("CRIU was killed with signal %d", WTERMSIG(status));
e29fe1dd
TA
1108 goto out_fini_handler;
1109 }
1110
3d9a5c85
TA
1111 close(pipes[0]);
1112
614be9bc 1113 ret = lxc_write_nointr(status_pipe, &status, sizeof(status));
f3886023
TA
1114 close(status_pipe);
1115 status_pipe = -1;
1116
1117 if (sizeof(status) != ret) {
1118 SYSERROR("failed to write all of status");
1119 goto out_fini_handler;
1120 }
1121
e29fe1dd
TA
1122 /*
1123 * See comment in lxcapi_start; we don't care if these
1124 * fail because it's just a beauty thing. We just
1125 * assign the return here to silence potential.
1126 */
1127 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
223e30c1
CB
1128 if (ret < 0 || (size_t)ret >= sizeof(title))
1129 INFO("Setting truncated process name");
1130
e29fe1dd 1131 ret = setproctitle(title);
223e30c1
CB
1132 if (ret < 0)
1133 INFO("Failed to set process name");
e29fe1dd
TA
1134
1135 ret = lxc_poll(c->name, handler);
1136 if (ret)
1137 lxc_abort(c->name, handler);
1138 lxc_fini(c->name, handler);
5a24adb8 1139 _exit(ret);
e29fe1dd
TA
1140 }
1141
1142out_fini_handler:
3d9a5c85
TA
1143 if (pipes[0] >= 0)
1144 close(pipes[0]);
1145 if (pipes[1] >= 0)
1146 close(pipes[1]);
1147
e29fe1dd
TA
1148 lxc_fini(c->name, handler);
1149
1150out:
3d9a5c85 1151 if (status_pipe >= 0) {
f3886023
TA
1152 /* ensure getting here was a failure, e.g. if we failed to
1153 * parse the child pid or something, even after a successful
1154 * restore
1155 */
1156 if (!status)
1157 status = 1;
113ebd57 1158
614be9bc 1159 if (lxc_write_nointr(status_pipe, &status, sizeof(status)) != sizeof(status))
e29fe1dd 1160 SYSERROR("writing status failed");
3d9a5c85 1161 close(status_pipe);
e29fe1dd
TA
1162 }
1163
5a24adb8 1164 _exit(EXIT_FAILURE);
e29fe1dd 1165}
aef3d51e 1166
4b54788e
TA
1167static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
1168{
1169 FILE *f;
1170 char path[PATH_MAX];
1171 int ret;
1172 struct stat sb;
1173
1174 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
1175 tty_id[0] = 0;
1176 return 0;
1177 }
1178
1179 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
1180 if (ret < 0 || ret >= sizeof(path)) {
f510330c 1181 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
1182 return -1;
1183 }
1184
1185 ret = stat(path, &sb);
1186 if (ret < 0) {
1187 SYSERROR("stat of %s failed", path);
1188 return -1;
1189 }
1190
1191 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
1192 if (ret < 0 || ret >= sizeof(path)) {
1193 ERROR("snprintf'd too many characters: %d", ret);
1194 return -1;
1195 }
1196
f03280a7
TA
1197 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
1198 (long long unsigned) sb.st_rdev,
1199 (long long unsigned) sb.st_dev);
4b54788e
TA
1200 if (ret < 0 || ret >= sizeof(path)) {
1201 ERROR("snprintf'd too many characters: %d", ret);
1202 return -1;
1203 }
1204
1205 f = fopen(path, "w");
1206 if (!f) {
1207 SYSERROR("failed to open %s", path);
1208 return -1;
1209 }
1210
1211 ret = fprintf(f, "%s", tty_id);
1212 fclose(f);
1213 if (ret < 0)
1214 SYSERROR("failed to write to %s", path);
1215 return ret;
1216}
1217
aef3d51e 1218/* do one of either predump or a regular dump */
b2c3710f 1219static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
aef3d51e 1220{
0e4adc1a 1221 int ret;
aef3d51e 1222 pid_t pid;
5af85cb1 1223 int criuout[2];
0e4adc1a 1224 char *criu_version = NULL;
aef3d51e 1225
f1954503 1226 if (!criu_ok(c, &criu_version))
aef3d51e
TA
1227 return false;
1228
0e4adc1a
CB
1229 ret = pipe(criuout);
1230 if (ret < 0) {
5af85cb1 1231 SYSERROR("pipe() failed");
7177e6b1 1232 free(criu_version);
aef3d51e 1233 return false;
5af85cb1
TA
1234 }
1235
1236 if (mkdir_p(opts->directory, 0700) < 0)
1237 goto fail;
aef3d51e
TA
1238
1239 pid = fork();
1240 if (pid < 0) {
1241 SYSERROR("fork failed");
5af85cb1 1242 goto fail;
aef3d51e
TA
1243 }
1244
1245 if (pid == 0) {
1246 struct criu_opts os;
2202afc9 1247 struct cgroup_ops *cgroup_ops;
0ab5703f 1248
5af85cb1
TA
1249 close(criuout[0]);
1250
5a087e05 1251 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 1252 if (!cgroup_ops) {
0ab5703f 1253 ERROR("failed to cgroup_init()");
7211378b 1254 _exit(EXIT_FAILURE);
0ab5703f 1255 }
aef3d51e 1256
5af85cb1 1257 os.pipefd = criuout[1];
aef3d51e 1258 os.action = mode;
b2c3710f 1259 os.user = opts;
aef3d51e 1260 os.c = c;
4b54788e 1261 os.console_name = c->lxc_conf->console.path;
f1954503 1262 os.criu_version = criu_version;
e20f46f8 1263 os.handler = NULL;
74eb576c 1264
0e4adc1a
CB
1265 ret = save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id));
1266 if (ret < 0) {
1267 free(criu_version);
7211378b 1268 _exit(EXIT_FAILURE);
0e4adc1a 1269 }
aef3d51e
TA
1270
1271 /* exec_criu() returning is an error */
e20f46f8 1272 exec_criu(cgroup_ops, c->lxc_conf, &os);
0e4adc1a 1273 free(criu_version);
7211378b 1274 _exit(EXIT_FAILURE);
aef3d51e
TA
1275 } else {
1276 int status;
5af85cb1
TA
1277 ssize_t n;
1278 char buf[4096];
5af85cb1
TA
1279
1280 close(criuout[1]);
1281
aef3d51e
TA
1282 pid_t w = waitpid(pid, &status, 0);
1283 if (w == -1) {
1284 SYSERROR("waitpid");
5af85cb1 1285 close(criuout[0]);
7177e6b1 1286 free(criu_version);
aef3d51e
TA
1287 return false;
1288 }
1289
668ba602 1290 n = lxc_read_nointr(criuout[0], buf, sizeof(buf));
5af85cb1
TA
1291 close(criuout[0]);
1292 if (n < 0) {
1293 SYSERROR("read");
1294 n = 0;
1295 }
40229e95 1296
1297 if (n == sizeof(buf))
1298 buf[n-1] = 0;
1299 else
1300 buf[n] = 0;
5af85cb1 1301
aef3d51e
TA
1302 if (WIFEXITED(status)) {
1303 if (WEXITSTATUS(status)) {
9f1f54b0 1304 ERROR("dump failed with %d", WEXITSTATUS(status));
5af85cb1
TA
1305 ret = false;
1306 } else {
1307 ret = true;
aef3d51e 1308 }
aef3d51e 1309 } else if (WIFSIGNALED(status)) {
9f1f54b0 1310 ERROR("dump signaled with %d", WTERMSIG(status));
5af85cb1 1311 ret = false;
aef3d51e 1312 } else {
9f1f54b0 1313 ERROR("unknown dump exit %d", status);
5af85cb1 1314 ret = false;
aef3d51e 1315 }
5af85cb1
TA
1316
1317 if (!ret)
1318 ERROR("criu output: %s", buf);
7177e6b1
DJ
1319
1320 free(criu_version);
5af85cb1 1321 return ret;
aef3d51e 1322 }
5af85cb1
TA
1323fail:
1324 close(criuout[0]);
1325 close(criuout[1]);
1326 rmdir(opts->directory);
0e4adc1a 1327 free(criu_version);
5af85cb1 1328 return false;
aef3d51e
TA
1329}
1330
b2c3710f 1331bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e 1332{
b2c3710f 1333 return do_dump(c, "pre-dump", opts);
aef3d51e
TA
1334}
1335
b2c3710f 1336bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1337{
1338 char path[PATH_MAX];
1339 int ret;
1340
b2c3710f 1341 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
aef3d51e
TA
1342 if (ret < 0 || ret >= sizeof(path))
1343 return false;
1344
1345 if (access(path, F_OK) == 0) {
9f1f54b0 1346 ERROR("please use a fresh directory for the dump directory");
aef3d51e
TA
1347 return false;
1348 }
1349
b2c3710f 1350 return do_dump(c, "dump", opts);
aef3d51e
TA
1351}
1352
b2c3710f 1353bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1354{
1355 pid_t pid;
1356 int status, nread;
1357 int pipefd[2];
f1954503 1358 char *criu_version = NULL;
aef3d51e 1359
aef3d51e 1360 if (geteuid()) {
9f1f54b0 1361 ERROR("Must be root to restore");
aef3d51e
TA
1362 return false;
1363 }
1364
1365 if (pipe(pipefd)) {
1366 ERROR("failed to create pipe");
1367 return false;
1368 }
1369
7177e6b1
DJ
1370 if (!criu_ok(c, &criu_version)) {
1371 close(pipefd[0]);
1372 close(pipefd[1]);
1373 return false;
1374 }
1375
aef3d51e
TA
1376 pid = fork();
1377 if (pid < 0) {
1378 close(pipefd[0]);
1379 close(pipefd[1]);
7177e6b1 1380 free(criu_version);
aef3d51e
TA
1381 return false;
1382 }
1383
1384 if (pid == 0) {
1385 close(pipefd[0]);
1a0e70ac 1386 /* this never returns */
f1954503 1387 do_restore(c, pipefd[1], opts, criu_version);
aef3d51e
TA
1388 }
1389
1390 close(pipefd[1]);
7177e6b1 1391 free(criu_version);
aef3d51e 1392
668ba602 1393 nread = lxc_read_nointr(pipefd[0], &status, sizeof(status));
aef3d51e
TA
1394 close(pipefd[0]);
1395 if (sizeof(status) != nread) {
1396 ERROR("reading status from pipe failed");
1397 goto err_wait;
1398 }
1399
1a0e70ac
CB
1400 /* If the criu process was killed or exited nonzero, wait() for the
1401 * handler, since the restore process died. Otherwise, we don't need to
1402 * wait, since the child becomes the monitor process.
1403 */
aef3d51e
TA
1404 if (!WIFEXITED(status) || WEXITSTATUS(status))
1405 goto err_wait;
1406 return true;
1407
1408err_wait:
1409 if (wait_for_pid(pid))
1410 ERROR("restore process died");
1411 return false;
1412}