]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
criu: Remove unnecessary return after _exit()
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
d38dd64a
CB
23
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
9b945f13 27#include <inttypes.h>
e29fe1dd
TA
28#include <linux/limits.h>
29#include <sched.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <sys/mount.h>
34#include <sys/types.h>
35#include <sys/wait.h>
36#include <unistd.h>
37
e29fe1dd 38#include "cgroup.h"
dc259399 39#include "commands.h"
d38dd64a
CB
40#include "conf.h"
41#include "config.h"
e29fe1dd
TA
42#include "criu.h"
43#include "log.h"
44#include "lxc.h"
45#include "lxclock.h"
46#include "network.h"
28d832c4 47#include "storage.h"
e8f764b6 48#include "syscall_wrappers.h"
e29fe1dd
TA
49#include "utils.h"
50
5f4e44a2
TA
51#if IS_BIONIC
52#include <../include/lxcmntent.h>
53#else
54#include <mntent.h>
55#endif
56
9de31d5a
CB
57#ifndef HAVE_STRLCPY
58#include "include/strlcpy.h"
59#endif
60
c33b0338 61#define CRIU_VERSION "2.0"
73d46752
TA
62
63#define CRIU_GITID_VERSION "2.0"
64#define CRIU_GITID_PATCHLEVEL 0
65
f1954503 66#define CRIU_IN_FLIGHT_SUPPORT "2.4"
46c8ffd5 67#define CRIU_EXTERNAL_NOT_VETH "2.8"
f1954503 68
ac2cecc4 69lxc_log_define(criu, lxc);
e29fe1dd 70
73d46752 71struct criu_opts {
5af85cb1
TA
72 /* the thing to hook to stdout and stderr for logging */
73 int pipefd;
74
73d46752
TA
75 /* The type of criu invocation, one of "dump" or "restore" */
76 char *action;
77
b2c3710f
TA
78 /* the user-provided migrate options relevant to this action */
79 struct migrate_opts *user;
73d46752
TA
80
81 /* The container to dump */
82 struct lxc_container *c;
83
73d46752 84 /* dump: stop the container or not after dumping? */
4b54788e 85 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
73d46752
TA
86
87 /* restore: the file to write the init process' pid into */
0ab5703f 88 struct lxc_handler *handler;
4b54788e
TA
89 int console_fd;
90 /* The path that is bind mounted from /dev/console, if any. We don't
91 * want to use `--ext-mount-map auto`'s result here because the pts
92 * device may have a different path (e.g. if the pty number is
3aed4934 93 * different) on the target host. NULL if lxc.console.path = "none".
4b54788e
TA
94 */
95 char *console_name;
f1954503
AR
96
97 /* The detected version of criu */
98 char *criu_version;
73d46752
TA
99};
100
4b54788e
TA
101static int load_tty_major_minor(char *directory, char *output, int len)
102{
103 FILE *f;
104 char path[PATH_MAX];
105 int ret;
106
107 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
108 if (ret < 0 || ret >= sizeof(path)) {
f510330c 109 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
110 return -1;
111 }
112
113 f = fopen(path, "r");
114 if (!f) {
115 /* This means we're coming from a liblxc which didn't export
3aed4934
CB
116 * the tty info. In this case they had to have lxc.console.path
117 * = * none, so there's no problem restoring.
4b54788e
TA
118 */
119 if (errno == ENOENT)
120 return 0;
121
122 SYSERROR("couldn't open %s", path);
123 return -1;
124 }
125
126 if (!fgets(output, len, f)) {
127 fclose(f);
128 SYSERROR("couldn't read %s", path);
129 return -1;
130 }
131
132 fclose(f);
133 return 0;
134}
135
74ad3607
FB
136static int cmp_version(const char *v1, const char *v2)
137{
138 int ret;
139 int oct_v1[3], oct_v2[3];
140
141 memset(oct_v1, -1, sizeof(oct_v1));
142 memset(oct_v2, -1, sizeof(oct_v2));
143
144 ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
145 if (ret < 1)
146 return -1;
147
148 ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
149 if (ret < 1)
150 return -1;
151
152 /* Major version is greater. */
153 if (oct_v1[0] > oct_v2[0])
154 return 1;
155
156 if (oct_v1[0] < oct_v2[0])
157 return -1;
158
159 /* Minor number is greater.*/
160 if (oct_v1[1] > oct_v2[1])
161 return 1;
162
163 if (oct_v1[1] < oct_v2[1])
164 return -1;
165
166 /* Patch number is greater. */
167 if (oct_v1[2] > oct_v2[2])
168 return 1;
169
170 /* Patch numbers are equal. */
171 if (oct_v1[2] == oct_v2[2])
172 return 0;
173
174 return -1;
175}
176
e20f46f8
AR
177static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
178 struct criu_opts *opts)
e29fe1dd
TA
179{
180 char **argv, log[PATH_MAX];
19d1509c 181 int static_args = 23, argc = 0, i, ret;
e29fe1dd
TA
182 int netnr = 0;
183 struct lxc_list *it;
5f4e44a2
TA
184 FILE *mnts;
185 struct mntent mntent;
e29fe1dd 186
0e4be3cf 187 char buf[4096], ttys[32];
a17fa3c0 188 size_t pos;
5af85cb1 189
e9195050
TA
190 /* If we are currently in a cgroup /foo/bar, and the container is in a
191 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
192 * container has an open fd that points to one of the cgroup files
193 * (systemd always opens its "root" cgroup). So, let's escape to the
194 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
195 * see all cgroups.
196 */
e20f46f8 197 if (!cgroup_ops->escape(cgroup_ops, conf)) {
e9195050
TA
198 ERROR("failed to escape cgroups");
199 return;
200 }
201
e29fe1dd 202 /* The command line always looks like:
19d1509c 203 * criu $(action) --tcp-established --file-locks --link-remap \
5f178bc9 204 * --manage-cgroups=full --action-script foo.sh -D $(directory) \
e29fe1dd
TA
205 * -o $(directory)/$(action).log --ext-mount-map auto
206 * --enable-external-sharing --enable-external-masters
4b54788e 207 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
e29fe1dd
TA
208 * +1 for final NULL */
209
aef3d51e 210 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399
TA
211 /* -t pid --freeze-cgroup /lxc/ct */
212 static_args += 4;
e29fe1dd 213
aef3d51e 214 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
b2c3710f 215 if (opts->user->predump_dir)
aef3d51e
TA
216 static_args += 2;
217
74eb576c 218 /* --page-server --address <address> --port <port> */
b2c3710f 219 if (opts->user->pageserver_address && opts->user->pageserver_port)
74eb576c
NE
220 static_args += 5;
221
aef3d51e 222 /* --leave-running (only for final dump) */
b2c3710f 223 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd 224 static_args++;
4b54788e
TA
225
226 /* --external tty[88,4] */
227 if (opts->tty_id[0])
228 static_args += 2;
19d1509c
TA
229
230 /* --force-irmap */
231 if (!opts->user->preserves_inodes)
232 static_args++;
b2b7b0d2
TA
233
234 /* --ghost-limit 1024 */
235 if (opts->user->ghost_limit)
236 static_args += 2;
e29fe1dd
TA
237 } else if (strcmp(opts->action, "restore") == 0) {
238 /* --root $(lxc_mount_point) --restore-detached
0ab5703f 239 * --restore-sibling
13389b29
TA
240 * --lsm-profile apparmor:whatever
241 */
0ab5703f 242 static_args += 6;
4b54788e 243
0e4be3cf
CB
244 ttys[0] = 0;
245 if (load_tty_major_minor(opts->user->directory, ttys, sizeof(ttys)))
4b54788e
TA
246 return;
247
248 /* --inherit-fd fd[%d]:tty[%s] */
0e4be3cf 249 if (ttys[0])
4b54788e 250 static_args += 2;
e29fe1dd
TA
251 } else {
252 return;
253 }
254
2202afc9
CB
255 if (cgroup_ops->num_hierarchies(cgroup_ops) > 0)
256 static_args += 2 * cgroup_ops->num_hierarchies(cgroup_ops);
0ab5703f 257
b2c3710f 258 if (opts->user->verbose)
e29fe1dd
TA
259 static_args++;
260
b9ee6643
TA
261 if (opts->user->action_script)
262 static_args += 2;
263
5f4e44a2
TA
264 static_args += 2 * lxc_list_len(&opts->c->lxc_conf->mount_list);
265
b2c3710f 266 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
e29fe1dd 267 if (ret < 0 || ret >= PATH_MAX) {
9f1f54b0 268 ERROR("logfile name too long");
e29fe1dd
TA
269 return;
270 }
271
272 argv = malloc(static_args * sizeof(*argv));
273 if (!argv)
274 return;
275
276 memset(argv, 0, static_args * sizeof(*argv));
277
278#define DECLARE_ARG(arg) \
279 do { \
280 if (arg == NULL) { \
281 ERROR("Got NULL argument for criu"); \
282 goto err; \
283 } \
284 argv[argc++] = strdup(arg); \
285 if (!argv[argc-1]) \
286 goto err; \
287 } while (0)
288
289 argv[argc++] = on_path("criu", NULL);
290 if (!argv[argc-1]) {
9f1f54b0 291 ERROR("Couldn't find criu binary");
e29fe1dd
TA
292 goto err;
293 }
294
295 DECLARE_ARG(opts->action);
296 DECLARE_ARG("--tcp-established");
297 DECLARE_ARG("--file-locks");
298 DECLARE_ARG("--link-remap");
0a5fc6df 299 DECLARE_ARG("--manage-cgroups=full");
e29fe1dd
TA
300 DECLARE_ARG("--ext-mount-map");
301 DECLARE_ARG("auto");
302 DECLARE_ARG("--enable-external-sharing");
303 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
304 DECLARE_ARG("--enable-fs");
305 DECLARE_ARG("hugetlbfs");
5b454329
TA
306 DECLARE_ARG("--enable-fs");
307 DECLARE_ARG("tracefs");
e29fe1dd 308 DECLARE_ARG("-D");
b2c3710f 309 DECLARE_ARG(opts->user->directory);
e29fe1dd
TA
310 DECLARE_ARG("-o");
311 DECLARE_ARG(log);
312
2202afc9 313 for (i = 0; i < cgroup_ops->num_hierarchies(cgroup_ops); i++) {
0ab5703f 314 char **controllers = NULL, *fullname;
31b204e4 315 char *path, *tmp;
0ab5703f 316
2202afc9 317 if (!cgroup_ops->get_hierarchies(cgroup_ops, i, &controllers)) {
0ab5703f
TA
318 ERROR("failed to get hierarchy %d", i);
319 goto err;
320 }
321
322 /* if we are in a dump, we have to ask the monitor process what
323 * the right cgroup is. if this is a restore, we can just use
324 * the handler the restore task created.
325 */
326 if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
327 path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
328 if (!path) {
329 ERROR("failed to get cgroup path for %s", controllers[0]);
330 goto err;
331 }
332 } else {
333 const char *p;
334
2202afc9 335 p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
0ab5703f
TA
336 if (!p) {
337 ERROR("failed to get cgroup path for %s", controllers[0]);
338 goto err;
339 }
340
341 path = strdup(p);
342 if (!path) {
343 ERROR("strdup failed");
344 goto err;
345 }
346 }
347
31b204e4
CB
348 tmp = lxc_deslashify(path);
349 if (!tmp) {
350 ERROR("Failed to remove extraneous slashes from \"%s\"",
351 path);
0ab5703f
TA
352 free(path);
353 goto err;
354 }
31b204e4
CB
355 free(path);
356 path = tmp;
0ab5703f
TA
357
358 fullname = lxc_string_join(",", (const char **) controllers, false);
359 if (!fullname) {
360 ERROR("failed to join controllers");
361 free(path);
362 goto err;
363 }
364
365 ret = sprintf(buf, "%s:%s", fullname, path);
366 free(path);
367 free(fullname);
368 if (ret < 0 || ret >= sizeof(buf)) {
369 ERROR("sprintf of cgroup root arg failed");
370 goto err;
371 }
372
373 DECLARE_ARG("--cgroup-root");
374 DECLARE_ARG(buf);
375 }
376
b2c3710f 377 if (opts->user->verbose)
582cb478 378 DECLARE_ARG("-v4");
e29fe1dd 379
b9ee6643
TA
380 if (opts->user->action_script) {
381 DECLARE_ARG("--action-script");
382 DECLARE_ARG(opts->user->action_script);
383 }
384
1800f924
WB
385 mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list,
386 opts->c->lxc_conf->lsm_aa_allow_nesting);
5f4e44a2
TA
387 if (!mnts)
388 goto err;
389
390 while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
d07545c7 391 char *mntdata;
5f4e44a2 392 char arg[2 * PATH_MAX + 2];
19d2422b
TA
393 unsigned long flags;
394
395 if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0)
396 goto err;
397
398 free(mntdata);
399
400 /* only add --ext-mount-map for actual bind mounts */
401 if (!(flags & MS_BIND))
402 continue;
5f4e44a2 403
d07545c7
CB
404 if (strcmp(opts->action, "dump") == 0)
405 ret = snprintf(arg, sizeof(arg), "/%s:%s",
406 mntent.mnt_dir, mntent.mnt_dir);
407 else
408 ret = snprintf(arg, sizeof(arg), "%s:%s",
409 mntent.mnt_dir, mntent.mnt_fsname);
5f4e44a2
TA
410 if (ret < 0 || ret >= sizeof(arg)) {
411 fclose(mnts);
412 ERROR("snprintf failed");
413 goto err;
414 }
415
416 DECLARE_ARG("--ext-mount-map");
417 DECLARE_ARG(arg);
418 }
419 fclose(mnts);
420
aef3d51e 421 if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
dc259399 422 char pid[32], *freezer_relative;
e29fe1dd
TA
423
424 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
425 goto err;
426
427 DECLARE_ARG("-t");
428 DECLARE_ARG(pid);
dc259399
TA
429
430 freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name,
431 opts->c->config_path,
432 "freezer");
433 if (!freezer_relative) {
434 ERROR("failed getting freezer path");
435 goto err;
436 }
437
438 ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
439 if (ret < 0 || ret >= sizeof(log))
440 goto err;
441
f1954503
AR
442 if (!opts->user->disable_skip_in_flight &&
443 strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
444 DECLARE_ARG("--skip-in-flight");
445
dc259399
TA
446 DECLARE_ARG("--freeze-cgroup");
447 DECLARE_ARG(log);
448
4b54788e 449 if (opts->tty_id[0]) {
36d2096c
TA
450 DECLARE_ARG("--ext-mount-map");
451 DECLARE_ARG("/dev/console:console");
452
4b54788e
TA
453 DECLARE_ARG("--external");
454 DECLARE_ARG(opts->tty_id);
455 }
456
b2c3710f 457 if (opts->user->predump_dir) {
aef3d51e 458 DECLARE_ARG("--prev-images-dir");
b2c3710f 459 DECLARE_ARG(opts->user->predump_dir);
9f99a33f 460 DECLARE_ARG("--track-mem");
74eb576c 461 }
4c0c0319 462
b2c3710f 463 if (opts->user->pageserver_address && opts->user->pageserver_port) {
74eb576c
NE
464 DECLARE_ARG("--page-server");
465 DECLARE_ARG("--address");
b2c3710f 466 DECLARE_ARG(opts->user->pageserver_address);
74eb576c 467 DECLARE_ARG("--port");
b2c3710f 468 DECLARE_ARG(opts->user->pageserver_port);
74eb576c 469 }
aef3d51e 470
19d1509c
TA
471 if (!opts->user->preserves_inodes)
472 DECLARE_ARG("--force-irmap");
473
b2b7b0d2
TA
474 if (opts->user->ghost_limit) {
475 char ghost_limit[32];
476
9b945f13 477 ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
b2b7b0d2 478 if (ret < 0 || ret >= sizeof(ghost_limit)) {
9b945f13 479 ERROR("failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
b2b7b0d2
TA
480 goto err;
481 }
482
483 DECLARE_ARG("--ghost-limit");
484 DECLARE_ARG(ghost_limit);
485 }
486
aef3d51e 487 /* only for final dump */
b2c3710f 488 if (strcmp(opts->action, "dump") == 0 && !opts->user->stop)
e29fe1dd
TA
489 DECLARE_ARG("--leave-running");
490 } else if (strcmp(opts->action, "restore") == 0) {
491 void *m;
492 int additional;
13389b29 493 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
e29fe1dd
TA
494
495 DECLARE_ARG("--root");
496 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
497 DECLARE_ARG("--restore-detached");
498 DECLARE_ARG("--restore-sibling");
e29fe1dd 499
0e4be3cf 500 if (ttys[0]) {
97e4f1a9 501 if (opts->console_fd < 0) {
3aed4934 502 ERROR("lxc.console.path configured on source host but not target");
97e4f1a9
TA
503 goto err;
504 }
505
0e4be3cf 506 ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, ttys);
4b54788e
TA
507 if (ret < 0 || ret >= sizeof(buf))
508 goto err;
509
510 DECLARE_ARG("--inherit-fd");
511 DECLARE_ARG(buf);
512 }
513 if (opts->console_name) {
514 if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) {
515 SYSERROR("sprintf'd too many bytes");
516 }
517 DECLARE_ARG("--ext-mount-map");
518 DECLARE_ARG(buf);
519 }
520
13389b29
TA
521 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
522
523 if (lxc_conf->lsm_aa_profile)
524 ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
525 else
526 ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
527
528 if (ret < 0 || ret >= sizeof(buf))
529 goto err;
530
531 DECLARE_ARG("--lsm-profile");
532 DECLARE_ARG(buf);
533 }
534
e29fe1dd
TA
535 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
536
fa071249
TA
537 m = realloc(argv, (argc + additional + 1) * sizeof(*argv));
538 if (!m)
539 goto err;
e29fe1dd
TA
540 argv = m;
541
542 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
9de31d5a 543 size_t retlen;
e29fe1dd
TA
544 char eth[128], *veth;
545 struct lxc_netdev *n = it->elem;
46c8ffd5
AR
546 bool external_not_veth;
547
74ad3607 548 if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
46c8ffd5
AR
549 /* Since criu version 2.8 the usage of --veth-pair
550 * has been deprecated:
551 * git tag --contains f2037e6d3445fc400
552 * v2.8 */
553 external_not_veth = true;
554 } else {
555 external_not_veth = false;
556 }
e29fe1dd 557
42277b1c 558 if (n->name[0] != '\0') {
9de31d5a
CB
559 retlen = strlcpy(eth, n->name, sizeof(eth));
560 if (retlen >= sizeof(eth))
e29fe1dd 561 goto err;
796a109d
TA
562 } else {
563 ret = snprintf(eth, sizeof(eth), "eth%d", netnr);
564 if (ret < 0 || ret >= sizeof(eth))
565 goto err;
566 }
e29fe1dd 567
e2697330
TA
568 switch (n->type) {
569 case LXC_NET_VETH:
570 veth = n->priv.veth_attr.pair;
ea7f6b29
CB
571 if (veth[0] == '\0')
572 veth = n->priv.veth_attr.veth1;
e29fe1dd 573
de4855a8 574 if (n->link[0] != '\0') {
46c8ffd5 575 if (external_not_veth)
d07545c7
CB
576 ret = snprintf(buf, sizeof(buf),
577 "veth[%s]:%s@%s",
578 eth, veth,
579 n->link);
46c8ffd5 580 else
d07545c7
CB
581 ret = snprintf(buf, sizeof(buf),
582 "%s=%s@%s", eth,
583 veth, n->link);
46c8ffd5
AR
584 } else {
585 if (external_not_veth)
d07545c7
CB
586 ret = snprintf(buf, sizeof(buf),
587 "veth[%s]:%s",
588 eth, veth);
46c8ffd5 589 else
d07545c7
CB
590 ret = snprintf(buf, sizeof(buf),
591 "%s=%s", eth,
592 veth);
46c8ffd5 593 }
e2697330
TA
594 if (ret < 0 || ret >= sizeof(buf))
595 goto err;
596 break;
597 case LXC_NET_MACVLAN:
de4855a8 598 if (n->link[0] == '\0') {
9f1f54b0 599 ERROR("no host interface for macvlan %s", n->name);
e2697330
TA
600 goto err;
601 }
602
603 ret = snprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, n->link);
604 if (ret < 0 || ret >= sizeof(buf))
605 goto err;
606 break;
607 case LXC_NET_NONE:
608 case LXC_NET_EMPTY:
609 break;
610 default:
611 /* we have screened for this earlier... */
9f1f54b0 612 ERROR("unexpected network type %d", n->type);
e29fe1dd 613 goto err;
e2697330 614 }
e29fe1dd 615
46c8ffd5
AR
616 if (external_not_veth)
617 DECLARE_ARG("--external");
618 else
619 DECLARE_ARG("--veth-pair");
e29fe1dd 620 DECLARE_ARG(buf);
2f3fbc6b 621 netnr++;
e29fe1dd
TA
622 }
623
624 }
625
626 argv[argc] = NULL;
627
cf4b07a5 628 buf[0] = 0;
a17fa3c0 629 pos = 0;
72a30576 630
cf4b07a5 631 for (i = 0; argv[i]; i++) {
72a30576
NE
632 ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]);
633 if (ret < 0 || ret >= sizeof(buf) - pos)
634 goto err;
635 else
636 pos += ret;
cf4b07a5
TA
637 }
638
639 INFO("execing: %s", buf);
640
5af85cb1
TA
641 /* before criu inits its log, it sometimes prints things to stdout/err;
642 * let's be sure we capture that.
643 */
644 if (dup2(opts->pipefd, STDOUT_FILENO) < 0) {
645 SYSERROR("dup2 stdout failed");
646 goto err;
647 }
648
649 if (dup2(opts->pipefd, STDERR_FILENO) < 0) {
650 SYSERROR("dup2 stderr failed");
651 goto err;
652 }
653
654 close(opts->pipefd);
655
e29fe1dd
TA
656#undef DECLARE_ARG
657 execv(argv[0], argv);
658err:
e29fe1dd
TA
659 for (i = 0; argv[i]; i++)
660 free(argv[i]);
661 free(argv);
662}
663
b5b12b9e
AR
664/*
665 * Function to check if the checks activated in 'features_to_check' are
666 * available with the current architecture/kernel/criu combination.
667 *
668 * Parameter features_to_check is a bit mask of all features that should be
669 * checked (see feature check defines in lxc/lxccontainer.h).
670 *
671 * If the return value is true, all requested features are supported. If
672 * the return value is false the features_to_check parameter is updated
673 * to reflect which features are available. '0' means no feature but
674 * also that something went totally wrong.
675 *
676 * Some of the code flow of criu_version_ok() is duplicated and maybe it
677 * is a good candidate for refactoring.
678 */
679bool __criu_check_feature(uint64_t *features_to_check)
680{
681 pid_t pid;
682 uint64_t current_bit = 0;
683 int ret;
fca23691 684 uint64_t features = *features_to_check;
b5b12b9e
AR
685 /* Feature checking is currently always like
686 * criu check --feature <feature-name>
687 */
688 char *args[] = { "criu", "check", "--feature", NULL, NULL };
689
690 if ((features & ~FEATURE_MEM_TRACK & ~FEATURE_LAZY_PAGES) != 0) {
691 /* There are feature bits activated we do not understand.
692 * Refusing to answer at all */
693 *features_to_check = 0;
694 return false;
695 }
696
6d61f17d 697 while (current_bit < (sizeof(uint64_t) * 8 - 1)) {
b5b12b9e
AR
698 /* only test requested features */
699 if (!(features & (1ULL << current_bit))) {
700 /* skip this */
701 current_bit++;
702 continue;
703 }
704
705 pid = fork();
706 if (pid < 0) {
707 SYSERROR("fork() failed");
708 *features_to_check = 0;
709 return false;
710 }
711
712 if (pid == 0) {
713 if ((1ULL << current_bit) == FEATURE_MEM_TRACK)
714 /* This is needed for pre-dump support, which
715 * enables pre-copy migration. */
716 args[3] = "mem_dirty_track";
717 else if ((1ULL << current_bit) == FEATURE_LAZY_PAGES)
718 /* CRIU has two checks for userfaultfd support.
719 *
720 * The simpler check is only for 'uffd'. If the
721 * kernel supports userfaultfd without noncoop
722 * then only process can be lazily restored
723 * which do not fork. With 'uffd-noncoop'
724 * it is also possible to lazily restore processes
725 * which do fork. For a container runtime like
726 * LXC checking only for 'uffd' makes not much sense. */
727 args[3] = "uffd-noncoop";
728 else
4f43526d 729 _exit(EXIT_FAILURE);
b5b12b9e
AR
730
731 null_stdfds();
732
733 execvp("criu", args);
734 SYSERROR("Failed to exec \"criu\"");
4f43526d 735 _exit(EXIT_FAILURE);
b5b12b9e
AR
736 }
737
738 ret = wait_for_pid(pid);
739
740 if (ret == -1) {
741 /* It is not known why CRIU failed. Either
742 * CRIU is not available, the feature check
743 * does not exist or the feature is not
744 * supported. */
745 INFO("feature not supported");
746 /* Clear not supported feature bit */
747 features &= ~(1ULL << current_bit);
748 }
749
750 current_bit++;
751 /* no more checks requested; exit check loop */
752 if (!(features & ~((1ULL << current_bit)-1)))
753 break;
754 }
755 if (features != *features_to_check) {
756 *features_to_check = features;
757 return false;
758 }
759 return true;
760}
761
8ba5ced7
TA
762/*
763 * Check to see if the criu version is recent enough for all the features we
764 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
765 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
766 * things potentially before a version is released with a particular feature.
767 *
768 * The intent is that when criu development slows down, we can drop this, but
769 * for now we shouldn't attempt to c/r with versions that we know won't work.
5407e2ab
CB
770 *
771 * Note: If version != NULL criu_version() stores the detected criu version in
772 * version. Allocates memory for version which must be freed by caller.
8ba5ced7 773 */
5407e2ab 774static bool criu_version_ok(char **version)
8ba5ced7
TA
775{
776 int pipes[2];
777 pid_t pid;
778
779 if (pipe(pipes) < 0) {
780 SYSERROR("pipe() failed");
781 return false;
782 }
783
784 pid = fork();
785 if (pid < 0) {
786 SYSERROR("fork() failed");
787 return false;
788 }
789
790 if (pid == 0) {
791 char *args[] = { "criu", "--version", NULL };
755fa453 792 char *path;
8ba5ced7
TA
793 close(pipes[0]);
794
795 close(STDERR_FILENO);
796 if (dup2(pipes[1], STDOUT_FILENO) < 0)
665bb114 797 _exit(EXIT_FAILURE);
8ba5ced7 798
755fa453 799 path = on_path("criu", NULL);
d9b32b09 800 if (!path)
665bb114 801 _exit(EXIT_FAILURE);
d9b32b09 802
755fa453 803 execv(path, args);
665bb114 804 _exit(EXIT_FAILURE);
8ba5ced7
TA
805 } else {
806 FILE *f;
5407e2ab 807 char *tmp;
8ba5ced7
TA
808 int patch;
809
810 close(pipes[1]);
811 if (wait_for_pid(pid) < 0) {
812 close(pipes[0]);
4eae4051 813 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
814 return false;
815 }
816
817 f = fdopen(pipes[0], "r");
818 if (!f) {
819 close(pipes[0]);
820 return false;
821 }
822
5407e2ab
CB
823 tmp = malloc(1024);
824 if (!tmp) {
825 fclose(f);
826 return false;
827 }
828
829 if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
8ba5ced7
TA
830 goto version_error;
831
832 if (fgetc(f) != '\n')
833 goto version_error;
834
5407e2ab 835 if (strcmp(tmp, CRIU_VERSION) >= 0)
8ba5ced7
TA
836 goto version_match;
837
5407e2ab 838 if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
8ba5ced7
TA
839 goto version_error;
840
841 if (fgetc(f) != '-')
842 goto version_error;
843
844 if (fscanf(f, "%d", &patch) != 1)
845 goto version_error;
846
5407e2ab 847 if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
8ba5ced7
TA
848 goto version_error;
849
850 if (patch < CRIU_GITID_PATCHLEVEL)
851 goto version_error;
852
853version_match:
3158ab5b 854 fclose(f);
5407e2ab
CB
855 if (!version)
856 free(tmp);
857 else
858 *version = tmp;
8ba5ced7
TA
859 return true;
860
861version_error:
3158ab5b 862 fclose(f);
5407e2ab 863 free(tmp);
9f1f54b0 864 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
8ba5ced7
TA
865 return false;
866 }
867}
868
e29fe1dd
TA
869/* Check and make sure the container has a configuration that we know CRIU can
870 * dump. */
f1954503 871static bool criu_ok(struct lxc_container *c, char **criu_version)
e29fe1dd
TA
872{
873 struct lxc_list *it;
e29fe1dd
TA
874
875 if (geteuid()) {
9f1f54b0 876 ERROR("Must be root to checkpoint");
e29fe1dd
TA
877 return false;
878 }
879
7177e6b1
DJ
880 if (!criu_version_ok(criu_version))
881 return false;
882
e29fe1dd
TA
883 /* We only know how to restore containers with veth networks. */
884 lxc_list_for_each(it, &c->lxc_conf->network) {
885 struct lxc_netdev *n = it->elem;
65b20221
TA
886 switch(n->type) {
887 case LXC_NET_VETH:
888 case LXC_NET_NONE:
889 case LXC_NET_EMPTY:
e2697330 890 case LXC_NET_MACVLAN:
65b20221
TA
891 break;
892 default:
9f1f54b0 893 ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(n->type), n->name);
7177e6b1
DJ
894 if (criu_version) {
895 free(*criu_version);
896 *criu_version = NULL;
897 }
e29fe1dd
TA
898 return false;
899 }
900 }
901
e29fe1dd
TA
902 return true;
903}
904
e29fe1dd
TA
905static bool restore_net_info(struct lxc_container *c)
906{
7eab8fc6 907 int ret;
e29fe1dd
TA
908 struct lxc_list *it;
909 bool has_error = true;
910
911 if (container_mem_lock(c))
912 return false;
913
914 lxc_list_for_each(it, &c->lxc_conf->network) {
915 struct lxc_netdev *netdev = it->elem;
916 char template[IFNAMSIZ];
65b20221
TA
917
918 if (netdev->type != LXC_NET_VETH)
919 continue;
920
7eab8fc6
CB
921 ret = snprintf(template, sizeof(template), "vethXXXXXX");
922 if (ret < 0 || ret >= sizeof(template))
923 goto out_unlock;
e29fe1dd 924
de4855a8
CB
925 if (netdev->priv.veth_attr.pair[0] == '\0' &&
926 netdev->priv.veth_attr.veth1[0] == '\0') {
966e9f1f 927 if (!lxc_mkifname(template))
de4855a8
CB
928 goto out_unlock;
929
cbb9c7c7 930 (void)strlcpy(netdev->priv.veth_attr.veth1, template, IFNAMSIZ);
de4855a8 931 }
e29fe1dd
TA
932 }
933
934 has_error = false;
935
936out_unlock:
937 container_mem_unlock(c);
938 return !has_error;
939}
940
1a0e70ac 941/* do_restore never returns, the calling process is used as the monitor process.
5a24adb8 942 * do_restore calls _exit() if it fails.
1a0e70ac 943 */
c33b0338 944static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
e29fe1dd 945{
5af9369b 946 int fd, ret;
e29fe1dd 947 pid_t pid;
e29fe1dd 948 struct lxc_handler *handler;
113ebd57 949 int status = 0;
9b1e2e6e 950 int pipes[2] = {-1, -1};
2202afc9 951 struct cgroup_ops *cgroup_ops;
e29fe1dd 952
a7fb6043 953 /* Try to detach from the current controlling tty if it exists.
69e3b3be 954 * Otherwise, lxc_init (via lxc_console) will attach the container's
a7fb6043
TA
955 * console output to the current tty, which is probably not what any
956 * library user wants, and if they do, they can just manually configure
957 * it :)
958 */
959 fd = open("/dev/tty", O_RDWR);
960 if (fd >= 0) {
961 if (ioctl(fd, TIOCNOTTY, NULL) < 0)
962 SYSERROR("couldn't detach from tty");
963 close(fd);
964 }
965
5e5576a4 966 handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false);
e29fe1dd
TA
967 if (!handler)
968 goto out;
969
aa460476
CB
970 if (lxc_init(c->name, handler) < 0)
971 goto out;
972
5a087e05 973 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 974 if (!cgroup_ops)
e29fe1dd 975 goto out_fini_handler;
2202afc9 976 handler->cgroup_ops = cgroup_ops;
e29fe1dd 977
e8b181f5 978 if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
e29fe1dd
TA
979 ERROR("failed creating groups");
980 goto out_fini_handler;
981 }
982
983 if (!restore_net_info(c)) {
984 ERROR("failed restoring network info");
985 goto out_fini_handler;
986 }
987
5af9369b
CB
988 ret = resolve_clone_flags(handler);
989 if (ret < 0) {
6d1400b5 990 SYSERROR("Unsupported clone flag specified");
5af9369b
CB
991 goto out_fini_handler;
992 }
e29fe1dd 993
de31cb57 994 if (pipe2(pipes, O_CLOEXEC) < 0) {
3d9a5c85
TA
995 SYSERROR("pipe() failed");
996 goto out_fini_handler;
997 }
998
e29fe1dd
TA
999 pid = fork();
1000 if (pid < 0)
1001 goto out_fini_handler;
1002
1003 if (pid == 0) {
1004 struct criu_opts os;
1005 struct lxc_rootfs *rootfs;
4b54788e 1006 int flags;
e29fe1dd 1007
3d9a5c85
TA
1008 close(status_pipe);
1009 status_pipe = -1;
1010
1011 close(pipes[0]);
1012 pipes[0] = -1;
e29fe1dd
TA
1013
1014 if (unshare(CLONE_NEWNS))
1015 goto out_fini_handler;
1016
1017 /* CRIU needs the lxc root bind mounted so that it is the root of some
1018 * mount. */
1019 rootfs = &c->lxc_conf->rootfs;
1020
1021 if (rootfs_is_blockdev(c->lxc_conf)) {
8ce1abc2
CB
1022 if (lxc_setup_rootfs_prepare_root(c->lxc_conf, c->name,
1023 c->config_path) < 0)
e29fe1dd
TA
1024 goto out_fini_handler;
1025 } else {
1026 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
1027 goto out_fini_handler;
1028
1029 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
1030 SYSERROR("remount / to private failed");
1031 goto out_fini_handler;
1032 }
1033
1034 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
1035 rmdir(rootfs->mount);
1036 goto out_fini_handler;
1037 }
1038 }
1039
5af85cb1 1040 os.pipefd = pipes[1];
e29fe1dd 1041 os.action = "restore";
b2c3710f 1042 os.user = opts;
e29fe1dd 1043 os.c = c;
4b54788e 1044 os.console_fd = c->lxc_conf->console.slave;
f1954503 1045 os.criu_version = criu_version;
0ab5703f 1046 os.handler = handler;
4b54788e 1047
97e4f1a9
TA
1048 if (os.console_fd >= 0) {
1049 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
1050 * via --inherit-fd, so we don't want it to close.
1051 */
1052 flags = fcntl(os.console_fd, F_GETFD);
1053 if (flags < 0) {
1054 SYSERROR("F_GETFD failed: %d", os.console_fd);
1055 goto out_fini_handler;
1056 }
4b54788e 1057
97e4f1a9 1058 flags &= ~FD_CLOEXEC;
4b54788e 1059
97e4f1a9
TA
1060 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
1061 SYSERROR("F_SETFD failed");
1062 goto out_fini_handler;
1063 }
4b54788e
TA
1064 }
1065 os.console_name = c->lxc_conf->console.name;
e29fe1dd
TA
1066
1067 /* exec_criu() returning is an error */
e20f46f8 1068 exec_criu(cgroup_ops, c->lxc_conf, &os);
e29fe1dd
TA
1069 umount(rootfs->mount);
1070 rmdir(rootfs->mount);
1071 goto out_fini_handler;
1072 } else {
e29fe1dd
TA
1073 char title[2048];
1074
3d9a5c85
TA
1075 close(pipes[1]);
1076 pipes[1] = -1;
1077
e29fe1dd
TA
1078 pid_t w = waitpid(pid, &status, 0);
1079 if (w == -1) {
1080 SYSERROR("waitpid");
1081 goto out_fini_handler;
1082 }
1083
e29fe1dd 1084 if (WIFEXITED(status)) {
75d219f0
TA
1085 char buf[4096];
1086
e29fe1dd 1087 if (WEXITSTATUS(status)) {
3d9a5c85
TA
1088 int n;
1089
668ba602 1090 n = lxc_read_nointr(pipes[0], buf, sizeof(buf));
3d9a5c85
TA
1091 if (n < 0) {
1092 SYSERROR("failed reading from criu stderr");
1093 goto out_fini_handler;
1094 }
1095
2735dfae
TA
1096 if (n == sizeof(buf))
1097 n--;
3d9a5c85
TA
1098 buf[n] = 0;
1099
9f1f54b0 1100 ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
e29fe1dd
TA
1101 goto out_fini_handler;
1102 } else {
3eba9b49 1103 ret = snprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
75d219f0
TA
1104 if (ret < 0 || ret >= sizeof(buf)) {
1105 ERROR("snprintf'd too many characters: %d", ret);
1106 goto out_fini_handler;
1107 }
1108
1109 FILE *f = fopen(buf, "r");
e29fe1dd 1110 if (!f) {
9f1f54b0 1111 SYSERROR("couldn't read restore's children file %s", buf);
e29fe1dd
TA
1112 goto out_fini_handler;
1113 }
1114
1115 ret = fscanf(f, "%d", (int*) &handler->pid);
1116 fclose(f);
1117 if (ret != 1) {
1118 ERROR("reading restore pid failed");
1119 goto out_fini_handler;
1120 }
1121
f8a41688
TA
1122 if (lxc_set_state(c->name, handler, RUNNING)) {
1123 ERROR("error setting running state after restore");
e29fe1dd 1124 goto out_fini_handler;
f8a41688 1125 }
e29fe1dd
TA
1126 }
1127 } else {
9f1f54b0 1128 ERROR("CRIU was killed with signal %d", WTERMSIG(status));
e29fe1dd
TA
1129 goto out_fini_handler;
1130 }
1131
3d9a5c85
TA
1132 close(pipes[0]);
1133
614be9bc 1134 ret = lxc_write_nointr(status_pipe, &status, sizeof(status));
f3886023
TA
1135 close(status_pipe);
1136 status_pipe = -1;
1137
1138 if (sizeof(status) != ret) {
1139 SYSERROR("failed to write all of status");
1140 goto out_fini_handler;
1141 }
1142
e29fe1dd
TA
1143 /*
1144 * See comment in lxcapi_start; we don't care if these
1145 * fail because it's just a beauty thing. We just
1146 * assign the return here to silence potential.
1147 */
1148 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
223e30c1
CB
1149 if (ret < 0 || (size_t)ret >= sizeof(title))
1150 INFO("Setting truncated process name");
1151
e29fe1dd 1152 ret = setproctitle(title);
223e30c1
CB
1153 if (ret < 0)
1154 INFO("Failed to set process name");
e29fe1dd
TA
1155
1156 ret = lxc_poll(c->name, handler);
1157 if (ret)
1158 lxc_abort(c->name, handler);
1159 lxc_fini(c->name, handler);
5a24adb8 1160 _exit(ret);
e29fe1dd
TA
1161 }
1162
1163out_fini_handler:
3d9a5c85
TA
1164 if (pipes[0] >= 0)
1165 close(pipes[0]);
1166 if (pipes[1] >= 0)
1167 close(pipes[1]);
1168
e29fe1dd
TA
1169 lxc_fini(c->name, handler);
1170
1171out:
3d9a5c85 1172 if (status_pipe >= 0) {
f3886023
TA
1173 /* ensure getting here was a failure, e.g. if we failed to
1174 * parse the child pid or something, even after a successful
1175 * restore
1176 */
1177 if (!status)
1178 status = 1;
113ebd57 1179
614be9bc 1180 if (lxc_write_nointr(status_pipe, &status, sizeof(status)) != sizeof(status))
e29fe1dd 1181 SYSERROR("writing status failed");
3d9a5c85 1182 close(status_pipe);
e29fe1dd
TA
1183 }
1184
5a24adb8 1185 _exit(EXIT_FAILURE);
e29fe1dd 1186}
aef3d51e 1187
4b54788e
TA
1188static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
1189{
1190 FILE *f;
1191 char path[PATH_MAX];
1192 int ret;
1193 struct stat sb;
1194
1195 if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) {
1196 tty_id[0] = 0;
1197 return 0;
1198 }
1199
1200 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
1201 if (ret < 0 || ret >= sizeof(path)) {
f510330c 1202 ERROR("snprintf'd too many characters: %d", ret);
4b54788e
TA
1203 return -1;
1204 }
1205
1206 ret = stat(path, &sb);
1207 if (ret < 0) {
1208 SYSERROR("stat of %s failed", path);
1209 return -1;
1210 }
1211
1212 ret = snprintf(path, sizeof(path), "%s/tty.info", directory);
1213 if (ret < 0 || ret >= sizeof(path)) {
1214 ERROR("snprintf'd too many characters: %d", ret);
1215 return -1;
1216 }
1217
f03280a7
TA
1218 ret = snprintf(tty_id, len, "tty[%llx:%llx]",
1219 (long long unsigned) sb.st_rdev,
1220 (long long unsigned) sb.st_dev);
4b54788e
TA
1221 if (ret < 0 || ret >= sizeof(path)) {
1222 ERROR("snprintf'd too many characters: %d", ret);
1223 return -1;
1224 }
1225
1226 f = fopen(path, "w");
1227 if (!f) {
1228 SYSERROR("failed to open %s", path);
1229 return -1;
1230 }
1231
1232 ret = fprintf(f, "%s", tty_id);
1233 fclose(f);
1234 if (ret < 0)
1235 SYSERROR("failed to write to %s", path);
1236 return ret;
1237}
1238
aef3d51e 1239/* do one of either predump or a regular dump */
b2c3710f 1240static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
aef3d51e 1241{
0e4adc1a 1242 int ret;
aef3d51e 1243 pid_t pid;
5af85cb1 1244 int criuout[2];
0e4adc1a 1245 char *criu_version = NULL;
aef3d51e 1246
f1954503 1247 if (!criu_ok(c, &criu_version))
aef3d51e
TA
1248 return false;
1249
0e4adc1a
CB
1250 ret = pipe(criuout);
1251 if (ret < 0) {
5af85cb1 1252 SYSERROR("pipe() failed");
7177e6b1 1253 free(criu_version);
aef3d51e 1254 return false;
5af85cb1
TA
1255 }
1256
1257 if (mkdir_p(opts->directory, 0700) < 0)
1258 goto fail;
aef3d51e
TA
1259
1260 pid = fork();
1261 if (pid < 0) {
1262 SYSERROR("fork failed");
5af85cb1 1263 goto fail;
aef3d51e
TA
1264 }
1265
1266 if (pid == 0) {
1267 struct criu_opts os;
2202afc9 1268 struct cgroup_ops *cgroup_ops;
0ab5703f 1269
5af85cb1
TA
1270 close(criuout[0]);
1271
5a087e05 1272 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 1273 if (!cgroup_ops) {
0ab5703f 1274 ERROR("failed to cgroup_init()");
7211378b 1275 _exit(EXIT_FAILURE);
0ab5703f 1276 }
aef3d51e 1277
5af85cb1 1278 os.pipefd = criuout[1];
aef3d51e 1279 os.action = mode;
b2c3710f 1280 os.user = opts;
aef3d51e 1281 os.c = c;
4b54788e 1282 os.console_name = c->lxc_conf->console.path;
f1954503 1283 os.criu_version = criu_version;
e20f46f8 1284 os.handler = NULL;
74eb576c 1285
0e4adc1a
CB
1286 ret = save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id));
1287 if (ret < 0) {
1288 free(criu_version);
7211378b 1289 _exit(EXIT_FAILURE);
0e4adc1a 1290 }
aef3d51e
TA
1291
1292 /* exec_criu() returning is an error */
e20f46f8 1293 exec_criu(cgroup_ops, c->lxc_conf, &os);
0e4adc1a 1294 free(criu_version);
7211378b 1295 _exit(EXIT_FAILURE);
aef3d51e
TA
1296 } else {
1297 int status;
5af85cb1
TA
1298 ssize_t n;
1299 char buf[4096];
5af85cb1
TA
1300
1301 close(criuout[1]);
1302
aef3d51e
TA
1303 pid_t w = waitpid(pid, &status, 0);
1304 if (w == -1) {
1305 SYSERROR("waitpid");
5af85cb1 1306 close(criuout[0]);
7177e6b1 1307 free(criu_version);
aef3d51e
TA
1308 return false;
1309 }
1310
668ba602 1311 n = lxc_read_nointr(criuout[0], buf, sizeof(buf));
5af85cb1
TA
1312 close(criuout[0]);
1313 if (n < 0) {
1314 SYSERROR("read");
1315 n = 0;
1316 }
40229e95 1317
1318 if (n == sizeof(buf))
1319 buf[n-1] = 0;
1320 else
1321 buf[n] = 0;
5af85cb1 1322
aef3d51e
TA
1323 if (WIFEXITED(status)) {
1324 if (WEXITSTATUS(status)) {
9f1f54b0 1325 ERROR("dump failed with %d", WEXITSTATUS(status));
5af85cb1
TA
1326 ret = false;
1327 } else {
1328 ret = true;
aef3d51e 1329 }
aef3d51e 1330 } else if (WIFSIGNALED(status)) {
9f1f54b0 1331 ERROR("dump signaled with %d", WTERMSIG(status));
5af85cb1 1332 ret = false;
aef3d51e 1333 } else {
9f1f54b0 1334 ERROR("unknown dump exit %d", status);
5af85cb1 1335 ret = false;
aef3d51e 1336 }
5af85cb1
TA
1337
1338 if (!ret)
1339 ERROR("criu output: %s", buf);
7177e6b1
DJ
1340
1341 free(criu_version);
5af85cb1 1342 return ret;
aef3d51e 1343 }
5af85cb1
TA
1344fail:
1345 close(criuout[0]);
1346 close(criuout[1]);
1347 rmdir(opts->directory);
0e4adc1a 1348 free(criu_version);
5af85cb1 1349 return false;
aef3d51e
TA
1350}
1351
b2c3710f 1352bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e 1353{
b2c3710f 1354 return do_dump(c, "pre-dump", opts);
aef3d51e
TA
1355}
1356
b2c3710f 1357bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1358{
1359 char path[PATH_MAX];
1360 int ret;
1361
b2c3710f 1362 ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
aef3d51e
TA
1363 if (ret < 0 || ret >= sizeof(path))
1364 return false;
1365
1366 if (access(path, F_OK) == 0) {
9f1f54b0 1367 ERROR("please use a fresh directory for the dump directory");
aef3d51e
TA
1368 return false;
1369 }
1370
b2c3710f 1371 return do_dump(c, "dump", opts);
aef3d51e
TA
1372}
1373
b2c3710f 1374bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1375{
1376 pid_t pid;
1377 int status, nread;
1378 int pipefd[2];
f1954503 1379 char *criu_version = NULL;
aef3d51e 1380
aef3d51e 1381 if (geteuid()) {
9f1f54b0 1382 ERROR("Must be root to restore");
aef3d51e
TA
1383 return false;
1384 }
1385
1386 if (pipe(pipefd)) {
1387 ERROR("failed to create pipe");
1388 return false;
1389 }
1390
7177e6b1
DJ
1391 if (!criu_ok(c, &criu_version)) {
1392 close(pipefd[0]);
1393 close(pipefd[1]);
1394 return false;
1395 }
1396
aef3d51e
TA
1397 pid = fork();
1398 if (pid < 0) {
1399 close(pipefd[0]);
1400 close(pipefd[1]);
7177e6b1 1401 free(criu_version);
aef3d51e
TA
1402 return false;
1403 }
1404
1405 if (pid == 0) {
1406 close(pipefd[0]);
1a0e70ac 1407 /* this never returns */
f1954503 1408 do_restore(c, pipefd[1], opts, criu_version);
aef3d51e
TA
1409 }
1410
1411 close(pipefd[1]);
7177e6b1 1412 free(criu_version);
aef3d51e 1413
668ba602 1414 nread = lxc_read_nointr(pipefd[0], &status, sizeof(status));
aef3d51e
TA
1415 close(pipefd[0]);
1416 if (sizeof(status) != nread) {
1417 ERROR("reading status from pipe failed");
1418 goto err_wait;
1419 }
1420
1a0e70ac
CB
1421 /* If the criu process was killed or exited nonzero, wait() for the
1422 * handler, since the restore process died. Otherwise, we don't need to
1423 * wait, since the child becomes the monitor process.
1424 */
aef3d51e
TA
1425 if (!WIFEXITED(status) || WEXITSTATUS(status))
1426 goto err_wait;
1427 return true;
1428
1429err_wait:
1430 if (wait_for_pid(pid))
1431 ERROR("restore process died");
1432 return false;
1433}