]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
rename functions which clash with libsystemd's
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
d38dd64a 2
1160ce89
CB
3#include "config.h"
4
9b945f13 5#include <inttypes.h>
e29fe1dd
TA
6#include <linux/limits.h>
7#include <sched.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/mount.h>
12#include <sys/types.h>
13#include <sys/wait.h>
14#include <unistd.h>
15
cdb4f412 16#include "attach_options.h"
5eac34d5 17
e29fe1dd 18#include "cgroup.h"
dc259399 19#include "commands.h"
d38dd64a 20#include "conf.h"
e29fe1dd
TA
21#include "criu.h"
22#include "log.h"
23#include "lxc.h"
24#include "lxclock.h"
59d8a539 25#include "memory_utils.h"
e29fe1dd 26#include "network.h"
28d832c4 27#include "storage.h"
e8f764b6 28#include "syscall_wrappers.h"
e29fe1dd
TA
29#include "utils.h"
30
5f4e44a2 31#if IS_BIONIC
58db1a61 32#include "lxcmntent.h"
5f4e44a2
TA
33#else
34#include <mntent.h>
35#endif
36
db4af8c5 37#if !HAVE_STRLCPY
58db1a61 38#include "strlcpy.h"
9de31d5a
CB
39#endif
40
c33b0338 41#define CRIU_VERSION "2.0"
73d46752
TA
42
43#define CRIU_GITID_VERSION "2.0"
44#define CRIU_GITID_PATCHLEVEL 0
45
f1954503 46#define CRIU_IN_FLIGHT_SUPPORT "2.4"
46c8ffd5 47#define CRIU_EXTERNAL_NOT_VETH "2.8"
0109a13d 48#define CRIU_EXTERNAL_NETDEV "3.15"
f1954503 49
ac2cecc4 50lxc_log_define(criu, lxc);
e29fe1dd 51
73d46752 52struct criu_opts {
5af85cb1
TA
53 /* the thing to hook to stdout and stderr for logging */
54 int pipefd;
55
73d46752
TA
56 /* The type of criu invocation, one of "dump" or "restore" */
57 char *action;
58
b2c3710f
TA
59 /* the user-provided migrate options relevant to this action */
60 struct migrate_opts *user;
73d46752
TA
61
62 /* The container to dump */
63 struct lxc_container *c;
64
73d46752 65 /* dump: stop the container or not after dumping? */
4b54788e 66 char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
73d46752
TA
67
68 /* restore: the file to write the init process' pid into */
0ab5703f 69 struct lxc_handler *handler;
4b54788e
TA
70 int console_fd;
71 /* The path that is bind mounted from /dev/console, if any. We don't
41808e20 72 * want to use `--ext-mount-map auto`'s result here because the pty
4b54788e 73 * device may have a different path (e.g. if the pty number is
3aed4934 74 * different) on the target host. NULL if lxc.console.path = "none".
4b54788e
TA
75 */
76 char *console_name;
f1954503
AR
77
78 /* The detected version of criu */
79 char *criu_version;
73d46752
TA
80};
81
4b54788e
TA
82static int load_tty_major_minor(char *directory, char *output, int len)
83{
4b54788e 84 char path[PATH_MAX];
c3e48967 85 ssize_t ret;
4b54788e 86
8eaa5ae3
CB
87 ret = strnprintf(path, sizeof(path), "%s/tty.info", directory);
88 if (ret < 0)
c3e48967 89 return ret_errno(EIO);
4b54788e 90
c3e48967
CB
91 ret = lxc_read_from_file(path, output, len);
92 if (ret < 0) {
93 /*
94 * This means we're coming from a liblxc which didn't export
3aed4934
CB
95 * the tty info. In this case they had to have lxc.console.path
96 * = * none, so there's no problem restoring.
4b54788e
TA
97 */
98 if (errno == ENOENT)
99 return 0;
100
c3e48967 101 return log_error_errno(-errno, errno, "Failed to open \"%s\"", path);
4b54788e
TA
102 }
103
4b54788e
TA
104 return 0;
105}
106
74ad3607
FB
107static int cmp_version(const char *v1, const char *v2)
108{
109 int ret;
110 int oct_v1[3], oct_v2[3];
111
112 memset(oct_v1, -1, sizeof(oct_v1));
113 memset(oct_v2, -1, sizeof(oct_v2));
114
115 ret = sscanf(v1, "%d.%d.%d", &oct_v1[0], &oct_v1[1], &oct_v1[2]);
116 if (ret < 1)
117 return -1;
118
119 ret = sscanf(v2, "%d.%d.%d", &oct_v2[0], &oct_v2[1], &oct_v2[2]);
120 if (ret < 1)
121 return -1;
122
123 /* Major version is greater. */
124 if (oct_v1[0] > oct_v2[0])
125 return 1;
126
127 if (oct_v1[0] < oct_v2[0])
128 return -1;
129
130 /* Minor number is greater.*/
131 if (oct_v1[1] > oct_v2[1])
132 return 1;
133
134 if (oct_v1[1] < oct_v2[1])
135 return -1;
136
137 /* Patch number is greater. */
138 if (oct_v1[2] > oct_v2[2])
139 return 1;
140
141 /* Patch numbers are equal. */
142 if (oct_v1[2] == oct_v2[2])
143 return 0;
144
145 return -1;
146}
147
59d8a539
CB
148struct criu_exec_args {
149 int argc;
150 char *argv[];
151};
152
153static void put_criu_exec_args(struct criu_exec_args *args)
e29fe1dd 154{
59d8a539
CB
155 if (args) {
156 for (int i = 0; i < args->argc; i++)
157 free_disarm(args->argv[i]);
158 free_disarm(args);
159 }
160}
161
162define_cleanup_function(struct criu_exec_args *, put_criu_exec_args);
163
164static int exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf,
165 struct criu_opts *opts)
166{
167 call_cleaner(put_criu_exec_args) struct criu_exec_args *args = NULL;
6eff3c0d 168 __do_fclose FILE *f_mnt = NULL;
59d8a539
CB
169 char log[PATH_MAX];
170 int static_args = 23, ret;
e29fe1dd 171 int netnr = 0;
5f4e44a2 172 struct mntent mntent;
d696c45e
CB
173 struct lxc_netdev *netdev;
174 struct string_entry *strentry;
e29fe1dd 175
0e4be3cf 176 char buf[4096], ttys[32];
5af85cb1 177
e9195050
TA
178 /* If we are currently in a cgroup /foo/bar, and the container is in a
179 * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
180 * container has an open fd that points to one of the cgroup files
181 * (systemd always opens its "root" cgroup). So, let's escape to the
182 * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
183 * see all cgroups.
184 */
59d8a539
CB
185 if (!cgroup_ops->criu_escape(cgroup_ops, conf))
186 return log_error_errno(-ENOENT, ENOENT, "Failed to escape to root cgroup");
e9195050 187
e29fe1dd 188 /* The command line always looks like:
19d1509c 189 * criu $(action) --tcp-established --file-locks --link-remap \
5f178bc9 190 * --manage-cgroups=full --action-script foo.sh -D $(directory) \
e29fe1dd
TA
191 * -o $(directory)/$(action).log --ext-mount-map auto
192 * --enable-external-sharing --enable-external-masters
4b54788e 193 * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
e29fe1dd
TA
194 * +1 for final NULL */
195
066af2cb 196 if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
dc259399
TA
197 /* -t pid --freeze-cgroup /lxc/ct */
198 static_args += 4;
e29fe1dd 199
aef3d51e 200 /* --prev-images-dir <path-to-directory-A-relative-to-B> */
b2c3710f 201 if (opts->user->predump_dir)
aef3d51e
TA
202 static_args += 2;
203
74eb576c 204 /* --page-server --address <address> --port <port> */
b2c3710f 205 if (opts->user->pageserver_address && opts->user->pageserver_port)
74eb576c
NE
206 static_args += 5;
207
aef3d51e 208 /* --leave-running (only for final dump) */
066af2cb 209 if (strequal(opts->action, "dump") && !opts->user->stop)
e29fe1dd 210 static_args++;
4b54788e
TA
211
212 /* --external tty[88,4] */
213 if (opts->tty_id[0])
214 static_args += 2;
19d1509c
TA
215
216 /* --force-irmap */
217 if (!opts->user->preserves_inodes)
218 static_args++;
b2b7b0d2
TA
219
220 /* --ghost-limit 1024 */
221 if (opts->user->ghost_limit)
222 static_args += 2;
066af2cb 223 } else if (strequal(opts->action, "restore")) {
e29fe1dd 224 /* --root $(lxc_mount_point) --restore-detached
0ab5703f 225 * --restore-sibling
13389b29
TA
226 * --lsm-profile apparmor:whatever
227 */
0ab5703f 228 static_args += 6;
4b54788e 229
0e4be3cf
CB
230 ttys[0] = 0;
231 if (load_tty_major_minor(opts->user->directory, ttys, sizeof(ttys)))
59d8a539 232 return log_error_errno(-EINVAL, EINVAL, "Failed to load tty information");
4b54788e
TA
233
234 /* --inherit-fd fd[%d]:tty[%s] */
0e4be3cf 235 if (ttys[0])
4b54788e 236 static_args += 2;
59d8a539 237
d696c45e 238 static_args += list_len(netdev, &opts->c->lxc_conf->netdevs, head) * 2;
e29fe1dd 239 } else {
59d8a539 240 return log_error_errno(-EINVAL, EINVAL, "Invalid criu operation specified");
e29fe1dd
TA
241 }
242
ff9edd2d
CB
243 if (cgroup_ops->criu_num_hierarchies(cgroup_ops) > 0)
244 static_args += 2 * cgroup_ops->criu_num_hierarchies(cgroup_ops);
0ab5703f 245
b2c3710f 246 if (opts->user->verbose)
e29fe1dd
TA
247 static_args++;
248
b9ee6643
TA
249 if (opts->user->action_script)
250 static_args += 2;
251
d696c45e 252 static_args += 2 * list_len(strentry, &opts->c->lxc_conf->mount_entries, head);
5f4e44a2 253
8eaa5ae3
CB
254 ret = strnprintf(log, sizeof(log), "%s/%s.log", opts->user->directory, opts->action);
255 if (ret < 0)
59d8a539 256 return ret_errno(EIO);
e29fe1dd 257
59d8a539
CB
258 args = zalloc(sizeof(struct criu_exec_args) + (static_args * sizeof(char **)));
259 if (!args)
260 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate static arguments");
261
262#define DECLARE_ARG(arg) \
263 do { \
264 if (arg == NULL) \
265 return log_error_errno(-EINVAL, EINVAL, \
266 "Got NULL argument for criu"); \
267 args->argv[(args->argc)++] = strdup(arg); \
268 if (!args->argv[args->argc - 1]) \
269 return log_error_errno(-ENOMEM, ENOMEM, \
270 "Failed to duplicate argumen %s", arg); \
e29fe1dd
TA
271 } while (0)
272
59d8a539
CB
273 args->argv[(args->argc)++] = on_path("criu", NULL);
274 if (!args->argv[args->argc - 1])
275 return log_error_errno(-ENOENT, ENOENT, "Failed to find criu binary");
e29fe1dd
TA
276
277 DECLARE_ARG(opts->action);
278 DECLARE_ARG("--tcp-established");
279 DECLARE_ARG("--file-locks");
280 DECLARE_ARG("--link-remap");
0a5fc6df 281 DECLARE_ARG("--manage-cgroups=full");
e29fe1dd
TA
282 DECLARE_ARG("--ext-mount-map");
283 DECLARE_ARG("auto");
284 DECLARE_ARG("--enable-external-sharing");
285 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
286 DECLARE_ARG("--enable-fs");
287 DECLARE_ARG("hugetlbfs");
5b454329
TA
288 DECLARE_ARG("--enable-fs");
289 DECLARE_ARG("tracefs");
e29fe1dd 290 DECLARE_ARG("-D");
b2c3710f 291 DECLARE_ARG(opts->user->directory);
e29fe1dd
TA
292 DECLARE_ARG("-o");
293 DECLARE_ARG(log);
294
59d8a539
CB
295 for (int i = 0; i < cgroup_ops->criu_num_hierarchies(cgroup_ops); i++) {
296 __do_free char *cgroup_base_path = NULL, *controllers;
297 char **controllers_list = NULL;
298 char *tmp;
0ab5703f 299
59d8a539
CB
300 if (!cgroup_ops->criu_get_hierarchies(cgroup_ops, i, &controllers_list))
301 return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve cgroup hierarchies %d", i);
0ab5703f 302
59d8a539
CB
303 /*
304 * If we are in a dump, we have to ask the monitor process what
0ab5703f
TA
305 * the right cgroup is. if this is a restore, we can just use
306 * the handler the restore task created.
307 */
066af2cb 308 if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
a9b642ee 309 cgroup_base_path = lxc_cmd_get_limit_cgroup_path(opts->c->name, opts->c->config_path, controllers_list[0]);
59d8a539 310 if (!cgroup_base_path)
a9b642ee 311 return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve limit cgroup path for %s", controllers_list[0] ?: "(null)");
0ab5703f
TA
312 } else {
313 const char *p;
314
a9b642ee 315 p = cgroup_ops->get_limit_cgroup(cgroup_ops, controllers_list[0]);
59d8a539 316 if (!p)
a9b642ee 317 return log_error_errno(-ENOENT, ENOENT, "Failed to retrieve limit cgroup path for %s", controllers_list[0] ?: "(null)");
0ab5703f 318
59d8a539
CB
319 cgroup_base_path = strdup(p);
320 if (!cgroup_base_path)
a9b642ee 321 return log_error_errno(-ENOMEM, ENOMEM, "Failed to duplicate limit cgroup path");
0ab5703f
TA
322 }
323
539c3977 324 tmp = lxc_path_simplify(cgroup_base_path);
59d8a539 325 if (!tmp)
bdb8aeda 326 return log_error_errno(-ENOMEM, ENOMEM, "Failed to remove extraneous slashes from \"%s\"", cgroup_base_path);
59d8a539 327 free_move_ptr(cgroup_base_path, tmp);
0ab5703f 328
92fde26d
CB
329 if (controllers_list[0]) {
330 controllers = lxc_string_join(",", (const char **)controllers_list, false);
331 if (!controllers)
332 return log_error_errno(-ENOMEM, ENOMEM, "Failed to join controllers");
59d8a539 333
92fde26d
CB
334 ret = sprintf(buf, "%s:%s", controllers, cgroup_base_path);
335 } else {
336 WARN("No cgroup controllers configured in container's cgroup %s", cgroup_base_path);
337 ret = sprintf(buf, "%s", cgroup_base_path);
338 }
402770b6 339 if (ret < 0 || (size_t)ret >= sizeof(buf))
59d8a539 340 return log_error_errno(-EIO, EIO, "sprintf of cgroup root arg failed");
0ab5703f
TA
341
342 DECLARE_ARG("--cgroup-root");
343 DECLARE_ARG(buf);
344 }
345
b2c3710f 346 if (opts->user->verbose)
582cb478 347 DECLARE_ARG("-v4");
e29fe1dd 348
b9ee6643
TA
349 if (opts->user->action_script) {
350 DECLARE_ARG("--action-script");
351 DECLARE_ARG(opts->user->action_script);
352 }
353
be0bc4d1 354 f_mnt = make_anonymous_mount_file(&opts->c->lxc_conf->mount_entries,
1800f924 355 opts->c->lxc_conf->lsm_aa_allow_nesting);
6eff3c0d 356 if (!f_mnt)
59d8a539 357 return log_error_errno(-ENOENT, ENOENT, "Failed to create anonymous mount file");
5f4e44a2 358
6eff3c0d 359 while (getmntent_r(f_mnt, &mntent, buf, sizeof(buf))) {
5257b91b 360 __do_free char *mnt_options = NULL;
a08bfbe3 361 unsigned long flags = 0;
5f4e44a2 362 char arg[2 * PATH_MAX + 2];
19d2422b 363
d94eb390 364 if (parse_mntopts_legacy(mntent.mnt_opts, &flags, &mnt_options) < 0)
59d8a539 365 return log_error_errno(-EINVAL, EINVAL, "Failed to parse mount options");
19d2422b 366
19d2422b
TA
367 /* only add --ext-mount-map for actual bind mounts */
368 if (!(flags & MS_BIND))
369 continue;
5f4e44a2 370
066af2cb 371 if (strequal(opts->action, "dump"))
8eaa5ae3 372 ret = strnprintf(arg, sizeof(arg), "/%s:%s", mntent.mnt_dir, mntent.mnt_dir);
d07545c7 373 else
8eaa5ae3
CB
374 ret = strnprintf(arg, sizeof(arg), "%s:%s", mntent.mnt_dir, mntent.mnt_fsname);
375 if (ret < 0)
59d8a539 376 return log_error_errno(-EIO, EIO, "Failed to create mount entry");
5f4e44a2
TA
377
378 DECLARE_ARG("--ext-mount-map");
379 DECLARE_ARG(arg);
380 }
5f4e44a2 381
066af2cb 382 if (strequal(opts->action, "dump") || strequal(opts->action, "pre-dump")) {
2539492c
CB
383 pid_t init_pid;
384 char init_pid_str[INTTYPE_TO_STRLEN(int)];
385 char *freezer_relative;
e29fe1dd 386
2539492c
CB
387 init_pid = opts->c->init_pid(opts->c);
388 if (init_pid < 0)
389 return log_error_errno(-ESRCH, ESRCH, "Failed to retrieve init pid of container");
390
8eaa5ae3
CB
391 ret = strnprintf(init_pid_str, sizeof(init_pid_str), "%d", init_pid);
392 if (ret < 0)
2539492c 393 return log_error_errno(-EIO, EIO, "Failed to create entry for init pid of container");
e29fe1dd
TA
394
395 DECLARE_ARG("-t");
2539492c 396 DECLARE_ARG(init_pid_str);
dc259399 397
a9b642ee
CB
398 freezer_relative = lxc_cmd_get_limit_cgroup_path(opts->c->name,
399 opts->c->config_path,
400 "freezer");
59d8a539
CB
401 if (!freezer_relative)
402 return log_error_errno(-ENOENT, ENOENT, "Failed getting freezer path");
dc259399 403
928b065d 404 if (pure_unified_layout(cgroup_ops))
8eaa5ae3 405 ret = strnprintf(log, sizeof(log), "/sys/fs/cgroup/%s", freezer_relative);
928b065d 406 else
8eaa5ae3
CB
407 ret = strnprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative);
408 if (ret < 0)
59d8a539 409 return log_error_errno(-EIO, EIO, "Failed to freezer cgroup entry");
dc259399 410
f1954503 411 if (!opts->user->disable_skip_in_flight &&
066af2cb 412 strcmp(opts->criu_version, CRIU_IN_FLIGHT_SUPPORT) >= 0)
f1954503
AR
413 DECLARE_ARG("--skip-in-flight");
414
dc259399
TA
415 DECLARE_ARG("--freeze-cgroup");
416 DECLARE_ARG(log);
417
4b54788e 418 if (opts->tty_id[0]) {
36d2096c
TA
419 DECLARE_ARG("--ext-mount-map");
420 DECLARE_ARG("/dev/console:console");
421
4b54788e
TA
422 DECLARE_ARG("--external");
423 DECLARE_ARG(opts->tty_id);
424 }
425
b2c3710f 426 if (opts->user->predump_dir) {
aef3d51e 427 DECLARE_ARG("--prev-images-dir");
b2c3710f 428 DECLARE_ARG(opts->user->predump_dir);
9f99a33f 429 DECLARE_ARG("--track-mem");
74eb576c 430 }
4c0c0319 431
b2c3710f 432 if (opts->user->pageserver_address && opts->user->pageserver_port) {
74eb576c
NE
433 DECLARE_ARG("--page-server");
434 DECLARE_ARG("--address");
b2c3710f 435 DECLARE_ARG(opts->user->pageserver_address);
74eb576c 436 DECLARE_ARG("--port");
b2c3710f 437 DECLARE_ARG(opts->user->pageserver_port);
74eb576c 438 }
aef3d51e 439
19d1509c
TA
440 if (!opts->user->preserves_inodes)
441 DECLARE_ARG("--force-irmap");
442
b2b7b0d2
TA
443 if (opts->user->ghost_limit) {
444 char ghost_limit[32];
445
9b945f13 446 ret = sprintf(ghost_limit, "%"PRIu64, opts->user->ghost_limit);
402770b6 447 if (ret < 0 || (size_t)ret >= sizeof(ghost_limit))
59d8a539 448 return log_error_errno(-EIO, EIO, "Failed to print ghost limit %"PRIu64, opts->user->ghost_limit);
b2b7b0d2
TA
449
450 DECLARE_ARG("--ghost-limit");
451 DECLARE_ARG(ghost_limit);
452 }
453
aef3d51e 454 /* only for final dump */
066af2cb 455 if (strequal(opts->action, "dump") && !opts->user->stop)
e29fe1dd 456 DECLARE_ARG("--leave-running");
066af2cb 457 } else if (strequal(opts->action, "restore")) {
13389b29 458 struct lxc_conf *lxc_conf = opts->c->lxc_conf;
e29fe1dd
TA
459
460 DECLARE_ARG("--root");
461 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
462 DECLARE_ARG("--restore-detached");
463 DECLARE_ARG("--restore-sibling");
e29fe1dd 464
0e4be3cf 465 if (ttys[0]) {
59d8a539
CB
466 if (opts->console_fd < 0)
467 return log_error_errno(-EINVAL, EINVAL, "lxc.console.path configured on source host but not target");
97e4f1a9 468
8eaa5ae3
CB
469 ret = strnprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, ttys);
470 if (ret < 0)
59d8a539 471 return log_error_errno(-EIO, EIO, "Failed to create console entry");
4b54788e
TA
472
473 DECLARE_ARG("--inherit-fd");
474 DECLARE_ARG(buf);
475 }
476 if (opts->console_name) {
8eaa5ae3 477 if (strnprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0)
59d8a539
CB
478 return log_error_errno(-EIO, EIO, "Failed to create console entry");
479
4b54788e
TA
480 DECLARE_ARG("--ext-mount-map");
481 DECLARE_ARG(buf);
482 }
483
13389b29
TA
484 if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) {
485
486 if (lxc_conf->lsm_aa_profile)
8eaa5ae3 487 ret = strnprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile);
13389b29 488 else
8eaa5ae3
CB
489 ret = strnprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context);
490 if (ret < 0)
59d8a539 491 return log_error_errno(-EIO, EIO, "Failed to create lsm entry");
13389b29
TA
492
493 DECLARE_ARG("--lsm-profile");
494 DECLARE_ARG(buf);
495 }
496
87d0990c 497 list_for_each_entry(netdev, &opts->c->lxc_conf->netdevs, head) {
9de31d5a 498 size_t retlen;
e29fe1dd 499 char eth[128], *veth;
46c8ffd5
AR
500 bool external_not_veth;
501
74ad3607 502 if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
46c8ffd5
AR
503 /* Since criu version 2.8 the usage of --veth-pair
504 * has been deprecated:
505 * git tag --contains f2037e6d3445fc400
506 * v2.8 */
507 external_not_veth = true;
508 } else {
509 external_not_veth = false;
510 }
e29fe1dd 511
87d0990c
CB
512 if (netdev->name[0] != '\0') {
513 retlen = strlcpy(eth, netdev->name, sizeof(eth));
9de31d5a 514 if (retlen >= sizeof(eth))
59d8a539 515 return log_error_errno(-E2BIG, E2BIG, "Failed to append veth device name");
796a109d 516 } else {
8eaa5ae3
CB
517 ret = strnprintf(eth, sizeof(eth), "eth%d", netnr);
518 if (ret < 0)
59d8a539 519 return log_error_errno(-E2BIG, E2BIG, "Failed to append veth device name");
796a109d 520 }
e29fe1dd 521
87d0990c 522 switch (netdev->type) {
e2697330 523 case LXC_NET_VETH:
87d0990c 524 veth = netdev->priv.veth_attr.pair;
ea7f6b29 525 if (veth[0] == '\0')
87d0990c 526 veth = netdev->priv.veth_attr.veth1;
e29fe1dd 527
87d0990c 528 if (netdev->link[0] != '\0') {
46c8ffd5 529 if (external_not_veth)
87d0990c 530 ret = strnprintf(buf, sizeof(buf), "veth[%s]:%s@%s", eth, veth, netdev->link);
46c8ffd5 531 else
87d0990c 532 ret = strnprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, netdev->link);
46c8ffd5
AR
533 } else {
534 if (external_not_veth)
8eaa5ae3 535 ret = strnprintf(buf, sizeof(buf), "veth[%s]:%s", eth, veth);
46c8ffd5 536 else
8eaa5ae3 537 ret = strnprintf(buf, sizeof(buf), "%s=%s", eth, veth);
46c8ffd5 538 }
8eaa5ae3 539 if (ret < 0)
59d8a539 540 return log_error_errno(-EIO, EIO, "Failed to append veth device name");
2541dfab
CB
541
542 TRACE("Added veth device entry %s", buf);
e2697330
TA
543 break;
544 case LXC_NET_MACVLAN:
87d0990c
CB
545 if (netdev->link[0] == '\0')
546 return log_error_errno(-EINVAL, EINVAL, "Failed to find host interface for macvlan %s", netdev->name);
e2697330 547
87d0990c 548 ret = strnprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, netdev->link);
8eaa5ae3 549 if (ret < 0)
59d8a539 550 return log_error_errno(-EIO, EIO, "Failed to add macvlan entry");
2541dfab
CB
551
552 TRACE("Added macvlan device entry %s", buf);
553
0109a13d
CB
554 break;
555 case LXC_NET_PHYS:
556 if (cmp_version(opts->criu_version, CRIU_EXTERNAL_NETDEV) < 0)
557 return syserror_set(-EOPNOTSUPP, "Restoring physical network devices not supported");
558
559 if (is_empty_string(netdev->link))
560 return syserror_set(-EINVAL, "Specifying link is required");
561
562 ret = strnprintf(buf, sizeof(buf), "netdev[%s]:%s", eth, netdev->link);
563 if (ret < 0)
564 return syserror_set(-EIO, "Failed to append phys device name");
565
566 TRACE("Added phys device entry %s", buf);
e2697330
TA
567 break;
568 case LXC_NET_NONE:
0109a13d 569 __fallthrough;
e2697330
TA
570 case LXC_NET_EMPTY:
571 break;
572 default:
573 /* we have screened for this earlier... */
87d0990c 574 return log_error_errno(-EINVAL, EINVAL, "Unsupported network type %d", netdev->type);
e2697330 575 }
e29fe1dd 576
46c8ffd5
AR
577 if (external_not_veth)
578 DECLARE_ARG("--external");
579 else
580 DECLARE_ARG("--veth-pair");
e29fe1dd 581 DECLARE_ARG(buf);
2f3fbc6b 582 netnr++;
e29fe1dd
TA
583 }
584
585 }
586
59d8a539 587 args->argv[args->argc] = NULL;
e29fe1dd 588
2541dfab
CB
589 if (lxc_log_trace()) {
590 buf[0] = 0;
591 for (int i = 0, pos = 0; i < args->argc && args->argv[i]; i++) {
8eaa5ae3
CB
592 ret = strnprintf(buf + pos, sizeof(buf) - pos, "%s ", args->argv[i]);
593 if (ret < 0)
2541dfab
CB
594 return log_error_errno(-EIO, EIO, "Failed to reorder entries");
595 else
596 pos += ret;
597 }
72a30576 598
2541dfab 599 TRACE("Using command line %s", buf);
cf4b07a5
TA
600 }
601
5af85cb1
TA
602 /* before criu inits its log, it sometimes prints things to stdout/err;
603 * let's be sure we capture that.
604 */
59d8a539
CB
605 if (dup2(opts->pipefd, STDOUT_FILENO) < 0)
606 return log_error_errno(-errno, errno, "Failed to duplicate stdout");
5af85cb1 607
59d8a539
CB
608 if (dup2(opts->pipefd, STDERR_FILENO) < 0)
609 return log_error_errno(-errno, errno, "Failed to duplicate stderr");
5af85cb1
TA
610
611 close(opts->pipefd);
612
e29fe1dd 613#undef DECLARE_ARG
59d8a539
CB
614 execv(args->argv[0], args->argv);
615 return -ENOEXEC;
e29fe1dd
TA
616}
617
b5b12b9e
AR
618/*
619 * Function to check if the checks activated in 'features_to_check' are
620 * available with the current architecture/kernel/criu combination.
621 *
622 * Parameter features_to_check is a bit mask of all features that should be
623 * checked (see feature check defines in lxc/lxccontainer.h).
624 *
625 * If the return value is true, all requested features are supported. If
626 * the return value is false the features_to_check parameter is updated
627 * to reflect which features are available. '0' means no feature but
628 * also that something went totally wrong.
629 *
630 * Some of the code flow of criu_version_ok() is duplicated and maybe it
631 * is a good candidate for refactoring.
632 */
633bool __criu_check_feature(uint64_t *features_to_check)
634{
635 pid_t pid;
636 uint64_t current_bit = 0;
637 int ret;
fca23691 638 uint64_t features = *features_to_check;
b5b12b9e
AR
639 /* Feature checking is currently always like
640 * criu check --feature <feature-name>
641 */
642 char *args[] = { "criu", "check", "--feature", NULL, NULL };
643
644 if ((features & ~FEATURE_MEM_TRACK & ~FEATURE_LAZY_PAGES) != 0) {
645 /* There are feature bits activated we do not understand.
646 * Refusing to answer at all */
647 *features_to_check = 0;
648 return false;
649 }
650
6d61f17d 651 while (current_bit < (sizeof(uint64_t) * 8 - 1)) {
b5b12b9e
AR
652 /* only test requested features */
653 if (!(features & (1ULL << current_bit))) {
654 /* skip this */
655 current_bit++;
656 continue;
657 }
658
659 pid = fork();
660 if (pid < 0) {
661 SYSERROR("fork() failed");
662 *features_to_check = 0;
663 return false;
664 }
665
666 if (pid == 0) {
667 if ((1ULL << current_bit) == FEATURE_MEM_TRACK)
668 /* This is needed for pre-dump support, which
669 * enables pre-copy migration. */
670 args[3] = "mem_dirty_track";
671 else if ((1ULL << current_bit) == FEATURE_LAZY_PAGES)
672 /* CRIU has two checks for userfaultfd support.
673 *
674 * The simpler check is only for 'uffd'. If the
675 * kernel supports userfaultfd without noncoop
676 * then only process can be lazily restored
677 * which do not fork. With 'uffd-noncoop'
678 * it is also possible to lazily restore processes
679 * which do fork. For a container runtime like
680 * LXC checking only for 'uffd' makes not much sense. */
681 args[3] = "uffd-noncoop";
682 else
4f43526d 683 _exit(EXIT_FAILURE);
b5b12b9e
AR
684
685 null_stdfds();
686
687 execvp("criu", args);
688 SYSERROR("Failed to exec \"criu\"");
4f43526d 689 _exit(EXIT_FAILURE);
b5b12b9e
AR
690 }
691
692 ret = wait_for_pid(pid);
693
694 if (ret == -1) {
695 /* It is not known why CRIU failed. Either
696 * CRIU is not available, the feature check
697 * does not exist or the feature is not
698 * supported. */
699 INFO("feature not supported");
700 /* Clear not supported feature bit */
701 features &= ~(1ULL << current_bit);
702 }
703
704 current_bit++;
705 /* no more checks requested; exit check loop */
706 if (!(features & ~((1ULL << current_bit)-1)))
707 break;
708 }
709 if (features != *features_to_check) {
710 *features_to_check = features;
711 return false;
712 }
713 return true;
714}
715
8ba5ced7
TA
716/*
717 * Check to see if the criu version is recent enough for all the features we
718 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
719 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
720 * things potentially before a version is released with a particular feature.
721 *
722 * The intent is that when criu development slows down, we can drop this, but
723 * for now we shouldn't attempt to c/r with versions that we know won't work.
5407e2ab
CB
724 *
725 * Note: If version != NULL criu_version() stores the detected criu version in
726 * version. Allocates memory for version which must be freed by caller.
8ba5ced7 727 */
5407e2ab 728static bool criu_version_ok(char **version)
8ba5ced7
TA
729{
730 int pipes[2];
731 pid_t pid;
732
733 if (pipe(pipes) < 0) {
734 SYSERROR("pipe() failed");
735 return false;
736 }
737
738 pid = fork();
739 if (pid < 0) {
740 SYSERROR("fork() failed");
741 return false;
742 }
743
744 if (pid == 0) {
745 char *args[] = { "criu", "--version", NULL };
755fa453 746 char *path;
8ba5ced7
TA
747 close(pipes[0]);
748
749 close(STDERR_FILENO);
750 if (dup2(pipes[1], STDOUT_FILENO) < 0)
665bb114 751 _exit(EXIT_FAILURE);
8ba5ced7 752
755fa453 753 path = on_path("criu", NULL);
d9b32b09 754 if (!path)
665bb114 755 _exit(EXIT_FAILURE);
d9b32b09 756
755fa453 757 execv(path, args);
665bb114 758 _exit(EXIT_FAILURE);
8ba5ced7
TA
759 } else {
760 FILE *f;
5407e2ab 761 char *tmp;
8ba5ced7
TA
762 int patch;
763
764 close(pipes[1]);
765 if (wait_for_pid(pid) < 0) {
766 close(pipes[0]);
4eae4051 767 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
768 return false;
769 }
770
4110345b 771 f = fdopen(pipes[0], "re");
8ba5ced7
TA
772 if (!f) {
773 close(pipes[0]);
774 return false;
775 }
776
5407e2ab
CB
777 tmp = malloc(1024);
778 if (!tmp) {
779 fclose(f);
780 return false;
781 }
782
783 if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1)
8ba5ced7
TA
784 goto version_error;
785
786 if (fgetc(f) != '\n')
787 goto version_error;
788
5407e2ab 789 if (strcmp(tmp, CRIU_VERSION) >= 0)
8ba5ced7
TA
790 goto version_match;
791
5407e2ab 792 if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1)
8ba5ced7
TA
793 goto version_error;
794
795 if (fgetc(f) != '-')
796 goto version_error;
797
798 if (fscanf(f, "%d", &patch) != 1)
799 goto version_error;
800
5407e2ab 801 if (strcmp(tmp, CRIU_GITID_VERSION) < 0)
8ba5ced7
TA
802 goto version_error;
803
804 if (patch < CRIU_GITID_PATCHLEVEL)
805 goto version_error;
806
807version_match:
3158ab5b 808 fclose(f);
5407e2ab
CB
809 if (!version)
810 free(tmp);
811 else
812 *version = tmp;
8ba5ced7
TA
813 return true;
814
815version_error:
3158ab5b 816 fclose(f);
5407e2ab 817 free(tmp);
9f1f54b0 818 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
8ba5ced7
TA
819 return false;
820 }
821}
822
e29fe1dd
TA
823/* Check and make sure the container has a configuration that we know CRIU can
824 * dump. */
f1954503 825static bool criu_ok(struct lxc_container *c, char **criu_version)
e29fe1dd 826{
87d0990c 827 struct lxc_netdev *netdev;
e29fe1dd
TA
828
829 if (geteuid()) {
9f1f54b0 830 ERROR("Must be root to checkpoint");
e29fe1dd
TA
831 return false;
832 }
833
7177e6b1
DJ
834 if (!criu_version_ok(criu_version))
835 return false;
836
e29fe1dd 837 /* We only know how to restore containers with veth networks. */
87d0990c
CB
838 list_for_each_entry(netdev, &c->lxc_conf->netdevs, head) {
839 switch(netdev->type) {
65b20221
TA
840 case LXC_NET_VETH:
841 case LXC_NET_NONE:
842 case LXC_NET_EMPTY:
0109a13d 843 case LXC_NET_PHYS:
e2697330 844 case LXC_NET_MACVLAN:
65b20221
TA
845 break;
846 default:
87d0990c 847 ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(netdev->type), netdev->name);
7177e6b1
DJ
848 if (criu_version) {
849 free(*criu_version);
850 *criu_version = NULL;
851 }
e29fe1dd
TA
852 return false;
853 }
854 }
855
e29fe1dd
TA
856 return true;
857}
858
e29fe1dd
TA
859static bool restore_net_info(struct lxc_container *c)
860{
7eab8fc6 861 int ret;
e29fe1dd 862 bool has_error = true;
87d0990c 863 struct lxc_netdev *netdev;
e29fe1dd
TA
864
865 if (container_mem_lock(c))
866 return false;
867
87d0990c 868 list_for_each_entry(netdev, &c->lxc_conf->netdevs, head) {
e29fe1dd 869 char template[IFNAMSIZ];
65b20221
TA
870
871 if (netdev->type != LXC_NET_VETH)
872 continue;
873
8eaa5ae3
CB
874 ret = strnprintf(template, sizeof(template), "vethXXXXXX");
875 if (ret < 0)
7eab8fc6 876 goto out_unlock;
e29fe1dd 877
de4855a8
CB
878 if (netdev->priv.veth_attr.pair[0] == '\0' &&
879 netdev->priv.veth_attr.veth1[0] == '\0') {
3646ffd9 880 if (!lxc_ifname_alnum_case_sensitive(template))
de4855a8
CB
881 goto out_unlock;
882
cbb9c7c7 883 (void)strlcpy(netdev->priv.veth_attr.veth1, template, IFNAMSIZ);
de4855a8 884 }
e29fe1dd
TA
885 }
886
887 has_error = false;
888
889out_unlock:
890 container_mem_unlock(c);
891 return !has_error;
892}
893
1a0e70ac 894/* do_restore never returns, the calling process is used as the monitor process.
5a24adb8 895 * do_restore calls _exit() if it fails.
1a0e70ac 896 */
c33b0338 897static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts, char *criu_version)
e29fe1dd 898{
5af9369b 899 int fd, ret;
e29fe1dd 900 pid_t pid;
e29fe1dd 901 struct lxc_handler *handler;
113ebd57 902 int status = 0;
9b1e2e6e 903 int pipes[2] = {-1, -1};
d9fc9be8 904 struct cgroup_ops *cgroup_ops;
e29fe1dd 905
a7fb6043 906 /* Try to detach from the current controlling tty if it exists.
69e3b3be 907 * Otherwise, lxc_init (via lxc_console) will attach the container's
a7fb6043
TA
908 * console output to the current tty, which is probably not what any
909 * library user wants, and if they do, they can just manually configure
910 * it :)
911 */
912 fd = open("/dev/tty", O_RDWR);
913 if (fd >= 0) {
914 if (ioctl(fd, TIOCNOTTY, NULL) < 0)
915 SYSERROR("couldn't detach from tty");
916 close(fd);
917 }
918
a42abcce 919 handler = lxc_init_handler(NULL, c->name, c->lxc_conf, c->config_path, false);
e29fe1dd
TA
920 if (!handler)
921 goto out;
922
aa460476
CB
923 if (lxc_init(c->name, handler) < 0)
924 goto out;
d9fc9be8 925 cgroup_ops = handler->cgroup_ops;
aa460476 926
d9fc9be8
CB
927 if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
928 ERROR("Failed to create monitor cgroup");
929 goto out_fini_handler;
930 }
931
932 if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
933 ERROR("Failed to enter monitor cgroup");
934 goto out_fini_handler;
935 }
936
937 if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
938 ERROR("Failed to delegate controllers to monitor cgroup");
939 goto out_fini_handler;
940 }
941
942 if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
943 ERROR("Failed creating cgroups");
e29fe1dd
TA
944 goto out_fini_handler;
945 }
946
947 if (!restore_net_info(c)) {
948 ERROR("failed restoring network info");
949 goto out_fini_handler;
950 }
951
5af9369b
CB
952 ret = resolve_clone_flags(handler);
953 if (ret < 0) {
6d1400b5 954 SYSERROR("Unsupported clone flag specified");
5af9369b
CB
955 goto out_fini_handler;
956 }
e29fe1dd 957
de31cb57 958 if (pipe2(pipes, O_CLOEXEC) < 0) {
3d9a5c85
TA
959 SYSERROR("pipe() failed");
960 goto out_fini_handler;
961 }
962
e29fe1dd
TA
963 pid = fork();
964 if (pid < 0)
965 goto out_fini_handler;
966
967 if (pid == 0) {
968 struct criu_opts os;
969 struct lxc_rootfs *rootfs;
4b54788e 970 int flags;
e29fe1dd 971
3d9a5c85
TA
972 close(status_pipe);
973 status_pipe = -1;
974
975 close(pipes[0]);
976 pipes[0] = -1;
e29fe1dd
TA
977
978 if (unshare(CLONE_NEWNS))
979 goto out_fini_handler;
980
4e86cad3
CB
981 ret = lxc_storage_prepare(c->lxc_conf);
982 if (ret)
983 goto out_fini_handler;
984
e29fe1dd
TA
985 /* CRIU needs the lxc root bind mounted so that it is the root of some
986 * mount. */
987 rootfs = &c->lxc_conf->rootfs;
988
989 if (rootfs_is_blockdev(c->lxc_conf)) {
8ce1abc2
CB
990 if (lxc_setup_rootfs_prepare_root(c->lxc_conf, c->name,
991 c->config_path) < 0)
e29fe1dd
TA
992 goto out_fini_handler;
993 } else {
994 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
995 goto out_fini_handler;
996
997 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
998 SYSERROR("remount / to private failed");
999 goto out_fini_handler;
1000 }
1001
1002 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
f075e955 1003 (void)rmdir(rootfs->mount);
e29fe1dd
TA
1004 goto out_fini_handler;
1005 }
1006 }
1007
5af85cb1 1008 os.pipefd = pipes[1];
e29fe1dd 1009 os.action = "restore";
b2c3710f 1010 os.user = opts;
e29fe1dd 1011 os.c = c;
41808e20 1012 os.console_fd = c->lxc_conf->console.pty;
f1954503 1013 os.criu_version = criu_version;
0ab5703f 1014 os.handler = handler;
4b54788e 1015
97e4f1a9
TA
1016 if (os.console_fd >= 0) {
1017 /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
1018 * via --inherit-fd, so we don't want it to close.
1019 */
1020 flags = fcntl(os.console_fd, F_GETFD);
1021 if (flags < 0) {
1022 SYSERROR("F_GETFD failed: %d", os.console_fd);
1023 goto out_fini_handler;
1024 }
4b54788e 1025
97e4f1a9 1026 flags &= ~FD_CLOEXEC;
4b54788e 1027
97e4f1a9
TA
1028 if (fcntl(os.console_fd, F_SETFD, flags) < 0) {
1029 SYSERROR("F_SETFD failed");
1030 goto out_fini_handler;
1031 }
4b54788e
TA
1032 }
1033 os.console_name = c->lxc_conf->console.name;
e29fe1dd
TA
1034
1035 /* exec_criu() returning is an error */
b41a8023 1036 ret = exec_criu(handler->cgroup_ops, c->lxc_conf, &os);
59d8a539
CB
1037 if (ret)
1038 SYSERROR("Failed to execute criu");
e29fe1dd 1039 umount(rootfs->mount);
f075e955 1040 (void)rmdir(rootfs->mount);
e29fe1dd
TA
1041 goto out_fini_handler;
1042 } else {
e29fe1dd
TA
1043 char title[2048];
1044
3d9a5c85
TA
1045 close(pipes[1]);
1046 pipes[1] = -1;
1047
e29fe1dd
TA
1048 pid_t w = waitpid(pid, &status, 0);
1049 if (w == -1) {
1050 SYSERROR("waitpid");
1051 goto out_fini_handler;
1052 }
1053
e29fe1dd 1054 if (WIFEXITED(status)) {
75d219f0
TA
1055 char buf[4096];
1056
e29fe1dd 1057 if (WEXITSTATUS(status)) {
3d9a5c85
TA
1058 int n;
1059
668ba602 1060 n = lxc_read_nointr(pipes[0], buf, sizeof(buf));
3d9a5c85
TA
1061 if (n < 0) {
1062 SYSERROR("failed reading from criu stderr");
1063 goto out_fini_handler;
1064 }
1065
2735dfae
TA
1066 if (n == sizeof(buf))
1067 n--;
3d9a5c85
TA
1068 buf[n] = 0;
1069
9f1f54b0 1070 ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
e29fe1dd
TA
1071 goto out_fini_handler;
1072 } else {
8eaa5ae3
CB
1073 ret = strnprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
1074 if (ret < 0) {
1075 ERROR("strnprintf'd too many characters: %d", ret);
75d219f0
TA
1076 goto out_fini_handler;
1077 }
1078
4110345b 1079 FILE *f = fopen(buf, "re");
e29fe1dd 1080 if (!f) {
9f1f54b0 1081 SYSERROR("couldn't read restore's children file %s", buf);
e29fe1dd
TA
1082 goto out_fini_handler;
1083 }
1084
1085 ret = fscanf(f, "%d", (int*) &handler->pid);
1086 fclose(f);
1087 if (ret != 1) {
1088 ERROR("reading restore pid failed");
1089 goto out_fini_handler;
1090 }
1091
f8a41688
TA
1092 if (lxc_set_state(c->name, handler, RUNNING)) {
1093 ERROR("error setting running state after restore");
e29fe1dd 1094 goto out_fini_handler;
f8a41688 1095 }
e29fe1dd
TA
1096 }
1097 } else {
9f1f54b0 1098 ERROR("CRIU was killed with signal %d", WTERMSIG(status));
e29fe1dd
TA
1099 goto out_fini_handler;
1100 }
1101
3d9a5c85
TA
1102 close(pipes[0]);
1103
614be9bc 1104 ret = lxc_write_nointr(status_pipe, &status, sizeof(status));
f3886023
TA
1105 close(status_pipe);
1106 status_pipe = -1;
1107
1108 if (sizeof(status) != ret) {
1109 SYSERROR("failed to write all of status");
1110 goto out_fini_handler;
1111 }
1112
e29fe1dd
TA
1113 /*
1114 * See comment in lxcapi_start; we don't care if these
1115 * fail because it's just a beauty thing. We just
1116 * assign the return here to silence potential.
1117 */
8eaa5ae3
CB
1118 ret = strnprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
1119 if (ret < 0)
223e30c1
CB
1120 INFO("Setting truncated process name");
1121
e29fe1dd 1122 ret = setproctitle(title);
223e30c1
CB
1123 if (ret < 0)
1124 INFO("Failed to set process name");
e29fe1dd
TA
1125
1126 ret = lxc_poll(c->name, handler);
1127 if (ret)
0c5859ff 1128 lxc_abort(handler);
fd5be714 1129 lxc_end(handler);
5a24adb8 1130 _exit(ret);
e29fe1dd
TA
1131 }
1132
1133out_fini_handler:
3d9a5c85
TA
1134 if (pipes[0] >= 0)
1135 close(pipes[0]);
1136 if (pipes[1] >= 0)
1137 close(pipes[1]);
1138
fd5be714 1139 lxc_end(handler);
e29fe1dd
TA
1140
1141out:
3d9a5c85 1142 if (status_pipe >= 0) {
f3886023
TA
1143 /* ensure getting here was a failure, e.g. if we failed to
1144 * parse the child pid or something, even after a successful
1145 * restore
1146 */
1147 if (!status)
1148 status = 1;
113ebd57 1149
614be9bc 1150 if (lxc_write_nointr(status_pipe, &status, sizeof(status)) != sizeof(status))
e29fe1dd 1151 SYSERROR("writing status failed");
3d9a5c85 1152 close(status_pipe);
e29fe1dd
TA
1153 }
1154
5a24adb8 1155 _exit(EXIT_FAILURE);
e29fe1dd 1156}
aef3d51e 1157
4b54788e
TA
1158static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len)
1159{
1160 FILE *f;
1161 char path[PATH_MAX];
1162 int ret;
1163 struct stat sb;
1164
066af2cb 1165 if (c->lxc_conf->console.path && strequal(c->lxc_conf->console.path, "none")) {
4b54788e
TA
1166 tty_id[0] = 0;
1167 return 0;
1168 }
1169
8eaa5ae3
CB
1170 ret = strnprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c));
1171 if (ret < 0) {
1172 ERROR("strnprintf'd too many characters: %d", ret);
4b54788e
TA
1173 return -1;
1174 }
1175
1176 ret = stat(path, &sb);
1177 if (ret < 0) {
1178 SYSERROR("stat of %s failed", path);
1179 return -1;
1180 }
1181
8eaa5ae3
CB
1182 ret = strnprintf(path, sizeof(path), "%s/tty.info", directory);
1183 if (ret < 0) {
1184 ERROR("strnprintf'd too many characters: %d", ret);
4b54788e
TA
1185 return -1;
1186 }
1187
8eaa5ae3 1188 ret = strnprintf(tty_id, len, "tty[%llx:%llx]",
f03280a7
TA
1189 (long long unsigned) sb.st_rdev,
1190 (long long unsigned) sb.st_dev);
8eaa5ae3
CB
1191 if (ret < 0) {
1192 ERROR("strnprintf'd too many characters: %d", ret);
4b54788e
TA
1193 return -1;
1194 }
1195
4110345b 1196 f = fopen(path, "we");
4b54788e
TA
1197 if (!f) {
1198 SYSERROR("failed to open %s", path);
1199 return -1;
1200 }
1201
1202 ret = fprintf(f, "%s", tty_id);
1203 fclose(f);
1204 if (ret < 0)
1205 SYSERROR("failed to write to %s", path);
1206 return ret;
1207}
1208
aef3d51e 1209/* do one of either predump or a regular dump */
b2c3710f 1210static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts)
aef3d51e 1211{
0e4adc1a 1212 int ret;
aef3d51e 1213 pid_t pid;
5af85cb1 1214 int criuout[2];
0e4adc1a 1215 char *criu_version = NULL;
aef3d51e 1216
f1954503 1217 if (!criu_ok(c, &criu_version))
aef3d51e
TA
1218 return false;
1219
0e4adc1a
CB
1220 ret = pipe(criuout);
1221 if (ret < 0) {
5af85cb1 1222 SYSERROR("pipe() failed");
7177e6b1 1223 free(criu_version);
aef3d51e 1224 return false;
5af85cb1
TA
1225 }
1226
539c3977 1227 if (lxc_mkdir_p(opts->directory, 0700) < 0)
5af85cb1 1228 goto fail;
aef3d51e
TA
1229
1230 pid = fork();
1231 if (pid < 0) {
1232 SYSERROR("fork failed");
5af85cb1 1233 goto fail;
aef3d51e
TA
1234 }
1235
1236 if (pid == 0) {
1237 struct criu_opts os;
2202afc9 1238 struct cgroup_ops *cgroup_ops;
0ab5703f 1239
5af85cb1
TA
1240 close(criuout[0]);
1241
5a087e05 1242 cgroup_ops = cgroup_init(c->lxc_conf);
2202afc9 1243 if (!cgroup_ops) {
0ab5703f 1244 ERROR("failed to cgroup_init()");
7211378b 1245 _exit(EXIT_FAILURE);
0ab5703f 1246 }
aef3d51e 1247
5af85cb1 1248 os.pipefd = criuout[1];
aef3d51e 1249 os.action = mode;
b2c3710f 1250 os.user = opts;
aef3d51e 1251 os.c = c;
4b54788e 1252 os.console_name = c->lxc_conf->console.path;
f1954503 1253 os.criu_version = criu_version;
e20f46f8 1254 os.handler = NULL;
74eb576c 1255
0e4adc1a
CB
1256 ret = save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id));
1257 if (ret < 0) {
1258 free(criu_version);
7211378b 1259 _exit(EXIT_FAILURE);
0e4adc1a 1260 }
aef3d51e
TA
1261
1262 /* exec_criu() returning is an error */
59d8a539
CB
1263 ret = exec_criu(cgroup_ops, c->lxc_conf, &os);
1264 if (ret)
1265 SYSERROR("Failed to execute criu");
0e4adc1a 1266 free(criu_version);
7211378b 1267 _exit(EXIT_FAILURE);
aef3d51e
TA
1268 } else {
1269 int status;
5af85cb1
TA
1270 ssize_t n;
1271 char buf[4096];
5af85cb1
TA
1272
1273 close(criuout[1]);
1274
aef3d51e
TA
1275 pid_t w = waitpid(pid, &status, 0);
1276 if (w == -1) {
1277 SYSERROR("waitpid");
5af85cb1 1278 close(criuout[0]);
7177e6b1 1279 free(criu_version);
aef3d51e
TA
1280 return false;
1281 }
1282
668ba602 1283 n = lxc_read_nointr(criuout[0], buf, sizeof(buf));
5af85cb1
TA
1284 close(criuout[0]);
1285 if (n < 0) {
1286 SYSERROR("read");
1287 n = 0;
1288 }
40229e95 1289
1290 if (n == sizeof(buf))
1291 buf[n-1] = 0;
1292 else
1293 buf[n] = 0;
5af85cb1 1294
aef3d51e
TA
1295 if (WIFEXITED(status)) {
1296 if (WEXITSTATUS(status)) {
9f1f54b0 1297 ERROR("dump failed with %d", WEXITSTATUS(status));
5af85cb1
TA
1298 ret = false;
1299 } else {
1300 ret = true;
aef3d51e 1301 }
aef3d51e 1302 } else if (WIFSIGNALED(status)) {
9f1f54b0 1303 ERROR("dump signaled with %d", WTERMSIG(status));
5af85cb1 1304 ret = false;
aef3d51e 1305 } else {
9f1f54b0 1306 ERROR("unknown dump exit %d", status);
5af85cb1 1307 ret = false;
aef3d51e 1308 }
5af85cb1
TA
1309
1310 if (!ret)
1311 ERROR("criu output: %s", buf);
7177e6b1
DJ
1312
1313 free(criu_version);
5af85cb1 1314 return ret;
aef3d51e 1315 }
5af85cb1
TA
1316fail:
1317 close(criuout[0]);
1318 close(criuout[1]);
f075e955 1319 (void)rmdir(opts->directory);
0e4adc1a 1320 free(criu_version);
5af85cb1 1321 return false;
aef3d51e
TA
1322}
1323
b2c3710f 1324bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e 1325{
b2c3710f 1326 return do_dump(c, "pre-dump", opts);
aef3d51e
TA
1327}
1328
b2c3710f 1329bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1330{
1331 char path[PATH_MAX];
1332 int ret;
1333
8eaa5ae3
CB
1334 ret = strnprintf(path, sizeof(path), "%s/inventory.img", opts->directory);
1335 if (ret < 0)
aef3d51e
TA
1336 return false;
1337
1338 if (access(path, F_OK) == 0) {
9f1f54b0 1339 ERROR("please use a fresh directory for the dump directory");
aef3d51e
TA
1340 return false;
1341 }
1342
b2c3710f 1343 return do_dump(c, "dump", opts);
aef3d51e
TA
1344}
1345
b2c3710f 1346bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts)
aef3d51e
TA
1347{
1348 pid_t pid;
1349 int status, nread;
1350 int pipefd[2];
f1954503 1351 char *criu_version = NULL;
aef3d51e 1352
aef3d51e 1353 if (geteuid()) {
9f1f54b0 1354 ERROR("Must be root to restore");
aef3d51e
TA
1355 return false;
1356 }
1357
1358 if (pipe(pipefd)) {
1359 ERROR("failed to create pipe");
1360 return false;
1361 }
1362
7177e6b1
DJ
1363 if (!criu_ok(c, &criu_version)) {
1364 close(pipefd[0]);
1365 close(pipefd[1]);
1366 return false;
1367 }
1368
aef3d51e
TA
1369 pid = fork();
1370 if (pid < 0) {
1371 close(pipefd[0]);
1372 close(pipefd[1]);
7177e6b1 1373 free(criu_version);
aef3d51e
TA
1374 return false;
1375 }
1376
1377 if (pid == 0) {
1378 close(pipefd[0]);
1a0e70ac 1379 /* this never returns */
f1954503 1380 do_restore(c, pipefd[1], opts, criu_version);
aef3d51e
TA
1381 }
1382
1383 close(pipefd[1]);
7177e6b1 1384 free(criu_version);
aef3d51e 1385
668ba602 1386 nread = lxc_read_nointr(pipefd[0], &status, sizeof(status));
aef3d51e
TA
1387 close(pipefd[0]);
1388 if (sizeof(status) != nread) {
1389 ERROR("reading status from pipe failed");
1390 goto err_wait;
1391 }
1392
1a0e70ac
CB
1393 /* If the criu process was killed or exited nonzero, wait() for the
1394 * handler, since the restore process died. Otherwise, we don't need to
1395 * wait, since the child becomes the monitor process.
1396 */
aef3d51e
TA
1397 if (!WIFEXITED(status) || WEXITSTATUS(status))
1398 goto err_wait;
1399 return true;
1400
1401err_wait:
1402 if (wait_for_pid(pid))
1403 ERROR("restore process died");
1404 return false;
1405}