]>
Commit | Line | Data |
---|---|---|
e29fe1dd TA |
1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * Copyright © 2014-2015 Canonical Ltd. | |
5 | * | |
6 | * Authors: | |
7 | * Tycho Andersen <tycho.andersen@canonical.com> | |
8 | * | |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 | */ | |
23 | #define _GNU_SOURCE | |
24 | #include <assert.h> | |
25 | #include <linux/limits.h> | |
26 | #include <sched.h> | |
27 | #include <stdio.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <sys/mount.h> | |
31 | #include <sys/types.h> | |
32 | #include <sys/wait.h> | |
33 | #include <unistd.h> | |
34 | ||
35 | #include "config.h" | |
36 | ||
4ec31c52 | 37 | #include "bdev/bdev.h" |
e29fe1dd TA |
38 | #include "cgroup.h" |
39 | #include "conf.h" | |
dc259399 | 40 | #include "commands.h" |
e29fe1dd TA |
41 | #include "criu.h" |
42 | #include "log.h" | |
43 | #include "lxc.h" | |
44 | #include "lxclock.h" | |
45 | #include "network.h" | |
46 | #include "utils.h" | |
47 | ||
73d46752 TA |
48 | #define CRIU_VERSION "2.0" |
49 | ||
50 | #define CRIU_GITID_VERSION "2.0" | |
51 | #define CRIU_GITID_PATCHLEVEL 0 | |
52 | ||
e29fe1dd TA |
53 | lxc_log_define(lxc_criu, lxc); |
54 | ||
73d46752 TA |
55 | struct criu_opts { |
56 | /* The type of criu invocation, one of "dump" or "restore" */ | |
57 | char *action; | |
58 | ||
b2c3710f TA |
59 | /* the user-provided migrate options relevant to this action */ |
60 | struct migrate_opts *user; | |
73d46752 TA |
61 | |
62 | /* The container to dump */ | |
63 | struct lxc_container *c; | |
64 | ||
73d46752 | 65 | /* dump: stop the container or not after dumping? */ |
4b54788e | 66 | char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */ |
73d46752 TA |
67 | |
68 | /* restore: the file to write the init process' pid into */ | |
69 | char *pidfile; | |
70 | const char *cgroup_path; | |
4b54788e TA |
71 | int console_fd; |
72 | /* The path that is bind mounted from /dev/console, if any. We don't | |
73 | * want to use `--ext-mount-map auto`'s result here because the pts | |
74 | * device may have a different path (e.g. if the pty number is | |
75 | * different) on the target host. NULL if lxc.console = "none". | |
76 | */ | |
77 | char *console_name; | |
73d46752 TA |
78 | }; |
79 | ||
4b54788e TA |
80 | static int load_tty_major_minor(char *directory, char *output, int len) |
81 | { | |
82 | FILE *f; | |
83 | char path[PATH_MAX]; | |
84 | int ret; | |
85 | ||
86 | ret = snprintf(path, sizeof(path), "%s/tty.info", directory); | |
87 | if (ret < 0 || ret >= sizeof(path)) { | |
88 | ERROR("snprintf'd too many chacters: %d", ret); | |
89 | return -1; | |
90 | } | |
91 | ||
92 | f = fopen(path, "r"); | |
93 | if (!f) { | |
94 | /* This means we're coming from a liblxc which didn't export | |
95 | * the tty info. In this case they had to have lxc.console = | |
96 | * none, so there's no problem restoring. | |
97 | */ | |
98 | if (errno == ENOENT) | |
99 | return 0; | |
100 | ||
101 | SYSERROR("couldn't open %s", path); | |
102 | return -1; | |
103 | } | |
104 | ||
105 | if (!fgets(output, len, f)) { | |
106 | fclose(f); | |
107 | SYSERROR("couldn't read %s", path); | |
108 | return -1; | |
109 | } | |
110 | ||
111 | fclose(f); | |
112 | return 0; | |
113 | } | |
114 | ||
9451eeff | 115 | static void exec_criu(struct criu_opts *opts) |
e29fe1dd TA |
116 | { |
117 | char **argv, log[PATH_MAX]; | |
19d1509c | 118 | int static_args = 23, argc = 0, i, ret; |
e29fe1dd TA |
119 | int netnr = 0; |
120 | struct lxc_list *it; | |
121 | ||
a17fa3c0 NE |
122 | char buf[4096], tty_info[32]; |
123 | size_t pos; | |
e9195050 TA |
124 | /* If we are currently in a cgroup /foo/bar, and the container is in a |
125 | * cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the | |
126 | * container has an open fd that points to one of the cgroup files | |
127 | * (systemd always opens its "root" cgroup). So, let's escape to the | |
128 | * /actual/ root cgroup so that lxcfs thinks criu has enough rights to | |
129 | * see all cgroups. | |
130 | */ | |
7103fe6f | 131 | if (!cgroup_escape()) { |
e9195050 TA |
132 | ERROR("failed to escape cgroups"); |
133 | return; | |
134 | } | |
135 | ||
e29fe1dd | 136 | /* The command line always looks like: |
19d1509c | 137 | * criu $(action) --tcp-established --file-locks --link-remap \ |
0a5fc6df | 138 | * --manage-cgroups=full action-script foo.sh -D $(directory) \ |
e29fe1dd TA |
139 | * -o $(directory)/$(action).log --ext-mount-map auto |
140 | * --enable-external-sharing --enable-external-masters | |
4b54788e | 141 | * --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n |
e29fe1dd TA |
142 | * +1 for final NULL */ |
143 | ||
aef3d51e | 144 | if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) { |
dc259399 TA |
145 | /* -t pid --freeze-cgroup /lxc/ct */ |
146 | static_args += 4; | |
e29fe1dd | 147 | |
aef3d51e | 148 | /* --prev-images-dir <path-to-directory-A-relative-to-B> */ |
b2c3710f | 149 | if (opts->user->predump_dir) |
aef3d51e TA |
150 | static_args += 2; |
151 | ||
74eb576c | 152 | /* --page-server --address <address> --port <port> */ |
b2c3710f | 153 | if (opts->user->pageserver_address && opts->user->pageserver_port) |
74eb576c NE |
154 | static_args += 5; |
155 | ||
aef3d51e | 156 | /* --leave-running (only for final dump) */ |
b2c3710f | 157 | if (strcmp(opts->action, "dump") == 0 && !opts->user->stop) |
e29fe1dd | 158 | static_args++; |
4b54788e TA |
159 | |
160 | /* --external tty[88,4] */ | |
161 | if (opts->tty_id[0]) | |
162 | static_args += 2; | |
19d1509c TA |
163 | |
164 | /* --force-irmap */ | |
165 | if (!opts->user->preserves_inodes) | |
166 | static_args++; | |
e29fe1dd TA |
167 | } else if (strcmp(opts->action, "restore") == 0) { |
168 | /* --root $(lxc_mount_point) --restore-detached | |
13389b29 TA |
169 | * --restore-sibling --pidfile $foo --cgroup-root $foo |
170 | * --lsm-profile apparmor:whatever | |
171 | */ | |
172 | static_args += 10; | |
4b54788e TA |
173 | |
174 | tty_info[0] = 0; | |
b2c3710f | 175 | if (load_tty_major_minor(opts->user->directory, tty_info, sizeof(tty_info))) |
4b54788e TA |
176 | return; |
177 | ||
178 | /* --inherit-fd fd[%d]:tty[%s] */ | |
179 | if (tty_info[0]) | |
180 | static_args += 2; | |
e29fe1dd TA |
181 | } else { |
182 | return; | |
183 | } | |
184 | ||
b2c3710f | 185 | if (opts->user->verbose) |
e29fe1dd TA |
186 | static_args++; |
187 | ||
b9ee6643 TA |
188 | if (opts->user->action_script) |
189 | static_args += 2; | |
190 | ||
b2c3710f | 191 | ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action); |
e29fe1dd TA |
192 | if (ret < 0 || ret >= PATH_MAX) { |
193 | ERROR("logfile name too long\n"); | |
194 | return; | |
195 | } | |
196 | ||
197 | argv = malloc(static_args * sizeof(*argv)); | |
198 | if (!argv) | |
199 | return; | |
200 | ||
201 | memset(argv, 0, static_args * sizeof(*argv)); | |
202 | ||
203 | #define DECLARE_ARG(arg) \ | |
204 | do { \ | |
205 | if (arg == NULL) { \ | |
206 | ERROR("Got NULL argument for criu"); \ | |
207 | goto err; \ | |
208 | } \ | |
209 | argv[argc++] = strdup(arg); \ | |
210 | if (!argv[argc-1]) \ | |
211 | goto err; \ | |
212 | } while (0) | |
213 | ||
214 | argv[argc++] = on_path("criu", NULL); | |
215 | if (!argv[argc-1]) { | |
216 | ERROR("Couldn't find criu binary\n"); | |
217 | goto err; | |
218 | } | |
219 | ||
220 | DECLARE_ARG(opts->action); | |
221 | DECLARE_ARG("--tcp-established"); | |
222 | DECLARE_ARG("--file-locks"); | |
223 | DECLARE_ARG("--link-remap"); | |
0a5fc6df | 224 | DECLARE_ARG("--manage-cgroups=full"); |
e29fe1dd TA |
225 | DECLARE_ARG("--ext-mount-map"); |
226 | DECLARE_ARG("auto"); | |
227 | DECLARE_ARG("--enable-external-sharing"); | |
228 | DECLARE_ARG("--enable-external-masters"); | |
dd62857a TA |
229 | DECLARE_ARG("--enable-fs"); |
230 | DECLARE_ARG("hugetlbfs"); | |
5b454329 TA |
231 | DECLARE_ARG("--enable-fs"); |
232 | DECLARE_ARG("tracefs"); | |
e29fe1dd | 233 | DECLARE_ARG("-D"); |
b2c3710f | 234 | DECLARE_ARG(opts->user->directory); |
e29fe1dd TA |
235 | DECLARE_ARG("-o"); |
236 | DECLARE_ARG(log); | |
237 | ||
b2c3710f | 238 | if (opts->user->verbose) |
e29fe1dd TA |
239 | DECLARE_ARG("-vvvvvv"); |
240 | ||
b9ee6643 TA |
241 | if (opts->user->action_script) { |
242 | DECLARE_ARG("--action-script"); | |
243 | DECLARE_ARG(opts->user->action_script); | |
244 | } | |
245 | ||
aef3d51e | 246 | if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) { |
dc259399 | 247 | char pid[32], *freezer_relative; |
e29fe1dd TA |
248 | |
249 | if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0) | |
250 | goto err; | |
251 | ||
252 | DECLARE_ARG("-t"); | |
253 | DECLARE_ARG(pid); | |
dc259399 TA |
254 | |
255 | freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name, | |
256 | opts->c->config_path, | |
257 | "freezer"); | |
258 | if (!freezer_relative) { | |
259 | ERROR("failed getting freezer path"); | |
260 | goto err; | |
261 | } | |
262 | ||
263 | ret = snprintf(log, sizeof(log), "/sys/fs/cgroup/freezer/%s", freezer_relative); | |
264 | if (ret < 0 || ret >= sizeof(log)) | |
265 | goto err; | |
266 | ||
267 | DECLARE_ARG("--freeze-cgroup"); | |
268 | DECLARE_ARG(log); | |
269 | ||
4b54788e | 270 | if (opts->tty_id[0]) { |
36d2096c TA |
271 | DECLARE_ARG("--ext-mount-map"); |
272 | DECLARE_ARG("/dev/console:console"); | |
273 | ||
4b54788e TA |
274 | DECLARE_ARG("--external"); |
275 | DECLARE_ARG(opts->tty_id); | |
276 | } | |
277 | ||
b2c3710f | 278 | if (opts->user->predump_dir) { |
aef3d51e | 279 | DECLARE_ARG("--prev-images-dir"); |
b2c3710f | 280 | DECLARE_ARG(opts->user->predump_dir); |
74eb576c | 281 | } |
4c0c0319 | 282 | |
b2c3710f | 283 | if (opts->user->pageserver_address && opts->user->pageserver_port) { |
74eb576c NE |
284 | DECLARE_ARG("--page-server"); |
285 | DECLARE_ARG("--address"); | |
b2c3710f | 286 | DECLARE_ARG(opts->user->pageserver_address); |
74eb576c | 287 | DECLARE_ARG("--port"); |
b2c3710f | 288 | DECLARE_ARG(opts->user->pageserver_port); |
74eb576c | 289 | } |
aef3d51e | 290 | |
19d1509c TA |
291 | if (!opts->user->preserves_inodes) |
292 | DECLARE_ARG("--force-irmap"); | |
293 | ||
aef3d51e | 294 | /* only for final dump */ |
b2c3710f | 295 | if (strcmp(opts->action, "dump") == 0 && !opts->user->stop) |
e29fe1dd TA |
296 | DECLARE_ARG("--leave-running"); |
297 | } else if (strcmp(opts->action, "restore") == 0) { | |
298 | void *m; | |
299 | int additional; | |
13389b29 | 300 | struct lxc_conf *lxc_conf = opts->c->lxc_conf; |
e29fe1dd TA |
301 | |
302 | DECLARE_ARG("--root"); | |
303 | DECLARE_ARG(opts->c->lxc_conf->rootfs.mount); | |
304 | DECLARE_ARG("--restore-detached"); | |
305 | DECLARE_ARG("--restore-sibling"); | |
306 | DECLARE_ARG("--pidfile"); | |
307 | DECLARE_ARG(opts->pidfile); | |
308 | DECLARE_ARG("--cgroup-root"); | |
309 | DECLARE_ARG(opts->cgroup_path); | |
310 | ||
4b54788e | 311 | if (tty_info[0]) { |
97e4f1a9 TA |
312 | if (opts->console_fd < 0) { |
313 | ERROR("lxc.console configured on source host but not target"); | |
314 | goto err; | |
315 | } | |
316 | ||
4b54788e TA |
317 | ret = snprintf(buf, sizeof(buf), "fd[%d]:%s", opts->console_fd, tty_info); |
318 | if (ret < 0 || ret >= sizeof(buf)) | |
319 | goto err; | |
320 | ||
321 | DECLARE_ARG("--inherit-fd"); | |
322 | DECLARE_ARG(buf); | |
323 | } | |
324 | if (opts->console_name) { | |
325 | if (snprintf(buf, sizeof(buf), "console:%s", opts->console_name) < 0) { | |
326 | SYSERROR("sprintf'd too many bytes"); | |
327 | } | |
328 | DECLARE_ARG("--ext-mount-map"); | |
329 | DECLARE_ARG(buf); | |
330 | } | |
331 | ||
13389b29 TA |
332 | if (lxc_conf->lsm_aa_profile || lxc_conf->lsm_se_context) { |
333 | ||
334 | if (lxc_conf->lsm_aa_profile) | |
335 | ret = snprintf(buf, sizeof(buf), "apparmor:%s", lxc_conf->lsm_aa_profile); | |
336 | else | |
337 | ret = snprintf(buf, sizeof(buf), "selinux:%s", lxc_conf->lsm_se_context); | |
338 | ||
339 | if (ret < 0 || ret >= sizeof(buf)) | |
340 | goto err; | |
341 | ||
342 | DECLARE_ARG("--lsm-profile"); | |
343 | DECLARE_ARG(buf); | |
344 | } | |
345 | ||
e29fe1dd TA |
346 | additional = lxc_list_len(&opts->c->lxc_conf->network) * 2; |
347 | ||
fa071249 TA |
348 | m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); |
349 | if (!m) | |
350 | goto err; | |
e29fe1dd TA |
351 | argv = m; |
352 | ||
353 | lxc_list_for_each(it, &opts->c->lxc_conf->network) { | |
354 | char eth[128], *veth; | |
355 | struct lxc_netdev *n = it->elem; | |
356 | ||
65b20221 TA |
357 | if (n->type != LXC_NET_VETH) |
358 | continue; | |
359 | ||
e29fe1dd TA |
360 | if (n->name) { |
361 | if (strlen(n->name) >= sizeof(eth)) | |
362 | goto err; | |
363 | strncpy(eth, n->name, sizeof(eth)); | |
364 | } else | |
365 | sprintf(eth, "eth%d", netnr); | |
366 | ||
367 | veth = n->priv.veth_attr.pair; | |
368 | ||
c1fd648d TA |
369 | if (n->link) |
370 | ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link); | |
371 | else | |
372 | ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth); | |
e29fe1dd TA |
373 | if (ret < 0 || ret >= sizeof(buf)) |
374 | goto err; | |
375 | ||
376 | DECLARE_ARG("--veth-pair"); | |
377 | DECLARE_ARG(buf); | |
378 | } | |
379 | ||
380 | } | |
381 | ||
382 | argv[argc] = NULL; | |
383 | ||
cf4b07a5 | 384 | buf[0] = 0; |
a17fa3c0 | 385 | pos = 0; |
72a30576 | 386 | |
cf4b07a5 | 387 | for (i = 0; argv[i]; i++) { |
72a30576 NE |
388 | ret = snprintf(buf + pos, sizeof(buf) - pos, "%s ", argv[i]); |
389 | if (ret < 0 || ret >= sizeof(buf) - pos) | |
390 | goto err; | |
391 | else | |
392 | pos += ret; | |
cf4b07a5 TA |
393 | } |
394 | ||
395 | INFO("execing: %s", buf); | |
396 | ||
e29fe1dd TA |
397 | #undef DECLARE_ARG |
398 | execv(argv[0], argv); | |
399 | err: | |
e29fe1dd TA |
400 | for (i = 0; argv[i]; i++) |
401 | free(argv[i]); | |
402 | free(argv); | |
403 | } | |
404 | ||
8ba5ced7 TA |
405 | /* |
406 | * Check to see if the criu version is recent enough for all the features we | |
407 | * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and | |
408 | * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r | |
409 | * things potentially before a version is released with a particular feature. | |
410 | * | |
411 | * The intent is that when criu development slows down, we can drop this, but | |
412 | * for now we shouldn't attempt to c/r with versions that we know won't work. | |
5407e2ab CB |
413 | * |
414 | * Note: If version != NULL criu_version() stores the detected criu version in | |
415 | * version. Allocates memory for version which must be freed by caller. | |
8ba5ced7 | 416 | */ |
5407e2ab | 417 | static bool criu_version_ok(char **version) |
8ba5ced7 TA |
418 | { |
419 | int pipes[2]; | |
420 | pid_t pid; | |
421 | ||
422 | if (pipe(pipes) < 0) { | |
423 | SYSERROR("pipe() failed"); | |
424 | return false; | |
425 | } | |
426 | ||
427 | pid = fork(); | |
428 | if (pid < 0) { | |
429 | SYSERROR("fork() failed"); | |
430 | return false; | |
431 | } | |
432 | ||
433 | if (pid == 0) { | |
434 | char *args[] = { "criu", "--version", NULL }; | |
755fa453 | 435 | char *path; |
8ba5ced7 TA |
436 | close(pipes[0]); |
437 | ||
438 | close(STDERR_FILENO); | |
439 | if (dup2(pipes[1], STDOUT_FILENO) < 0) | |
440 | exit(1); | |
441 | ||
755fa453 | 442 | path = on_path("criu", NULL); |
d9b32b09 SH |
443 | if (!path) |
444 | exit(1); | |
445 | ||
755fa453 | 446 | execv(path, args); |
8ba5ced7 TA |
447 | exit(1); |
448 | } else { | |
449 | FILE *f; | |
5407e2ab | 450 | char *tmp; |
8ba5ced7 TA |
451 | int patch; |
452 | ||
453 | close(pipes[1]); | |
454 | if (wait_for_pid(pid) < 0) { | |
455 | close(pipes[0]); | |
4eae4051 | 456 | SYSERROR("execing criu failed, is it installed?"); |
8ba5ced7 TA |
457 | return false; |
458 | } | |
459 | ||
460 | f = fdopen(pipes[0], "r"); | |
461 | if (!f) { | |
462 | close(pipes[0]); | |
463 | return false; | |
464 | } | |
465 | ||
5407e2ab CB |
466 | tmp = malloc(1024); |
467 | if (!tmp) { | |
468 | fclose(f); | |
469 | return false; | |
470 | } | |
471 | ||
472 | if (fscanf(f, "Version: %1023[^\n]s", tmp) != 1) | |
8ba5ced7 TA |
473 | goto version_error; |
474 | ||
475 | if (fgetc(f) != '\n') | |
476 | goto version_error; | |
477 | ||
5407e2ab | 478 | if (strcmp(tmp, CRIU_VERSION) >= 0) |
8ba5ced7 TA |
479 | goto version_match; |
480 | ||
5407e2ab | 481 | if (fscanf(f, "GitID: v%1023[^-]s", tmp) != 1) |
8ba5ced7 TA |
482 | goto version_error; |
483 | ||
484 | if (fgetc(f) != '-') | |
485 | goto version_error; | |
486 | ||
487 | if (fscanf(f, "%d", &patch) != 1) | |
488 | goto version_error; | |
489 | ||
5407e2ab | 490 | if (strcmp(tmp, CRIU_GITID_VERSION) < 0) |
8ba5ced7 TA |
491 | goto version_error; |
492 | ||
493 | if (patch < CRIU_GITID_PATCHLEVEL) | |
494 | goto version_error; | |
495 | ||
496 | version_match: | |
3158ab5b | 497 | fclose(f); |
5407e2ab CB |
498 | if (!version) |
499 | free(tmp); | |
500 | else | |
501 | *version = tmp; | |
8ba5ced7 TA |
502 | return true; |
503 | ||
504 | version_error: | |
3158ab5b | 505 | fclose(f); |
5407e2ab | 506 | free(tmp); |
8ba5ced7 TA |
507 | ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n"); |
508 | return false; | |
509 | } | |
510 | } | |
511 | ||
e29fe1dd TA |
512 | /* Check and make sure the container has a configuration that we know CRIU can |
513 | * dump. */ | |
73d46752 | 514 | static bool criu_ok(struct lxc_container *c) |
e29fe1dd TA |
515 | { |
516 | struct lxc_list *it; | |
e29fe1dd | 517 | |
5407e2ab | 518 | if (!criu_version_ok(NULL)) |
8ba5ced7 TA |
519 | return false; |
520 | ||
e29fe1dd TA |
521 | if (geteuid()) { |
522 | ERROR("Must be root to checkpoint\n"); | |
523 | return false; | |
524 | } | |
525 | ||
526 | /* We only know how to restore containers with veth networks. */ | |
527 | lxc_list_for_each(it, &c->lxc_conf->network) { | |
528 | struct lxc_netdev *n = it->elem; | |
65b20221 TA |
529 | switch(n->type) { |
530 | case LXC_NET_VETH: | |
531 | case LXC_NET_NONE: | |
532 | case LXC_NET_EMPTY: | |
533 | break; | |
534 | default: | |
e29fe1dd TA |
535 | ERROR("Found network that is not VETH or NONE\n"); |
536 | return false; | |
537 | } | |
538 | } | |
539 | ||
e29fe1dd TA |
540 | return true; |
541 | } | |
542 | ||
e29fe1dd TA |
543 | static bool restore_net_info(struct lxc_container *c) |
544 | { | |
545 | struct lxc_list *it; | |
546 | bool has_error = true; | |
547 | ||
548 | if (container_mem_lock(c)) | |
549 | return false; | |
550 | ||
551 | lxc_list_for_each(it, &c->lxc_conf->network) { | |
552 | struct lxc_netdev *netdev = it->elem; | |
553 | char template[IFNAMSIZ]; | |
65b20221 TA |
554 | |
555 | if (netdev->type != LXC_NET_VETH) | |
556 | continue; | |
557 | ||
e29fe1dd TA |
558 | snprintf(template, sizeof(template), "vethXXXXXX"); |
559 | ||
560 | if (!netdev->priv.veth_attr.pair) | |
561 | netdev->priv.veth_attr.pair = lxc_mkifname(template); | |
562 | ||
563 | if (!netdev->priv.veth_attr.pair) | |
564 | goto out_unlock; | |
565 | } | |
566 | ||
567 | has_error = false; | |
568 | ||
569 | out_unlock: | |
570 | container_mem_unlock(c); | |
571 | return !has_error; | |
572 | } | |
573 | ||
aef3d51e TA |
574 | // do_restore never returns, the calling process is used as the |
575 | // monitor process. do_restore calls exit() if it fails. | |
b2c3710f | 576 | void do_restore(struct lxc_container *c, int status_pipe, struct migrate_opts *opts) |
e29fe1dd TA |
577 | { |
578 | pid_t pid; | |
579 | char pidfile[L_tmpnam]; | |
580 | struct lxc_handler *handler; | |
3d9a5c85 | 581 | int status, pipes[2] = {-1, -1}; |
e29fe1dd TA |
582 | |
583 | if (!tmpnam(pidfile)) | |
584 | goto out; | |
585 | ||
586 | handler = lxc_init(c->name, c->lxc_conf, c->config_path); | |
587 | if (!handler) | |
588 | goto out; | |
589 | ||
590 | if (!cgroup_init(handler)) { | |
591 | ERROR("failed initing cgroups"); | |
592 | goto out_fini_handler; | |
593 | } | |
594 | ||
595 | if (!cgroup_create(handler)) { | |
596 | ERROR("failed creating groups"); | |
597 | goto out_fini_handler; | |
598 | } | |
599 | ||
600 | if (!restore_net_info(c)) { | |
601 | ERROR("failed restoring network info"); | |
602 | goto out_fini_handler; | |
603 | } | |
604 | ||
605 | resolve_clone_flags(handler); | |
606 | ||
3d9a5c85 TA |
607 | if (pipe(pipes) < 0) { |
608 | SYSERROR("pipe() failed"); | |
609 | goto out_fini_handler; | |
610 | } | |
611 | ||
e29fe1dd TA |
612 | pid = fork(); |
613 | if (pid < 0) | |
614 | goto out_fini_handler; | |
615 | ||
616 | if (pid == 0) { | |
617 | struct criu_opts os; | |
618 | struct lxc_rootfs *rootfs; | |
4b54788e | 619 | int flags; |
e29fe1dd | 620 | |
3d9a5c85 TA |
621 | close(status_pipe); |
622 | status_pipe = -1; | |
623 | ||
624 | close(pipes[0]); | |
625 | pipes[0] = -1; | |
626 | if (dup2(pipes[1], STDERR_FILENO) < 0) { | |
627 | SYSERROR("dup2 failed"); | |
628 | goto out_fini_handler; | |
629 | } | |
630 | ||
631 | if (dup2(pipes[1], STDOUT_FILENO) < 0) { | |
632 | SYSERROR("dup2 failed"); | |
633 | goto out_fini_handler; | |
634 | } | |
e29fe1dd TA |
635 | |
636 | if (unshare(CLONE_NEWNS)) | |
637 | goto out_fini_handler; | |
638 | ||
639 | /* CRIU needs the lxc root bind mounted so that it is the root of some | |
640 | * mount. */ | |
641 | rootfs = &c->lxc_conf->rootfs; | |
642 | ||
643 | if (rootfs_is_blockdev(c->lxc_conf)) { | |
644 | if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0) | |
645 | goto out_fini_handler; | |
646 | } else { | |
647 | if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST) | |
648 | goto out_fini_handler; | |
649 | ||
650 | if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { | |
651 | SYSERROR("remount / to private failed"); | |
652 | goto out_fini_handler; | |
653 | } | |
654 | ||
655 | if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) { | |
656 | rmdir(rootfs->mount); | |
657 | goto out_fini_handler; | |
658 | } | |
659 | } | |
660 | ||
661 | os.action = "restore"; | |
b2c3710f | 662 | os.user = opts; |
e29fe1dd TA |
663 | os.c = c; |
664 | os.pidfile = pidfile; | |
e29fe1dd | 665 | os.cgroup_path = cgroup_canonical_path(handler); |
4b54788e TA |
666 | os.console_fd = c->lxc_conf->console.slave; |
667 | ||
97e4f1a9 TA |
668 | if (os.console_fd >= 0) { |
669 | /* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu | |
670 | * via --inherit-fd, so we don't want it to close. | |
671 | */ | |
672 | flags = fcntl(os.console_fd, F_GETFD); | |
673 | if (flags < 0) { | |
674 | SYSERROR("F_GETFD failed: %d", os.console_fd); | |
675 | goto out_fini_handler; | |
676 | } | |
4b54788e | 677 | |
97e4f1a9 | 678 | flags &= ~FD_CLOEXEC; |
4b54788e | 679 | |
97e4f1a9 TA |
680 | if (fcntl(os.console_fd, F_SETFD, flags) < 0) { |
681 | SYSERROR("F_SETFD failed"); | |
682 | goto out_fini_handler; | |
683 | } | |
4b54788e TA |
684 | } |
685 | os.console_name = c->lxc_conf->console.name; | |
e29fe1dd TA |
686 | |
687 | /* exec_criu() returning is an error */ | |
7103fe6f | 688 | exec_criu(&os); |
e29fe1dd TA |
689 | umount(rootfs->mount); |
690 | rmdir(rootfs->mount); | |
691 | goto out_fini_handler; | |
692 | } else { | |
693 | int ret; | |
694 | char title[2048]; | |
695 | ||
3d9a5c85 TA |
696 | close(pipes[1]); |
697 | pipes[1] = -1; | |
698 | ||
e29fe1dd TA |
699 | pid_t w = waitpid(pid, &status, 0); |
700 | if (w == -1) { | |
701 | SYSERROR("waitpid"); | |
702 | goto out_fini_handler; | |
703 | } | |
704 | ||
3d9a5c85 TA |
705 | ret = write(status_pipe, &status, sizeof(status)); |
706 | close(status_pipe); | |
707 | status_pipe = -1; | |
e29fe1dd TA |
708 | |
709 | if (sizeof(status) != ret) { | |
710 | SYSERROR("failed to write all of status"); | |
711 | goto out_fini_handler; | |
712 | } | |
713 | ||
714 | if (WIFEXITED(status)) { | |
715 | if (WEXITSTATUS(status)) { | |
3d9a5c85 TA |
716 | char buf[4096]; |
717 | int n; | |
718 | ||
719 | n = read(pipes[0], buf, sizeof(buf)); | |
720 | if (n < 0) { | |
721 | SYSERROR("failed reading from criu stderr"); | |
722 | goto out_fini_handler; | |
723 | } | |
724 | ||
725 | buf[n] = 0; | |
726 | ||
727 | ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf); | |
e29fe1dd TA |
728 | goto out_fini_handler; |
729 | } else { | |
730 | int ret; | |
731 | FILE *f = fopen(pidfile, "r"); | |
732 | if (!f) { | |
733 | SYSERROR("couldn't read restore's init pidfile %s\n", pidfile); | |
734 | goto out_fini_handler; | |
735 | } | |
736 | ||
737 | ret = fscanf(f, "%d", (int*) &handler->pid); | |
738 | fclose(f); | |
59c2d406 TA |
739 | if (unlink(pidfile) < 0 && errno != ENOENT) |
740 | SYSERROR("unlinking pidfile failed"); | |
741 | ||
e29fe1dd TA |
742 | if (ret != 1) { |
743 | ERROR("reading restore pid failed"); | |
744 | goto out_fini_handler; | |
745 | } | |
746 | ||
f8a41688 TA |
747 | if (lxc_set_state(c->name, handler, RUNNING)) { |
748 | ERROR("error setting running state after restore"); | |
e29fe1dd | 749 | goto out_fini_handler; |
f8a41688 | 750 | } |
e29fe1dd TA |
751 | } |
752 | } else { | |
753 | ERROR("CRIU was killed with signal %d\n", WTERMSIG(status)); | |
754 | goto out_fini_handler; | |
755 | } | |
756 | ||
3d9a5c85 TA |
757 | close(pipes[0]); |
758 | ||
e29fe1dd TA |
759 | /* |
760 | * See comment in lxcapi_start; we don't care if these | |
761 | * fail because it's just a beauty thing. We just | |
762 | * assign the return here to silence potential. | |
763 | */ | |
764 | ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name); | |
765 | ret = setproctitle(title); | |
766 | ||
767 | ret = lxc_poll(c->name, handler); | |
768 | if (ret) | |
769 | lxc_abort(c->name, handler); | |
770 | lxc_fini(c->name, handler); | |
771 | exit(ret); | |
772 | } | |
773 | ||
774 | out_fini_handler: | |
3d9a5c85 TA |
775 | if (pipes[0] >= 0) |
776 | close(pipes[0]); | |
777 | if (pipes[1] >= 0) | |
778 | close(pipes[1]); | |
779 | ||
e29fe1dd | 780 | lxc_fini(c->name, handler); |
59c2d406 TA |
781 | if (unlink(pidfile) < 0 && errno != ENOENT) |
782 | SYSERROR("unlinking pidfile failed"); | |
e29fe1dd TA |
783 | |
784 | out: | |
3d9a5c85 | 785 | if (status_pipe >= 0) { |
e29fe1dd | 786 | status = 1; |
3d9a5c85 | 787 | if (write(status_pipe, &status, sizeof(status)) != sizeof(status)) { |
e29fe1dd TA |
788 | SYSERROR("writing status failed"); |
789 | } | |
3d9a5c85 | 790 | close(status_pipe); |
e29fe1dd TA |
791 | } |
792 | ||
793 | exit(1); | |
794 | } | |
aef3d51e | 795 | |
4b54788e TA |
796 | static int save_tty_major_minor(char *directory, struct lxc_container *c, char *tty_id, int len) |
797 | { | |
798 | FILE *f; | |
799 | char path[PATH_MAX]; | |
800 | int ret; | |
801 | struct stat sb; | |
802 | ||
803 | if (c->lxc_conf->console.path && !strcmp(c->lxc_conf->console.path, "none")) { | |
804 | tty_id[0] = 0; | |
805 | return 0; | |
806 | } | |
807 | ||
808 | ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/console", c->init_pid(c)); | |
809 | if (ret < 0 || ret >= sizeof(path)) { | |
810 | ERROR("snprintf'd too many chacters: %d", ret); | |
811 | return -1; | |
812 | } | |
813 | ||
814 | ret = stat(path, &sb); | |
815 | if (ret < 0) { | |
816 | SYSERROR("stat of %s failed", path); | |
817 | return -1; | |
818 | } | |
819 | ||
820 | ret = snprintf(path, sizeof(path), "%s/tty.info", directory); | |
821 | if (ret < 0 || ret >= sizeof(path)) { | |
822 | ERROR("snprintf'd too many characters: %d", ret); | |
823 | return -1; | |
824 | } | |
825 | ||
f03280a7 TA |
826 | ret = snprintf(tty_id, len, "tty[%llx:%llx]", |
827 | (long long unsigned) sb.st_rdev, | |
828 | (long long unsigned) sb.st_dev); | |
4b54788e TA |
829 | if (ret < 0 || ret >= sizeof(path)) { |
830 | ERROR("snprintf'd too many characters: %d", ret); | |
831 | return -1; | |
832 | } | |
833 | ||
834 | f = fopen(path, "w"); | |
835 | if (!f) { | |
836 | SYSERROR("failed to open %s", path); | |
837 | return -1; | |
838 | } | |
839 | ||
840 | ret = fprintf(f, "%s", tty_id); | |
841 | fclose(f); | |
842 | if (ret < 0) | |
843 | SYSERROR("failed to write to %s", path); | |
844 | return ret; | |
845 | } | |
846 | ||
aef3d51e | 847 | /* do one of either predump or a regular dump */ |
b2c3710f | 848 | static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *opts) |
aef3d51e TA |
849 | { |
850 | pid_t pid; | |
851 | ||
852 | if (!criu_ok(c)) | |
853 | return false; | |
854 | ||
b2c3710f | 855 | if (mkdir_p(opts->directory, 0700) < 0) |
aef3d51e TA |
856 | return false; |
857 | ||
858 | pid = fork(); | |
859 | if (pid < 0) { | |
860 | SYSERROR("fork failed"); | |
861 | return false; | |
862 | } | |
863 | ||
864 | if (pid == 0) { | |
865 | struct criu_opts os; | |
866 | ||
867 | os.action = mode; | |
b2c3710f | 868 | os.user = opts; |
aef3d51e | 869 | os.c = c; |
4b54788e | 870 | os.console_name = c->lxc_conf->console.path; |
74eb576c | 871 | |
b2c3710f | 872 | if (save_tty_major_minor(opts->directory, c, os.tty_id, sizeof(os.tty_id)) < 0) |
4b54788e | 873 | exit(1); |
aef3d51e TA |
874 | |
875 | /* exec_criu() returning is an error */ | |
7103fe6f | 876 | exec_criu(&os); |
aef3d51e TA |
877 | exit(1); |
878 | } else { | |
879 | int status; | |
880 | pid_t w = waitpid(pid, &status, 0); | |
881 | if (w == -1) { | |
882 | SYSERROR("waitpid"); | |
883 | return false; | |
884 | } | |
885 | ||
886 | if (WIFEXITED(status)) { | |
887 | if (WEXITSTATUS(status)) { | |
888 | ERROR("dump failed with %d\n", WEXITSTATUS(status)); | |
889 | return false; | |
890 | } | |
891 | ||
892 | return true; | |
893 | } else if (WIFSIGNALED(status)) { | |
894 | ERROR("dump signaled with %d\n", WTERMSIG(status)); | |
895 | return false; | |
896 | } else { | |
897 | ERROR("unknown dump exit %d\n", status); | |
898 | return false; | |
899 | } | |
900 | } | |
901 | } | |
902 | ||
b2c3710f | 903 | bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts) |
aef3d51e | 904 | { |
b2c3710f | 905 | return do_dump(c, "pre-dump", opts); |
aef3d51e TA |
906 | } |
907 | ||
b2c3710f | 908 | bool __criu_dump(struct lxc_container *c, struct migrate_opts *opts) |
aef3d51e TA |
909 | { |
910 | char path[PATH_MAX]; | |
911 | int ret; | |
912 | ||
b2c3710f | 913 | ret = snprintf(path, sizeof(path), "%s/inventory.img", opts->directory); |
aef3d51e TA |
914 | if (ret < 0 || ret >= sizeof(path)) |
915 | return false; | |
916 | ||
917 | if (access(path, F_OK) == 0) { | |
918 | ERROR("please use a fresh directory for the dump directory\n"); | |
919 | return false; | |
920 | } | |
921 | ||
b2c3710f | 922 | return do_dump(c, "dump", opts); |
aef3d51e TA |
923 | } |
924 | ||
b2c3710f | 925 | bool __criu_restore(struct lxc_container *c, struct migrate_opts *opts) |
aef3d51e TA |
926 | { |
927 | pid_t pid; | |
928 | int status, nread; | |
929 | int pipefd[2]; | |
930 | ||
931 | if (!criu_ok(c)) | |
932 | return false; | |
933 | ||
934 | if (geteuid()) { | |
935 | ERROR("Must be root to restore\n"); | |
936 | return false; | |
937 | } | |
938 | ||
939 | if (pipe(pipefd)) { | |
940 | ERROR("failed to create pipe"); | |
941 | return false; | |
942 | } | |
943 | ||
944 | pid = fork(); | |
945 | if (pid < 0) { | |
946 | close(pipefd[0]); | |
947 | close(pipefd[1]); | |
948 | return false; | |
949 | } | |
950 | ||
951 | if (pid == 0) { | |
952 | close(pipefd[0]); | |
953 | // this never returns | |
b2c3710f | 954 | do_restore(c, pipefd[1], opts); |
aef3d51e TA |
955 | } |
956 | ||
957 | close(pipefd[1]); | |
958 | ||
959 | nread = read(pipefd[0], &status, sizeof(status)); | |
960 | close(pipefd[0]); | |
961 | if (sizeof(status) != nread) { | |
962 | ERROR("reading status from pipe failed"); | |
963 | goto err_wait; | |
964 | } | |
965 | ||
966 | // If the criu process was killed or exited nonzero, wait() for the | |
967 | // handler, since the restore process died. Otherwise, we don't need to | |
968 | // wait, since the child becomes the monitor process. | |
969 | if (!WIFEXITED(status) || WEXITSTATUS(status)) | |
970 | goto err_wait; | |
971 | return true; | |
972 | ||
973 | err_wait: | |
974 | if (wait_for_pid(pid)) | |
975 | ERROR("restore process died"); | |
976 | return false; | |
977 | } |