]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
Merge pull request #557 from dangowrt/fix-build-on-ppc
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23#define _GNU_SOURCE
24#include <assert.h>
25#include <linux/limits.h>
26#include <sched.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/wait.h>
33#include <unistd.h>
34
35#include "config.h"
36
37#include "bdev.h"
38#include "cgroup.h"
39#include "conf.h"
40#include "criu.h"
41#include "log.h"
42#include "lxc.h"
43#include "lxclock.h"
44#include "network.h"
45#include "utils.h"
46
47lxc_log_define(lxc_criu, lxc);
48
49void exec_criu(struct criu_opts *opts)
50{
51 char **argv, log[PATH_MAX];
dd62857a 52 int static_args = 20, argc = 0, i, ret;
e29fe1dd
TA
53 int netnr = 0;
54 struct lxc_list *it;
55
56 char buf[4096];
57 FILE *mnts = NULL;
58
59 /* The command line always looks like:
60 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
61 * --manage-cgroups action-script foo.sh -D $(directory) \
62 * -o $(directory)/$(action).log --ext-mount-map auto
63 * --enable-external-sharing --enable-external-masters
dd62857a 64 * --enable-fs hugetlbfs
e29fe1dd
TA
65 * +1 for final NULL */
66
67 if (strcmp(opts->action, "dump") == 0) {
68 /* -t pid */
69 static_args += 2;
70
71 /* --leave-running */
72 if (!opts->stop)
73 static_args++;
74 } else if (strcmp(opts->action, "restore") == 0) {
75 /* --root $(lxc_mount_point) --restore-detached
76 * --restore-sibling --pidfile $foo --cgroup-root $foo */
77 static_args += 8;
78 } else {
79 return;
80 }
81
82 if (opts->verbose)
83 static_args++;
84
85 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
86 if (ret < 0 || ret >= PATH_MAX) {
87 ERROR("logfile name too long\n");
88 return;
89 }
90
91 argv = malloc(static_args * sizeof(*argv));
92 if (!argv)
93 return;
94
95 memset(argv, 0, static_args * sizeof(*argv));
96
97#define DECLARE_ARG(arg) \
98 do { \
99 if (arg == NULL) { \
100 ERROR("Got NULL argument for criu"); \
101 goto err; \
102 } \
103 argv[argc++] = strdup(arg); \
104 if (!argv[argc-1]) \
105 goto err; \
106 } while (0)
107
108 argv[argc++] = on_path("criu", NULL);
109 if (!argv[argc-1]) {
110 ERROR("Couldn't find criu binary\n");
111 goto err;
112 }
113
114 DECLARE_ARG(opts->action);
115 DECLARE_ARG("--tcp-established");
116 DECLARE_ARG("--file-locks");
117 DECLARE_ARG("--link-remap");
118 DECLARE_ARG("--force-irmap");
119 DECLARE_ARG("--manage-cgroups");
120 DECLARE_ARG("--ext-mount-map");
121 DECLARE_ARG("auto");
122 DECLARE_ARG("--enable-external-sharing");
123 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
124 DECLARE_ARG("--enable-fs");
125 DECLARE_ARG("hugetlbfs");
e29fe1dd
TA
126 DECLARE_ARG("-D");
127 DECLARE_ARG(opts->directory);
128 DECLARE_ARG("-o");
129 DECLARE_ARG(log);
130
131 if (opts->verbose)
132 DECLARE_ARG("-vvvvvv");
133
134 if (strcmp(opts->action, "dump") == 0) {
135 char pid[32];
136
137 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
138 goto err;
139
140 DECLARE_ARG("-t");
141 DECLARE_ARG(pid);
142 if (!opts->stop)
143 DECLARE_ARG("--leave-running");
144 } else if (strcmp(opts->action, "restore") == 0) {
145 void *m;
146 int additional;
147
148 DECLARE_ARG("--root");
149 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
150 DECLARE_ARG("--restore-detached");
151 DECLARE_ARG("--restore-sibling");
152 DECLARE_ARG("--pidfile");
153 DECLARE_ARG(opts->pidfile);
154 DECLARE_ARG("--cgroup-root");
155 DECLARE_ARG(opts->cgroup_path);
156
157 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
158
159 m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); \
160 if (!m) \
161 goto err; \
162 argv = m;
163
164 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
165 char eth[128], *veth;
166 struct lxc_netdev *n = it->elem;
167
168 if (n->name) {
169 if (strlen(n->name) >= sizeof(eth))
170 goto err;
171 strncpy(eth, n->name, sizeof(eth));
172 } else
173 sprintf(eth, "eth%d", netnr);
174
175 veth = n->priv.veth_attr.pair;
176
177 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
178 if (ret < 0 || ret >= sizeof(buf))
179 goto err;
180
181 DECLARE_ARG("--veth-pair");
182 DECLARE_ARG(buf);
183 }
184
185 }
186
187 argv[argc] = NULL;
188
189#undef DECLARE_ARG
190 execv(argv[0], argv);
191err:
192 if (mnts)
193 fclose(mnts);
194 for (i = 0; argv[i]; i++)
195 free(argv[i]);
196 free(argv);
197}
198
8ba5ced7
TA
199/*
200 * Check to see if the criu version is recent enough for all the features we
201 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
202 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
203 * things potentially before a version is released with a particular feature.
204 *
205 * The intent is that when criu development slows down, we can drop this, but
206 * for now we shouldn't attempt to c/r with versions that we know won't work.
207 */
208static bool criu_version_ok()
209{
210 int pipes[2];
211 pid_t pid;
212
213 if (pipe(pipes) < 0) {
214 SYSERROR("pipe() failed");
215 return false;
216 }
217
218 pid = fork();
219 if (pid < 0) {
220 SYSERROR("fork() failed");
221 return false;
222 }
223
224 if (pid == 0) {
225 char *args[] = { "criu", "--version", NULL };
755fa453 226 char *path;
8ba5ced7
TA
227 close(pipes[0]);
228
229 close(STDERR_FILENO);
230 if (dup2(pipes[1], STDOUT_FILENO) < 0)
231 exit(1);
232
755fa453 233 path = on_path("criu", NULL);
d9b32b09
SH
234 if (!path)
235 exit(1);
236
755fa453 237 execv(path, args);
8ba5ced7
TA
238 exit(1);
239 } else {
240 FILE *f;
241 char version[1024];
242 int patch;
243
244 close(pipes[1]);
245 if (wait_for_pid(pid) < 0) {
246 close(pipes[0]);
4eae4051 247 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
248 return false;
249 }
250
251 f = fdopen(pipes[0], "r");
252 if (!f) {
253 close(pipes[0]);
254 return false;
255 }
256
257 if (fscanf(f, "Version: %1024[^\n]s", version) != 1)
258 goto version_error;
259
260 if (fgetc(f) != '\n')
261 goto version_error;
262
263 if (strcmp(version, CRIU_VERSION) >= 0)
264 goto version_match;
265
266 if (fscanf(f, "GitID: v%1024[^-]s", version) != 1)
267 goto version_error;
268
269 if (fgetc(f) != '-')
270 goto version_error;
271
272 if (fscanf(f, "%d", &patch) != 1)
273 goto version_error;
274
275 if (strcmp(version, CRIU_GITID_VERSION) < 0)
276 goto version_error;
277
278 if (patch < CRIU_GITID_PATCHLEVEL)
279 goto version_error;
280
281version_match:
282 close(pipes[0]);
283 return true;
284
285version_error:
286 close(pipes[0]);
287 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
288 return false;
289 }
290}
291
e29fe1dd
TA
292/* Check and make sure the container has a configuration that we know CRIU can
293 * dump. */
294bool criu_ok(struct lxc_container *c)
295{
296 struct lxc_list *it;
297 bool found_deny_rule = false;
298
8ba5ced7
TA
299 if (!criu_version_ok())
300 return false;
301
e29fe1dd
TA
302 if (geteuid()) {
303 ERROR("Must be root to checkpoint\n");
304 return false;
305 }
306
307 /* We only know how to restore containers with veth networks. */
308 lxc_list_for_each(it, &c->lxc_conf->network) {
309 struct lxc_netdev *n = it->elem;
310 if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE) {
311 ERROR("Found network that is not VETH or NONE\n");
312 return false;
313 }
314 }
315
316 // These requirements come from http://criu.org/LXC
317 if (c->lxc_conf->console.path &&
318 strcmp(c->lxc_conf->console.path, "none") != 0) {
319 ERROR("lxc.console must be none\n");
320 return false;
321 }
322
323 if (c->lxc_conf->tty != 0) {
324 ERROR("lxc.tty must be 0\n");
325 return false;
326 }
327
328 lxc_list_for_each(it, &c->lxc_conf->cgroup) {
329 struct lxc_cgroup *cg = it->elem;
330 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
331 strcmp(cg->value, "c 5:1 rwm") == 0) {
332
333 found_deny_rule = true;
334 break;
335 }
336 }
337
338 if (!found_deny_rule) {
339 ERROR("couldn't find devices.deny = c 5:1 rwm");
340 return false;
341 }
342
343 return true;
344}
345
346bool dump_net_info(struct lxc_container *c, char *directory)
347{
348 int netnr;
349 struct lxc_list *it;
350
351 netnr = 0;
352 lxc_list_for_each(it, &c->lxc_conf->network) {
353 char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
354 struct lxc_netdev *n = it->elem;
355 bool has_error = true;
356 int pret;
357
358 pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
359 if (pret < 0 || pret >= PATH_MAX)
360 goto out;
361
362 veth = c->get_running_config_item(c, veth_path);
363 if (!veth) {
364 /* criu_ok() checks that all interfaces are
365 * LXC_NET{VETH,NONE}, and VETHs should have this
366 * config */
367 assert(n->type == LXC_NET_NONE);
368 break;
369 }
370
371 bridge = c->get_running_config_item(c, veth_path);
372 if (!bridge)
373 goto out;
374
375 pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
376 if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0)
377 goto out;
378
379 if (n->name) {
380 if (strlen(n->name) >= 128)
381 goto out;
382 strncpy(eth, n->name, 128);
383 } else
384 sprintf(eth, "eth%d", netnr);
385
386 has_error = false;
387out:
388 free(veth);
389 free(bridge);
390 if (has_error)
391 return false;
392 }
393
394 return true;
395}
396
397static bool restore_net_info(struct lxc_container *c)
398{
399 struct lxc_list *it;
400 bool has_error = true;
401
402 if (container_mem_lock(c))
403 return false;
404
405 lxc_list_for_each(it, &c->lxc_conf->network) {
406 struct lxc_netdev *netdev = it->elem;
407 char template[IFNAMSIZ];
408 snprintf(template, sizeof(template), "vethXXXXXX");
409
410 if (!netdev->priv.veth_attr.pair)
411 netdev->priv.veth_attr.pair = lxc_mkifname(template);
412
413 if (!netdev->priv.veth_attr.pair)
414 goto out_unlock;
415 }
416
417 has_error = false;
418
419out_unlock:
420 container_mem_unlock(c);
421 return !has_error;
422}
423
424void do_restore(struct lxc_container *c, int pipe, char *directory, bool verbose)
425{
426 pid_t pid;
427 char pidfile[L_tmpnam];
428 struct lxc_handler *handler;
429 int status;
430
431 if (!tmpnam(pidfile))
432 goto out;
433
434 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
435 if (!handler)
436 goto out;
437
438 if (!cgroup_init(handler)) {
439 ERROR("failed initing cgroups");
440 goto out_fini_handler;
441 }
442
443 if (!cgroup_create(handler)) {
444 ERROR("failed creating groups");
445 goto out_fini_handler;
446 }
447
448 if (!restore_net_info(c)) {
449 ERROR("failed restoring network info");
450 goto out_fini_handler;
451 }
452
453 resolve_clone_flags(handler);
454
455 pid = fork();
456 if (pid < 0)
457 goto out_fini_handler;
458
459 if (pid == 0) {
460 struct criu_opts os;
461 struct lxc_rootfs *rootfs;
462
463 close(pipe);
464 pipe = -1;
465
466 if (unshare(CLONE_NEWNS))
467 goto out_fini_handler;
468
469 /* CRIU needs the lxc root bind mounted so that it is the root of some
470 * mount. */
471 rootfs = &c->lxc_conf->rootfs;
472
473 if (rootfs_is_blockdev(c->lxc_conf)) {
474 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
475 goto out_fini_handler;
476 } else {
477 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
478 goto out_fini_handler;
479
480 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
481 SYSERROR("remount / to private failed");
482 goto out_fini_handler;
483 }
484
485 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
486 rmdir(rootfs->mount);
487 goto out_fini_handler;
488 }
489 }
490
491 os.action = "restore";
492 os.directory = directory;
493 os.c = c;
494 os.pidfile = pidfile;
495 os.verbose = verbose;
496 os.cgroup_path = cgroup_canonical_path(handler);
497
498 /* exec_criu() returning is an error */
499 exec_criu(&os);
500 umount(rootfs->mount);
501 rmdir(rootfs->mount);
502 goto out_fini_handler;
503 } else {
504 int ret;
505 char title[2048];
506
507 pid_t w = waitpid(pid, &status, 0);
508 if (w == -1) {
509 SYSERROR("waitpid");
510 goto out_fini_handler;
511 }
512
513 ret = write(pipe, &status, sizeof(status));
514 close(pipe);
515 pipe = -1;
516
517 if (sizeof(status) != ret) {
518 SYSERROR("failed to write all of status");
519 goto out_fini_handler;
520 }
521
522 if (WIFEXITED(status)) {
523 if (WEXITSTATUS(status)) {
524 goto out_fini_handler;
525 } else {
526 int ret;
527 FILE *f = fopen(pidfile, "r");
528 if (!f) {
529 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
530 goto out_fini_handler;
531 }
532
533 ret = fscanf(f, "%d", (int*) &handler->pid);
534 fclose(f);
59c2d406
TA
535 if (unlink(pidfile) < 0 && errno != ENOENT)
536 SYSERROR("unlinking pidfile failed");
537
e29fe1dd
TA
538 if (ret != 1) {
539 ERROR("reading restore pid failed");
540 goto out_fini_handler;
541 }
542
543 if (lxc_set_state(c->name, handler, RUNNING))
544 goto out_fini_handler;
545 }
546 } else {
547 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
548 goto out_fini_handler;
549 }
550
551 /*
552 * See comment in lxcapi_start; we don't care if these
553 * fail because it's just a beauty thing. We just
554 * assign the return here to silence potential.
555 */
556 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
557 ret = setproctitle(title);
558
559 ret = lxc_poll(c->name, handler);
560 if (ret)
561 lxc_abort(c->name, handler);
562 lxc_fini(c->name, handler);
563 exit(ret);
564 }
565
566out_fini_handler:
567 lxc_fini(c->name, handler);
59c2d406
TA
568 if (unlink(pidfile) < 0 && errno != ENOENT)
569 SYSERROR("unlinking pidfile failed");
e29fe1dd
TA
570
571out:
572 if (pipe >= 0) {
573 status = 1;
574 if (write(pipe, &status, sizeof(status)) != sizeof(status)) {
575 SYSERROR("writing status failed");
576 }
577 close(pipe);
578 }
579
580 exit(1);
581}