]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/criu.c
coverity: fix use-after-free in cgmanager.
[mirror_lxc.git] / src / lxc / criu.c
CommitLineData
e29fe1dd
TA
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2014-2015 Canonical Ltd.
5 *
6 * Authors:
7 * Tycho Andersen <tycho.andersen@canonical.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23#define _GNU_SOURCE
24#include <assert.h>
25#include <linux/limits.h>
26#include <sched.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <sys/mount.h>
31#include <sys/types.h>
32#include <sys/wait.h>
33#include <unistd.h>
34
35#include "config.h"
36
37#include "bdev.h"
38#include "cgroup.h"
39#include "conf.h"
40#include "criu.h"
41#include "log.h"
42#include "lxc.h"
43#include "lxclock.h"
44#include "network.h"
45#include "utils.h"
46
47lxc_log_define(lxc_criu, lxc);
48
49void exec_criu(struct criu_opts *opts)
50{
51 char **argv, log[PATH_MAX];
dd62857a 52 int static_args = 20, argc = 0, i, ret;
e29fe1dd
TA
53 int netnr = 0;
54 struct lxc_list *it;
55
56 char buf[4096];
57 FILE *mnts = NULL;
58
59 /* The command line always looks like:
60 * criu $(action) --tcp-established --file-locks --link-remap --force-irmap \
61 * --manage-cgroups action-script foo.sh -D $(directory) \
62 * -o $(directory)/$(action).log --ext-mount-map auto
63 * --enable-external-sharing --enable-external-masters
dd62857a 64 * --enable-fs hugetlbfs
e29fe1dd
TA
65 * +1 for final NULL */
66
67 if (strcmp(opts->action, "dump") == 0) {
68 /* -t pid */
69 static_args += 2;
70
71 /* --leave-running */
72 if (!opts->stop)
73 static_args++;
74 } else if (strcmp(opts->action, "restore") == 0) {
75 /* --root $(lxc_mount_point) --restore-detached
76 * --restore-sibling --pidfile $foo --cgroup-root $foo */
77 static_args += 8;
78 } else {
79 return;
80 }
81
82 if (opts->verbose)
83 static_args++;
84
85 ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
86 if (ret < 0 || ret >= PATH_MAX) {
87 ERROR("logfile name too long\n");
88 return;
89 }
90
91 argv = malloc(static_args * sizeof(*argv));
92 if (!argv)
93 return;
94
95 memset(argv, 0, static_args * sizeof(*argv));
96
97#define DECLARE_ARG(arg) \
98 do { \
99 if (arg == NULL) { \
100 ERROR("Got NULL argument for criu"); \
101 goto err; \
102 } \
103 argv[argc++] = strdup(arg); \
104 if (!argv[argc-1]) \
105 goto err; \
106 } while (0)
107
108 argv[argc++] = on_path("criu", NULL);
109 if (!argv[argc-1]) {
110 ERROR("Couldn't find criu binary\n");
111 goto err;
112 }
113
114 DECLARE_ARG(opts->action);
115 DECLARE_ARG("--tcp-established");
116 DECLARE_ARG("--file-locks");
117 DECLARE_ARG("--link-remap");
118 DECLARE_ARG("--force-irmap");
119 DECLARE_ARG("--manage-cgroups");
120 DECLARE_ARG("--ext-mount-map");
121 DECLARE_ARG("auto");
122 DECLARE_ARG("--enable-external-sharing");
123 DECLARE_ARG("--enable-external-masters");
dd62857a
TA
124 DECLARE_ARG("--enable-fs");
125 DECLARE_ARG("hugetlbfs");
e29fe1dd
TA
126 DECLARE_ARG("-D");
127 DECLARE_ARG(opts->directory);
128 DECLARE_ARG("-o");
129 DECLARE_ARG(log);
130
131 if (opts->verbose)
132 DECLARE_ARG("-vvvvvv");
133
134 if (strcmp(opts->action, "dump") == 0) {
135 char pid[32];
136
137 if (sprintf(pid, "%d", opts->c->init_pid(opts->c)) < 0)
138 goto err;
139
140 DECLARE_ARG("-t");
141 DECLARE_ARG(pid);
142 if (!opts->stop)
143 DECLARE_ARG("--leave-running");
144 } else if (strcmp(opts->action, "restore") == 0) {
145 void *m;
146 int additional;
147
148 DECLARE_ARG("--root");
149 DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
150 DECLARE_ARG("--restore-detached");
151 DECLARE_ARG("--restore-sibling");
152 DECLARE_ARG("--pidfile");
153 DECLARE_ARG(opts->pidfile);
154 DECLARE_ARG("--cgroup-root");
155 DECLARE_ARG(opts->cgroup_path);
156
157 additional = lxc_list_len(&opts->c->lxc_conf->network) * 2;
158
159 m = realloc(argv, (argc + additional + 1) * sizeof(*argv)); \
160 if (!m) \
161 goto err; \
162 argv = m;
163
164 lxc_list_for_each(it, &opts->c->lxc_conf->network) {
165 char eth[128], *veth;
166 struct lxc_netdev *n = it->elem;
167
168 if (n->name) {
169 if (strlen(n->name) >= sizeof(eth))
170 goto err;
171 strncpy(eth, n->name, sizeof(eth));
172 } else
173 sprintf(eth, "eth%d", netnr);
174
175 veth = n->priv.veth_attr.pair;
176
177 ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
178 if (ret < 0 || ret >= sizeof(buf))
179 goto err;
180
181 DECLARE_ARG("--veth-pair");
182 DECLARE_ARG(buf);
183 }
184
185 }
186
187 argv[argc] = NULL;
188
189#undef DECLARE_ARG
190 execv(argv[0], argv);
191err:
192 if (mnts)
193 fclose(mnts);
194 for (i = 0; argv[i]; i++)
195 free(argv[i]);
196 free(argv);
197}
198
8ba5ced7
TA
199/*
200 * Check to see if the criu version is recent enough for all the features we
201 * use. This version allows either CRIU_VERSION or (CRIU_GITID_VERSION and
202 * CRIU_GITID_PATCHLEVEL) to work, enabling users building from git to c/r
203 * things potentially before a version is released with a particular feature.
204 *
205 * The intent is that when criu development slows down, we can drop this, but
206 * for now we shouldn't attempt to c/r with versions that we know won't work.
207 */
208static bool criu_version_ok()
209{
210 int pipes[2];
211 pid_t pid;
212
213 if (pipe(pipes) < 0) {
214 SYSERROR("pipe() failed");
215 return false;
216 }
217
218 pid = fork();
219 if (pid < 0) {
220 SYSERROR("fork() failed");
221 return false;
222 }
223
224 if (pid == 0) {
225 char *args[] = { "criu", "--version", NULL };
755fa453 226 char *path;
8ba5ced7
TA
227 close(pipes[0]);
228
229 close(STDERR_FILENO);
230 if (dup2(pipes[1], STDOUT_FILENO) < 0)
231 exit(1);
232
755fa453
TA
233 path = on_path("criu", NULL);
234 execv(path, args);
8ba5ced7
TA
235 exit(1);
236 } else {
237 FILE *f;
238 char version[1024];
239 int patch;
240
241 close(pipes[1]);
242 if (wait_for_pid(pid) < 0) {
243 close(pipes[0]);
4eae4051 244 SYSERROR("execing criu failed, is it installed?");
8ba5ced7
TA
245 return false;
246 }
247
248 f = fdopen(pipes[0], "r");
249 if (!f) {
250 close(pipes[0]);
251 return false;
252 }
253
254 if (fscanf(f, "Version: %1024[^\n]s", version) != 1)
255 goto version_error;
256
257 if (fgetc(f) != '\n')
258 goto version_error;
259
260 if (strcmp(version, CRIU_VERSION) >= 0)
261 goto version_match;
262
263 if (fscanf(f, "GitID: v%1024[^-]s", version) != 1)
264 goto version_error;
265
266 if (fgetc(f) != '-')
267 goto version_error;
268
269 if (fscanf(f, "%d", &patch) != 1)
270 goto version_error;
271
272 if (strcmp(version, CRIU_GITID_VERSION) < 0)
273 goto version_error;
274
275 if (patch < CRIU_GITID_PATCHLEVEL)
276 goto version_error;
277
278version_match:
279 close(pipes[0]);
280 return true;
281
282version_error:
283 close(pipes[0]);
284 ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
285 return false;
286 }
287}
288
e29fe1dd
TA
289/* Check and make sure the container has a configuration that we know CRIU can
290 * dump. */
291bool criu_ok(struct lxc_container *c)
292{
293 struct lxc_list *it;
294 bool found_deny_rule = false;
295
8ba5ced7
TA
296 if (!criu_version_ok())
297 return false;
298
e29fe1dd
TA
299 if (geteuid()) {
300 ERROR("Must be root to checkpoint\n");
301 return false;
302 }
303
304 /* We only know how to restore containers with veth networks. */
305 lxc_list_for_each(it, &c->lxc_conf->network) {
306 struct lxc_netdev *n = it->elem;
307 if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE) {
308 ERROR("Found network that is not VETH or NONE\n");
309 return false;
310 }
311 }
312
313 // These requirements come from http://criu.org/LXC
314 if (c->lxc_conf->console.path &&
315 strcmp(c->lxc_conf->console.path, "none") != 0) {
316 ERROR("lxc.console must be none\n");
317 return false;
318 }
319
320 if (c->lxc_conf->tty != 0) {
321 ERROR("lxc.tty must be 0\n");
322 return false;
323 }
324
325 lxc_list_for_each(it, &c->lxc_conf->cgroup) {
326 struct lxc_cgroup *cg = it->elem;
327 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
328 strcmp(cg->value, "c 5:1 rwm") == 0) {
329
330 found_deny_rule = true;
331 break;
332 }
333 }
334
335 if (!found_deny_rule) {
336 ERROR("couldn't find devices.deny = c 5:1 rwm");
337 return false;
338 }
339
340 return true;
341}
342
343bool dump_net_info(struct lxc_container *c, char *directory)
344{
345 int netnr;
346 struct lxc_list *it;
347
348 netnr = 0;
349 lxc_list_for_each(it, &c->lxc_conf->network) {
350 char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
351 struct lxc_netdev *n = it->elem;
352 bool has_error = true;
353 int pret;
354
355 pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
356 if (pret < 0 || pret >= PATH_MAX)
357 goto out;
358
359 veth = c->get_running_config_item(c, veth_path);
360 if (!veth) {
361 /* criu_ok() checks that all interfaces are
362 * LXC_NET{VETH,NONE}, and VETHs should have this
363 * config */
364 assert(n->type == LXC_NET_NONE);
365 break;
366 }
367
368 bridge = c->get_running_config_item(c, veth_path);
369 if (!bridge)
370 goto out;
371
372 pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
373 if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0)
374 goto out;
375
376 if (n->name) {
377 if (strlen(n->name) >= 128)
378 goto out;
379 strncpy(eth, n->name, 128);
380 } else
381 sprintf(eth, "eth%d", netnr);
382
383 has_error = false;
384out:
385 free(veth);
386 free(bridge);
387 if (has_error)
388 return false;
389 }
390
391 return true;
392}
393
394static bool restore_net_info(struct lxc_container *c)
395{
396 struct lxc_list *it;
397 bool has_error = true;
398
399 if (container_mem_lock(c))
400 return false;
401
402 lxc_list_for_each(it, &c->lxc_conf->network) {
403 struct lxc_netdev *netdev = it->elem;
404 char template[IFNAMSIZ];
405 snprintf(template, sizeof(template), "vethXXXXXX");
406
407 if (!netdev->priv.veth_attr.pair)
408 netdev->priv.veth_attr.pair = lxc_mkifname(template);
409
410 if (!netdev->priv.veth_attr.pair)
411 goto out_unlock;
412 }
413
414 has_error = false;
415
416out_unlock:
417 container_mem_unlock(c);
418 return !has_error;
419}
420
421void do_restore(struct lxc_container *c, int pipe, char *directory, bool verbose)
422{
423 pid_t pid;
424 char pidfile[L_tmpnam];
425 struct lxc_handler *handler;
426 int status;
427
428 if (!tmpnam(pidfile))
429 goto out;
430
431 handler = lxc_init(c->name, c->lxc_conf, c->config_path);
432 if (!handler)
433 goto out;
434
435 if (!cgroup_init(handler)) {
436 ERROR("failed initing cgroups");
437 goto out_fini_handler;
438 }
439
440 if (!cgroup_create(handler)) {
441 ERROR("failed creating groups");
442 goto out_fini_handler;
443 }
444
445 if (!restore_net_info(c)) {
446 ERROR("failed restoring network info");
447 goto out_fini_handler;
448 }
449
450 resolve_clone_flags(handler);
451
452 pid = fork();
453 if (pid < 0)
454 goto out_fini_handler;
455
456 if (pid == 0) {
457 struct criu_opts os;
458 struct lxc_rootfs *rootfs;
459
460 close(pipe);
461 pipe = -1;
462
463 if (unshare(CLONE_NEWNS))
464 goto out_fini_handler;
465
466 /* CRIU needs the lxc root bind mounted so that it is the root of some
467 * mount. */
468 rootfs = &c->lxc_conf->rootfs;
469
470 if (rootfs_is_blockdev(c->lxc_conf)) {
471 if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
472 goto out_fini_handler;
473 } else {
474 if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
475 goto out_fini_handler;
476
477 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
478 SYSERROR("remount / to private failed");
479 goto out_fini_handler;
480 }
481
482 if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
483 rmdir(rootfs->mount);
484 goto out_fini_handler;
485 }
486 }
487
488 os.action = "restore";
489 os.directory = directory;
490 os.c = c;
491 os.pidfile = pidfile;
492 os.verbose = verbose;
493 os.cgroup_path = cgroup_canonical_path(handler);
494
495 /* exec_criu() returning is an error */
496 exec_criu(&os);
497 umount(rootfs->mount);
498 rmdir(rootfs->mount);
499 goto out_fini_handler;
500 } else {
501 int ret;
502 char title[2048];
503
504 pid_t w = waitpid(pid, &status, 0);
505 if (w == -1) {
506 SYSERROR("waitpid");
507 goto out_fini_handler;
508 }
509
510 ret = write(pipe, &status, sizeof(status));
511 close(pipe);
512 pipe = -1;
513
514 if (sizeof(status) != ret) {
515 SYSERROR("failed to write all of status");
516 goto out_fini_handler;
517 }
518
519 if (WIFEXITED(status)) {
520 if (WEXITSTATUS(status)) {
521 goto out_fini_handler;
522 } else {
523 int ret;
524 FILE *f = fopen(pidfile, "r");
525 if (!f) {
526 SYSERROR("couldn't read restore's init pidfile %s\n", pidfile);
527 goto out_fini_handler;
528 }
529
530 ret = fscanf(f, "%d", (int*) &handler->pid);
531 fclose(f);
59c2d406
TA
532 if (unlink(pidfile) < 0 && errno != ENOENT)
533 SYSERROR("unlinking pidfile failed");
534
e29fe1dd
TA
535 if (ret != 1) {
536 ERROR("reading restore pid failed");
537 goto out_fini_handler;
538 }
539
540 if (lxc_set_state(c->name, handler, RUNNING))
541 goto out_fini_handler;
542 }
543 } else {
544 ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
545 goto out_fini_handler;
546 }
547
548 /*
549 * See comment in lxcapi_start; we don't care if these
550 * fail because it's just a beauty thing. We just
551 * assign the return here to silence potential.
552 */
553 ret = snprintf(title, sizeof(title), "[lxc monitor] %s %s", c->config_path, c->name);
554 ret = setproctitle(title);
555
556 ret = lxc_poll(c->name, handler);
557 if (ret)
558 lxc_abort(c->name, handler);
559 lxc_fini(c->name, handler);
560 exit(ret);
561 }
562
563out_fini_handler:
564 lxc_fini(c->name, handler);
59c2d406
TA
565 if (unlink(pidfile) < 0 && errno != ENOENT)
566 SYSERROR("unlinking pidfile failed");
e29fe1dd
TA
567
568out:
569 if (pipe >= 0) {
570 status = 1;
571 if (write(pipe, &status, sizeof(status)) != sizeof(status)) {
572 SYSERROR("writing status failed");
573 }
574 close(pipe);
575 }
576
577 exit(1);
578}