* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define _GNU_SOURCE
-#include <assert.h>
#include <inttypes.h>
#include <linux/limits.h>
#include <sched.h>
#include "network.h"
#include "utils.h"
+#if IS_BIONIC
+#include <../include/lxcmntent.h>
+#else
+#include <mntent.h>
+#endif
+
#define CRIU_VERSION "2.0"
#define CRIU_GITID_VERSION "2.0"
#define CRIU_GITID_PATCHLEVEL 0
#define CRIU_IN_FLIGHT_SUPPORT "2.4"
+#define CRIU_EXTERNAL_NOT_VETH "2.8"
lxc_log_define(lxc_criu, lxc);
struct criu_opts {
+ /* the thing to hook to stdout and stderr for logging */
+ int pipefd;
+
/* The type of criu invocation, one of "dump" or "restore" */
char *action;
char tty_id[32]; /* the criu tty id for /dev/console, i.e. "tty[${rdev}:${dev}]" */
/* restore: the file to write the init process' pid into */
- const char *cgroup_path;
+ struct lxc_handler *handler;
int console_fd;
/* The path that is bind mounted from /dev/console, if any. We don't
* want to use `--ext-mount-map auto`'s result here because the pts
int static_args = 23, argc = 0, i, ret;
int netnr = 0;
struct lxc_list *it;
+ FILE *mnts;
+ struct mntent mntent;
char buf[4096], tty_info[32];
size_t pos;
+
/* If we are currently in a cgroup /foo/bar, and the container is in a
* cgroup /lxc/foo, lxcfs will give us an ENOENT if some task in the
* container has an open fd that points to one of the cgroup files
/* The command line always looks like:
* criu $(action) --tcp-established --file-locks --link-remap \
- * --manage-cgroups=full action-script foo.sh -D $(directory) \
+ * --manage-cgroups=full --action-script foo.sh -D $(directory) \
* -o $(directory)/$(action).log --ext-mount-map auto
* --enable-external-sharing --enable-external-masters
* --enable-fs hugetlbfs --enable-fs tracefs --ext-mount-map console:/dev/pts/n
static_args += 2;
} else if (strcmp(opts->action, "restore") == 0) {
/* --root $(lxc_mount_point) --restore-detached
- * --restore-sibling --cgroup-root $foo
+ * --restore-sibling
* --lsm-profile apparmor:whatever
*/
- static_args += 8;
+ static_args += 6;
tty_info[0] = 0;
if (load_tty_major_minor(opts->user->directory, tty_info, sizeof(tty_info)))
return;
}
+ if (cgroup_num_hierarchies() > 0)
+ static_args += 2 * cgroup_num_hierarchies();
+
if (opts->user->verbose)
static_args++;
if (opts->user->action_script)
static_args += 2;
+ static_args += 2 * lxc_list_len(&opts->c->lxc_conf->mount_list);
+
ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->user->directory, opts->action);
if (ret < 0 || ret >= PATH_MAX) {
- ERROR("logfile name too long\n");
+ ERROR("logfile name too long");
return;
}
argv[argc++] = on_path("criu", NULL);
if (!argv[argc-1]) {
- ERROR("Couldn't find criu binary\n");
+ ERROR("Couldn't find criu binary");
goto err;
}
DECLARE_ARG("-o");
DECLARE_ARG(log);
+ for (i = 0; i < cgroup_num_hierarchies(); i++) {
+ char **controllers = NULL, *fullname;
+ char *path;
+
+ if (!cgroup_get_hierarchies(i, &controllers)) {
+ ERROR("failed to get hierarchy %d", i);
+ goto err;
+ }
+
+ /* if we are in a dump, we have to ask the monitor process what
+ * the right cgroup is. if this is a restore, we can just use
+ * the handler the restore task created.
+ */
+ if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) {
+ path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]);
+ if (!path) {
+ ERROR("failed to get cgroup path for %s", controllers[0]);
+ goto err;
+ }
+ } else {
+ const char *p;
+
+ p = cgroup_get_cgroup(opts->handler, controllers[0]);
+ if (!p) {
+ ERROR("failed to get cgroup path for %s", controllers[0]);
+ goto err;
+ }
+
+ path = strdup(p);
+ if (!path) {
+ ERROR("strdup failed");
+ goto err;
+ }
+ }
+
+ if (!lxc_deslashify(&path)) {
+ ERROR("failed to deslashify %s", path);
+ free(path);
+ goto err;
+ }
+
+ fullname = lxc_string_join(",", (const char **) controllers, false);
+ if (!fullname) {
+ ERROR("failed to join controllers");
+ free(path);
+ goto err;
+ }
+
+ ret = sprintf(buf, "%s:%s", fullname, path);
+ free(path);
+ free(fullname);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ERROR("sprintf of cgroup root arg failed");
+ goto err;
+ }
+
+ DECLARE_ARG("--cgroup-root");
+ DECLARE_ARG(buf);
+ }
+
if (opts->user->verbose)
DECLARE_ARG("-vvvvvv");
DECLARE_ARG(opts->user->action_script);
}
+ mnts = make_anonymous_mount_file(&opts->c->lxc_conf->mount_list);
+ if (!mnts)
+ goto err;
+
+ while (getmntent_r(mnts, &mntent, buf, sizeof(buf))) {
+ char *fmt, *key, *val, *mntdata;
+ char arg[2 * PATH_MAX + 2];
+ unsigned long flags;
+
+ if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0)
+ goto err;
+
+ free(mntdata);
+
+ /* only add --ext-mount-map for actual bind mounts */
+ if (!(flags & MS_BIND))
+ continue;
+
+ if (strcmp(opts->action, "dump") == 0) {
+ fmt = "/%s:%s";
+ key = mntent.mnt_dir;
+ val = mntent.mnt_dir;
+ } else {
+ fmt = "%s:%s";
+ key = mntent.mnt_dir;
+ val = mntent.mnt_fsname;
+ }
+
+ ret = snprintf(arg, sizeof(arg), fmt, key, val);
+ if (ret < 0 || ret >= sizeof(arg)) {
+ fclose(mnts);
+ ERROR("snprintf failed");
+ goto err;
+ }
+
+ DECLARE_ARG("--ext-mount-map");
+ DECLARE_ARG(arg);
+ }
+ fclose(mnts);
+
if (strcmp(opts->action, "dump") == 0 || strcmp(opts->action, "pre-dump") == 0) {
char pid[32], *freezer_relative;
if (opts->user->predump_dir) {
DECLARE_ARG("--prev-images-dir");
DECLARE_ARG(opts->user->predump_dir);
+ DECLARE_ARG("--track-mem");
}
if (opts->user->pageserver_address && opts->user->pageserver_port) {
DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
DECLARE_ARG("--restore-detached");
DECLARE_ARG("--restore-sibling");
- DECLARE_ARG("--cgroup-root");
- DECLARE_ARG(opts->cgroup_path);
if (tty_info[0]) {
if (opts->console_fd < 0) {
lxc_list_for_each(it, &opts->c->lxc_conf->network) {
char eth[128], *veth;
+ char *fmt;
struct lxc_netdev *n = it->elem;
-
- if (n->type != LXC_NET_VETH)
- continue;
+ bool external_not_veth;
+
+ if (strcmp(opts->criu_version, CRIU_EXTERNAL_NOT_VETH) >= 0) {
+ /* Since criu version 2.8 the usage of --veth-pair
+ * has been deprecated:
+ * git tag --contains f2037e6d3445fc400
+ * v2.8 */
+ external_not_veth = true;
+ } else {
+ external_not_veth = false;
+ }
if (n->name) {
if (strlen(n->name) >= sizeof(eth))
goto err;
strncpy(eth, n->name, sizeof(eth));
- } else
- sprintf(eth, "eth%d", netnr);
+ } else {
+ ret = snprintf(eth, sizeof(eth), "eth%d", netnr);
+ if (ret < 0 || ret >= sizeof(eth))
+ goto err;
+ }
- veth = n->priv.veth_attr.pair;
+ switch (n->type) {
+ case LXC_NET_VETH:
+ veth = n->priv.veth_attr.pair;
- if (n->link)
- ret = snprintf(buf, sizeof(buf), "%s=%s@%s", eth, veth, n->link);
- else
- ret = snprintf(buf, sizeof(buf), "%s=%s", eth, veth);
- if (ret < 0 || ret >= sizeof(buf))
+ if (n->link) {
+ if (external_not_veth)
+ fmt = "veth[%s]:%s@%s";
+ else
+ fmt = "%s=%s@%s";
+
+ ret = snprintf(buf, sizeof(buf), fmt, eth, veth, n->link);
+ } else {
+ if (external_not_veth)
+ fmt = "veth[%s]:%s";
+ else
+ fmt = "%s=%s";
+
+ ret = snprintf(buf, sizeof(buf), fmt, eth, veth);
+ }
+ if (ret < 0 || ret >= sizeof(buf))
+ goto err;
+ break;
+ case LXC_NET_MACVLAN:
+ if (!n->link) {
+ ERROR("no host interface for macvlan %s", n->name);
+ goto err;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "macvlan[%s]:%s", eth, n->link);
+ if (ret < 0 || ret >= sizeof(buf))
+ goto err;
+ break;
+ case LXC_NET_NONE:
+ case LXC_NET_EMPTY:
+ break;
+ default:
+ /* we have screened for this earlier... */
+ ERROR("unexpected network type %d", n->type);
goto err;
+ }
- DECLARE_ARG("--veth-pair");
+ if (external_not_veth)
+ DECLARE_ARG("--external");
+ else
+ DECLARE_ARG("--veth-pair");
DECLARE_ARG(buf);
+ netnr++;
}
}
INFO("execing: %s", buf);
+ /* before criu inits its log, it sometimes prints things to stdout/err;
+ * let's be sure we capture that.
+ */
+ if (dup2(opts->pipefd, STDOUT_FILENO) < 0) {
+ SYSERROR("dup2 stdout failed");
+ goto err;
+ }
+
+ if (dup2(opts->pipefd, STDERR_FILENO) < 0) {
+ SYSERROR("dup2 stderr failed");
+ goto err;
+ }
+
+ close(opts->pipefd);
+
#undef DECLARE_ARG
execv(argv[0], argv);
err:
version_error:
fclose(f);
free(tmp);
- ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore\n");
+ ERROR("must have criu " CRIU_VERSION " or greater to checkpoint/restore");
return false;
}
}
return false;
if (geteuid()) {
- ERROR("Must be root to checkpoint\n");
+ ERROR("Must be root to checkpoint");
return false;
}
case LXC_NET_VETH:
case LXC_NET_NONE:
case LXC_NET_EMPTY:
+ case LXC_NET_MACVLAN:
break;
default:
- ERROR("Found network that is not VETH or NONE\n");
+ ERROR("Found un-dumpable network: %s (%s)", lxc_net_type_to_str(n->type), n->name);
return false;
}
}
{
pid_t pid;
struct lxc_handler *handler;
- int status;
+ int status, fd;
int pipes[2] = {-1, -1};
+ /* Try to detach from the current controlling tty if it exists.
+ * Othwerise, lxc_init (via lxc_console) will attach the container's
+ * console output to the current tty, which is probably not what any
+ * library user wants, and if they do, they can just manually configure
+ * it :)
+ */
+ fd = open("/dev/tty", O_RDWR);
+ if (fd >= 0) {
+ if (ioctl(fd, TIOCNOTTY, NULL) < 0)
+ SYSERROR("couldn't detach from tty");
+ close(fd);
+ }
+
handler = lxc_init(c->name, c->lxc_conf, c->config_path);
if (!handler)
goto out;
close(pipes[0]);
pipes[0] = -1;
- if (dup2(pipes[1], STDERR_FILENO) < 0) {
- SYSERROR("dup2 failed");
- goto out_fini_handler;
- }
-
- if (dup2(pipes[1], STDOUT_FILENO) < 0) {
- SYSERROR("dup2 failed");
- goto out_fini_handler;
- }
if (unshare(CLONE_NEWNS))
goto out_fini_handler;
}
}
+ os.pipefd = pipes[1];
os.action = "restore";
os.user = opts;
os.c = c;
- os.cgroup_path = cgroup_canonical_path(handler);
os.console_fd = c->lxc_conf->console.slave;
os.criu_version = criu_version;
+ os.handler = handler;
if (os.console_fd >= 0) {
/* Twiddle the FD_CLOEXEC bit. We want to pass this FD to criu
goto out_fini_handler;
}
+ if (n == sizeof(buf))
+ n--;
buf[n] = 0;
- ERROR("criu process exited %d, output:\n%s\n", WEXITSTATUS(status), buf);
+ ERROR("criu process exited %d, output:\n%s", WEXITSTATUS(status), buf);
goto out_fini_handler;
} else {
ret = snprintf(buf, sizeof(buf), "/proc/self/task/%lu/children", (unsigned long)syscall(__NR_gettid));
FILE *f = fopen(buf, "r");
if (!f) {
- SYSERROR("couldn't read restore's children file %s\n", buf);
+ SYSERROR("couldn't read restore's children file %s", buf);
goto out_fini_handler;
}
}
}
} else {
- ERROR("CRIU was killed with signal %d\n", WTERMSIG(status));
+ ERROR("CRIU was killed with signal %d", WTERMSIG(status));
goto out_fini_handler;
}
{
pid_t pid;
char *criu_version = NULL;
+ int criuout[2];
if (!criu_ok(c, &criu_version))
return false;
- if (mkdir_p(opts->directory, 0700) < 0)
+ if (pipe(criuout) < 0) {
+ SYSERROR("pipe() failed");
return false;
+ }
+
+ if (mkdir_p(opts->directory, 0700) < 0)
+ goto fail;
pid = fork();
if (pid < 0) {
SYSERROR("fork failed");
- return false;
+ goto fail;
}
if (pid == 0) {
struct criu_opts os;
+ struct lxc_handler h;
+
+ close(criuout[0]);
+ h.name = c->name;
+ if (!cgroup_init(&h)) {
+ ERROR("failed to cgroup_init()");
+ exit(1);
+ }
+
+ os.pipefd = criuout[1];
os.action = mode;
os.user = opts;
os.c = c;
exit(1);
} else {
int status;
+ ssize_t n;
+ char buf[4096];
+ bool ret;
+
+ close(criuout[1]);
+
pid_t w = waitpid(pid, &status, 0);
if (w == -1) {
SYSERROR("waitpid");
+ close(criuout[0]);
return false;
}
+ n = read(criuout[0], buf, sizeof(buf));
+ close(criuout[0]);
+ if (n < 0) {
+ SYSERROR("read");
+ n = 0;
+ }
+ buf[n] = 0;
+
if (WIFEXITED(status)) {
if (WEXITSTATUS(status)) {
- ERROR("dump failed with %d\n", WEXITSTATUS(status));
- return false;
+ ERROR("dump failed with %d", WEXITSTATUS(status));
+ ret = false;
+ } else {
+ ret = true;
}
-
- return true;
} else if (WIFSIGNALED(status)) {
- ERROR("dump signaled with %d\n", WTERMSIG(status));
- return false;
+ ERROR("dump signaled with %d", WTERMSIG(status));
+ ret = false;
} else {
- ERROR("unknown dump exit %d\n", status);
- return false;
+ ERROR("unknown dump exit %d", status);
+ ret = false;
}
+
+ if (!ret)
+ ERROR("criu output: %s", buf);
+ return ret;
}
+fail:
+ close(criuout[0]);
+ close(criuout[1]);
+ rmdir(opts->directory);
+ return false;
}
bool __criu_pre_dump(struct lxc_container *c, struct migrate_opts *opts)
return false;
if (access(path, F_OK) == 0) {
- ERROR("please use a fresh directory for the dump directory\n");
+ ERROR("please use a fresh directory for the dump directory");
return false;
}
return false;
if (geteuid()) {
- ERROR("Must be root to restore\n");
+ ERROR("Must be root to restore");
return false;
}