-/*
- * lxc: linux Container library
- *
- * (C) Copyright IBM Corp. 2007, 2008
- *
- * Authors:
- * Daniel Lezcano <daniel.lezcano at free.fr>
- * Serge Hallyn <serge@hallyn.com>
- * Christian Brauner <christian.brauner@ubuntu.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
+/* SPDX-License-Identifier: LGPL-2.1+ */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
ret = access(copy, X_OK);
if (ret != 0) {
- SYSERROR("Could not access %s. Please grant it x "
- "access, or add an ACL for the container "
- "root", copy);
+ SYSERROR("Could not access %s. Please grant it x access, or add an ACL for the container " "root", copy);
return;
}
*p = saved;
static void lxc_put_nsfds(struct lxc_handler *handler)
{
- int i;
-
- for (i = 0; i < LXC_NS_MAX; i++) {
+ for (int i = 0; i < LXC_NS_MAX; i++) {
if (handler->nsfd[i] < 0)
continue;
- close(handler->nsfd[i]);
- handler->nsfd[i] = -EBADF;
+ close_prot_errno_disarm(handler->nsfd[i]);
}
}
fd = lxc_preserve_ns(pid, ns);
if (fd < 0) {
- if (errno != ENOENT) {
- SYSERROR("Failed to preserve %s namespace", ns);
- return -EINVAL;
- }
+ if (errno != ENOENT)
+ return log_error_errno(-EINVAL,
+ errno, "Failed to preserve %s namespace",
+ ns);
- SYSWARN("Kernel does not support preserving %s namespaces", ns);
- return -EOPNOTSUPP;
+ return log_warn_errno(-EOPNOTSUPP,
+ errno, "Kernel does not support preserving %s namespaces",
+ ns);
}
return fd;
int ret;
ret = snprintf(path, PATH_MAX, "/proc/self/fd/%s", direntp->d_name);
- if (ret < 0 || ret >= PATH_MAX) {
- ERROR("Failed to create file descriptor name");
- return false;
- }
+ if (ret < 0 || ret >= PATH_MAX)
+ return log_error(false, "Failed to create file descriptor name");
linklen = readlink(path, link, PATH_MAX);
- if (linklen < 0) {
- SYSERROR("Failed to read link path - \"%s\"", path);
- return false;
- } else if (linklen >= PATH_MAX) {
- ERROR("The name of link path is too long - \"%s\"", path);
- return false;
- }
-
- if (strcmp(link, "/dev/log_main") == 0 ||
- strcmp(link, "/dev/log_system") == 0 ||
- strcmp(link, "/dev/log_radio") == 0)
+ if (linklen < 0)
+ return log_error(false, "Failed to read link path - \"%s\"", path);
+ else if (linklen >= PATH_MAX)
+ return log_error(false, "The name of link path is too long - \"%s\"", path);
+
+ if (strcmp(link, "/dev/log_main") == 0 ||
+ strcmp(link, "/dev/log_system") == 0 ||
+ strcmp(link, "/dev/log_radio") == 0)
return true;
return false;
restart:
dir = opendir("/proc/self/fd");
- if (!dir) {
- SYSWARN("Failed to open directory");
- return -1;
- }
+ if (!dir)
+ return log_warn(-1, "Failed to open directory");
fddir = dirfd(dir);
}
ret = pthread_sigmask(SIG_BLOCK, &mask, oldmask);
- if (ret < 0) {
- SYSERROR("Failed to set signal mask");
- return -EBADF;
- }
+ if (ret < 0)
+ return log_error_errno(-EBADF, errno,
+ "Failed to set signal mask");
ret = signalfd(-1, &mask, SFD_CLOEXEC);
- if (ret < 0) {
- SYSERROR("Failed to create signal file descriptor");
- return -EBADF;
- }
+ if (ret < 0)
+ return log_error_errno(-EBADF,
+ errno, "Failed to create signal file descriptor");
TRACE("Created signal file descriptor %d", ret);
struct lxc_handler *hdlr = data;
ret = lxc_read_nointr(fd, &siginfo, sizeof(siginfo));
- if (ret < 0) {
- ERROR("Failed to read signal info from signal file descriptor %d", fd);
- return LXC_MAINLOOP_ERROR;
- }
+ if (ret < 0)
+ return log_error(LXC_MAINLOOP_ERROR, "Failed to read signal info from signal file descriptor %d", fd);
- if (ret != sizeof(siginfo)) {
- ERROR("Unexpected size for struct signalfd_siginfo");
- return -EINVAL;
- }
+ if (ret != sizeof(siginfo))
+ return log_error(-EINVAL, "Unexpected size for struct signalfd_siginfo");
/* Check whether init is running. */
info.si_pid = 0;
}
if (siginfo.ssi_signo == SIGHUP) {
- kill(hdlr->pid, SIGTERM);
+ if (hdlr->pidfd >= 0)
+ lxc_raw_pidfd_send_signal(hdlr->pidfd, SIGTERM, NULL, 0);
+ else
+ kill(hdlr->pid, SIGTERM);
INFO("Killing %d since terminal hung up", hdlr->pid);
return hdlr->init_died ? LXC_MAINLOOP_CLOSE
: LXC_MAINLOOP_CONTINUE;
}
if (siginfo.ssi_signo != SIGCHLD) {
- kill(hdlr->pid, siginfo.ssi_signo);
+ if (hdlr->pidfd >= 0)
+ lxc_raw_pidfd_send_signal(hdlr->pidfd,
+ siginfo.ssi_signo, NULL, 0);
+ else
+ kill(hdlr->pid, siginfo.ssi_signo);
INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid);
return hdlr->init_died ? LXC_MAINLOOP_CLOSE
: LXC_MAINLOOP_CONTINUE;
: LXC_MAINLOOP_CONTINUE;
}
- DEBUG("Container init process %d exited", hdlr->pid);
-
- return LXC_MAINLOOP_CLOSE;
+ return log_debug(LXC_MAINLOOP_CLOSE, "Container init process %d exited", hdlr->pid);
}
int lxc_serve_state_clients(const char *name, struct lxc_handler *handler,
TRACE("Set container state to %s", lxc_state2str(state));
- if (lxc_list_empty(&handler->conf->state_clients)) {
- TRACE("No state clients registered");
- return 0;
- }
+ if (lxc_list_empty(&handler->conf->state_clients))
+ return log_trace(0, "No state clients registered");
retlen = strlcpy(msg.name, name, sizeof(msg.name));
if (retlen >= sizeof(msg.name))
return 0;
/* Close read end of the socket pair. */
- close(handler->state_socket_pair[0]);
- handler->state_socket_pair[0] = -1;
+ close_prot_errno_disarm(handler->state_socket_pair[0]);
again:
ret = lxc_abstract_unix_send_credential(handler->state_socket_pair[1],
return -1;
}
- if (ret != sizeof(int)) {
- ERROR("Message too long : %d", handler->state_socket_pair[1]);
- return -1;
- }
+ if (ret != sizeof(int))
+ return log_error(-1, "Message too long : %d", handler->state_socket_pair[1]);
TRACE("Sent container state \"%s\" to %d", lxc_state2str(state),
handler->state_socket_pair[1]);
/* Close write end of the socket pair. */
- close(handler->state_socket_pair[1]);
- handler->state_socket_pair[1] = -1;
+ close_prot_errno_disarm(handler->state_socket_pair[1]);
return 0;
}
int ret;
ret = lxc_serve_state_socket_pair(name, handler, state);
- if (ret < 0) {
- ERROR("Failed to synchronize via anonymous pair of unix sockets");
- return -1;
- }
+ if (ret < 0)
+ return log_error(-1, "Failed to synchronize via anonymous pair of unix sockets");
ret = lxc_serve_state_clients(name, handler, state);
if (ret < 0)
goto out_mainloop_console;
}
+ ret = lxc_seccomp_setup_proxy(&handler->conf->seccomp, &descr, handler);
+ if (ret < 0) {
+ ERROR("Failed to setup seccomp proxy");
+ goto out_mainloop_console;
+ }
+
if (has_console) {
struct lxc_terminal *console = &handler->conf->console;
TRACE("Mainloop is ready");
ret = lxc_mainloop(&descr, -1);
- close(descr.epfd);
- descr.epfd = -EBADF;
+ close_prot_errno_disarm(descr.epfd);
if (ret < 0 || !handler->init_died)
goto out_mainloop_console;
TRACE("Closed mainloop");
out_sigfd:
- close(handler->sigfd);
TRACE("Closed signal file descriptor %d", handler->sigfd);
- handler->sigfd = -EBADF;
+ close_prot_errno_disarm(handler->sigfd);
return ret;
}
void lxc_zero_handler(struct lxc_handler *handler)
{
- int i;
-
memset(handler, 0, sizeof(struct lxc_handler));
- handler->pinfd = -1;
+ handler->state = STOPPED;
- handler->sigfd = -1;
+ handler->pinfd = -EBADF;
- for (i = 0; i < LXC_NS_MAX; i++)
- handler->nsfd[i] = -1;
+ handler->pidfd = -EBADF;
+
+ handler->sigfd = -EBADF;
+
+ for (int i = 0; i < LXC_NS_MAX; i++)
+ handler->nsfd[i] = -EBADF;
- handler->data_sock[0] = -1;
- handler->data_sock[1] = -1;
+ handler->data_sock[0] = -EBADF;
+ handler->data_sock[1] = -EBADF;
- handler->state_socket_pair[0] = -1;
- handler->state_socket_pair[1] = -1;
+ handler->state_socket_pair[0] = -EBADF;
+ handler->state_socket_pair[1] = -EBADF;
- handler->sync_sock[0] = -1;
- handler->sync_sock[1] = -1;
+ handler->sync_sock[0] = -EBADF;
+ handler->sync_sock[1] = -EBADF;
}
void lxc_free_handler(struct lxc_handler *handler)
{
- if (handler->pinfd >= 0)
- close(handler->pinfd);
-
- if (handler->sigfd >= 0)
- close(handler->sigfd);
-
+ close_prot_errno_disarm(handler->pinfd);
+ close_prot_errno_disarm(handler->pidfd);
+ close_prot_errno_disarm(handler->sigfd);
lxc_put_nsfds(handler);
-
if (handler->conf && handler->conf->reboot == REBOOT_NONE)
- if (handler->conf->maincmd_fd >= 0)
- lxc_abstract_unix_close(handler->conf->maincmd_fd);
-
- if (handler->state_socket_pair[0] >= 0)
- close(handler->state_socket_pair[0]);
-
- if (handler->state_socket_pair[1] >= 0)
- close(handler->state_socket_pair[1]);
-
- if (handler->cgroup_ops)
- cgroup_exit(handler->cgroup_ops);
-
+ close_prot_errno_disarm(handler->conf->maincmd_fd);
+ close_prot_errno_disarm(handler->monitor_status_fd);
+ close_prot_errno_disarm(handler->state_socket_pair[0]);
+ close_prot_errno_disarm(handler->state_socket_pair[1]);
+ cgroup_exit(handler->cgroup_ops);
handler->conf = NULL;
- free(handler);
- handler = NULL;
+ free_disarm(handler);
}
struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
const char *lxcpath, bool daemonize)
{
- int i, ret;
+ int ret;
struct lxc_handler *handler;
handler = malloc(sizeof(*handler));
* as root so this should be fine.
*/
handler->am_root = !am_guest_unpriv();
- handler->data_sock[0] = handler->data_sock[1] = -1;
handler->conf = conf;
handler->lxcpath = lxcpath;
- handler->pinfd = -1;
- handler->sigfd = -EBADF;
handler->init_died = false;
- handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
+ handler->data_sock[0] = -EBADF;
+ handler->data_sock[1] = -EBADF;
+ handler->monitor_status_fd = -EBADF;
+ handler->pinfd = -EBADF;
+ handler->pidfd = -EBADF;
+ handler->sigfd = -EBADF;
+ handler->state_socket_pair[0] = -EBADF;
+ handler->state_socket_pair[1] = -EBADF;
if (handler->conf->reboot == REBOOT_NONE)
lxc_list_init(&handler->conf->state_clients);
- for (i = 0; i < LXC_NS_MAX; i++)
- handler->nsfd[i] = -1;
+ for (int i = 0; i < LXC_NS_MAX; i++)
+ handler->nsfd[i] = -EBADF;
handler->name = name;
+ if (daemonize)
+ handler->transient_pid = lxc_raw_getpid();
+ else
+ handler->transient_pid = -1;
if (daemonize && handler->conf->reboot == REBOOT_NONE) {
/* Create socketpair() to synchronize on daemonized startup.
int lxc_init(const char *name, struct lxc_handler *handler)
{
+ __do_close int status_fd = -EBADF;
int ret;
const char *loglevel;
struct lxc_conf *conf = handler->conf;
handler->monitor_pid = lxc_raw_getpid();
+ status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC);
+ if (status_fd < 0)
+ return log_error_errno(-1, errno, "Failed to open monitor status fd");
lsm_init();
TRACE("Initialized LSM");
ret = lxc_read_seccomp_config(conf);
- if (ret < 0) {
- ERROR("Failed loading seccomp policy");
- goto out_close_maincmd_fd;
- }
+ if (ret < 0)
+ return log_error(-1, "Failed loading seccomp policy");
TRACE("Read seccomp policy");
/* Begin by setting the state to STARTING. */
ret = lxc_set_state(name, handler, STARTING);
- if (ret < 0) {
- ERROR("Failed to set state to \"%s\"", lxc_state2str(STARTING));
- goto out_close_maincmd_fd;
- }
+ if (ret < 0)
+ return log_error(-1, "Failed to set state to \"%s\"", lxc_state2str(STARTING));
TRACE("Set container state to \"STARTING\"");
/* Start of environment variable setup for hooks. */
if (conf->rcfile) {
ret = setenv("LXC_CONFIG_FILE", conf->rcfile, 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_CONFIG_FILE=%s", conf->rcfile);
+ SYSERROR("Failed to set environment variable: LXC_CONFIG_FILE=%s", conf->rcfile);
}
if (conf->rootfs.mount) {
ret = setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount);
+ SYSERROR("Failed to set environment variable: LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount);
}
if (conf->rootfs.path) {
ret = setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_ROOTFS_PATH=%s", conf->rootfs.path);
+ SYSERROR("Failed to set environment variable: LXC_ROOTFS_PATH=%s", conf->rootfs.path);
}
if (conf->console.path) {
ret = setenv("LXC_CONSOLE", conf->console.path, 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_CONSOLE=%s", conf->console.path);
+ SYSERROR("Failed to set environment variable: LXC_CONSOLE=%s", conf->console.path);
}
if (conf->console.log_path) {
ret = setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_CONSOLE_LOGPATH=%s", conf->console.log_path);
+ SYSERROR("Failed to set environment variable: LXC_CONSOLE_LOGPATH=%s", conf->console.log_path);
}
if (cgns_supported()) {
ret = setenv("LXC_CGNS_AWARE", "1", 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable "
- "LXC_CGNS_AWARE=1");
+ SYSERROR("Failed to set environment variable LXC_CGNS_AWARE=1");
}
loglevel = lxc_log_priority_to_string(lxc_log_get_level());
ret = setenv("LXC_LOG_LEVEL", loglevel, 1);
if (ret < 0)
- SYSERROR("Set environment variable LXC_LOG_LEVEL=%s",
- loglevel);
+ SYSERROR("Set environment variable LXC_LOG_LEVEL=%s", loglevel);
if (conf->hooks_version == 0)
ret = setenv("LXC_HOOK_VERSION", "0", 1);
TRACE("Set environment variables");
ret = run_lxc_hooks(name, "pre-start", conf, NULL);
- if (ret < 0) {
- ERROR("Failed to run lxc.hook.pre-start for container \"%s\"", name);
- goto out_aborting;
- }
+ if (ret < 0)
+ return log_error(-1, "Failed to run lxc.hook.pre-start for container \"%s\"", name);
TRACE("Ran pre-start hooks");
/* The signal fd has to be created before forking otherwise if the child
* and the command will be stuck.
*/
handler->sigfd = setup_signal_fd(&handler->oldmask);
- if (handler->sigfd < 0) {
- ERROR("Failed to setup SIGCHLD fd handler.");
- goto out_delete_tty;
- }
+ if (handler->sigfd < 0)
+ return log_error(-1, "Failed to setup SIGCHLD fd handler.");
TRACE("Set up signal fd");
/* Do this after setting up signals since it might unblock SIGWINCH. */
ret = lsm_process_prepare(conf, handler->lxcpath);
if (ret < 0) {
ERROR("Failed to initialize LSM");
- goto out_destroy_cgroups;
+ goto out_delete_terminal;
}
TRACE("Initialized LSM");
INFO("Container \"%s\" is initialized", name);
+ handler->monitor_status_fd = move_fd(status_fd);
return 0;
-out_destroy_cgroups:
- handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
- handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
-
out_delete_terminal:
lxc_terminal_delete(&handler->conf->console);
out_restore_sigmask:
(void)pthread_sigmask(SIG_SETMASK, &handler->oldmask, NULL);
-out_delete_tty:
- lxc_delete_tty(&conf->ttys);
-
-out_aborting:
- (void)lxc_set_state(name, handler, ABORTING);
-
-out_close_maincmd_fd:
- lxc_abstract_unix_close(conf->maincmd_fd);
- conf->maincmd_fd = -1;
return -1;
}
void lxc_fini(const char *name, struct lxc_handler *handler)
{
- int i, ret;
+ int ret;
pid_t self;
struct lxc_list *cur, *next;
char *namespaces[LXC_NS_MAX + 1];
lxc_set_state(name, handler, STOPPING);
self = lxc_raw_getpid();
- for (i = 0; i < LXC_NS_MAX; i++) {
+ for (int i = 0; i < LXC_NS_MAX; i++) {
if (handler->nsfd[i] < 0)
continue;
else
ret = asprintf(&namespaces[namespace_count],
"/proc/%d/fd/%d", self, handler->nsfd[i]);
- if (ret == -1) {
+ if (ret < 0) {
SYSERROR("Failed to allocate memory");
break;
}
if (handler->conf->reboot > REBOOT_NONE) {
ret = setenv("LXC_TARGET", "reboot", 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_TARGET=reboot");
+ SYSERROR("Failed to set environment variable: LXC_TARGET=reboot");
}
if (handler->conf->reboot == REBOOT_NONE) {
ret = setenv("LXC_TARGET", "stop", 1);
if (ret < 0)
- SYSERROR("Failed to set environment variable: "
- "LXC_TARGET=stop");
+ SYSERROR("Failed to set environment variable: LXC_TARGET=stop");
}
if (handler->conf->hooks_version == 0)
lsm_process_cleanup(handler->conf, handler->lxcpath);
- cgroup_ops->payload_destroy(cgroup_ops, handler);
- cgroup_ops->monitor_destroy(cgroup_ops, handler);
+ if (cgroup_ops) {
+ cgroup_ops->payload_destroy(cgroup_ops, handler);
+ cgroup_ops->monitor_destroy(cgroup_ops, handler);
+ }
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
* the command socket causing a new process to get ECONNREFUSED
* because we haven't yet closed the command socket.
*/
- lxc_abstract_unix_close(handler->conf->maincmd_fd);
- handler->conf->maincmd_fd = -1;
+ close_prot_errno_disarm(handler->conf->maincmd_fd);
TRACE("Closed command socket");
/* This function will try to connect to the legacy lxc-monitord
ret = setenv("LXC_TARGET", "stop", 1);
if (ret < 0)
- WARN("Failed to set environment variable: "
- "LXC_TARGET=stop");
+ WARN("Failed to set environment variable: LXC_TARGET=stop");
}
}
void lxc_abort(const char *name, struct lxc_handler *handler)
{
- int ret, status;
+ int ret = 0;
+ int status;
lxc_set_state(name, handler, ABORTING);
- if (handler->pid > 0) {
- ret = kill(handler->pid, SIGKILL);
- if (ret < 0)
- SYSERROR("Failed to send SIGKILL to %d", handler->pid);
+ if (handler->pidfd >= 0) {
+ ret = lxc_raw_pidfd_send_signal(handler->pidfd, SIGKILL, NULL, 0);
+ if (ret)
+ SYSWARN("Failed to send SIGKILL via pidfd %d for process %d",
+ handler->pidfd, handler->pid);
}
- while ((ret = waitpid(-1, &status, 0)) > 0) {
- ;
- }
+ if ((!ret || errno != ESRCH) && handler->pid > 0)
+ if (kill(handler->pid, SIGKILL))
+ SYSWARN("Failed to send SIGKILL to %d", handler->pid);
+
+ do {
+ ret = waitpid(-1, &status, 0);
+ } while (ret > 0);
}
static int do_start(void *data)
{
+ struct lxc_handler *handler = data;
+ __lxc_unused __do_close int data_sock0 = handler->data_sock[0],
+ data_sock1 = handler->data_sock[1];
+ __do_close int status_fd = -EBADF;
int ret;
- char path[PATH_MAX];
uid_t new_uid;
gid_t new_gid;
struct lxc_list *iterator;
uid_t nsuid = 0;
gid_t nsgid = 0;
int devnull_fd = -1;
- struct lxc_handler *handler = data;
lxc_sync_fini_parent(handler);
+ if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) {
+ ERROR("Failed to receive status file descriptor to child process");
+ goto out_warn_father;
+ }
+
/* This prctl must be before the synchro, so if the parent dies before
* we set the parent death signal, we will detect its death with the
* synchro right after, otherwise we have a window where the parent can
* exit before we set the pdeath signal leading to a unsupervized
* container.
*/
- ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
+ ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
}
/* Don't leak the pinfd to the container. */
- if (handler->pinfd >= 0)
- close(handler->pinfd);
+ close_prot_errno_disarm(handler->pinfd);
ret = lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP);
if (ret < 0)
if (ret < 0)
goto out_error;
- ret = lxc_network_recv_veth_names_from_parent(handler);
- if (ret < 0) {
- ERROR("Failed to receive veth names from parent");
- goto out_warn_father;
+ if (handler->ns_clone_flags & CLONE_NEWNET) {
+ ret = lxc_network_recv_from_parent(handler);
+ if (ret < 0) {
+ ERROR("Failed to receive veth names from parent");
+ goto out_warn_father;
+ }
}
/* If we are in a new user namespace, become root there to have
if (!handler->conf->root_nsgid_map)
nsgid = handler->conf->init_gid;
- if (!lxc_switch_uid_gid(nsuid, nsgid))
- goto out_warn_father;
-
/* Drop groups only after we switched to a valid gid in the new
* user namespace.
*/
(handler->am_root || errno != EPERM))
goto out_warn_father;
+ if (!lxc_switch_uid_gid(nsuid, nsgid))
+ goto out_warn_father;
+
ret = prctl(PR_SET_DUMPABLE, prctl_arg(1), prctl_arg(0),
prctl_arg(0), prctl_arg(0));
if (ret < 0)
goto out_warn_father;
/* set{g,u}id() clears deathsignal */
- ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
+ ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
goto out_warn_father;
}
- ret = snprintf(path, sizeof(path), "%s/dev/null",
- handler->conf->rootfs.mount);
- if (ret < 0 || ret >= sizeof(path))
- goto out_warn_father;
-
/* In order to checkpoint restore, we need to have everything in the
* same mount namespace. However, some containers may not have a
* reasonable /dev (in particular, they may not have /dev/null), so we
* where it isn't wanted.
*/
if (handler->daemonize && !handler->conf->autodev) {
+ char path[PATH_MAX];
+
+ ret = snprintf(path, sizeof(path), "%s/dev/null",
+ handler->conf->rootfs.mount);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out_warn_father;
+
ret = access(path, F_OK);
if (ret != 0) {
devnull_fd = open_devnull();
if (devnull_fd < 0)
goto out_warn_father;
- WARN("Using /dev/null from the host for container "
- "init's standard file descriptors. Migration will "
- "not work");
+ WARN("Using /dev/null from the host for container init's standard file descriptors. Migration will not work");
}
}
/* Add the requested environment variables to the current environment to
* allow them to be used by the various hooks, such as the start hook
- * above.
+ * below.
*/
lxc_list_for_each(iterator, &handler->conf->environment) {
ret = putenv((char *)iterator->elem);
/* Setup the container, ip, names, utsname, ... */
ret = lxc_setup(handler);
- close(handler->data_sock[1]);
- close(handler->data_sock[0]);
if (ret < 0) {
ERROR("Failed to setup container \"%s\"", handler->name);
goto out_warn_father;
ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
prctl_arg(0), prctl_arg(0));
if (ret < 0) {
- SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block "
- "execve() gainable privileges");
+ SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges");
goto out_warn_father;
}
- DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable "
- "privileges");
+ DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges");
}
/* Some init's such as busybox will set sane tty settings on stdin,
else
ret = lxc_terminal_set_stdfds(handler->conf->console.slave);
if (ret < 0) {
- ERROR("Failed to redirect std{in,out,err} to pty file "
- "descriptor %d", handler->conf->console.slave);
+ ERROR("Failed to redirect std{in,out,err} to pty file descriptor %d",
+ handler->conf->console.slave);
goto out_warn_father;
}
}
if (ret < 0)
goto out_warn_father;
+ ret = lxc_seccomp_send_notifier_fd(&handler->conf->seccomp, data_sock0);
+ if (ret < 0) {
+ SYSERROR("Failed to send seccomp notify fd to parent");
+ goto out_warn_father;
+ }
+
ret = run_lxc_hooks(handler->name, "start", handler->conf, NULL);
if (ret < 0) {
ERROR("Failed to run lxc.hook.start for container \"%s\"",
goto out_warn_father;
}
- close(handler->sigfd);
+ close_prot_errno_disarm(handler->sigfd);
if (handler->conf->console.slave < 0 && handler->daemonize) {
if (devnull_fd < 0) {
}
}
- if (devnull_fd >= 0) {
- close(devnull_fd);
- devnull_fd = -1;
- }
+ close_prot_errno_disarm(devnull_fd);
setsid();
if (new_gid == nsgid)
new_gid = LXC_INVALID_GID;
- if (!lxc_switch_uid_gid(new_uid, new_gid))
- goto out_warn_father;
-
/* If we are in a new user namespace we already dropped all groups when
* we switched to root in the new user namespace further above. Only
* drop groups if we can, so ensure that we have necessary privilege.
if (!lxc_setgroups(0, NULL))
goto out_warn_father;
+ if (!lxc_switch_uid_gid(new_uid, new_gid))
+ goto out_warn_father;
+
ret = lxc_ambient_caps_down();
if (ret < 0) {
ERROR("Failed to clear ambient capabilities");
}
if (handler->conf->monitor_signal_pdeath != SIGKILL) {
- ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, handler->monitor_pid);
+ ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath,
+ handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
handler->conf->monitor_signal_pdeath);
lxc_sync_wake_parent(handler, LXC_SYNC_ERROR);
out_error:
- if (devnull_fd >= 0)
- close(devnull_fd);
+ close_prot_errno_disarm(devnull_fd);
return -1;
}
break;
tty = &ttys->tty[i];
- tty->busy = 0;
+ tty->busy = -1;
tty->master = ttyfds[0];
tty->slave = ttyfds[1];
- TRACE("Received pty with master fd %d and slave fd %d from "
- "parent", tty->master, tty->slave);
+ TRACE("Received pty with master fd %d and slave fd %d from child", tty->master, tty->slave);
}
if (ret < 0)
flags = handler->ns_on_clone_flags;
flags |= CLONE_PARENT;
- handler->pid = lxc_raw_clone_cb(do_start, handler, flags);
+ handler->pid = lxc_raw_clone_cb(do_start, handler, CLONE_PIDFD | flags,
+ &handler->pidfd);
if (handler->pid < 0)
return -1;
*/
static int lxc_spawn(struct lxc_handler *handler)
{
+ __do_close int data_sock0 = -EBADF, data_sock1 = -EBADF;
int i, ret;
char pidstr[20];
bool wants_to_map_ids;
handler->data_sock);
if (ret < 0)
goto out_sync_fini;
+ data_sock0 = handler->data_sock[0];
+ data_sock1 = handler->data_sock[1];
ret = resolve_clone_flags(handler);
if (ret < 0)
goto out_sync_fini;
if (handler->ns_clone_flags & CLONE_NEWNET) {
- if (!lxc_list_empty(&conf->network)) {
-
- /* Find gateway addresses from the link device, which is
- * no longer accessible inside the container. Do this
- * before creating network interfaces, since goto
- * out_delete_net does not work before lxc_clone.
- */
- ret = lxc_find_gateway_addresses(handler);
- if (ret < 0) {
- ERROR("Failed to find gateway addresses");
- goto out_sync_fini;
- }
-
- /* That should be done before the clone because we will
- * fill the netdev index and use them in the child.
- */
- ret = lxc_create_network_priv(handler);
- if (ret < 0) {
- ERROR("Failed to create the network");
- goto out_delete_net;
- }
+ ret = lxc_find_gateway_addresses(handler);
+ if (ret) {
+ ERROR("Failed to find gateway addresses");
+ goto out_sync_fini;
}
}
*/
if (!wants_to_map_ids) {
handler->pinfd = pin_rootfs(conf->rootfs.path);
- if (handler->pinfd == -1)
+ if (handler->pinfd == -EBADF)
INFO("Failed to pin the rootfs for container \"%s\"", handler->name);
}
pid_t attacher_pid;
attacher_pid = lxc_clone(do_share_ns, handler,
- CLONE_VFORK | CLONE_VM | CLONE_FILES);
+ CLONE_VFORK | CLONE_VM | CLONE_FILES, NULL);
if (attacher_pid < 0) {
SYSERROR(LXC_CLONE_ERROR);
goto out_delete_net;
}
} else {
handler->pid = lxc_raw_clone_cb(do_start, handler,
- handler->ns_on_clone_flags);
+ CLONE_PIDFD | handler->ns_on_clone_flags,
+ &handler->pidfd);
}
if (handler->pid < 0) {
SYSERROR(LXC_CLONE_ERROR);
}
TRACE("Cloned child process %d", handler->pid);
+ /* Verify that we can actually make use of pidfds. */
+ if (!lxc_can_use_pidfd(handler->pidfd))
+ close_prot_errno_disarm(handler->pidfd);
+
+ ret = snprintf(pidstr, 20, "%d", handler->pid);
+ if (ret < 0 || ret >= 20)
+ goto out_delete_net;
+
+ ret = setenv("LXC_PID", pidstr, 1);
+ if (ret < 0)
+ SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
+
for (i = 0; i < LXC_NS_MAX; i++)
if (handler->ns_on_clone_flags & ns_info[i].clone_flag)
INFO("Cloned %s", ns_info[i].flag_name);
lxc_sync_fini_child(handler);
+ if (lxc_abstract_unix_send_fds(handler->data_sock[0], &handler->monitor_status_fd, 1, NULL, 0) < 0) {
+ ERROR("Failed to send status file descriptor to child process");
+ goto out_delete_net;
+ }
+ close_prot_errno_disarm(handler->monitor_status_fd);
+
/* Map the container uids. The container became an invalid userid the
* moment it was cloned with CLONE_NEWUSER. This call doesn't change
* anything immediately, but allows the container to setuid(0) (0 being
if (ret < 0)
goto out_delete_net;
- if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
+ if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net;
}
- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
+ if (!cgroup_ops->payload_enter(cgroup_ops, handler)) {
goto out_delete_net;
+ }
+
+ if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
+ ERROR("Failed to delegate controllers to payload cgroup");
+ goto out_delete_net;
+ }
+
+ if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
+ ERROR("Failed to setup cgroup limits for container \"%s\"", name);
+ goto out_delete_net;
+ }
if (!cgroup_ops->chown(cgroup_ops, handler->conf))
goto out_delete_net;
- /* Now we're ready to preserve the network namespace */
- ret = lxc_try_preserve_ns(handler->pid, "net");
- if (ret < 0) {
- if (ret != -EOPNOTSUPP) {
- SYSERROR("Failed to preserve net namespace");
- goto out_delete_net;
+ /* If not done yet, we're now ready to preserve the network namespace */
+ if (handler->nsfd[LXC_NS_NET] < 0) {
+ ret = lxc_try_preserve_ns(handler->pid, "net");
+ if (ret < 0) {
+ if (ret != -EOPNOTSUPP) {
+ SYSERROR("Failed to preserve net namespace");
+ goto out_delete_net;
+ }
+ } else {
+ handler->nsfd[LXC_NS_NET] = ret;
+ DEBUG("Preserved net namespace via fd %d", ret);
}
- } else {
- handler->nsfd[LXC_NS_NET] = ret;
- DEBUG("Preserved net namespace via fd %d", ret);
-
- ret = lxc_netns_set_nsid(handler->nsfd[LXC_NS_NET]);
- if (ret < 0)
- SYSWARN("Failed to allocate new network namespace id");
- else
- TRACE("Allocated new network namespace id");
}
+ ret = lxc_netns_set_nsid(handler->nsfd[LXC_NS_NET]);
+ if (ret < 0)
+ SYSWARN("Failed to allocate new network namespace id");
+ else
+ TRACE("Allocated new network namespace id");
/* Create the network configuration. */
if (handler->ns_clone_flags & CLONE_NEWNET) {
- ret = lxc_network_move_created_netdev_priv(handler->lxcpath,
- handler->name,
- &conf->network,
- handler->pid);
+ ret = lxc_create_network(handler);
if (ret < 0) {
- ERROR("Failed to create the configured network");
+ ERROR("Failed to create the network");
goto out_delete_net;
}
- ret = lxc_create_network_unpriv(handler->lxcpath, handler->name,
- &conf->network, handler->pid, conf->hooks_version);
+ ret = lxc_network_send_to_child(handler);
if (ret < 0) {
- ERROR("Failed to create the configured network");
+ ERROR("Failed to send veth names to child");
goto out_delete_net;
}
}
- ret = lxc_network_send_veth_names_to_child(handler);
- if (ret < 0) {
- ERROR("Failed to send veth names to child");
- goto out_delete_net;
- }
-
if (!lxc_list_empty(&conf->procs)) {
ret = setup_proc_filesystem(&conf->procs, handler->pid);
if (ret < 0)
if (ret < 0)
goto out_delete_net;
- if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
+ if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
ERROR("Failed to setup legacy device cgroup controller limits");
goto out_delete_net;
}
TRACE("Set up legacy device cgroup controller limits");
+ if (!cgroup_ops->devices_activate(cgroup_ops, handler)) {
+ ERROR("Failed to setup cgroup2 device controller limits");
+ goto out_delete_net;
+ }
+ TRACE("Set up cgroup2 device controller limits");
+
if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
/* Now we're ready to preserve the cgroup namespace */
ret = lxc_try_preserve_ns(handler->pid, "cgroup");
}
}
- ret = snprintf(pidstr, 20, "%d", handler->pid);
- if (ret < 0 || ret >= 20)
- goto out_delete_net;
-
- ret = setenv("LXC_PID", pidstr, 1);
- if (ret < 0)
- SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
+ cgroup_ops->payload_finalize(cgroup_ops);
+ TRACE("Finished setting up cgroups");
/* Run any host-side start hooks */
ret = run_lxc_hooks(name, "start-host", conf, NULL);
if (ret < 0)
goto out_delete_net;
- ret = lxc_network_recv_name_and_ifindex_from_child(handler);
- if (ret < 0) {
- ERROR("Failed to receive names and ifindices for network "
- "devices from child");
- goto out_delete_net;
+ if (handler->ns_clone_flags & CLONE_NEWNET) {
+ ret = lxc_network_recv_name_and_ifindex_from_child(handler);
+ if (ret < 0) {
+ ERROR("Failed to receive names and ifindices for network devices from child");
+ goto out_delete_net;
+ }
}
/* Now all networks are created, network devices are moved into place,
goto out_delete_net;
}
+ ret = lxc_seccomp_recv_notifier_fd(&handler->conf->seccomp, data_sock1);
+ if (ret < 0) {
+ SYSERROR("Failed to receive seccomp notify fd from child");
+ goto out_delete_net;
+ }
+
ret = handler->ops->post_start(handler, handler->data);
if (ret < 0)
goto out_abort;
out_sync_fini:
lxc_sync_fini(handler);
- if (handler->pinfd >= 0) {
- close(handler->pinfd);
- handler->pinfd = -1;
- }
+ close_prot_errno_disarm(handler->pinfd);
return -1;
}
ret = lxc_init(name, handler);
if (ret < 0) {
ERROR("Failed to initialize container \"%s\"", name);
- return -1;
+ goto out_fini_nonet;
}
handler->ops = ops;
handler->data = data;
goto out_fini_nonet;
}
- if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+ if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
ERROR("Failed to enter monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
+ if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
+ ERROR("Failed to delegate controllers to monitor cgroup");
+ ret = -1;
+ goto out_fini_nonet;
+ }
+
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) {
ERROR("Failed to spawn container \"%s\"", name);
goto out_detach_blockdev;
}
- /* close parent side of data socket */
- close(handler->data_sock[0]);
- handler->data_sock[0] = -1;
- close(handler->data_sock[1]);
- handler->data_sock[1] = -1;
handler->conf->reboot = REBOOT_NONE;
ret = lxc_restore_phys_nics_to_netns(handler);
if (ret < 0)
- ERROR("Failed to move physical network devices back to parent "
- "network namespace");
+ ERROR("Failed to move physical network devices back to parent network namespace");
- if (handler->pinfd >= 0) {
- close(handler->pinfd);
- handler->pinfd = -1;
- }
+ close_prot_errno_disarm(handler->pinfd);
lxc_monitor_send_exit_code(name, status, handler->lxcpath);
lxc_error_set_and_log(handler->pid, status);
if (error_num)
*error_num = handler->exit_status;
-out_fini:
+/* These are the goto targets you are not allowed to jump to. */
+__out_fini:
lxc_delete_network(handler);
-out_detach_blockdev:
+__out_detach_blockdev:
detach_block_device(handler->conf);
-out_fini_nonet:
+__out_fini_nonet:
lxc_fini(name, handler);
+
return ret;
+/* These are the goto targets you want to jump to. */
+out_fini_nonet:
+ lxc_abort(name, handler);
+ goto __out_fini_nonet;
+
+out_detach_blockdev:
+ lxc_abort(name, handler);
+ goto __out_detach_blockdev;
+
out_abort:
lxc_abort(name, handler);
- goto out_fini;
+ goto __out_fini;
+
}
struct start_args {