-/*
- * lxc: linux Container library
- *
- * (C) Copyright IBM Corp. 2007, 2008
- *
- * Authors:
- * Daniel Lezcano <daniel.lezcano at free.fr>
- * Serge Hallyn <serge@hallyn.com>
- * Christian Brauner <christian.brauner@ubuntu.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
+/* SPDX-License-Identifier: LGPL-2.1+ */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
if (siginfo.ssi_signo == SIGHUP) {
if (hdlr->pidfd >= 0)
lxc_raw_pidfd_send_signal(hdlr->pidfd, SIGTERM, NULL, 0);
- else if (hdlr->proc_pidfd >= 0)
- lxc_raw_pidfd_send_signal(hdlr->proc_pidfd, SIGTERM, NULL, 0);
else
kill(hdlr->pid, SIGTERM);
INFO("Killing %d since terminal hung up", hdlr->pid);
if (hdlr->pidfd >= 0)
lxc_raw_pidfd_send_signal(hdlr->pidfd,
siginfo.ssi_signo, NULL, 0);
- else if (hdlr->proc_pidfd >= 0)
- lxc_raw_pidfd_send_signal(hdlr->proc_pidfd,
- siginfo.ssi_signo, NULL, 0);
else
kill(hdlr->pid, siginfo.ssi_signo);
INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid);
handler->pidfd = -EBADF;
- handler->proc_pidfd = -EBADF;
-
handler->sigfd = -1;
for (i = 0; i < LXC_NS_MAX; i++)
if (handler->pidfd >= 0)
close(handler->pidfd);
- if (handler->proc_pidfd >= 0)
- close(handler->proc_pidfd);
-
if (handler->sigfd >= 0)
close(handler->sigfd);
if (handler->conf->maincmd_fd >= 0)
lxc_abstract_unix_close(handler->conf->maincmd_fd);
+ if (handler->monitor_status_fd >= 0)
+ close(handler->monitor_status_fd);
+
if (handler->state_socket_pair[0] >= 0)
close(handler->state_socket_pair[0]);
handler->data_sock[0] = handler->data_sock[1] = -1;
handler->conf = conf;
handler->lxcpath = lxcpath;
+ handler->monitor_status_fd = -EBADF;
handler->pinfd = -1;
handler->pidfd = -EBADF;
- handler->proc_pidfd = -EBADF;
handler->sigfd = -EBADF;
handler->init_died = false;
handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
handler->nsfd[i] = -1;
handler->name = name;
+ if (daemonize)
+ handler->transient_pid = lxc_raw_getpid();
+ else
+ handler->transient_pid = -1;
if (daemonize && handler->conf->reboot == REBOOT_NONE) {
/* Create socketpair() to synchronize on daemonized startup.
int lxc_init(const char *name, struct lxc_handler *handler)
{
+ __do_close_prot_errno int status_fd = -EBADF;
int ret;
const char *loglevel;
struct lxc_conf *conf = handler->conf;
handler->monitor_pid = lxc_raw_getpid();
+ status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC);
+ if (status_fd < 0) {
+ SYSERROR("Failed to open monitor status fd");
+ goto out_close_maincmd_fd;
+ }
lsm_init();
TRACE("Initialized LSM");
ret = lsm_process_prepare(conf, handler->lxcpath);
if (ret < 0) {
ERROR("Failed to initialize LSM");
- goto out_destroy_cgroups;
+ goto out_delete_terminal;
}
TRACE("Initialized LSM");
INFO("Container \"%s\" is initialized", name);
+ handler->monitor_status_fd = move_fd(status_fd);
return 0;
-out_destroy_cgroups:
- handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
- handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
-
out_delete_terminal:
lxc_terminal_delete(&handler->conf->console);
lsm_process_cleanup(handler->conf, handler->lxcpath);
- cgroup_ops->payload_destroy(cgroup_ops, handler);
- cgroup_ops->monitor_destroy(cgroup_ops, handler);
+ if (cgroup_ops) {
+ cgroup_ops->payload_destroy(cgroup_ops, handler);
+ cgroup_ops->monitor_destroy(cgroup_ops, handler);
+ }
if (handler->conf->reboot == REBOOT_NONE) {
/* For all new state clients simply close the command socket.
lxc_set_state(name, handler, ABORTING);
- if (handler->pidfd > 0)
+ if (handler->pidfd >= 0) {
ret = lxc_raw_pidfd_send_signal(handler->pidfd, SIGKILL, NULL, 0);
- else if (handler->proc_pidfd > 0)
- ret = lxc_raw_pidfd_send_signal(handler->proc_pidfd, SIGKILL, NULL, 0);
- else if (handler->pid > 0)
- ret = kill(handler->pid, SIGKILL);
- if (ret < 0)
- SYSERROR("Failed to send SIGKILL to %d", handler->pid);
+ if (ret)
+ SYSWARN("Failed to send SIGKILL via pidfd %d for process %d",
+ handler->pidfd, handler->pid);
+ }
+
+ if (!ret || errno != ESRCH)
+ if (kill(handler->pid, SIGKILL))
+ SYSWARN("Failed to send SIGKILL to %d", handler->pid);
do {
ret = waitpid(-1, &status, 0);
static int do_start(void *data)
{
struct lxc_handler *handler = data;
- ATTR_UNUSED __do_close_prot_errno int data_sock0 = handler->data_sock[0],
- data_sock1 = handler->data_sock[1];
+ __lxc_unused __do_close_prot_errno int data_sock0 = handler->data_sock[0],
+ data_sock1 = handler->data_sock[1];
+ __do_close_prot_errno int status_fd = -EBADF;
int ret;
- char path[PATH_MAX];
uid_t new_uid;
gid_t new_gid;
struct lxc_list *iterator;
lxc_sync_fini_parent(handler);
+ if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) {
+ ERROR("Failed to receive status file descriptor to child process");
+ goto out_warn_father;
+ }
+
/* This prctl must be before the synchro, so if the parent dies before
* we set the parent death signal, we will detect its death with the
* synchro right after, otherwise we have a window where the parent can
* exit before we set the pdeath signal leading to a unsupervized
* container.
*/
- ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
+ ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
goto out_error;
if (handler->ns_clone_flags & CLONE_NEWNET) {
- ret = lxc_network_recv_veth_names_from_parent(handler);
+ ret = lxc_network_recv_from_parent(handler);
if (ret < 0) {
ERROR("Failed to receive veth names from parent");
goto out_warn_father;
goto out_warn_father;
/* set{g,u}id() clears deathsignal */
- ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid);
+ ret = lxc_set_death_signal(SIGKILL, handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
goto out_warn_father;
goto out_warn_father;
}
- ret = snprintf(path, sizeof(path), "%s/dev/null",
- handler->conf->rootfs.mount);
- if (ret < 0 || ret >= sizeof(path))
- goto out_warn_father;
-
/* In order to checkpoint restore, we need to have everything in the
* same mount namespace. However, some containers may not have a
* reasonable /dev (in particular, they may not have /dev/null), so we
* where it isn't wanted.
*/
if (handler->daemonize && !handler->conf->autodev) {
+ char path[PATH_MAX];
+
+ ret = snprintf(path, sizeof(path), "%s/dev/null",
+ handler->conf->rootfs.mount);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out_warn_father;
+
ret = access(path, F_OK);
if (ret != 0) {
devnull_fd = open_devnull();
/* Add the requested environment variables to the current environment to
* allow them to be used by the various hooks, such as the start hook
- * above.
+ * below.
*/
lxc_list_for_each(iterator, &handler->conf->environment) {
ret = putenv((char *)iterator->elem);
}
if (handler->conf->monitor_signal_pdeath != SIGKILL) {
- ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, handler->monitor_pid);
+ ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath,
+ handler->monitor_pid, status_fd);
if (ret < 0) {
SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
handler->conf->monitor_signal_pdeath);
break;
tty = &ttys->tty[i];
- tty->busy = 0;
+ tty->busy = -1;
tty->master = ttyfds[0];
tty->slave = ttyfds[1];
TRACE("Received pty with master fd %d and slave fd %d from "
- "parent", tty->master, tty->slave);
+ "child", tty->master, tty->slave);
}
if (ret < 0)
return 0;
}
-static int proc_pidfd_open(pid_t pid)
-{
- __do_close_prot_errno int proc_pidfd = -EBADF;
- char path[100];
-
- snprintf(path, sizeof(path), "/proc/%d", pid);
- proc_pidfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
- if (proc_pidfd < 0) {
- SYSERROR("Failed to open %s", path);
- return -1;
- }
-
- /* Test whether we can send signals. */
- if (lxc_raw_pidfd_send_signal(proc_pidfd, 0, NULL, 0)) {
- SYSERROR("Failed to send signal through pidfd");
- return -1;
- }
-
- return move_fd(proc_pidfd);
-}
-
/* lxc_spawn() performs crucial setup tasks and clone()s the new process which
* exec()s the requested container binary.
* Note that lxc_spawn() runs in the parent namespaces. Any operations performed
if (ret < 0)
goto out_sync_fini;
+ if (handler->ns_clone_flags & CLONE_NEWNET) {
+ ret = lxc_find_gateway_addresses(handler);
+ if (ret) {
+ ERROR("Failed to find gateway addresses");
+ goto out_sync_fini;
+ }
+ }
+
if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
ERROR("Failed creating cgroups");
goto out_delete_net;
}
TRACE("Cloned child process %d", handler->pid);
- if (handler->pidfd < 0) {
- handler->proc_pidfd = proc_pidfd_open(handler->pid);
- if (handler->proc_pidfd < 0 && (errno != ENOSYS))
- goto out_delete_net;
- }
+ ret = snprintf(pidstr, 20, "%d", handler->pid);
+ if (ret < 0 || ret >= 20)
+ goto out_delete_net;
+
+ ret = setenv("LXC_PID", pidstr, 1);
+ if (ret < 0)
+ SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
for (i = 0; i < LXC_NS_MAX; i++)
if (handler->ns_on_clone_flags & ns_info[i].clone_flag)
lxc_sync_fini_child(handler);
+ if (lxc_abstract_unix_send_fds(handler->data_sock[0], &handler->monitor_status_fd, 1, NULL, 0) < 0) {
+ ERROR("Failed to send status file descriptor to child process");
+ goto out_delete_net;
+ }
+ close_prot_errno_disarm(handler->monitor_status_fd);
+
/* Map the container uids. The container became an invalid userid the
* moment it was cloned with CLONE_NEWUSER. This call doesn't change
* anything immediately, but allows the container to setuid(0) (0 being
if (ret < 0)
goto out_delete_net;
- if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
+ if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, false)) {
ERROR("Failed to setup cgroup limits for container \"%s\"", name);
goto out_delete_net;
}
- if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
+ if (!cgroup_ops->payload_enter(cgroup_ops, handler)) {
goto out_delete_net;
+ }
+
+ if (!cgroup_ops->payload_delegate_controllers(cgroup_ops)) {
+ ERROR("Failed to delegate controllers to payload cgroup");
+ goto out_delete_net;
+ }
+
+ if (!cgroup_ops->setup_limits(cgroup_ops, handler)) {
+ ERROR("Failed to setup cgroup limits for container \"%s\"", name);
+ goto out_delete_net;
+ }
if (!cgroup_ops->chown(cgroup_ops, handler->conf))
goto out_delete_net;
goto out_delete_net;
}
- ret = lxc_network_send_veth_names_to_child(handler);
+ ret = lxc_network_send_to_child(handler);
if (ret < 0) {
ERROR("Failed to send veth names to child");
goto out_delete_net;
if (ret < 0)
goto out_delete_net;
- if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
+ if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) {
ERROR("Failed to setup legacy device cgroup controller limits");
goto out_delete_net;
}
TRACE("Set up legacy device cgroup controller limits");
+ if (!cgroup_ops->devices_activate(cgroup_ops, handler)) {
+ ERROR("Failed to setup cgroup2 device controller limits");
+ goto out_delete_net;
+ }
+ TRACE("Set up cgroup2 device controller limits");
+
if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
/* Now we're ready to preserve the cgroup namespace */
ret = lxc_try_preserve_ns(handler->pid, "cgroup");
}
}
- ret = snprintf(pidstr, 20, "%d", handler->pid);
- if (ret < 0 || ret >= 20)
- goto out_delete_net;
-
- ret = setenv("LXC_PID", pidstr, 1);
- if (ret < 0)
- SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
+ cgroup_ops->payload_finalize(cgroup_ops);
+ TRACE("Finished setting up cgroups");
/* Run any host-side start hooks */
ret = run_lxc_hooks(name, "start-host", conf, NULL);
goto out_fini_nonet;
}
- if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+ if (!cgroup_ops->monitor_enter(cgroup_ops, handler)) {
ERROR("Failed to enter monitor cgroup");
ret = -1;
goto out_fini_nonet;
}
+ if (!cgroup_ops->monitor_delegate_controllers(cgroup_ops)) {
+ ERROR("Failed to delegate controllers to monitor cgroup");
+ ret = -1;
+ goto out_fini_nonet;
+ }
+
if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
/* If the backing store is a device, mount it here and now. */
if (rootfs_is_blockdev(conf)) {