/* SPDX-License-Identifier: LGPL-2.1+ */
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
+#include "config.h"
+
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <unistd.h>
+#include "lxc.h"
+
#include "af_unix.h"
+#include "attach_options.h"
#include "caps.h"
#include "cgroups/cgroup.h"
#include "cgroups/cgroup_utils.h"
#include "commands_utils.h"
#include "compiler.h"
#include "conf.h"
-#include "config.h"
#include "confile_utils.h"
#include "error.h"
#include "file_utils.h"
#include "list.h"
#include "log.h"
#include "lsm/lsm.h"
-#include "lxccontainer.h"
#include "lxclock.h"
#include "lxcseccomp.h"
#include "macro.h"
#include <sys/capability.h>
#endif
-#ifndef HAVE_STRLCPY
-#include "include/strlcpy.h"
+#if !HAVE_STRLCPY
+#include "strlcpy.h"
#endif
lxc_log_define(start, lxc);
#endif
- if (fd <= listen_fds_max) {
+ if ((size_t)fd <= listen_fds_max) {
INFO("Inheriting fd %d (using the LISTEN_FDS environment variable)", fd);
continue;
}
if (ret < 0)
return -EBADF;
- for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
+ for (size_t sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
ret = sigdelset(&mask, signals[sig]);
if (ret < 0)
return -EBADF;
/* More robustness, protect ourself from a SIGCHLD sent
* by a process different from the container init.
*/
- if (siginfo.ssi_pid != hdlr->pid) {
+ if ((__u64)siginfo.ssi_pid != (__u64)hdlr->pid) {
NOTICE("Received %d from pid %d instead of container init %d",
siginfo.ssi_signo, siginfo.ssi_pid, hdlr->pid);
return hdlr->init_died ? LXC_MAINLOOP_CLOSE
TRACE("Mainloop is ready");
ret = lxc_mainloop(&descr, -1);
- close_prot_errno_disarm(descr.epfd);
+ if (descr.type == LXC_MAINLOOP_EPOLL)
+ close_prot_errno_disarm(descr.epfd);
if (ret < 0 || !handler->init_died)
goto out_mainloop_console;
{
struct lxc_handler *handler = data;
__lxc_unused __do_close int data_sock0 = handler->data_sock[0],
- data_sock1 = handler->data_sock[1];
+ data_sock1 = handler->data_sock[1];
__do_close int devnull_fd = -EBADF, status_fd = -EBADF;
int ret;
uid_t new_uid;
gid_t new_gid;
- struct lxc_list *iterator;
uid_t nsuid = 0;
gid_t nsgid = 0;
/* If we are in a new user namespace, become root there to have
* privilege over our namespace.
*/
- if (!lxc_list_empty(&handler->conf->id_map)) {
+ if (!list_empty(&handler->conf->id_map)) {
if (!handler->conf->root_nsuid_map)
nsuid = handler->conf->init_uid;
}
}
- /* Add the requested environment variables to the current environment to
- * allow them to be used by the various hooks, such as the start hook
- * below.
+ /*
+ * Add the requested environment variables to the current environment
+ * to allow them to be used by the various hooks, such as the start
+ * hook below.
*/
- lxc_list_for_each(iterator, &handler->conf->environment) {
- ret = putenv((char *)iterator->elem);
- if (ret < 0) {
- SYSERROR("Failed to set environment variable: %s",
- (char *)iterator->elem);
- goto out_warn_father;
- }
- }
+ ret = lxc_set_environment(handler->conf);
+ if (ret < 0)
+ goto out_warn_father;
if (!lxc_sync_wait_parent(handler, START_SYNC_POST_CONFIGURE))
goto out_warn_father;
if (ret < 0)
SYSERROR("Failed to clear environment.");
- lxc_list_for_each(iterator, &handler->conf->environment) {
- ret = putenv((char *)iterator->elem);
- if (ret < 0) {
- SYSERROR("Failed to set environment variable: %s",
- (char *)iterator->elem);
- goto out_warn_father;
- }
- }
+ ret = lxc_set_environment(handler->conf);
+ if (ret < 0)
+ goto out_warn_father;
ret = putenv("container=lxc");
if (ret < 0) {
* we switched to root in the new user namespace further above. Only
* drop groups if we can, so ensure that we have necessary privilege.
*/
- if (lxc_list_empty(&handler->conf->id_map)) {
+ if (list_empty(&handler->conf->id_map)) {
#if HAVE_LIBCAP
if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE))
#endif
if ((conf->ns_clone & ns_info[i].clone_flag))
handler->ns_clone_flags |= ns_info[i].clone_flag;
} else {
- if (i == LXC_NS_USER && lxc_list_empty(&handler->conf->id_map))
+ if (i == LXC_NS_USER && list_empty(&handler->conf->id_map))
continue;
if (i == LXC_NS_NET && lxc_requests_empty_network(handler))
return 0;
}
+static int core_scheduling(struct lxc_handler *handler)
+{
+ struct lxc_conf *conf = handler->conf;
+ int ret;
+
+ if (!conf->sched_core)
+ return log_trace(0, "No new core scheduling domain requested");
+
+ if (!(handler->ns_clone_flags & CLONE_NEWPID))
+ return syserror_set(-EINVAL, "Core scheduling currently requires a separate pid namespace");
+
+ ret = core_scheduling_cookie_create_threadgroup(handler->pid);
+ if (ret < 0) {
+ if (ret == -ENODEV) {
+ INFO("The kernel doesn't support or doesn't use simultaneous multithreading (SMT)");
+ conf->sched_core = false;
+ return 0;
+ }
+ if (ret == -EINVAL)
+ return syserror("The kernel does not support core scheduling");
+
+ return syserror("Failed to create new core scheduling domain");
+ }
+
+ ret = core_scheduling_cookie_get(handler->pid, &conf->sched_core_cookie);
+ if (ret || !core_scheduling_cookie_valid(conf->sched_core_cookie))
+ return syserror("Failed to retrieve core scheduling domain cookie");
+
+ TRACE("Created new core scheduling domain with cookie %llu",
+ (llu)conf->sched_core_cookie);
+
+ return 0;
+}
+
/* lxc_spawn() performs crucial setup tasks and clone()s the new process which
* exec()s the requested container binary.
* Note that lxc_spawn() runs in the parent namespaces. Any operations performed
int i, ret;
char pidstr[20];
bool wants_to_map_ids;
- struct lxc_list *id_map;
+ struct list_head *id_map;
const char *name = handler->name;
const char *lxcpath = handler->lxcpath;
bool share_ns = false;
struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
id_map = &conf->id_map;
- wants_to_map_ids = !lxc_list_empty(id_map);
+ wants_to_map_ids = !list_empty(id_map);
for (i = 0; i < LXC_NS_MAX; i++) {
if (!conf->ns_share[i])
handler->clone_flags &= ~CLONE_PIDFD;
TRACE("Cloned child process %d", handler->pid);
+ ret = core_scheduling(handler);
+ if (ret < 0)
+ goto out_delete_net;
+
/* Verify that we can actually make use of pidfds. */
if (!lxc_can_use_pidfd(handler->pidfd))
close_prot_errno_disarm(handler->pidfd);
}
}
- if (!lxc_list_empty(&conf->procs)) {
- ret = setup_proc_filesystem(&conf->procs, handler->pid);
- if (ret < 0)
- goto out_delete_net;
+ ret = setup_proc_filesystem(conf, handler->pid);
+ if (ret < 0) {
+ ERROR("Failed to setup procfs limits");
+ goto out_delete_net;
}
- if (!lxc_list_empty(&conf->limits)) {
- ret = setup_resource_limits(&conf->limits, handler->pid);
- if (ret < 0) {
- ERROR("Failed to setup resource limits");
- goto out_delete_net;
- }
+ ret = setup_resource_limits(conf, handler->pid);
+ if (ret < 0) {
+ ERROR("Failed to setup resource limits");
+ goto out_delete_net;
}
/* Tell the child to continue its initialization. */
* it readonly.
* If the container is unprivileged then skip rootfs pinning.
*/
- ret = lxc_rootfs_init(conf, !lxc_list_empty(&conf->id_map));
+ ret = lxc_rootfs_init(conf, !list_empty(&conf->id_map));
if (ret) {
ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
ret = -1;
goto out_abort;
}
- if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
+ if (geteuid() == 0 && !list_empty(&conf->id_map)) {
/*
* Most filesystems can't be mounted inside a userns so handle them here.
*/