]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/start.c
start: check event loop type before closing fd
[mirror_lxc.git] / src / lxc / start.c
index 4f09e4e5c07ff784759dac5fa9389868c8fc270e..1a6046c7a40d8b4bb3aebcf521545b3fce7db355 100644 (file)
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
 
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
+#include "config.h"
+
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include "lxc.h"
+
 #include "af_unix.h"
+#include "attach_options.h"
 #include "caps.h"
 #include "cgroups/cgroup.h"
 #include "cgroups/cgroup_utils.h"
 #include "commands_utils.h"
 #include "compiler.h"
 #include "conf.h"
-#include "config.h"
 #include "confile_utils.h"
 #include "error.h"
 #include "file_utils.h"
 #include "list.h"
 #include "log.h"
 #include "lsm/lsm.h"
-#include "lxccontainer.h"
 #include "lxclock.h"
 #include "lxcseccomp.h"
 #include "macro.h"
@@ -62,8 +62,8 @@
 #include <sys/capability.h>
 #endif
 
-#ifndef HAVE_STRLCPY
-#include "include/strlcpy.h"
+#if !HAVE_STRLCPY
+#include "strlcpy.h"
 #endif
 
 lxc_log_define(start, lxc);
@@ -316,7 +316,7 @@ restart:
 
 #endif
 
-               if (fd <= listen_fds_max) {
+               if ((size_t)fd <= listen_fds_max) {
                        INFO("Inheriting fd %d (using the LISTEN_FDS environment variable)", fd);
                        continue;
                }
@@ -354,7 +354,7 @@ static int setup_signal_fd(sigset_t *oldmask)
        if (ret < 0)
                return -EBADF;
 
-       for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
+       for (size_t sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
                ret = sigdelset(&mask, signals[sig]);
                if (ret < 0)
                        return -EBADF;
@@ -445,7 +445,7 @@ static int signal_handler(int fd, uint32_t events, void *data,
        /* More robustness, protect ourself from a SIGCHLD sent
         * by a process different from the container init.
         */
-       if (siginfo.ssi_pid != hdlr->pid) {
+       if ((__u64)siginfo.ssi_pid != (__u64)hdlr->pid) {
                NOTICE("Received %d from pid %d instead of container init %d",
                       siginfo.ssi_signo, siginfo.ssi_pid, hdlr->pid);
                return hdlr->init_died ? LXC_MAINLOOP_CLOSE
@@ -629,7 +629,8 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
        TRACE("Mainloop is ready");
 
        ret = lxc_mainloop(&descr, -1);
-       close_prot_errno_disarm(descr.epfd);
+       if (descr.type == LXC_MAINLOOP_EPOLL)
+               close_prot_errno_disarm(descr.epfd);
        if (ret < 0 || !handler->init_died)
                goto out_mainloop_console;
 
@@ -1052,12 +1053,11 @@ static int do_start(void *data)
 {
        struct lxc_handler *handler = data;
        __lxc_unused __do_close int data_sock0 = handler->data_sock[0],
-                                          data_sock1 = handler->data_sock[1];
+                                   data_sock1 = handler->data_sock[1];
        __do_close int devnull_fd = -EBADF, status_fd = -EBADF;
        int ret;
        uid_t new_uid;
        gid_t new_gid;
-       struct lxc_list *iterator;
        uid_t nsuid = 0;
        gid_t nsgid = 0;
 
@@ -1110,7 +1110,7 @@ static int do_start(void *data)
        /* If we are in a new user namespace, become root there to have
         * privilege over our namespace.
         */
-       if (!lxc_list_empty(&handler->conf->id_map)) {
+       if (!list_empty(&handler->conf->id_map)) {
                if (!handler->conf->root_nsuid_map)
                        nsuid = handler->conf->init_uid;
 
@@ -1257,18 +1257,14 @@ static int do_start(void *data)
                }
        }
 
-       /* Add the requested environment variables to the current environment to
-        * allow them to be used by the various hooks, such as the start hook
-        * below.
+       /*
+        * Add the requested environment variables to the current environment
+        * to allow them to be used by the various hooks, such as the start
+        * hook below.
         */
-       lxc_list_for_each(iterator, &handler->conf->environment) {
-               ret = putenv((char *)iterator->elem);
-               if (ret < 0) {
-                       SYSERROR("Failed to set environment variable: %s",
-                                (char *)iterator->elem);
-                       goto out_warn_father;
-               }
-       }
+       ret = lxc_set_environment(handler->conf);
+       if (ret < 0)
+               goto out_warn_father;
 
        if (!lxc_sync_wait_parent(handler, START_SYNC_POST_CONFIGURE))
                goto out_warn_father;
@@ -1361,14 +1357,9 @@ static int do_start(void *data)
        if (ret < 0)
                SYSERROR("Failed to clear environment.");
 
-       lxc_list_for_each(iterator, &handler->conf->environment) {
-               ret = putenv((char *)iterator->elem);
-               if (ret < 0) {
-                       SYSERROR("Failed to set environment variable: %s",
-                                (char *)iterator->elem);
-                       goto out_warn_father;
-               }
-       }
+       ret = lxc_set_environment(handler->conf);
+       if (ret < 0)
+               goto out_warn_father;
 
        ret = putenv("container=lxc");
        if (ret < 0) {
@@ -1406,7 +1397,7 @@ static int do_start(void *data)
         * we switched to root in the new user namespace further above. Only
         * drop groups if we can, so ensure that we have necessary privilege.
         */
-       if (lxc_list_empty(&handler->conf->id_map)) {
+       if (list_empty(&handler->conf->id_map)) {
                #if HAVE_LIBCAP
                if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE))
                #endif
@@ -1473,7 +1464,7 @@ int resolve_clone_flags(struct lxc_handler *handler)
                        if ((conf->ns_clone & ns_info[i].clone_flag))
                                handler->ns_clone_flags |= ns_info[i].clone_flag;
                } else {
-                       if (i == LXC_NS_USER && lxc_list_empty(&handler->conf->id_map))
+                       if (i == LXC_NS_USER && list_empty(&handler->conf->id_map))
                                continue;
 
                        if (i == LXC_NS_NET && lxc_requests_empty_network(handler))
@@ -1563,6 +1554,40 @@ static inline int do_share_ns(void *arg)
        return 0;
 }
 
+static int core_scheduling(struct lxc_handler *handler)
+{
+       struct lxc_conf *conf = handler->conf;
+       int ret;
+
+       if (!conf->sched_core)
+               return log_trace(0, "No new core scheduling domain requested");
+
+       if (!(handler->ns_clone_flags & CLONE_NEWPID))
+               return syserror_set(-EINVAL, "Core scheduling currently requires a separate pid namespace");
+
+       ret = core_scheduling_cookie_create_threadgroup(handler->pid);
+       if (ret < 0) {
+               if (ret == -ENODEV) {
+                       INFO("The kernel doesn't support or doesn't use simultaneous multithreading (SMT)");
+                       conf->sched_core = false;
+                       return 0;
+               }
+               if (ret == -EINVAL)
+                       return syserror("The kernel does not support core scheduling");
+
+               return syserror("Failed to create new core scheduling domain");
+       }
+
+       ret = core_scheduling_cookie_get(handler->pid, &conf->sched_core_cookie);
+       if (ret || !core_scheduling_cookie_valid(conf->sched_core_cookie))
+               return syserror("Failed to retrieve core scheduling domain cookie");
+
+       TRACE("Created new core scheduling domain with cookie %llu",
+             (llu)conf->sched_core_cookie);
+
+       return 0;
+}
+
 /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
  * exec()s the requested container binary.
  * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1576,7 +1601,7 @@ static int lxc_spawn(struct lxc_handler *handler)
        int i, ret;
        char pidstr[20];
        bool wants_to_map_ids;
-       struct lxc_list *id_map;
+       struct list_head *id_map;
        const char *name = handler->name;
        const char *lxcpath = handler->lxcpath;
        bool share_ns = false;
@@ -1584,7 +1609,7 @@ static int lxc_spawn(struct lxc_handler *handler)
        struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 
        id_map = &conf->id_map;
-       wants_to_map_ids = !lxc_list_empty(id_map);
+       wants_to_map_ids = !list_empty(id_map);
 
        for (i = 0; i < LXC_NS_MAX; i++) {
                if (!conf->ns_share[i])
@@ -1719,6 +1744,10 @@ static int lxc_spawn(struct lxc_handler *handler)
                handler->clone_flags &= ~CLONE_PIDFD;
        TRACE("Cloned child process %d", handler->pid);
 
+       ret = core_scheduling(handler);
+       if (ret < 0)
+               goto out_delete_net;
+
        /* Verify that we can actually make use of pidfds. */
        if (!lxc_can_use_pidfd(handler->pidfd))
                close_prot_errno_disarm(handler->pidfd);
@@ -2012,14 +2041,14 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
         * it readonly.
         * If the container is unprivileged then skip rootfs pinning.
         */
-       ret = lxc_rootfs_init(conf, !lxc_list_empty(&conf->id_map));
+       ret = lxc_rootfs_init(conf, !list_empty(&conf->id_map));
        if (ret) {
                ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
                ret = -1;
                goto out_abort;
        }
 
-       if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
+       if (geteuid() == 0 && !list_empty(&conf->id_map)) {
                /*
                 * Most filesystems can't be mounted inside a userns so handle them here.
                 */