start: check event loop type before closing fd

[mirror_lxc.git] / src / lxc / start.c
diff --git a/src/lxc/start.c b/src/lxc/start.c

index 41f820279d9912a776f672adb01cc5c7bc3b2d15..1a6046c7a40d8b4bb3aebcf521545b3fce7db355 100644 (file)
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -1,8 +1,7 @@
  /* SPDX-License-Identifier: LGPL-2.1+ */
  
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
+#include "config.h"
+
  #include <dirent.h>
  #include <errno.h>
  #include <fcntl.h>
@@ -25,7 +24,10 @@
  #include <sys/wait.h>
  #include <unistd.h>
  
+#include "lxc.h"
+
  #include "af_unix.h"
+#include "attach_options.h"
  #include "caps.h"
  #include "cgroups/cgroup.h"
  #include "cgroups/cgroup_utils.h"
@@ -33,14 +35,12 @@
  #include "commands_utils.h"
  #include "compiler.h"
  #include "conf.h"
-#include "config.h"
  #include "confile_utils.h"
  #include "error.h"
  #include "file_utils.h"
  #include "list.h"
  #include "log.h"
  #include "lsm/lsm.h"
-#include "lxccontainer.h"
  #include "lxclock.h"
  #include "lxcseccomp.h"
  #include "macro.h"
@@ -62,8 +62,8 @@
  #include <sys/capability.h>
  #endif
  
-#ifndef HAVE_STRLCPY
-#include "include/strlcpy.h"
+#if !HAVE_STRLCPY
+#include "strlcpy.h"
  #endif
  
  lxc_log_define(start, lxc);
@@ -316,7 +316,7 @@ restart:
  
  #endif
  
-               if (fd <= listen_fds_max) {
+               if ((size_t)fd <= listen_fds_max) {
                         INFO("Inheriting fd %d (using the LISTEN_FDS environment variable)", fd);
                         continue;
                 }
@@ -354,7 +354,7 @@ static int setup_signal_fd(sigset_t *oldmask)
         if (ret < 0)
                 return -EBADF;
  
-       for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
+       for (size_t sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
                 ret = sigdelset(&mask, signals[sig]);
                 if (ret < 0)
                         return -EBADF;
@@ -445,7 +445,7 @@ static int signal_handler(int fd, uint32_t events, void *data,
         /* More robustness, protect ourself from a SIGCHLD sent
          * by a process different from the container init.
          */
-       if (siginfo.ssi_pid != hdlr->pid) {
+       if ((__u64)siginfo.ssi_pid != (__u64)hdlr->pid) {
                 NOTICE("Received %d from pid %d instead of container init %d",
                        siginfo.ssi_signo, siginfo.ssi_pid, hdlr->pid);
                 return hdlr->init_died ? LXC_MAINLOOP_CLOSE
@@ -629,7 +629,8 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
         TRACE("Mainloop is ready");
  
         ret = lxc_mainloop(&descr, -1);
-       close_prot_errno_disarm(descr.epfd);
+       if (descr.type == LXC_MAINLOOP_EPOLL)
+               close_prot_errno_disarm(descr.epfd);
         if (ret < 0 || !handler->init_died)
                 goto out_mainloop_console;
  
@@ -1052,12 +1053,11 @@ static int do_start(void *data)
  {
         struct lxc_handler *handler = data;
         __lxc_unused __do_close int data_sock0 = handler->data_sock[0],
-                                          data_sock1 = handler->data_sock[1];
+                                   data_sock1 = handler->data_sock[1];
         __do_close int devnull_fd = -EBADF, status_fd = -EBADF;
         int ret;
         uid_t new_uid;
         gid_t new_gid;
-       struct lxc_list *iterator;
         uid_t nsuid = 0;
         gid_t nsgid = 0;
  
@@ -1110,7 +1110,7 @@ static int do_start(void *data)
         /* If we are in a new user namespace, become root there to have
          * privilege over our namespace.
          */
-       if (!lxc_list_empty(&handler->conf->id_map)) {
+       if (!list_empty(&handler->conf->id_map)) {
                 if (!handler->conf->root_nsuid_map)
                         nsuid = handler->conf->init_uid;
  
@@ -1257,18 +1257,14 @@ static int do_start(void *data)
                 }
         }
  
-       /* Add the requested environment variables to the current environment to
-        * allow them to be used by the various hooks, such as the start hook
-        * below.
+       /*
+        * Add the requested environment variables to the current environment
+        * to allow them to be used by the various hooks, such as the start
+        * hook below.
          */
-       lxc_list_for_each(iterator, &handler->conf->environment) {
-               ret = putenv((char *)iterator->elem);
-               if (ret < 0) {
-                       SYSERROR("Failed to set environment variable: %s",
-                                (char *)iterator->elem);
-                       goto out_warn_father;
-               }
-       }
+       ret = lxc_set_environment(handler->conf);
+       if (ret < 0)
+               goto out_warn_father;
  
         if (!lxc_sync_wait_parent(handler, START_SYNC_POST_CONFIGURE))
                 goto out_warn_father;
@@ -1361,14 +1357,9 @@ static int do_start(void *data)
         if (ret < 0)
                 SYSERROR("Failed to clear environment.");
  
-       lxc_list_for_each(iterator, &handler->conf->environment) {
-               ret = putenv((char *)iterator->elem);
-               if (ret < 0) {
-                       SYSERROR("Failed to set environment variable: %s",
-                                (char *)iterator->elem);
-                       goto out_warn_father;
-               }
-       }
+       ret = lxc_set_environment(handler->conf);
+       if (ret < 0)
+               goto out_warn_father;
  
         ret = putenv("container=lxc");
         if (ret < 0) {
@@ -1406,7 +1397,7 @@ static int do_start(void *data)
          * we switched to root in the new user namespace further above. Only
          * drop groups if we can, so ensure that we have necessary privilege.
          */
-       if (lxc_list_empty(&handler->conf->id_map)) {
+       if (list_empty(&handler->conf->id_map)) {
                 #if HAVE_LIBCAP
                 if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE))
                 #endif
@@ -1473,7 +1464,7 @@ int resolve_clone_flags(struct lxc_handler *handler)
                         if ((conf->ns_clone & ns_info[i].clone_flag))
                                 handler->ns_clone_flags |= ns_info[i].clone_flag;
                 } else {
-                       if (i == LXC_NS_USER && lxc_list_empty(&handler->conf->id_map))
+                       if (i == LXC_NS_USER && list_empty(&handler->conf->id_map))
                                 continue;
  
                         if (i == LXC_NS_NET && lxc_requests_empty_network(handler))
@@ -1563,6 +1554,40 @@ static inline int do_share_ns(void *arg)
         return 0;
  }
  
+static int core_scheduling(struct lxc_handler *handler)
+{
+       struct lxc_conf *conf = handler->conf;
+       int ret;
+
+       if (!conf->sched_core)
+               return log_trace(0, "No new core scheduling domain requested");
+
+       if (!(handler->ns_clone_flags & CLONE_NEWPID))
+               return syserror_set(-EINVAL, "Core scheduling currently requires a separate pid namespace");
+
+       ret = core_scheduling_cookie_create_threadgroup(handler->pid);
+       if (ret < 0) {
+               if (ret == -ENODEV) {
+                       INFO("The kernel doesn't support or doesn't use simultaneous multithreading (SMT)");
+                       conf->sched_core = false;
+                       return 0;
+               }
+               if (ret == -EINVAL)
+                       return syserror("The kernel does not support core scheduling");
+
+               return syserror("Failed to create new core scheduling domain");
+       }
+
+       ret = core_scheduling_cookie_get(handler->pid, &conf->sched_core_cookie);
+       if (ret || !core_scheduling_cookie_valid(conf->sched_core_cookie))
+               return syserror("Failed to retrieve core scheduling domain cookie");
+
+       TRACE("Created new core scheduling domain with cookie %llu",
+             (llu)conf->sched_core_cookie);
+
+       return 0;
+}
+
  /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
   * exec()s the requested container binary.
   * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1576,7 +1601,7 @@ static int lxc_spawn(struct lxc_handler *handler)
         int i, ret;
         char pidstr[20];
         bool wants_to_map_ids;
-       struct lxc_list *id_map;
+       struct list_head *id_map;
         const char *name = handler->name;
         const char *lxcpath = handler->lxcpath;
         bool share_ns = false;
@@ -1584,7 +1609,7 @@ static int lxc_spawn(struct lxc_handler *handler)
         struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
  
         id_map = &conf->id_map;
-       wants_to_map_ids = !lxc_list_empty(id_map);
+       wants_to_map_ids = !list_empty(id_map);
  
         for (i = 0; i < LXC_NS_MAX; i++) {
                 if (!conf->ns_share[i])
@@ -1719,6 +1744,10 @@ static int lxc_spawn(struct lxc_handler *handler)
                 handler->clone_flags &= ~CLONE_PIDFD;
         TRACE("Cloned child process %d", handler->pid);
  
+       ret = core_scheduling(handler);
+       if (ret < 0)
+               goto out_delete_net;
+
         /* Verify that we can actually make use of pidfds. */
         if (!lxc_can_use_pidfd(handler->pidfd))
                 close_prot_errno_disarm(handler->pidfd);
@@ -1816,18 +1845,16 @@ static int lxc_spawn(struct lxc_handler *handler)
                 }
         }
  
-       if (!lxc_list_empty(&conf->procs)) {
-               ret = setup_proc_filesystem(&conf->procs, handler->pid);
-               if (ret < 0)
-                       goto out_delete_net;
+       ret = setup_proc_filesystem(conf, handler->pid);
+       if (ret < 0) {
+               ERROR("Failed to setup procfs limits");
+               goto out_delete_net;
         }
  
-       if (!lxc_list_empty(&conf->limits)) {
-               ret = setup_resource_limits(&conf->limits, handler->pid);
-               if (ret < 0) {
-                       ERROR("Failed to setup resource limits");
-                       goto out_delete_net;
-               }
+       ret = setup_resource_limits(conf, handler->pid);
+       if (ret < 0) {
+               ERROR("Failed to setup resource limits");
+               goto out_delete_net;
         }
  
         /* Tell the child to continue its initialization. */
@@ -2014,14 +2041,14 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
          * it readonly.
          * If the container is unprivileged then skip rootfs pinning.
          */
-       ret = lxc_rootfs_init(conf, !lxc_list_empty(&conf->id_map));
+       ret = lxc_rootfs_init(conf, !list_empty(&conf->id_map));
         if (ret) {
                 ERROR("Failed to handle rootfs pinning for container \"%s\"", handler->name);
                 ret = -1;
                 goto out_abort;
         }
  
-       if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
+       if (geteuid() == 0 && !list_empty(&conf->id_map)) {
                 /*
                  * Most filesystems can't be mounted inside a userns so handle them here.
                  */