]> git.proxmox.com Git - mirror_lxc.git/commitdiff
Enable network namespace sharing in lxc-start
authorMarek Majkowski <marek@cloudflare.com>
Wed, 6 Nov 2013 23:16:33 +0000 (15:16 -0800)
committerMarek Majkowski <marek@cloudflare.com>
Wed, 6 Nov 2013 23:16:33 +0000 (15:16 -0800)
Right now lxc-start always does one of two things: it creates
a new namespace or inherits it from the parent environment.
This patch adds a third option: share a namespace with another
container (actually: a process).

In some situations this is handy. For example by sharing a network
namespace it is possible to migrate services between containers
without (or with little) downtime.

This patch creates an infrastructure for inheriting any type
of namespace, but only the network namespace is supported for now.

src/lxc/arguments.h
src/lxc/conf.c
src/lxc/conf.h
src/lxc/lxc_start.c
src/lxc/start.c
src/lxc/start.h

index dc2d117eb9027461fdb5ccefde9ec536e8ef3c5e..bf262f859034d21e10b1b9cb5813ad82b6b60503 100644 (file)
@@ -53,6 +53,9 @@ struct lxc_arguments {
        /* set to 0 to accept only 1 lxcpath, -1 for unlimited */
        int lxcpath_additional;
 
+       /* for lxc-start */
+       const char *share_net;
+
        /* for lxc-checkpoint/restart */
        const char *statefile;
        int statefd;
index a75673126f55ccf413436934ffb345fa42ada5b7..3bebe46d157d07946c6654f1d0ff5db19f7ab9be 100644 (file)
@@ -2399,6 +2399,9 @@ struct lxc_conf *lxc_conf_init(void)
        new->lsm_se_context = NULL;
        new->lsm_umount_proc = 0;
 
+       for (i = 0; i < LXC_NS_MAX; i++)
+               new->inherit_ns_fd[i] = -1;
+
        return new;
 }
 
index 940d493719344e842c070b9f5784cfedcb0646f0..606ad5123e605f346d5958ca9c3023301b3bb9ac 100644 (file)
@@ -318,6 +318,8 @@ struct lxc_conf {
        // store the config file specified values here.
        char *logfile;  // the logfile as specifed in config
        int loglevel;   // loglevel as specifed in config (if any)
+
+       int inherit_ns_fd[LXC_NS_MAX];
 };
 
 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
index add2542f22da36cd9dfde116732681ca3c5386a4..69fde9a493ff719b4263ec75e93ce78c3a22f59d 100644 (file)
@@ -51,6 +51,8 @@
 #include "confile.h"
 #include "arguments.h"
 
+#define OPT_SHARE_NET OPT_USAGE+1
+
 lxc_log_define(lxc_start_ui, lxc_start);
 
 static struct lxc_list defines;
@@ -101,6 +103,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg)
        case 'C': args->close_all_fds = 1; break;
        case 's': return lxc_config_define_add(&defines, arg);
        case 'p': args->pidfile = arg; break;
+       case OPT_SHARE_NET: args->share_net = arg; break;
        }
        return 0;
 }
@@ -113,6 +116,7 @@ static const struct option my_longopts[] = {
        {"console-log", required_argument, 0, 'L'},
        {"close-all-fds", no_argument, 0, 'C'},
        {"pidfile", required_argument, 0, 'p'},
+       {"share-net", required_argument, 0, OPT_SHARE_NET},
        LXC_COMMON_OPTIONS
 };
 
@@ -133,7 +137,9 @@ Options :\n\
   -C, --close-all-fds    If any fds are inherited, close them\n\
                          If not specified, exit with failure instead\n\
                         Note: --daemon implies --close-all-fds\n\
-  -s, --define KEY=VAL   Assign VAL to configuration variable KEY\n",
+  -s, --define KEY=VAL   Assign VAL to configuration variable KEY\n\
+      --share-net=PID    Share a network namespace with another container\n\
+",
        .options   = my_longopts,
        .parser    = my_parser,
        .checker   = NULL,
@@ -249,6 +255,27 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (my_args.share_net != NULL) {
+               char *eptr;
+               int fd;
+               int pid = strtol(my_args.share_net, &eptr, 10);
+               if (*eptr != '\0') {
+                       SYSERROR("'%s' is not a valid pid number", my_args.share_net);
+                       goto out;
+               }
+               char path[MAXPATHLEN];
+               int ret = snprintf(path, MAXPATHLEN, "/proc/%d/ns/net", pid);
+               if (ret < 0 || ret >= MAXPATHLEN)
+                       goto out;
+
+               fd = open(path, O_RDONLY);
+               if (fd < 0) {
+                       SYSERROR("failed to open %s", path);
+                       goto out;
+               }
+               conf->inherit_ns_fd[LXC_NS_NET] = fd;
+       }
+
        if (my_args.daemonize) {
                c->want_daemonize(c);
        }
index 3b2ba8fbd06f0cb9b0bbf7da578be0bd54401d0c..50992fcc032ce4e57260890b0c80fe43b7150943 100644 (file)
 
 lxc_log_define(lxc_start, lxc);
 
+const struct ns_info ns_info[LXC_NS_MAX] = {
+       [LXC_NS_MNT] = {"mnt", CLONE_NEWNS},
+       [LXC_NS_PID] = {"pid", CLONE_NEWPID},
+       [LXC_NS_UTS] = {"uts", CLONE_NEWUTS},
+       [LXC_NS_IPC] = {"ipc", CLONE_NEWIPC},
+       [LXC_NS_USER] = {"user", CLONE_NEWUSER},
+       [LXC_NS_NET] = {"net", CLONE_NEWNET}
+};
+
+static void close_ns(int ns_fd[LXC_NS_MAX]) {
+       int i;
+
+       process_lock();
+       for (i = 0; i < LXC_NS_MAX; i++) {
+               if (ns_fd[i] > -1) {
+                       close(ns_fd[i]);
+                       ns_fd[i] = -1;
+               }
+       }
+       process_unlock();
+}
+
+static int preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags) {
+       int i, saved_errno;
+       char path[MAXPATHLEN];
+
+       if (access("/proc/self/ns", X_OK)) {
+               ERROR("Does this kernel version support 'attach'?");
+               return -1;
+       }
+
+       for (i = 0; i < LXC_NS_MAX; i++)
+               ns_fd[i] = -1;
+
+       for (i = 0; i < LXC_NS_MAX; i++) {
+               if ((clone_flags & ns_info[i].clone_flag) == 0)
+                       continue;
+               snprintf(path, MAXPATHLEN, "/proc/self/ns/%s", ns_info[i].proc_name);
+               process_lock();
+               ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC);
+               process_unlock();
+               if (ns_fd[i] < 0)
+                       goto error;
+       }
+
+       return 0;
+
+error:
+       saved_errno = errno;
+       close_ns(ns_fd);
+       errno = saved_errno;
+       SYSERROR("failed to open '%s'", path);
+       return -1;
+}
+
+static int attach_ns(const int ns_fd[LXC_NS_MAX]) {
+       int i;
+
+       for (i = 0; i < LXC_NS_MAX; i++) {
+               if (ns_fd[i] < 0)
+                       continue;
+
+               if (setns(ns_fd[i], 0) != 0)
+                       goto error;
+       }
+       return 0;
+
+error:
+       SYSERROR("failed to set namespace '%s'", ns_info[i].proc_name);
+       return -1;
+}
+
 static int match_fd(int fd)
 {
        return (fd == 0 || fd == 1 || fd == 2);
@@ -645,6 +717,12 @@ int lxc_spawn(struct lxc_handler *handler)
        const char *name = handler->name;
        struct cgroup_meta_data *cgroup_meta = NULL;
        const char *cgroup_pattern = NULL;
+       int saved_ns_fd[LXC_NS_MAX];
+       int preserve_mask = 0, i;
+
+       for (i = 0; i < LXC_NS_MAX; i++)
+               if (handler->conf->inherit_ns_fd[i] > -1)
+                       preserve_mask |= ns_info[i].clone_flag;
 
        if (lxc_sync_init(handler))
                return -1;
@@ -654,34 +732,40 @@ int lxc_spawn(struct lxc_handler *handler)
                INFO("Cloning a new user namespace");
                handler->clone_flags |= CLONE_NEWUSER;
        }
-       if (!lxc_list_empty(&handler->conf->network)) {
-
-               handler->clone_flags |= CLONE_NEWNET;
 
-               /* Find gateway addresses from the link device, which is
-                * no longer accessible inside the container. Do this
-                * before creating network interfaces, since goto
-                * out_delete_net does not work before lxc_clone. */
-               if (lxc_find_gateway_addresses(handler)) {
-                       ERROR("failed to find gateway addresses");
-                       lxc_sync_fini(handler);
-                       return -1;
+       if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
+               if (!lxc_list_empty(&handler->conf->network)) {
+
+                       handler->clone_flags |= CLONE_NEWNET;
+
+                       /* Find gateway addresses from the link device, which is
+                        * no longer accessible inside the container. Do this
+                        * before creating network interfaces, since goto
+                        * out_delete_net does not work before lxc_clone. */
+                       if (lxc_find_gateway_addresses(handler)) {
+                               ERROR("failed to find gateway addresses");
+                               lxc_sync_fini(handler);
+                               return -1;
+                       }
+
+                       /* that should be done before the clone because we will
+                        * fill the netdev index and use them in the child
+                        */
+                       if (lxc_create_network(handler)) {
+                               ERROR("failed to create the network IW WAS ERE");
+                               lxc_sync_fini(handler);
+                               return -1;
+                       }
                }
 
-               /* that should be done before the clone because we will
-                * fill the netdev index and use them in the child
-                */
-               if (lxc_create_network(handler)) {
-                       ERROR("failed to create the network");
-                       lxc_sync_fini(handler);
-                       return -1;
+               if (save_phys_nics(handler->conf)) {
+                       ERROR("failed to save physical nic info");
+                       goto out_abort;
                }
+       } else {
+               INFO("Inheriting a net namespace");
        }
 
-       if (save_phys_nics(handler->conf)) {
-               ERROR("failed to save physical nic info");
-               goto out_abort;
-       }
 
        cgroup_meta = lxc_cgroup_load_meta();
        if (!cgroup_meta) {
@@ -716,6 +800,9 @@ int lxc_spawn(struct lxc_handler *handler)
        if (handler->pinfd == -1)
                INFO("failed to pin the container's rootfs");
 
+       preserve_ns(saved_ns_fd, preserve_mask);
+       attach_ns(handler->conf->inherit_ns_fd);
+
        /* Create a process in a new set of namespaces */
        handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
        if (handler->pid < 0) {
@@ -723,6 +810,8 @@ int lxc_spawn(struct lxc_handler *handler)
                goto out_delete_net;
        }
 
+       attach_ns(saved_ns_fd);
+
        lxc_sync_fini_child(handler);
 
        if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
index c35c5c481d791fa617dd524bcf2139dc1042bc6e..c1f790901580491846bac02d4578645a9d5db699 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <lxc/state.h>
 #include <sys/param.h>
+#include "namespace.h"
 
 struct lxc_conf;
 
@@ -39,6 +40,23 @@ struct lxc_operations {
 
 struct cgroup_desc;
 
+enum {
+       LXC_NS_MNT,
+       LXC_NS_PID,
+       LXC_NS_UTS,
+       LXC_NS_IPC,
+       LXC_NS_USER,
+       LXC_NS_NET,
+       LXC_NS_MAX
+};
+
+struct ns_info {
+       const char *proc_name;
+       int clone_flag;
+};
+
+const struct ns_info ns_info[LXC_NS_MAX];
+
 struct lxc_handler {
        pid_t pid;
        char *name;