]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/attach.c
af_unix: fix return value & cleanups
[mirror_lxc.git] / src / lxc / attach.c
index fa6c3cf295784811c6ec166b95bd3293c994ee43..9bc3e23d19540ae3a630690028751cf97c23a4b2 100644 (file)
@@ -24,6 +24,7 @@
 #define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
+#include <termios.h>
 #include <grp.h>
 #include <pwd.h>
 #include <signal.h>
@@ -65,7 +66,9 @@
 #include "lsm/lsm.h"
 #include "lxclock.h"
 #include "lxcseccomp.h"
+#include "mainloop.h"
 #include "namespace.h"
+#include "terminal.h"
 #include "utils.h"
 
 #if HAVE_SYS_PERSONALITY_H
 #define MS_SLAVE (1 << 19)
 #endif
 
-lxc_log_define(lxc_attach, lxc);
-
-/* /proc/pid-to-str/current\0 = (5 + 21 + 7 + 1) */
-#define __LSMATTRLEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
-static int lsm_openat(int procfd, pid_t pid, int on_exec)
-{
-       int ret = -1;
-       int labelfd = -1;
-       const char *name;
-       char path[__LSMATTRLEN];
-
-       name = lsm_name();
-
-       if (strcmp(name, "nop") == 0)
-               return 0;
-
-       if (strcmp(name, "none") == 0)
-               return 0;
-
-       /* We don't support on-exec with AppArmor */
-       if (strcmp(name, "AppArmor") == 0)
-               on_exec = 0;
-
-       if (on_exec)
-               ret = snprintf(path, __LSMATTRLEN, "%d/attr/exec", pid);
-       else
-               ret = snprintf(path, __LSMATTRLEN, "%d/attr/current", pid);
-       if (ret < 0 || ret >= __LSMATTRLEN)
-               return -1;
-
-       labelfd = openat(procfd, path, O_RDWR);
-       if (labelfd < 0) {
-               SYSERROR("Unable to open file descriptor to set LSM label.");
-               return -1;
-       }
-
-       return labelfd;
-}
-
-static int lsm_set_label_at(int lsm_labelfd, int on_exec, char *lsm_label)
-{
-       int fret = -1;
-       const char *name;
-       char *command = NULL;
-
-       name = lsm_name();
-
-       if (strcmp(name, "nop") == 0)
-               return 0;
-
-       if (strcmp(name, "none") == 0)
-               return 0;
-
-       /* We don't support on-exec with AppArmor */
-       if (strcmp(name, "AppArmor") == 0)
-               on_exec = 0;
-
-       if (strcmp(name, "AppArmor") == 0) {
-               int size;
-
-               command =
-                   malloc(strlen(lsm_label) + strlen("changeprofile ") + 1);
-               if (!command) {
-                       SYSERROR("Failed to write apparmor profile.");
-                       goto out;
-               }
-
-               size = sprintf(command, "changeprofile %s", lsm_label);
-               if (size < 0) {
-                       SYSERROR("Failed to write apparmor profile.");
-                       goto out;
-               }
-
-               if (write(lsm_labelfd, command, size + 1) < 0) {
-                       SYSERROR("Unable to set LSM label: %s.", command);
-                       goto out;
-               }
-               INFO("Set LSM label to: %s.", command);
-       } else if (strcmp(name, "SELinux") == 0) {
-               if (write(lsm_labelfd, lsm_label, strlen(lsm_label) + 1) < 0) {
-                       SYSERROR("Unable to set LSM label: %s.", lsm_label);
-                       goto out;
-               }
-               INFO("Set LSM label to: %s.", lsm_label);
-       } else {
-               ERROR("Unable to restore label for unknown LSM: %s.", name);
-               goto out;
-       }
-       fret = 0;
-
-out:
-       free(command);
-
-       if (lsm_labelfd != -1)
-               close(lsm_labelfd);
-
-       return fret;
-}
+lxc_log_define(attach, lxc);
 
 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
@@ -258,8 +164,13 @@ static inline void lxc_proc_close_ns_fd(struct lxc_proc_context_info *ctx)
 static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
 {
        free(ctx->lsm_label);
-       if (ctx->container)
+       ctx->lsm_label = NULL;
+
+       if (ctx->container) {
                lxc_container_put(ctx->container);
+               ctx->container = NULL;
+       }
+
        lxc_proc_close_ns_fd(ctx);
        free(ctx);
 }
@@ -402,17 +313,22 @@ static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
                        continue;
 
                if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
-                       SYSERROR("Failed to remove capability id %d.", cap);
+                       SYSERROR("Failed to drop capability %d", cap);
                        return -1;
                }
+               TRACE("Dropped capability %d", cap);
        }
 
        return 0;
 }
 
-static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy,
+static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx,
+                                     enum lxc_attach_env_policy_t policy,
                                      char **extra_env, char **extra_keep)
 {
+       int ret;
+       struct lxc_list *iterator;
+
        if (policy == LXC_ATTACH_CLEAR_ENV) {
                int path_kept = 0;
                char **extra_keep_store = NULL;
@@ -420,44 +336,41 @@ static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy,
                if (extra_keep) {
                        size_t count, i;
 
-                       for (count = 0; extra_keep[count]; count++);
+                       for (count = 0; extra_keep[count]; count++)
+                               ;
 
                        extra_keep_store = calloc(count, sizeof(char *));
-                       if (!extra_keep_store) {
-                               SYSERROR("Failed to allocate memory for storing current "
-                                        "environment variable values that will be kept.");
+                       if (!extra_keep_store)
                                return -1;
-                       }
+
                        for (i = 0; i < count; i++) {
                                char *v = getenv(extra_keep[i]);
                                if (v) {
                                        extra_keep_store[i] = strdup(v);
                                        if (!extra_keep_store[i]) {
-                                               SYSERROR("Failed to allocate memory for storing current "
-                                                        "environment variable values that will be kept.");
                                                while (i > 0)
                                                        free(extra_keep_store[--i]);
                                                free(extra_keep_store);
                                                return -1;
                                        }
+
                                        if (strcmp(extra_keep[i], "PATH") == 0)
                                                path_kept = 1;
                                }
-                               /* Calloc sets entire array to zero, so we don't
-                                * need an else.
-                                */
                        }
                }
 
                if (clearenv()) {
-                       char **p;
-
-                       SYSERROR("Failed to clear environment.");
                        if (extra_keep_store) {
+                               char **p;
+
                                for (p = extra_keep_store; *p; p++)
                                        free(*p);
+
                                free(extra_keep_store);
                        }
+
+                       SYSERROR("Failed to clear environment");
                        return -1;
                }
 
@@ -466,8 +379,9 @@ static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy,
 
                        for (i = 0; extra_keep[i]; i++) {
                                if (extra_keep_store[i]) {
-                                       if (setenv(extra_keep[i], extra_keep_store[i], 1) < 0)
-                                               SYSERROR("Unable to set environment variable.");
+                                       ret = setenv(extra_keep[i], extra_keep_store[i], 1);
+                                       if (ret < 0)
+                                               SYSWARN("Failed to set environment variable");
                                }
                                free(extra_keep_store[i]);
                        }
@@ -479,31 +393,50 @@ static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy,
                 * out there that just assume that getenv("PATH") is never NULL
                 * and then die a painful segfault death.
                 */
-               if (!path_kept)
-                       setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
+               if (!path_kept) {
+                       ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
+                       if (ret < 0)
+                               SYSWARN("Failed to set environment variable");
+               }
        }
 
-       if (putenv("container=lxc")) {
-               SYSERROR("Failed to set environment variable.");
+       ret = putenv("container=lxc");
+       if (ret < 0) {
+               SYSWARN("Failed to set environment variable");
                return -1;
        }
 
+       /* Set container environment variables.*/
+       if (init_ctx && init_ctx->container && init_ctx->container->lxc_conf) {
+               lxc_list_for_each(iterator, &init_ctx->container->lxc_conf->environment) {
+                       char *env_tmp;
+
+                       env_tmp = strdup((char *)iterator->elem);
+                       if (!env_tmp)
+                               return -1;
+
+                       ret = putenv(env_tmp);
+                       if (ret < 0) {
+                               SYSERROR("Failed to set environment variable: %s", (char *)iterator->elem);
+                               return -1;
+                       }
+               }
+       }
+
        /* Set extra environment variables. */
        if (extra_env) {
                for (; *extra_env; extra_env++) {
-                       /* Duplicate the string, just to be on the safe side,
-                        * because putenv does not do it for us.
-                        */
-                       char *p = strdup(*extra_env);
+                       char *p;
                        /* We just assume the user knows what they are doing, so
                         * we don't do any checks.
                         */
-                       if (!p) {
-                               SYSERROR("Failed to allocate memory for additional environment "
-                                        "variables.");
+                       p = strdup(*extra_env);
+                       if (!p)
                                return -1;
-                       }
-                       putenv(p);
+
+                       ret = putenv(p);
+                       if (ret < 0)
+                               SYSWARN("Failed to set environment variable");
                }
        }
 
@@ -515,12 +448,15 @@ static char *lxc_attach_getpwshell(uid_t uid)
        int fd, ret;
        pid_t pid;
        int pipes[2];
-       char *result = NULL;
+       FILE *pipe_f;
+       bool found = false;
+       size_t line_bufsz = 0;
+       char *line = NULL, *result = NULL;
 
        /* We need to fork off a process that runs the getent program, and we
         * need to capture its output, so we use a pipe for that purpose.
         */
-       ret = pipe(pipes);
+       ret = pipe2(pipes, O_CLOEXEC);
        if (ret < 0)
                return NULL;
 
@@ -531,100 +467,7 @@ static char *lxc_attach_getpwshell(uid_t uid)
                return NULL;
        }
 
-       if (pid) {
-               int status;
-               FILE *pipe_f;
-               int found = 0;
-               size_t line_bufsz = 0;
-               char *line = NULL;
-
-               close(pipes[1]);
-
-               pipe_f = fdopen(pipes[0], "r");
-               while (getline(&line, &line_bufsz, pipe_f) != -1) {
-                       int i;
-                       long value;
-                       char *token;
-                       char *endptr = NULL, *saveptr = NULL;
-
-                       /* If we already found something, just continue to read
-                        * until the pipe doesn't deliver any more data, but
-                        * don't modify the existing data structure.
-                        */
-                       if (found)
-                               continue;
-
-                       /* Trim line on the right hand side. */
-                       for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
-                               line[i - 1] = '\0';
-
-                       /* Split into tokens: first: user name. */
-                       token = strtok_r(line, ":", &saveptr);
-                       if (!token)
-                               continue;
-                       /* next: dummy password field */
-                       token = strtok_r(NULL, ":", &saveptr);
-                       if (!token)
-                               continue;
-                       /* next: user id */
-                       token = strtok_r(NULL, ":", &saveptr);
-                       value = token ? strtol(token, &endptr, 10) : 0;
-                       if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
-                               continue;
-                       /* dummy sanity check: user id matches */
-                       if ((uid_t) value != uid)
-                               continue;
-                       /* skip fields: gid, gecos, dir, go to next field 'shell' */
-                       for (i = 0; i < 4; i++) {
-                               token = strtok_r(NULL, ":", &saveptr);
-                               if (!token)
-                                       break;
-                       }
-                       if (!token)
-                               continue;
-                       free(result);
-                       result = strdup(token);
-
-                       /* Sanity check that there are no fields after that. */
-                       token = strtok_r(NULL, ":", &saveptr);
-                       if (token)
-                               continue;
-
-                       found = 1;
-               }
-
-               free(line);
-               fclose(pipe_f);
-       again:
-               if (waitpid(pid, &status, 0) < 0) {
-                       if (errno == EINTR)
-                               goto again;
-                       free(result);
-                       return NULL;
-               }
-
-               /* Some sanity checks. If anything even hinted at going wrong,
-                * we can't be sure we have a valid result, so we assume we
-                * don't.
-                */
-
-               if (!WIFEXITED(status)) {
-                       free(result);
-                       return NULL;
-               }
-
-               if (WEXITSTATUS(status) != 0) {
-                       free(result);
-                       return NULL;
-               }
-
-               if (!found) {
-                       free(result);
-                       return NULL;
-               }
-
-               return result;
-       } else {
+       if (!pid) {
                char uid_buf[32];
                char *arguments[] = {
                        "getent",
@@ -636,31 +479,108 @@ static char *lxc_attach_getpwshell(uid_t uid)
                close(pipes[0]);
 
                /* We want to capture stdout. */
-               dup2(pipes[1], 1);
+               ret = dup2(pipes[1], STDOUT_FILENO);
                close(pipes[1]);
+               if (ret < 0)
+                       exit(EXIT_FAILURE);
 
                /* Get rid of stdin/stderr, so we try to associate it with
                 * /dev/null.
                 */
-               fd = open("/dev/null", O_RDWR);
+               fd = open_devnull();
                if (fd < 0) {
-                       close(0);
-                       close(2);
+                       close(STDIN_FILENO);
+                       close(STDERR_FILENO);
                } else {
-                       dup2(fd, 0);
-                       dup2(fd, 2);
+                       (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
+                       (void)dup3(fd, STDOUT_FILENO, O_CLOEXEC);
                        close(fd);
                }
 
                /* Finish argument list. */
-               ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
-               if (ret <= 0)
-                       exit(-1);
+               ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
+               if (ret <= 0 || ret >= sizeof(uid_buf))
+                       exit(EXIT_FAILURE);
 
                /* Try to run getent program. */
-               (void) execvp("getent", arguments);
-               exit(-1);
+               (void)execvp("getent", arguments);
+               exit(EXIT_FAILURE);
+       }
+
+       close(pipes[1]);
+
+       pipe_f = fdopen(pipes[0], "r");
+       while (getline(&line, &line_bufsz, pipe_f) != -1) {
+               int i;
+               long value;
+               char *token;
+               char *endptr = NULL, *saveptr = NULL;
+
+               /* If we already found something, just continue to read
+               * until the pipe doesn't deliver any more data, but
+               * don't modify the existing data structure.
+                */
+               if (found)
+                       continue;
+
+               /* Trim line on the right hand side. */
+               for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
+                       line[i - 1] = '\0';
+
+               /* Split into tokens: first: user name. */
+               token = strtok_r(line, ":", &saveptr);
+               if (!token)
+                       continue;
+
+               /* next: dummy password field */
+               token = strtok_r(NULL, ":", &saveptr);
+               if (!token)
+                       continue;
+
+               /* next: user id */
+               token = strtok_r(NULL, ":", &saveptr);
+               value = token ? strtol(token, &endptr, 10) : 0;
+               if (!token || !endptr || *endptr || value == LONG_MIN ||
+                               value == LONG_MAX)
+                       continue;
+
+               /* dummy sanity check: user id matches */
+               if ((uid_t)value != uid)
+                       continue;
+
+               /* skip fields: gid, gecos, dir, go to next field 'shell' */
+               for (i = 0; i < 4; i++) {
+                       token = strtok_r(NULL, ":", &saveptr);
+                       if (!token)
+                               continue;
+               }
+               if (!token)
+                       continue;
+               free(result);
+               result = strdup(token);
+
+               /* Sanity check that there are no fields after that. */
+               token = strtok_r(NULL, ":", &saveptr);
+               if (token)
+                       continue;
+
+               found = true;
+       }
+       free(line);
+       fclose(pipe_f);
+
+       ret = wait_for_pid(pid);
+       if (ret < 0) {
+               free(result);
+               return NULL;
+       }
+
+       if (!found) {
+               free(result);
+               return NULL;
        }
+
+       return result;
 }
 
 static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
@@ -674,8 +594,9 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
        uid_t uid = (uid_t)-1;
        gid_t gid = (gid_t)-1;
 
-       /* Read capabilities. */
-       snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", 1);
+       ret = snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", 1);
+       if (ret < 0 || ret >= __PROC_STATUS_LEN)
+               return;
 
        proc_file = fopen(proc_fn, "r");
        if (!proc_file)
@@ -711,16 +632,6 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
         */
 }
 
-struct attach_clone_payload {
-       int ipc_socket;
-       lxc_attach_options_t *options;
-       struct lxc_proc_context_info *init_ctx;
-       lxc_attach_exec_t exec_function;
-       void *exec_payload;
-};
-
-static int attach_child_main(void* data);
-
 /* Help the optimizer along if it doesn't know that exit always exits. */
 #define rexit(c)                                                               \
        do {                                                                   \
@@ -732,9 +643,10 @@ static int attach_child_main(void* data);
 /* Define default options if no options are supplied by the user. */
 static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
 
-static bool fetch_seccomp(struct lxc_container *c,
-                         lxc_attach_options_t *options)
+static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
 {
+       int ret;
+       bool bret;
        char *path;
 
        if (!(options->namespaces & CLONE_NEWNS) ||
@@ -745,62 +657,61 @@ static bool fetch_seccomp(struct lxc_container *c,
        }
 
        /* Remove current setting. */
-       if (!c->set_config_item(c, "lxc.seccomp", "") &&
-           !c->set_config_item(c, "lxc.seccomp.profile", "")) {
+       if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
+           !c->set_config_item(c, "lxc.seccomp", "")) {
                return false;
        }
 
        /* Fetch the current profile path over the cmd interface. */
        path = c->get_running_config_item(c, "lxc.seccomp.profile");
        if (!path) {
-               INFO("Failed to get running config item for lxc.seccomp.profile");
+               INFO("Failed to retrieve lxc.seccomp.profile");
                path = c->get_running_config_item(c, "lxc.seccomp");
-       }
-       if (!path) {
-               INFO("Failed to get running config item for lxc.seccomp");
-               return true;
+               if (!path) {
+                       INFO("Failed to retrieve lxc.seccomp");
+                       return true;
+               }
        }
 
        /* Copy the value into the new lxc_conf. */
-       if (!c->set_config_item(c, "lxc.seccomp.profile", path)) {
-               free(path);
-               return false;
-       }
+       bret = c->set_config_item(c, "lxc.seccomp.profile", path);
        free(path);
+       if (!bret)
+               return false;
 
        /* Attempt to parse the resulting config. */
-       if (lxc_read_seccomp_config(c->lxc_conf) < 0) {
-               ERROR("Error reading seccomp policy.");
+       ret = lxc_read_seccomp_config(c->lxc_conf);
+       if (ret < 0) {
+               ERROR("Failed to retrieve seccomp policy");
                return false;
        }
 
-       INFO("Retrieved seccomp policy.");
+       INFO("Retrieved seccomp policy");
        return true;
 }
 
 static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
 {
+       bool bret;
        char *val;
 
        /* Remove current setting. */
-       if (!c->set_config_item(c, "lxc.no_new_privs", ""))
+       if (!c->set_config_item(c, "lxc.no_new_privs", "")) {
+               INFO("Failed to unset lxc.no_new_privs");
                return false;
+       }
 
        /* Retrieve currently active setting. */
        val = c->get_running_config_item(c, "lxc.no_new_privs");
        if (!val) {
-               INFO("Failed to get running config item for lxc.no_new_privs.");
+               INFO("Failed to retrieve lxc.no_new_privs");
                return false;
        }
 
        /* Set currently active setting. */
-       if (!c->set_config_item(c, "lxc.no_new_privs", val)) {
-               free(val);
-               return false;
-       }
+       bret = c->set_config_item(c, "lxc.no_new_privs", val);
        free(val);
-
-       return true;
+       return bret;
 }
 
 static signed long get_personality(const char *name, const char *lxcpath)
@@ -818,79 +729,402 @@ static signed long get_personality(const char *name, const char *lxcpath)
        return ret;
 }
 
-int lxc_attach(const char *name, const char *lxcpath,
-              lxc_attach_exec_t exec_function, void *exec_payload,
-              lxc_attach_options_t *options, pid_t *attached_process)
-{
-       int i, ret, status;
-       int ipc_sockets[2];
-       char *cwd, *new_cwd;
-       signed long personality;
-       pid_t attached_pid, expected, init_pid, pid;
+struct attach_clone_payload {
+       int ipc_socket;
+       int terminal_slave_fd;
+       lxc_attach_options_t *options;
        struct lxc_proc_context_info *init_ctx;
+       lxc_attach_exec_t exec_function;
+       void *exec_payload;
+};
 
-       ret = access("/proc/self/ns", X_OK);
-       if (ret) {
-               ERROR("Does this kernel version support namespaces?");
-               return -1;
-       }
-
-       if (!options)
-               options = &attach_static_default_options;
-
-       init_pid = lxc_cmd_get_init_pid(name, lxcpath);
-       if (init_pid < 0) {
-               ERROR("Failed to get init pid.");
-               return -1;
+static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
+{
+       if (p->ipc_socket >= 0) {
+               shutdown(p->ipc_socket, SHUT_RDWR);
+               close(p->ipc_socket);
+               p->ipc_socket = -EBADF;
        }
 
-       init_ctx = lxc_proc_get_context_info(init_pid);
-       if (!init_ctx) {
-               ERROR("Failed to get context of init process: %ld", (long)init_pid);
-               return -1;
+       if (p->terminal_slave_fd >= 0) {
+               close(p->terminal_slave_fd);
+               p->terminal_slave_fd = -EBADF;
        }
 
-       personality = get_personality(name, lxcpath);
-       if (init_ctx->personality < 0) {
-               ERROR("Failed to get personality of the container");
-               lxc_proc_put_context_info(init_ctx);
-               return -1;
+       if (p->init_ctx) {
+               lxc_proc_put_context_info(p->init_ctx);
+               p->init_ctx = NULL;
        }
-       init_ctx->personality = personality;
+}
 
-       init_ctx->container = lxc_container_new(name, lxcpath);
-       if (!init_ctx->container)
-               return -1;
+static int attach_child_main(struct attach_clone_payload *payload)
+{
+       int fd, lsm_fd, ret;
+       uid_t new_uid;
+       gid_t new_gid;
+       lxc_attach_options_t* options = payload->options;
+       struct lxc_proc_context_info* init_ctx = payload->init_ctx;
+       bool needs_lsm = (options->namespaces & CLONE_NEWNS) &&
+                        (options->attach_flags & LXC_ATTACH_LSM) &&
+                        init_ctx->lsm_label;
 
-       if (!init_ctx->container->lxc_conf) {
-               init_ctx->container->lxc_conf = lxc_conf_init();
-               if (!init_ctx->container->lxc_conf)
-                       return -ENOMEM;
+       /* A description of the purpose of this functionality is provided in the
+        * lxc-attach(1) manual page. We have to remount here and not in the
+        * parent process, otherwise /proc may not properly reflect the new pid
+        * namespace.
+        */
+       if (!(options->namespaces & CLONE_NEWNS) &&
+           (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
+               ret = lxc_attach_remount_sys_proc();
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Remounted \"/proc\" and \"/sys\"");
        }
 
-       if (!fetch_seccomp(init_ctx->container, options))
-               WARN("Failed to get seccomp policy.");
+/* Now perform additional attachments. */
+#if HAVE_SYS_PERSONALITY_H
+       if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
+               long new_personality;
 
-       if (!no_new_privs(init_ctx->container, options))
-               WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set.");
+               if (options->personality < 0)
+                       new_personality = init_ctx->personality;
+               else
+                       new_personality = options->personality;
+               ret = personality(new_personality);
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Set new personality");
+       }
+#endif
 
-       cwd = getcwd(NULL, 0);
+       if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
+               ret = lxc_attach_drop_privs(init_ctx);
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Dropped capabilities");
+       }
 
-       /* Determine which namespaces the container was created with
-        * by asking lxc-start, if necessary.
+       /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
+        * if you want this to be a no-op).
         */
-       if (options->namespaces == -1) {
-               options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
-               /* call failed */
-               if (options->namespaces == -1) {
-                       ERROR("Failed to automatically determine the "
-                             "namespaces which the container uses");
-                       free(cwd);
-                       lxc_proc_put_context_info(init_ctx);
-                       return -1;
+       ret = lxc_attach_set_environment(init_ctx,
+                                        options->env_policy,
+                                        options->extra_env_vars,
+                                        options->extra_keep_env);
+       if (ret < 0)
+               goto on_error;
+       TRACE("Set up environment");
+
+       /* This remark only affects fully unprivileged containers:
+        * Receive fd for LSM security module before we set{g,u}id(). The reason
+        * is that on set{g,u}id() the kernel will a) make us undumpable and b)
+        * we will change our effective uid. This means our effective uid will
+        * be different from the effective uid of the process that created us
+        * which means that this processs no longer has capabilities in our
+        * namespace including CAP_SYS_PTRACE. This means we will not be able to
+        * read and /proc/<pid> files for the process anymore when /proc is
+        * mounted with hidepid={1,2}. So let's get the lsm label fd before the
+        * set{g,u}id().
+        */
+       if (needs_lsm) {
+               ret = lxc_abstract_unix_recv_fds(payload->ipc_socket, &lsm_fd, 1, NULL, 0);
+               if (ret <= 0) {
+                       if (ret < 0)
+                               SYSERROR("Failed to receive lsm label fd");
+
+                       goto on_error;
                }
 
-               for (i = 0; i < LXC_NS_MAX; i++) {
+               TRACE("Received LSM label file descriptor %d from parent", lsm_fd);
+       }
+
+       if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
+               ret = lxc_make_controlling_terminal(options->stdin_fd);
+               if (ret < 0)
+                       goto on_error;
+       }
+
+       /* Set {u,g}id. */
+       new_uid = 0;
+       new_gid = 0;
+       /* Ignore errors, we will fall back to root in that case (/proc was not
+        * mounted etc.).
+        */
+       if (options->namespaces & CLONE_NEWUSER)
+               lxc_attach_get_init_uidgid(&new_uid, &new_gid);
+
+       if (options->uid != (uid_t)-1)
+               new_uid = options->uid;
+       if (options->gid != (gid_t)-1)
+               new_gid = options->gid;
+
+       /* Try to set the {u,g}id combination. */
+       if (new_uid != 0 || new_gid != 0 || options->namespaces & CLONE_NEWUSER) {
+               ret = lxc_switch_uid_gid(new_uid, new_gid);
+               if (ret < 0)
+                       goto on_error;
+       }
+
+       ret = lxc_setgroups(0, NULL);
+       if (ret < 0 && errno != EPERM)
+               goto on_error;
+
+       if ((init_ctx->container && init_ctx->container->lxc_conf &&
+            init_ctx->container->lxc_conf->no_new_privs) ||
+           (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
+               ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Set PR_SET_NO_NEW_PRIVS");
+       }
+
+       if (needs_lsm) {
+               bool on_exec;
+
+               /* Change into our new LSM profile. */
+               on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
+               ret = lsm_process_label_set_at(lsm_fd, init_ctx->lsm_label, on_exec);
+               close(lsm_fd);
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Set %s LSM label to \"%s\"", lsm_name(), init_ctx->lsm_label);
+       }
+
+       if (init_ctx->container && init_ctx->container->lxc_conf &&
+           init_ctx->container->lxc_conf->seccomp) {
+               ret = lxc_seccomp_load(init_ctx->container->lxc_conf);
+               if (ret < 0)
+                       goto on_error;
+               TRACE("Loaded seccomp profile");
+       }
+       shutdown(payload->ipc_socket, SHUT_RDWR);
+       close(payload->ipc_socket);
+       payload->ipc_socket = -EBADF;
+       lxc_proc_put_context_info(init_ctx);
+       payload->init_ctx = NULL;
+
+       /* The following is done after the communication socket is shut down.
+        * That way, all errors that might (though unlikely) occur up until this
+        * point will have their messages printed to the original stderr (if
+        * logging is so configured) and not the fd the user supplied, if any.
+        */
+
+       /* Fd handling for stdin, stdout and stderr; ignore errors here, user
+        * may want to make sure the fds are closed, for example.
+        */
+       if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
+               (void)dup2(options->stdin_fd, STDIN_FILENO);
+
+       if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
+               (void)dup2(options->stdout_fd, STDOUT_FILENO);
+
+       if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
+               (void)dup2(options->stderr_fd, STDERR_FILENO);
+
+       /* close the old fds */
+       if (options->stdin_fd > STDERR_FILENO)
+               close(options->stdin_fd);
+
+       if (options->stdout_fd > STDERR_FILENO)
+               close(options->stdout_fd);
+
+       if (options->stderr_fd > STDERR_FILENO)
+               close(options->stderr_fd);
+
+       /* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
+        * here, ignore errors.
+        */
+       for (fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
+               ret = fd_cloexec(fd, false);
+               if (ret < 0) {
+                       SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
+                       goto on_error;
+               }
+       }
+
+       if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+               ret = lxc_terminal_prepare_login(payload->terminal_slave_fd);
+               if (ret < 0) {
+                       SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd);
+                       goto on_error;
+               }
+               TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd);
+       }
+
+       /* We're done, so we can now do whatever the user intended us to do. */
+       rexit(payload->exec_function(payload->exec_payload));
+
+on_error:
+       lxc_put_attach_clone_payload(payload);
+       rexit(EXIT_FAILURE);
+}
+
+static int lxc_attach_terminal(struct lxc_conf *conf,
+                              struct lxc_terminal *terminal)
+{
+       int ret;
+
+       lxc_terminal_init(terminal);
+
+       ret = lxc_terminal_create(terminal);
+       if (ret < 0) {
+               SYSERROR("Failed to create terminal");
+               return -1;
+       }
+
+       /* Shift ttys to container. */
+       ret = lxc_terminal_map_ids(conf, terminal);
+       if (ret < 0) {
+               ERROR("Failed to chown terminal");
+               goto on_error;
+       }
+
+       return 0;
+
+on_error:
+       lxc_terminal_delete(terminal);
+       lxc_terminal_conf_free(terminal);
+       return -1;
+}
+
+static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
+                                            struct lxc_epoll_descr *descr)
+{
+       int ret;
+
+       ret = lxc_mainloop_open(descr);
+       if (ret < 0) {
+               ERROR("Failed to create mainloop");
+               return -1;
+       }
+
+       ret = lxc_terminal_mainloop_add(descr, terminal);
+       if (ret < 0) {
+               ERROR("Failed to add handlers to mainloop");
+               lxc_mainloop_close(descr);
+               return -1;
+       }
+
+       return 0;
+}
+
+static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal)
+{
+       if (terminal->master < 0)
+               return;
+
+       close(terminal->master);
+       terminal->master = -EBADF;
+}
+
+static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal)
+{
+       if (terminal->slave < 0)
+               return;
+
+       close(terminal->slave);
+       terminal->slave = -EBADF;
+}
+
+static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
+{
+       if (terminal->peer < 0)
+               return;
+
+       close(terminal->peer);
+       terminal->peer = -EBADF;
+}
+
+static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
+{
+       if (terminal->log_fd < 0)
+               return;
+
+       close(terminal->log_fd);
+       terminal->log_fd = -EBADF;
+}
+
+int lxc_attach(const char *name, const char *lxcpath,
+              lxc_attach_exec_t exec_function, void *exec_payload,
+              lxc_attach_options_t *options, pid_t *attached_process)
+{
+       int i, ret, status;
+       int ipc_sockets[2];
+       char *cwd, *new_cwd;
+       signed long personality;
+       pid_t attached_pid, init_pid, pid;
+       struct lxc_proc_context_info *init_ctx;
+       struct lxc_terminal terminal;
+       struct lxc_conf *conf;
+       struct attach_clone_payload payload = {0};
+
+       ret = access("/proc/self/ns", X_OK);
+       if (ret) {
+               ERROR("Does this kernel version support namespaces?");
+               return -1;
+       }
+
+       if (!options)
+               options = &attach_static_default_options;
+
+       init_pid = lxc_cmd_get_init_pid(name, lxcpath);
+       if (init_pid < 0) {
+               ERROR("Failed to get init pid");
+               return -1;
+       }
+
+       init_ctx = lxc_proc_get_context_info(init_pid);
+       if (!init_ctx) {
+               ERROR("Failed to get context of init process: %ld", (long)init_pid);
+               return -1;
+       }
+
+       personality = get_personality(name, lxcpath);
+       if (init_ctx->personality < 0) {
+               ERROR("Failed to get personality of the container");
+               lxc_proc_put_context_info(init_ctx);
+               return -1;
+       }
+       init_ctx->personality = personality;
+
+       init_ctx->container = lxc_container_new(name, lxcpath);
+       if (!init_ctx->container) {
+               lxc_proc_put_context_info(init_ctx);
+               return -1;
+       }
+
+       if (!init_ctx->container->lxc_conf) {
+               init_ctx->container->lxc_conf = lxc_conf_init();
+               if (!init_ctx->container->lxc_conf) {
+                       lxc_proc_put_context_info(init_ctx);
+                       return -ENOMEM;
+               }
+       }
+       conf = init_ctx->container->lxc_conf;
+
+       if (!fetch_seccomp(init_ctx->container, options))
+               WARN("Failed to get seccomp policy");
+
+       if (!no_new_privs(init_ctx->container, options))
+               WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
+
+       cwd = getcwd(NULL, 0);
+
+       /* Determine which namespaces the container was created with
+        * by asking lxc-start, if necessary.
+        */
+       if (options->namespaces == -1) {
+               options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
+               /* call failed */
+               if (options->namespaces == -1) {
+                       ERROR("Failed to automatically determine the "
+                             "namespaces which the container uses");
+                       free(cwd);
+                       lxc_proc_put_context_info(init_ctx);
+                       return -1;
+               }
+
+               for (i = 0; i < LXC_NS_MAX; i++) {
                        if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
                                if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
                                    !cgns_supported())
@@ -903,7 +1137,7 @@ int lxc_attach(const char *name, const char *lxcpath,
                }
        }
 
-       pid = getpid();
+       pid = lxc_raw_getpid();
        for (i = 0; i < LXC_NS_MAX; i++) {
                int j, saved_errno;
 
@@ -939,6 +1173,20 @@ int lxc_attach(const char *name, const char *lxcpath,
                return -1;
        }
 
+       if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+               ret = lxc_attach_terminal(conf, &terminal);
+               if (ret < 0) {
+                       ERROR("Failed to setup new terminal");
+                       free(cwd);
+                       lxc_proc_put_context_info(init_ctx);
+                       return -1;
+               }
+
+               terminal.log_fd = options->log_fd;
+       } else {
+               lxc_terminal_init(&terminal);
+       }
+
        /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
         * to make sure we don't irritate other threads that want to fork+exec
         * away
@@ -974,80 +1222,87 @@ int lxc_attach(const char *name, const char *lxcpath,
         */
        ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
        if (ret < 0) {
-               SYSERROR("Could not set up required IPC mechanism for attaching.");
+               SYSERROR("Could not set up required IPC mechanism for attaching");
                free(cwd);
                lxc_proc_put_context_info(init_ctx);
                return -1;
        }
 
-       /* Create intermediate subprocess, three reasons:
-        *       1. Runs all pthread_atfork handlers and the child will no
-        *          longer be threaded (we can't properly setns() in a threaded
-        *          process).
-        *       2. We can't setns() in the child itself, since we want to make
+       /* Create intermediate subprocess, two reasons:
+        *       1. We can't setns() in the child itself, since we want to make
         *          sure we are properly attached to the pidns.
-        *       3. Also, the initial thread has to put the attached process
+        *       2. Also, the initial thread has to put the attached process
         *          into the cgroup, which we can only do if we didn't already
         *          setns() (otherwise, user namespaces will hate us).
         */
        pid = fork();
        if (pid < 0) {
-               SYSERROR("Failed to create first subprocess.");
+               SYSERROR("Failed to create first subprocess");
                free(cwd);
                lxc_proc_put_context_info(init_ctx);
                return -1;
        }
 
        if (pid) {
-               int procfd = -1;
+               int ret_parent = -1;
                pid_t to_cleanup_pid = pid;
+               struct lxc_epoll_descr descr = {0};
 
-               /* close file namespace descriptors */
-               lxc_proc_close_ns_fd(init_ctx);
-
-               /* Initial thread, we close the socket that is for the
-                * subprocesses.
-                */
+               /* close unneeded file descriptors */
                close(ipc_sockets[1]);
                free(cwd);
+               lxc_proc_close_ns_fd(init_ctx);
+               if (options->attach_flags & LXC_ATTACH_TERMINAL)
+                       lxc_attach_terminal_close_slave(&terminal);
 
                /* Attach to cgroup, if requested. */
                if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
-                       if (!cgroup_attach(name, lxcpath, pid))
+                       struct cgroup_ops *cgroup_ops;
+
+                       cgroup_ops = cgroup_init(NULL);
+                       if (!cgroup_ops)
+                               goto on_error;
+
+                       if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
+                               goto on_error;
+
+                       cgroup_exit(cgroup_ops);
+                       TRACE("Moved intermediate process %d into container's cgroups", pid);
+               }
+
+               /* Setup /proc limits */
+               if (!lxc_list_empty(&conf->procs)) {
+                       ret = setup_proc_filesystem(&conf->procs, pid);
+                       if (ret < 0)
                                goto on_error;
                }
 
                /* Setup resource limits */
-               if (!lxc_list_empty(&init_ctx->container->lxc_conf->limits))
-                       if (setup_resource_limits(&init_ctx->container->lxc_conf->limits, pid) < 0)
+               if (!lxc_list_empty(&conf->limits)) {
+                       ret = setup_resource_limits(&conf->limits, pid);
+                       if (ret < 0)
                                goto on_error;
+               }
 
-               /* Open /proc before setns() to the containers namespace so we
-                * don't rely on any information from inside the container.
-                */
-               procfd = open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-               if (procfd < 0) {
-                       SYSERROR("Unable to open /proc.");
-                       goto on_error;
+               if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+                       ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
+                       if (ret < 0)
+                               goto on_error;
+                       TRACE("Initialized terminal mainloop");
                }
 
                /* Let the child process know to go ahead. */
                status = 0;
                ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
-               if (ret <= 0) {
-                       ERROR("Intended to send sequence number 0: %s.",
-                             strerror(errno));
-                       goto on_error;
-               }
+               if (ret != sizeof(status))
+                       goto close_mainloop;
+               TRACE("Told intermediate process to start initializing");
 
                /* Get pid of attached process from intermediate process. */
-               ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid,
-                                            sizeof(attached_pid), NULL);
-               if (ret <= 0) {
-                       if (ret != 0)
-                               ERROR("Expected to receive pid: %s.", strerror(errno));
-                       goto on_error;
-               }
+               ret = lxc_read_nointr(ipc_sockets[0], &attached_pid, sizeof(attached_pid));
+               if (ret != sizeof(attached_pid))
+                       goto close_mainloop;
+               TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
 
                /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
                if (options->stdin_fd == 0) {
@@ -1058,78 +1313,39 @@ int lxc_attach(const char *name, const char *lxcpath,
                /* Reap intermediate process. */
                ret = wait_for_pid(pid);
                if (ret < 0)
-                       goto on_error;
+                       goto close_mainloop;
+               TRACE("Intermediate process %d exited", pid);
 
                /* We will always have to reap the attached process now. */
                to_cleanup_pid = attached_pid;
 
-               /* Tell attached process it may start initializing. */
-               status = 0;
-               ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
-               if (ret <= 0) {
-                       ERROR("Intended to send sequence number 0: %s.", strerror(errno));
-                       goto on_error;
-               }
-
-               /* Wait for the attached process to finish initializing. */
-               expected = 1;
-               ret = lxc_read_nointr_expect(ipc_sockets[0], &status,
-                                            sizeof(status), &expected);
-               if (ret <= 0) {
-                       if (ret != 0)
-                               ERROR("Expected to receive sequence number 1: %s.", strerror(errno));
-                       goto on_error;
-               }
-
-               /* Tell attached process we're done. */
-               status = 2;
-               ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
-               if (ret <= 0) {
-                       ERROR("Intended to send sequence number 2: %s.", strerror(errno));
-                       goto on_error;
-               }
-
-               /* Wait for the (grand)child to tell us that it's ready to set
-                * up its LSM labels.
-                */
-               expected = 3;
-               ret = lxc_read_nointr_expect(ipc_sockets[0], &status,
-                                            sizeof(status), &expected);
-               if (ret <= 0) {
-                       ERROR("Expected to receive sequence number 3: %s.",
-                             strerror(errno));
-                       goto on_error;
-               }
-
                /* Open LSM fd and send it to child. */
                if ((options->namespaces & CLONE_NEWNS) &&
                    (options->attach_flags & LXC_ATTACH_LSM) &&
                    init_ctx->lsm_label) {
-                       int on_exec, saved_errno;
-                       int labelfd = -1;
+                       int ret = -1;
+                       int labelfd;
+                       bool on_exec;
 
-                       on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
-                       /* Open fd for the LSM security module. */
-                       labelfd = lsm_openat(procfd, attached_pid, on_exec);
+                       on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
+                       labelfd = lsm_process_label_fd_get(attached_pid, on_exec);
                        if (labelfd < 0)
-                               goto on_error;
+                               goto close_mainloop;
+                       TRACE("Opened LSM label file descriptor %d", labelfd);
 
                        /* Send child fd of the LSM security module to write to. */
                        ret = lxc_abstract_unix_send_fds(ipc_sockets[0], &labelfd, 1, NULL, 0);
-                       saved_errno = errno;
-                       close(labelfd);
                        if (ret <= 0) {
-                               ERROR("Intended to send file descriptor %d: %s.", labelfd, strerror(saved_errno));
-                               goto on_error;
+                               if (ret < 0)
+                                       SYSERROR("Failed to send lsm label fd");
+
+                               close(labelfd);
+                               goto close_mainloop;
                        }
-               }
 
-               if (procfd >= 0)
-                       close(procfd);
-               /* Now shut down communication with child, we're done. */
-               shutdown(ipc_sockets[0], SHUT_RDWR);
-               close(ipc_sockets[0]);
-               lxc_proc_put_context_info(init_ctx);
+                       close(labelfd);
+                       TRACE("Sent LSM label file descriptor %d to child", labelfd);
+               }
 
                /* We're done, the child process should now execute whatever it
                 * is that the user requested. The parent can now track it with
@@ -1137,45 +1353,69 @@ int lxc_attach(const char *name, const char *lxcpath,
                 */
 
                *attached_process = attached_pid;
-               return 0;
 
-       on_error:
-               /* First shut down the socket, then wait for the pid, otherwise
-                * the pid we're waiting for may never exit.
-                */
-               if (procfd >= 0)
-                       close(procfd);
+               /* Now shut down communication with child, we're done. */
                shutdown(ipc_sockets[0], SHUT_RDWR);
                close(ipc_sockets[0]);
-               if (to_cleanup_pid)
+               ipc_sockets[0] = -1;
+
+               ret_parent = 0;
+               to_cleanup_pid = -1;
+               if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+                       ret = lxc_mainloop(&descr, -1);
+                       if (ret < 0) {
+                               ret_parent = -1;
+                               to_cleanup_pid = attached_pid;
+                       }
+               }
+
+       close_mainloop:
+               if (options->attach_flags & LXC_ATTACH_TERMINAL)
+                       lxc_mainloop_close(&descr);
+
+       on_error:
+               if (ipc_sockets[0] >= 0) {
+                       shutdown(ipc_sockets[0], SHUT_RDWR);
+                       close(ipc_sockets[0]);
+               }
+
+               if (to_cleanup_pid > 0)
                        (void)wait_for_pid(to_cleanup_pid);
+
+               if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+                       lxc_terminal_delete(&terminal);
+                       lxc_terminal_conf_free(&terminal);
+               }
                lxc_proc_put_context_info(init_ctx);
-               return -1;
+               return ret_parent;
        }
 
-       /* First subprocess begins here, we close the socket that is for the
-        * initial thread.
-        */
+       /* close unneeded file descriptors */
        close(ipc_sockets[0]);
+       ipc_sockets[0] = -EBADF;
+       if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+               lxc_attach_terminal_close_master(&terminal);
+               lxc_attach_terminal_close_peer(&terminal);
+               lxc_attach_terminal_close_log(&terminal);
+       }
 
        /* Wait for the parent to have setup cgroups. */
-       expected = 0;
-       status = -1;
-       ret = lxc_read_nointr_expect(ipc_sockets[1], &status, sizeof(status),
-                                    &expected);
-       if (ret <= 0) {
-               ERROR("Expected to receive sequence number 0: %s.", strerror(errno));
+       ret = lxc_read_nointr(ipc_sockets[1], &status, sizeof(status));
+       if (ret != sizeof(status)) {
                shutdown(ipc_sockets[1], SHUT_RDWR);
+               lxc_proc_put_context_info(init_ctx);
                rexit(-1);
        }
+       TRACE("Intermediate process starting to initialize");
 
        /* Attach now, create another subprocess later, since pid namespaces
         * only really affect the children of the current process.
         */
        ret = lxc_attach_to_ns(init_pid, init_ctx);
        if (ret < 0) {
-               ERROR("Failed to enter namespaces.");
+               ERROR("Failed to enter namespaces");
                shutdown(ipc_sockets[1], SHUT_RDWR);
+               lxc_proc_put_context_info(init_ctx);
                rexit(-1);
        }
        /* close namespace file descriptors */
@@ -1186,35 +1426,38 @@ int lxc_attach(const char *name, const char *lxcpath,
                new_cwd = options->initial_cwd;
        else
                new_cwd = cwd;
-       ret = chdir(new_cwd);
-       if (ret < 0)
-               WARN("Could not change directory to \"%s\".", new_cwd);
+       if (new_cwd) {
+               ret = chdir(new_cwd);
+               if (ret < 0)
+                       WARN("Could not change directory to \"%s\"", new_cwd);
+       }
        free(cwd);
 
-       /* Now create the real child process. */
-       {
-               struct attach_clone_payload payload = {
-                       .ipc_socket = ipc_sockets[1],
-                       .options = options,
-                       .init_ctx = init_ctx,
-                       .exec_function = exec_function,
-                       .exec_payload = exec_payload,
-               };
-               /* We use clone_parent here to make this subprocess a direct
-                * child of the initial process. Then this intermediate process
-                * can exit and the parent can directly track the attached
-                * process.
-                */
-               pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
-       }
+       /* Create attached process. */
+       payload.ipc_socket = ipc_sockets[1];
+       payload.options = options;
+       payload.init_ctx = init_ctx;
+       payload.terminal_slave_fd = terminal.slave;
+       payload.exec_function = exec_function;
+       payload.exec_payload = exec_payload;
 
-       /* Shouldn't happen, clone() should always return positive pid. */
-       if (pid <= 0) {
-               SYSERROR("Failed to create subprocess.");
+       pid = lxc_raw_clone(CLONE_PARENT);
+       if (pid < 0) {
+               SYSERROR("Failed to clone attached process");
                shutdown(ipc_sockets[1], SHUT_RDWR);
+               lxc_proc_put_context_info(init_ctx);
                rexit(-1);
        }
 
+       if (pid == 0) {
+               ret = attach_child_main(&payload);
+               if (ret < 0)
+                       ERROR("Failed to exec");
+               _exit(EXIT_FAILURE);
+       }
+       if (options->attach_flags & LXC_ATTACH_TERMINAL)
+               lxc_attach_terminal_close_slave(&terminal);
+
        /* Tell grandparent the pid of the pid of the newly created child. */
        ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
        if (ret != sizeof(pid)) {
@@ -1224,253 +1467,15 @@ int lxc_attach(const char *name, const char *lxcpath,
                 * CLONE_PARENT) so the parent won't be able to reap it and the
                 * attached process will remain a zombie.
                 */
-               ERROR("Intended to send pid %d: %s.", pid, strerror(errno));
                shutdown(ipc_sockets[1], SHUT_RDWR);
+               lxc_proc_put_context_info(init_ctx);
                rexit(-1);
        }
+       TRACE("Sending pid %d of attached process", pid);
 
        /* The rest is in the hands of the initial and the attached process. */
-       rexit(0);
-}
-
-static int attach_child_main(void* data)
-{
-       int expected, fd, lsm_labelfd, ret, status;
-       long flags;
-#if HAVE_SYS_PERSONALITY_H
-       long new_personality;
-#endif
-       uid_t new_uid;
-       gid_t new_gid;
-       struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
-       int ipc_socket = payload->ipc_socket;
-       lxc_attach_options_t* options = payload->options;
-       struct lxc_proc_context_info* init_ctx = payload->init_ctx;
-
-       /* Wait for the initial thread to signal us that it's ready for us to
-        * start initializing.
-        */
-       expected = 0;
-       status = -1;
-       ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
-       if (ret <= 0) {
-               ERROR("Expected to receive sequence number 0: %s.", strerror(errno));
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       /* A description of the purpose of this functionality is provided in the
-        * lxc-attach(1) manual page. We have to remount here and not in the
-        * parent process, otherwise /proc may not properly reflect the new pid
-        * namespace.
-        */
-       if (!(options->namespaces & CLONE_NEWNS) &&
-           (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
-               ret = lxc_attach_remount_sys_proc();
-               if (ret < 0) {
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-       }
-
-       /* Now perform additional attachments. */
-#if HAVE_SYS_PERSONALITY_H
-       if (options->personality < 0)
-               new_personality = init_ctx->personality;
-       else
-               new_personality = options->personality;
-
-       if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
-               ret = personality(new_personality);
-               if (ret < 0) {
-                       SYSERROR("Could not ensure correct architecture.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-       }
-#endif
-
-       if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
-               ret = lxc_attach_drop_privs(init_ctx);
-               if (ret < 0) {
-                       ERROR("Could not drop privileges.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-       }
-
-       /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
-        * if you want this to be a no-op).
-        */
-       ret = lxc_attach_set_environment(options->env_policy,
-                                        options->extra_env_vars,
-                                        options->extra_keep_env);
-       if (ret < 0) {
-               ERROR("Could not set initial environment for attached process.");
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       /* Set {u,g}id. */
-       new_uid = 0;
-       new_gid = 0;
-       /* Ignore errors, we will fall back to root in that case (/proc was not
-        * mounted etc.).
-        */
-       if (options->namespaces & CLONE_NEWUSER)
-               lxc_attach_get_init_uidgid(&new_uid, &new_gid);
-
-       if (options->uid != (uid_t)-1)
-               new_uid = options->uid;
-       if (options->gid != (gid_t)-1)
-               new_gid = options->gid;
-
-       /* Setup the controlling tty. */
-       if (options->stdin_fd && isatty(options->stdin_fd)) {
-               if (setsid() < 0) {
-                       SYSERROR("Unable to setsid.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-
-               if (ioctl(options->stdin_fd, TIOCSCTTY, (char *)NULL) < 0) {
-                       SYSERROR("Unable to set TIOCSTTY.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-       }
-
-       /* Try to set the {u,g}id combination. */
-       if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
-               if (setgid(new_gid) || setgroups(0, NULL)) {
-                       SYSERROR("Switching to container gid.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-       }
-       if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) &&
-           setuid(new_uid)) {
-               SYSERROR("Switching to container uid.");
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       /* Tell initial process it may now put us into cgroups. */
-       status = 1;
-       ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
-       if (ret != sizeof(status)) {
-               ERROR("Intended to send sequence number 1: %s.", strerror(errno));
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       /* Wait for the initial thread to signal us that it has done everything
-        * for us when it comes to cgroups etc.
-        */
-       expected = 2;
-       status = -1;
-       ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
-       if (ret <= 0) {
-               ERROR("Expected to receive sequence number 2: %s", strerror(errno));
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       if ((init_ctx->container && init_ctx->container->lxc_conf &&
-            init_ctx->container->lxc_conf->no_new_privs) ||
-           (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
-               if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
-                       SYSERROR("PR_SET_NO_NEW_PRIVS could not be set. "
-                                "Process can use execve() gainable "
-                                "privileges.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-               INFO("PR_SET_NO_NEW_PRIVS is set. Process cannot use execve() "
-                    "gainable privileges.");
-       }
-
-       /* Tell the (grand)parent to send us LSM label fd. */
-       status = 3;
-       ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
-       if (ret <= 0) {
-               ERROR("Intended to send sequence number 3: %s.", strerror(errno));
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       if ((options->namespaces & CLONE_NEWNS) &&
-           (options->attach_flags & LXC_ATTACH_LSM) && init_ctx->lsm_label) {
-               int on_exec;
-               /* Receive fd for LSM security module. */
-               ret = lxc_abstract_unix_recv_fds(ipc_socket, &lsm_labelfd, 1, NULL, 0);
-               if (ret <= 0) {
-                       ERROR("Expected to receive file descriptor: %s.", strerror(errno));
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       rexit(-1);
-               }
-
-               /* Change into our new LSM profile. */
-               on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
-               if (lsm_set_label_at(lsm_labelfd, on_exec, init_ctx->lsm_label) < 0) {
-                       SYSERROR("Failed to set LSM label.");
-                       shutdown(ipc_socket, SHUT_RDWR);
-                       close(lsm_labelfd);
-                       rexit(-1);
-               }
-               close(lsm_labelfd);
-       }
-
-       if (init_ctx->container && init_ctx->container->lxc_conf &&
-           init_ctx->container->lxc_conf->seccomp &&
-           (lxc_seccomp_load(init_ctx->container->lxc_conf) != 0)) {
-               ERROR("Failed to load seccomp policy.");
-               shutdown(ipc_socket, SHUT_RDWR);
-               rexit(-1);
-       }
-
-       shutdown(ipc_socket, SHUT_RDWR);
-       close(ipc_socket);
        lxc_proc_put_context_info(init_ctx);
-
-       /* The following is done after the communication socket is shut down.
-        * That way, all errors that might (though unlikely) occur up until this
-        * point will have their messages printed to the original stderr (if
-        * logging is so configured) and not the fd the user supplied, if any.
-        */
-
-       /* Fd handling for stdin, stdout and stderr; ignore errors here, user
-        * may want to make sure the fds are closed, for example.
-        */
-       if (options->stdin_fd >= 0 && options->stdin_fd != 0)
-               dup2(options->stdin_fd, 0);
-       if (options->stdout_fd >= 0 && options->stdout_fd != 1)
-               dup2(options->stdout_fd, 1);
-       if (options->stderr_fd >= 0 && options->stderr_fd != 2)
-               dup2(options->stderr_fd, 2);
-
-       /* close the old fds */
-       if (options->stdin_fd > 2)
-               close(options->stdin_fd);
-       if (options->stdout_fd > 2)
-               close(options->stdout_fd);
-       if (options->stderr_fd > 2)
-               close(options->stderr_fd);
-
-       /* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
-        * here, ignore errors.
-        */
-       for (fd = 0; fd <= 2; fd++) {
-               flags = fcntl(fd, F_GETFL);
-               if (flags < 0)
-                       continue;
-               if (flags & FD_CLOEXEC)
-                       if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0)
-                               SYSERROR("Unable to clear FD_CLOEXEC from file descriptor.");
-       }
-
-       /* We're done, so we can now do whatever the user intended us to do. */
-       rexit(payload->exec_function(payload->exec_payload));
+       rexit(0);
 }
 
 int lxc_attach_run_command(void* payload)
@@ -1485,14 +1490,32 @@ int lxc_attach_run_command(void* payload)
 int lxc_attach_run_shell(void* payload)
 {
        uid_t uid;
-       struct passwd *passwd;
+       struct passwd pwent;
+       struct passwd *pwentp = NULL;
        char *user_shell;
+       char *buf;
+       size_t bufsize;
+       int ret;
 
        /* Ignore payload parameter. */
        (void)payload;
 
        uid = getuid();
-       passwd = getpwuid(uid);
+
+       bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+       if (bufsize == -1)
+               bufsize = 1024;
+
+       buf = malloc(bufsize);
+       if (buf) {
+               ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
+               if (!pwentp) {
+                       if (ret == 0)
+                               WARN("Could not find matched password record.");
+
+                       WARN("Failed to get password record - %u", uid);
+               }
+       }
 
        /* This probably happens because of incompatible nss implementations in
         * host and container (remember, this code is still using the host's
@@ -1500,11 +1523,10 @@ int lxc_attach_run_shell(void* payload)
         * the information by spawning a [getent passwd uid] process and parsing
         * the result.
         */
-       if (!passwd)
+       if (!pwentp)
                user_shell = lxc_attach_getpwshell(uid);
        else
-               user_shell = passwd->pw_shell;
-
+               user_shell = pwent.pw_shell;
        if (user_shell)
                execlp(user_shell, user_shell, (char *)NULL);
 
@@ -1512,6 +1534,9 @@ int lxc_attach_run_shell(void* payload)
         * on /bin/sh as a default shell.
         */
        execlp("/bin/sh", "/bin/sh", (char *)NULL);
-       SYSERROR("Failed to exec shell.");
+       SYSERROR("Failed to execute shell");
+       if (!pwentp)
+               free(user_shell);
+       free(buf);
        return -1;
 }