X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=src%2Flxc%2Fattach.c;h=117e3778fab182de29b24cf7c5754a149d973649;hb=1d801260c637ad72e204778cec7cebb0de714b67;hp=243c91c653a54e27931dce574b09a15932ba419e;hpb=0ed9b1bc5ccad490807e19c9e71df3abb6380655;p=mirror_lxc.git diff --git a/src/lxc/attach.c b/src/lxc/attach.c index 243c91c65..117e3778f 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -21,38 +21,30 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#define _GNU_SOURCE +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif #include #include #include +#include #include +#include #include #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include #include -#ifndef HAVE_DECL_PR_CAPBSET_DROP -#define PR_CAPBSET_DROP 24 -#endif - -#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS -#define PR_SET_NO_NEW_PRIVS 38 -#endif - -#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS -#define PR_GET_NO_NEW_PRIVS 39 -#endif - #include "af_unix.h" #include "attach.h" #include "caps.h" @@ -65,8 +57,11 @@ #include "lsm/lsm.h" #include "lxclock.h" #include "lxcseccomp.h" +#include "macro.h" #include "mainloop.h" #include "namespace.h" +#include "raw_syscalls.h" +#include "syscall_wrappers.h" #include "terminal.h" #include "utils.h" @@ -74,51 +69,41 @@ #include #endif -#ifndef SOCK_CLOEXEC -#define SOCK_CLOEXEC 02000000 -#endif - -#ifndef MS_REC -#define MS_REC 16384 -#endif - -#ifndef MS_SLAVE -#define MS_SLAVE (1 << 19) -#endif +lxc_log_define(attach, lxc); -lxc_log_define(lxc_attach, lxc); +/* Define default options if no options are supplied by the user. */ +static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT; -/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */ -#define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1) static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid) { int ret; bool found; FILE *proc_file; - char proc_fn[__PROC_STATUS_LEN]; + char proc_fn[LXC_PROC_STATUS_LEN]; size_t line_bufsz = 0; char *line = NULL; struct lxc_proc_context_info *info = NULL; /* Read capabilities. */ - ret = snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", pid); - if (ret < 0 || ret >= __PROC_STATUS_LEN) + ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", pid); + if (ret < 0 || ret >= LXC_PROC_STATUS_LEN) goto on_error; proc_file = fopen(proc_fn, "r"); if (!proc_file) { - SYSERROR("Could not open %s.", proc_fn); + SYSERROR("Could not open %s", proc_fn); goto on_error; } info = calloc(1, sizeof(*info)); if (!info) { - SYSERROR("Could not allocate memory."); + SYSERROR("Could not allocate memory"); fclose(proc_file); return NULL; } found = false; + while (getline(&line, &line_bufsz, proc_file) != -1) { ret = sscanf(line, "CapBnd: %llx", &info->capability_mask); if (ret != EOF && ret == 1) { @@ -131,9 +116,8 @@ static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid) fclose(proc_file); if (!found) { - SYSERROR("Could not read capability bounding set from %s.", - proc_fn); - errno = ENOENT; + ERROR("Could not read capability bounding set from %s", + proc_fn); goto on_error; } @@ -155,6 +139,7 @@ static inline void lxc_proc_close_ns_fd(struct lxc_proc_context_info *ctx) for (i = 0; i < LXC_NS_MAX; i++) { if (ctx->ns_fd[i] < 0) continue; + close(ctx->ns_fd[i]); ctx->ns_fd[i] = -EBADF; } @@ -172,7 +157,6 @@ static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx) lxc_proc_close_ns_fd(ctx); free(ctx); - ctx = NULL; } /** @@ -189,6 +173,7 @@ static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx) static int in_same_namespace(pid_t pid1, pid_t pid2, const char *ns) { int ns_fd1 = -1, ns_fd2 = -1, ret = -1; + int saved_errno; struct stat ns_st1, ns_st2; ns_fd1 = lxc_preserve_ns(pid1, ns); @@ -215,21 +200,25 @@ static int in_same_namespace(pid_t pid1, pid_t pid2, const char *ns) goto out; /* processes are in the same namespace */ - ret = -EINVAL; - if ((ns_st1.st_dev == ns_st2.st_dev ) && (ns_st1.st_ino == ns_st2.st_ino)) + if ((ns_st1.st_dev == ns_st2.st_dev ) && (ns_st1.st_ino == ns_st2.st_ino)) { + ret = -EINVAL; goto out; + } /* processes are in different namespaces */ ret = ns_fd2; ns_fd2 = -1; out: + saved_errno = errno; if (ns_fd1 >= 0) close(ns_fd1); + if (ns_fd2 >= 0) close(ns_fd2); + errno = saved_errno; return ret; } @@ -244,7 +233,7 @@ static int lxc_attach_to_ns(pid_t pid, struct lxc_proc_context_info *ctx) ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag); if (ret < 0) { SYSERROR("Failed to attach to %s namespace of %d", - ns_info[i].proc_name, pid); + ns_info[i].proc_name, pid); return -1; } @@ -260,13 +249,13 @@ static int lxc_attach_remount_sys_proc(void) ret = unshare(CLONE_NEWNS); if (ret < 0) { - SYSERROR("Failed to unshare mount namespace."); + SYSERROR("Failed to unshare mount namespace"); return -1; } if (detect_shared_rootfs()) { if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { - SYSERROR("Failed to make / rslave."); + SYSERROR("Failed to make / rslave"); ERROR("Continuing..."); } } @@ -274,13 +263,13 @@ static int lxc_attach_remount_sys_proc(void) /* Assume /proc is always mounted, so remount it. */ ret = umount2("/proc", MNT_DETACH); if (ret < 0) { - SYSERROR("Failed to unmount /proc."); + SYSERROR("Failed to unmount /proc"); return -1; } ret = mount("none", "/proc", "proc", 0, NULL); if (ret < 0) { - SYSERROR("Failed to remount /proc."); + SYSERROR("Failed to remount /proc"); return -1; } @@ -289,13 +278,13 @@ static int lxc_attach_remount_sys_proc(void) */ ret = umount2("/sys", MNT_DETACH); if (ret < 0 && errno != EINVAL) { - SYSERROR("Failed to unmount /sys."); + SYSERROR("Failed to unmount /sys"); return -1; } else if (ret == 0) { /* Remount it. */ ret = mount("none", "/sys", "sysfs", 0, NULL); if (ret < 0) { - SYSERROR("Failed to remount /sys."); + SYSERROR("Failed to remount /sys"); return -1; } } @@ -312,10 +301,12 @@ static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx) if (ctx->capability_mask & (1LL << cap)) continue; - if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) { + if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0), + prctl_arg(0), prctl_arg(0))) { SYSERROR("Failed to drop capability %d", cap); return -1; } + TRACE("Dropped capability %d", cap); } @@ -350,6 +341,7 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, if (!extra_keep_store[i]) { while (i > 0) free(extra_keep_store[--i]); + free(extra_keep_store); return -1; } @@ -370,7 +362,7 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, free(extra_keep_store); } - SYSERROR("Failed to clear environment"); + ERROR("Failed to clear environment"); return -1; } @@ -381,10 +373,12 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, if (extra_keep_store[i]) { ret = setenv(extra_keep[i], extra_keep_store[i], 1); if (ret < 0) - WARN("%s - Failed to set environment variable", strerror(errno)); + SYSWARN("Failed to set environment variable"); } + free(extra_keep_store[i]); } + free(extra_keep_store); } @@ -396,13 +390,13 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, if (!path_kept) { ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1); if (ret < 0) - WARN("%s - Failed to set environment variable", strerror(errno)); + SYSWARN("Failed to set environment variable"); } } ret = putenv("container=lxc"); if (ret < 0) { - WARN("%s - Failed to set environment variable", strerror(errno)); + SYSWARN("Failed to set environment variable"); return -1; } @@ -427,6 +421,7 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, if (extra_env) { for (; *extra_env; extra_env++) { char *p; + /* We just assume the user knows what they are doing, so * we don't do any checks. */ @@ -436,7 +431,7 @@ static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx, ret = putenv(p); if (ret < 0) - WARN("%s - Failed to set environment variable", strerror(errno)); + SYSWARN("Failed to set environment variable"); } } @@ -448,12 +443,15 @@ static char *lxc_attach_getpwshell(uid_t uid) int fd, ret; pid_t pid; int pipes[2]; - char *result = NULL; + FILE *pipe_f; + bool found = false; + size_t line_bufsz = 0; + char *line = NULL, *result = NULL; /* We need to fork off a process that runs the getent program, and we * need to capture its output, so we use a pipe for that purpose. */ - ret = pipe(pipes); + ret = pipe2(pipes, O_CLOEXEC); if (ret < 0) return NULL; @@ -464,100 +462,7 @@ static char *lxc_attach_getpwshell(uid_t uid) return NULL; } - if (pid) { - int status; - FILE *pipe_f; - int found = 0; - size_t line_bufsz = 0; - char *line = NULL; - - close(pipes[1]); - - pipe_f = fdopen(pipes[0], "r"); - while (getline(&line, &line_bufsz, pipe_f) != -1) { - int i; - long value; - char *token; - char *endptr = NULL, *saveptr = NULL; - - /* If we already found something, just continue to read - * until the pipe doesn't deliver any more data, but - * don't modify the existing data structure. - */ - if (found) - continue; - - /* Trim line on the right hand side. */ - for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i) - line[i - 1] = '\0'; - - /* Split into tokens: first: user name. */ - token = strtok_r(line, ":", &saveptr); - if (!token) - continue; - /* next: dummy password field */ - token = strtok_r(NULL, ":", &saveptr); - if (!token) - continue; - /* next: user id */ - token = strtok_r(NULL, ":", &saveptr); - value = token ? strtol(token, &endptr, 10) : 0; - if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX) - continue; - /* dummy sanity check: user id matches */ - if ((uid_t) value != uid) - continue; - /* skip fields: gid, gecos, dir, go to next field 'shell' */ - for (i = 0; i < 4; i++) { - token = strtok_r(NULL, ":", &saveptr); - if (!token) - break; - } - if (!token) - continue; - free(result); - result = strdup(token); - - /* Sanity check that there are no fields after that. */ - token = strtok_r(NULL, ":", &saveptr); - if (token) - continue; - - found = 1; - } - - free(line); - fclose(pipe_f); - again: - if (waitpid(pid, &status, 0) < 0) { - if (errno == EINTR) - goto again; - free(result); - return NULL; - } - - /* Some sanity checks. If anything even hinted at going wrong, - * we can't be sure we have a valid result, so we assume we - * don't. - */ - - if (!WIFEXITED(status)) { - free(result); - return NULL; - } - - if (WEXITSTATUS(status) != 0) { - free(result); - return NULL; - } - - if (!found) { - free(result); - return NULL; - } - - return result; - } else { + if (!pid) { char uid_buf[32]; char *arguments[] = { "getent", @@ -569,37 +474,120 @@ static char *lxc_attach_getpwshell(uid_t uid) close(pipes[0]); /* We want to capture stdout. */ - dup2(pipes[1], 1); + ret = dup2(pipes[1], STDOUT_FILENO); close(pipes[1]); + if (ret < 0) + _exit(EXIT_FAILURE); /* Get rid of stdin/stderr, so we try to associate it with * /dev/null. */ - fd = open("/dev/null", O_RDWR); + fd = open_devnull(); if (fd < 0) { - close(0); - close(2); + close(STDIN_FILENO); + close(STDERR_FILENO); } else { - dup2(fd, 0); - dup2(fd, 2); + (void)dup3(fd, STDIN_FILENO, O_CLOEXEC); + (void)dup3(fd, STDOUT_FILENO, O_CLOEXEC); close(fd); } /* Finish argument list. */ - ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid); - if (ret <= 0) - exit(-1); + ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid); + if (ret <= 0 || ret >= sizeof(uid_buf)) + _exit(EXIT_FAILURE); /* Try to run getent program. */ - (void) execvp("getent", arguments); - exit(-1); + (void)execvp("getent", arguments); + _exit(EXIT_FAILURE); + } + + close(pipes[1]); + + pipe_f = fdopen(pipes[0], "r"); + while (getline(&line, &line_bufsz, pipe_f) != -1) { + int i; + long value; + char *token; + char *endptr = NULL, *saveptr = NULL; + + /* If we already found something, just continue to read + * until the pipe doesn't deliver any more data, but + * don't modify the existing data structure. + */ + if (found) + continue; + + if (!line) + continue; + + /* Trim line on the right hand side. */ + for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i) + line[i - 1] = '\0'; + + /* Split into tokens: first: user name. */ + token = strtok_r(line, ":", &saveptr); + if (!token) + continue; + + /* next: dummy password field */ + token = strtok_r(NULL, ":", &saveptr); + if (!token) + continue; + + /* next: user id */ + token = strtok_r(NULL, ":", &saveptr); + value = token ? strtol(token, &endptr, 10) : 0; + if (!token || !endptr || *endptr || value == LONG_MIN || + value == LONG_MAX) + continue; + + /* dummy sanity check: user id matches */ + if ((uid_t)value != uid) + continue; + + /* skip fields: gid, gecos, dir, go to next field 'shell' */ + for (i = 0; i < 4; i++) { + token = strtok_r(NULL, ":", &saveptr); + if (!token) + continue; + } + + if (!token) + continue; + + free(result); + result = strdup(token); + + /* Sanity check that there are no fields after that. */ + token = strtok_r(NULL, ":", &saveptr); + if (token) + continue; + + found = true; + } + + free(line); + fclose(pipe_f); + + ret = wait_for_pid(pid); + if (ret < 0) { + free(result); + return NULL; + } + + if (!found) { + free(result); + return NULL; } + + return result; } static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid) { FILE *proc_file; - char proc_fn[__PROC_STATUS_LEN]; + char proc_fn[LXC_PROC_STATUS_LEN]; int ret; char *line = NULL; size_t line_bufsz = 0; @@ -607,8 +595,9 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid) uid_t uid = (uid_t)-1; gid_t gid = (gid_t)-1; - /* Read capabilities. */ - snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", 1); + ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", 1); + if (ret < 0 || ret >= LXC_PROC_STATUS_LEN) + return; proc_file = fopen(proc_fn, "r"); if (!proc_file) @@ -626,6 +615,7 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid) if (ret != EOF && ret == 1) gid = (gid_t)value; } + if (uid != (uid_t)-1 && gid != (gid_t)-1) break; } @@ -636,6 +626,7 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid) /* Only override arguments if we found something. */ if (uid != (uid_t)-1) *init_uid = uid; + if (gid != (gid_t)-1) *init_gid = gid; @@ -644,20 +635,10 @@ static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid) */ } -/* Help the optimizer along if it doesn't know that exit always exits. */ -#define rexit(c) \ - do { \ - int __c = (c); \ - _exit(__c); \ - return __c; \ - } while (0) - -/* Define default options if no options are supplied by the user. */ -static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT; - -static bool fetch_seccomp(struct lxc_container *c, - lxc_attach_options_t *options) +static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options) { + int ret; + bool bret; char *path; if (!(options->namespaces & CLONE_NEWNS) || @@ -668,62 +649,61 @@ static bool fetch_seccomp(struct lxc_container *c, } /* Remove current setting. */ - if (!c->set_config_item(c, "lxc.seccomp", "") && - !c->set_config_item(c, "lxc.seccomp.profile", "")) { + if (!c->set_config_item(c, "lxc.seccomp.profile", "") && + !c->set_config_item(c, "lxc.seccomp", "")) return false; - } /* Fetch the current profile path over the cmd interface. */ path = c->get_running_config_item(c, "lxc.seccomp.profile"); if (!path) { - INFO("Failed to get running config item for lxc.seccomp.profile"); + INFO("Failed to retrieve lxc.seccomp.profile"); + path = c->get_running_config_item(c, "lxc.seccomp"); - } - if (!path) { - INFO("Failed to get running config item for lxc.seccomp"); - return true; + if (!path) { + INFO("Failed to retrieve lxc.seccomp"); + return true; + } } /* Copy the value into the new lxc_conf. */ - if (!c->set_config_item(c, "lxc.seccomp.profile", path)) { - free(path); - return false; - } + bret = c->set_config_item(c, "lxc.seccomp.profile", path); free(path); + if (!bret) + return false; /* Attempt to parse the resulting config. */ - if (lxc_read_seccomp_config(c->lxc_conf) < 0) { - ERROR("Error reading seccomp policy."); + ret = lxc_read_seccomp_config(c->lxc_conf); + if (ret < 0) { + ERROR("Failed to retrieve seccomp policy"); return false; } - INFO("Retrieved seccomp policy."); + INFO("Retrieved seccomp policy"); return true; } static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options) { + bool bret; char *val; /* Remove current setting. */ - if (!c->set_config_item(c, "lxc.no_new_privs", "")) + if (!c->set_config_item(c, "lxc.no_new_privs", "")) { + INFO("Failed to unset lxc.no_new_privs"); return false; + } /* Retrieve currently active setting. */ val = c->get_running_config_item(c, "lxc.no_new_privs"); if (!val) { - INFO("Failed to get running config item for lxc.no_new_privs."); + INFO("Failed to retrieve lxc.no_new_privs"); return false; } /* Set currently active setting. */ - if (!c->set_config_item(c, "lxc.no_new_privs", val)) { - free(val); - return false; - } + bret = c->set_config_item(c, "lxc.no_new_privs", val); free(val); - - return true; + return bret; } static signed long get_personality(const char *name, const char *lxcpath) @@ -743,7 +723,7 @@ static signed long get_personality(const char *name, const char *lxcpath) struct attach_clone_payload { int ipc_socket; - int pty_fd; + int terminal_slave_fd; lxc_attach_options_t *options; struct lxc_proc_context_info *init_ctx; lxc_attach_exec_t exec_function; @@ -753,14 +733,13 @@ struct attach_clone_payload { static void lxc_put_attach_clone_payload(struct attach_clone_payload *p) { if (p->ipc_socket >= 0) { - shutdown(p->ipc_socket, SHUT_RDWR); close(p->ipc_socket); p->ipc_socket = -EBADF; } - if (p->pty_fd >= 0) { - close(p->pty_fd); - p->pty_fd = -EBADF; + if (p->terminal_slave_fd >= 0) { + close(p->terminal_slave_fd); + p->terminal_slave_fd = -EBADF; } if (p->init_ctx) { @@ -774,6 +753,8 @@ static int attach_child_main(struct attach_clone_payload *payload) int fd, lsm_fd, ret; uid_t new_uid; gid_t new_gid; + uid_t ns_root_uid = 0; + gid_t ns_root_gid = 0; lxc_attach_options_t* options = payload->options; struct lxc_proc_context_info* init_ctx = payload->init_ctx; bool needs_lsm = (options->namespaces & CLONE_NEWNS) && @@ -790,6 +771,7 @@ static int attach_child_main(struct attach_clone_payload *payload) ret = lxc_attach_remount_sys_proc(); if (ret < 0) goto on_error; + TRACE("Remounted \"/proc\" and \"/sys\""); } @@ -802,9 +784,11 @@ static int attach_child_main(struct attach_clone_payload *payload) new_personality = init_ctx->personality; else new_personality = options->personality; + ret = personality(new_personality); if (ret < 0) goto on_error; + TRACE("Set new personality"); } #endif @@ -813,6 +797,7 @@ static int attach_child_main(struct attach_clone_payload *payload) ret = lxc_attach_drop_privs(init_ctx); if (ret < 0) goto on_error; + TRACE("Dropped capabilities"); } @@ -825,6 +810,7 @@ static int attach_child_main(struct attach_clone_payload *payload) options->extra_keep_env); if (ret < 0) goto on_error; + TRACE("Set up environment"); /* This remark only affects fully unprivileged containers: @@ -840,48 +826,64 @@ static int attach_child_main(struct attach_clone_payload *payload) */ if (needs_lsm) { ret = lxc_abstract_unix_recv_fds(payload->ipc_socket, &lsm_fd, 1, NULL, 0); - if (ret <= 0) + if (ret <= 0) { + if (ret < 0) + SYSERROR("Failed to receive lsm label fd"); + goto on_error; + } + TRACE("Received LSM label file descriptor %d from parent", lsm_fd); } if (options->stdin_fd > 0 && isatty(options->stdin_fd)) { - ret = lxc_make_controlling_pty(options->stdin_fd); + ret = lxc_make_controlling_terminal(options->stdin_fd); if (ret < 0) goto on_error; } - /* Set {u,g}id. */ - new_uid = 0; - new_gid = 0; - /* Ignore errors, we will fall back to root in that case (/proc was not - * mounted etc.). - */ - if (options->namespaces & CLONE_NEWUSER) - lxc_attach_get_init_uidgid(&new_uid, &new_gid); + if (options->namespaces & CLONE_NEWUSER) { + /* Check whether nsuid 0 has a mapping. */ + ns_root_uid = get_ns_uid(0); - if (options->uid != (uid_t)-1) - new_uid = options->uid; - if (options->gid != (gid_t)-1) - new_gid = options->gid; + /* Check whether nsgid 0 has a mapping. */ + ns_root_gid = get_ns_gid(0); - /* Try to set the {u,g}id combination. */ - if (new_uid != 0 || new_gid != 0 || options->namespaces & CLONE_NEWUSER) { - ret = lxc_switch_uid_gid(new_uid, new_gid); - if (ret < 0) + /* If there's no mapping for nsuid 0 try to retrieve the nsuid + * init was started with. + */ + if (ns_root_uid == LXC_INVALID_UID) + lxc_attach_get_init_uidgid(&ns_root_uid, &ns_root_gid); + + if (ns_root_uid == LXC_INVALID_UID) goto on_error; - ret = lxc_setgroups(0, NULL); - if (ret < 0) + if (!lxc_switch_uid_gid(ns_root_uid, ns_root_gid)) goto on_error; } + if (!lxc_setgroups(0, NULL) && errno != EPERM) + goto on_error; + + /* Set {u,g}id. */ + if (options->uid != LXC_INVALID_UID) + new_uid = options->uid; + else + new_uid = ns_root_uid; + + if (options->gid != LXC_INVALID_GID) + new_gid = options->gid; + else + new_gid = ns_root_gid; + if ((init_ctx->container && init_ctx->container->lxc_conf && init_ctx->container->lxc_conf->no_new_privs) || (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0), + prctl_arg(0), prctl_arg(0)); if (ret < 0) goto on_error; + TRACE("Set PR_SET_NO_NEW_PRIVS"); } @@ -890,10 +892,12 @@ static int attach_child_main(struct attach_clone_payload *payload) /* Change into our new LSM profile. */ on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false; + ret = lsm_process_label_set_at(lsm_fd, init_ctx->lsm_label, on_exec); close(lsm_fd); if (ret < 0) goto on_error; + TRACE("Set %s LSM label to \"%s\"", lsm_name(), init_ctx->lsm_label); } @@ -902,12 +906,14 @@ static int attach_child_main(struct attach_clone_payload *payload) ret = lxc_seccomp_load(init_ctx->container->lxc_conf); if (ret < 0) goto on_error; + TRACE("Loaded seccomp profile"); } - shutdown(payload->ipc_socket, SHUT_RDWR); + close(payload->ipc_socket); payload->ipc_socket = -EBADF; lxc_proc_put_context_info(init_ctx); + payload->init_ctx = NULL; /* The following is done after the communication socket is shut down. * That way, all errors that might (though unlikely) occur up until this @@ -919,13 +925,13 @@ static int attach_child_main(struct attach_clone_payload *payload) * may want to make sure the fds are closed, for example. */ if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO) - dup2(options->stdin_fd, STDIN_FILENO); + (void)dup2(options->stdin_fd, STDIN_FILENO); if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO) - dup2(options->stdout_fd, STDOUT_FILENO); + (void)dup2(options->stdout_fd, STDOUT_FILENO); if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO) - dup2(options->stderr_fd, STDERR_FILENO); + (void)dup2(options->stderr_fd, STDERR_FILENO); /* close the old fds */ if (options->stdin_fd > STDERR_FILENO) @@ -941,68 +947,71 @@ static int attach_child_main(struct attach_clone_payload *payload) * here, ignore errors. */ for (fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) { - int flags; - - flags = fcntl(fd, F_GETFL); - if (flags < 0) - continue; - - if ((flags & FD_CLOEXEC) == 0) - continue; - - ret = fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC); + ret = fd_cloexec(fd, false); if (ret < 0) { SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd); goto on_error; } } - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { - ret = lxc_login_pty(payload->pty_fd); + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + ret = lxc_terminal_prepare_login(payload->terminal_slave_fd); if (ret < 0) { - SYSERROR("Failed to prepare pty file descriptor %d", payload->pty_fd); + SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd); goto on_error; } - TRACE("Prepared pty file descriptor %d", payload->pty_fd); + + TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd); } + /* Avoid unnecessary syscalls. */ + if (new_uid == ns_root_uid) + new_uid = LXC_INVALID_UID; + + if (new_gid == ns_root_gid) + new_gid = LXC_INVALID_GID; + + if (!lxc_switch_uid_gid(new_uid, new_gid)) + goto on_error; + /* We're done, so we can now do whatever the user intended us to do. */ - rexit(payload->exec_function(payload->exec_payload)); + _exit(payload->exec_function(payload->exec_payload)); on_error: lxc_put_attach_clone_payload(payload); - rexit(EXIT_FAILURE); + _exit(EXIT_FAILURE); } -static int lxc_attach_pty(struct lxc_conf *conf, struct lxc_terminal *pty) +static int lxc_attach_terminal(struct lxc_conf *conf, + struct lxc_terminal *terminal) { int ret; - lxc_terminal_init(pty); + lxc_terminal_init(terminal); - ret = lxc_terminal_create(pty); + ret = lxc_terminal_create(terminal); if (ret < 0) { - SYSERROR("Failed to create pty"); + ERROR("Failed to create terminal"); return -1; } /* Shift ttys to container. */ - ret = lxc_terminal_map_ids(conf, pty); + ret = lxc_terminal_map_ids(conf, terminal); if (ret < 0) { - ERROR("Failed to shift pty"); + ERROR("Failed to chown terminal"); goto on_error; } return 0; on_error: - lxc_terminal_delete(pty); - lxc_terminal_conf_free(pty); + lxc_terminal_delete(terminal); + lxc_terminal_conf_free(terminal); return -1; } -static int lxc_attach_pty_mainloop_init(struct lxc_terminal *pty, - struct lxc_epoll_descr *descr) +static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal, + struct lxc_epoll_descr *descr) { int ret; @@ -1012,7 +1021,7 @@ static int lxc_attach_pty_mainloop_init(struct lxc_terminal *pty, return -1; } - ret = lxc_terminal_mainloop_add(descr, pty); + ret = lxc_terminal_mainloop_add(descr, terminal); if (ret < 0) { ERROR("Failed to add handlers to mainloop"); lxc_mainloop_close(descr); @@ -1022,40 +1031,40 @@ static int lxc_attach_pty_mainloop_init(struct lxc_terminal *pty, return 0; } -static inline void lxc_attach_pty_close_master(struct lxc_terminal *pty) +static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal) { - if (pty->master < 0) + if (terminal->master < 0) return; - close(pty->master); - pty->master = -EBADF; + close(terminal->master); + terminal->master = -EBADF; } -static inline void lxc_attach_pty_close_slave(struct lxc_terminal *pty) +static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal) { - if (pty->slave < 0) + if (terminal->slave < 0) return; - close(pty->slave); - pty->slave = -EBADF; + close(terminal->slave); + terminal->slave = -EBADF; } -static inline void lxc_attach_pty_close_peer(struct lxc_terminal *pty) +static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal) { - if (pty->peer < 0) + if (terminal->peer < 0) return; - close(pty->peer); - pty->peer = -EBADF; + close(terminal->peer); + terminal->peer = -EBADF; } -static inline void lxc_attach_pty_close_log(struct lxc_terminal *pty) +static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal) { - if (pty->log_fd < 0) + if (terminal->log_fd < 0) return; - close(pty->log_fd); - pty->log_fd = -EBADF; + close(terminal->log_fd); + terminal->log_fd = -EBADF; } int lxc_attach(const char *name, const char *lxcpath, @@ -1068,13 +1077,13 @@ int lxc_attach(const char *name, const char *lxcpath, signed long personality; pid_t attached_pid, init_pid, pid; struct lxc_proc_context_info *init_ctx; - struct lxc_terminal pty; + struct lxc_terminal terminal; struct lxc_conf *conf; struct attach_clone_payload payload = {0}; ret = access("/proc/self/ns", X_OK); if (ret) { - ERROR("Does this kernel version support namespaces?"); + SYSERROR("Does this kernel version support namespaces?"); return -1; } @@ -1083,7 +1092,7 @@ int lxc_attach(const char *name, const char *lxcpath, init_pid = lxc_cmd_get_init_pid(name, lxcpath); if (init_pid < 0) { - ERROR("Failed to get init pid."); + ERROR("Failed to get init pid"); return -1; } @@ -1111,16 +1120,16 @@ int lxc_attach(const char *name, const char *lxcpath, init_ctx->container->lxc_conf = lxc_conf_init(); if (!init_ctx->container->lxc_conf) { lxc_proc_put_context_info(init_ctx); - return -ENOMEM; + return -1; } } conf = init_ctx->container->lxc_conf; if (!fetch_seccomp(init_ctx->container, options)) - WARN("Failed to get seccomp policy."); + WARN("Failed to get seccomp policy"); if (!no_new_privs(init_ctx->container, options)) - WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set."); + WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set"); cwd = getcwd(NULL, 0); @@ -1152,8 +1161,9 @@ int lxc_attach(const char *name, const char *lxcpath, } pid = lxc_raw_getpid(); + for (i = 0; i < LXC_NS_MAX; i++) { - int j, saved_errno; + int j; if (options->namespaces & ns_info[i].clone_flag) init_ctx->ns_fd[i] = lxc_preserve_ns(init_pid, ns_info[i].proc_name); @@ -1161,6 +1171,7 @@ int lxc_attach(const char *name, const char *lxcpath, init_ctx->ns_fd[i] = in_same_namespace(pid, init_pid, ns_info[i].proc_name); else continue; + if (init_ctx->ns_fd[i] >= 0) continue; @@ -1172,33 +1183,32 @@ int lxc_attach(const char *name, const char *lxcpath, } /* We failed to preserve the namespace. */ - saved_errno = errno; + SYSERROR("Failed to attach to %s namespace of %d", + ns_info[i].proc_name, pid); + /* Close all already opened file descriptors before we return an * error, so we don't leak them. */ for (j = 0; j < i; j++) close(init_ctx->ns_fd[j]); - errno = saved_errno; - SYSERROR("Failed to attach to %s namespace of %d", - ns_info[i].proc_name, pid); free(cwd); lxc_proc_put_context_info(init_ctx); return -1; } - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { - ret = lxc_attach_pty(conf, &pty); + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + ret = lxc_attach_terminal(conf, &terminal); if (ret < 0) { - ERROR("Failed to allocate pty"); + ERROR("Failed to setup new terminal"); free(cwd); lxc_proc_put_context_info(init_ctx); return -1; } - pty.log_fd = options->log_fd; + terminal.log_fd = options->log_fd; } else { - lxc_terminal_init(&pty); + lxc_terminal_init(&terminal); } /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order @@ -1209,7 +1219,7 @@ int lxc_attach(const char *name, const char *lxcpath, * just fork()s away without exec'ing directly after, the socket fd will * exist in the forked process from the other thread and any close() in * our own child process will not really cause the socket to close - * properly, potentiall causing the parent to hang. + * properly, potentially causing the parent to hang. * * For this reason, while IPC is still active, we have to use shutdown() * if the child exits prematurely in order to signal that the socket is @@ -1236,7 +1246,7 @@ int lxc_attach(const char *name, const char *lxcpath, */ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); if (ret < 0) { - SYSERROR("Could not set up required IPC mechanism for attaching."); + SYSERROR("Could not set up required IPC mechanism for attaching"); free(cwd); lxc_proc_put_context_info(init_ctx); return -1; @@ -1251,7 +1261,7 @@ int lxc_attach(const char *name, const char *lxcpath, */ pid = fork(); if (pid < 0) { - SYSERROR("Failed to create first subprocess."); + SYSERROR("Failed to create first subprocess"); free(cwd); lxc_proc_put_context_info(init_ctx); return -1; @@ -1266,15 +1276,22 @@ int lxc_attach(const char *name, const char *lxcpath, close(ipc_sockets[1]); free(cwd); lxc_proc_close_ns_fd(init_ctx); - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) - lxc_attach_pty_close_slave(&pty); + if (options->attach_flags & LXC_ATTACH_TERMINAL) + lxc_attach_terminal_close_slave(&terminal); /* Attach to cgroup, if requested. */ if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { - if (!cgroup_attach(name, lxcpath, pid)) + struct cgroup_ops *cgroup_ops; + + cgroup_ops = cgroup_init(conf); + if (!cgroup_ops) goto on_error; - TRACE("Moved intermediate process %d into container's " - "cgroups", pid); + + if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid)) + goto on_error; + + cgroup_exit(cgroup_ops); + TRACE("Moved intermediate process %d into container's cgroups", pid); } /* Setup /proc limits */ @@ -1291,11 +1308,12 @@ int lxc_attach(const char *name, const char *lxcpath, goto on_error; } - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { - ret = lxc_attach_pty_mainloop_init(&pty, &descr); + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + ret = lxc_attach_terminal_mainloop_init(&terminal, &descr); if (ret < 0) goto on_error; - TRACE("Initalized pty mainloop"); + + TRACE("Initialized terminal mainloop"); } /* Let the child process know to go ahead. */ @@ -1303,12 +1321,14 @@ int lxc_attach(const char *name, const char *lxcpath, ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status)); if (ret != sizeof(status)) goto close_mainloop; + TRACE("Told intermediate process to start initializing"); /* Get pid of attached process from intermediate process. */ ret = lxc_read_nointr(ipc_sockets[0], &attached_pid, sizeof(attached_pid)); if (ret != sizeof(attached_pid)) goto close_mainloop; + TRACE("Received pid %d of attached process in parent pid namespace", attached_pid); /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */ @@ -1321,6 +1341,7 @@ int lxc_attach(const char *name, const char *lxcpath, ret = wait_for_pid(pid); if (ret < 0) goto close_mainloop; + TRACE("Intermediate process %d exited", pid); /* We will always have to reap the attached process now. */ @@ -1338,15 +1359,20 @@ int lxc_attach(const char *name, const char *lxcpath, labelfd = lsm_process_label_fd_get(attached_pid, on_exec); if (labelfd < 0) goto close_mainloop; + TRACE("Opened LSM label file descriptor %d", labelfd); /* Send child fd of the LSM security module to write to. */ ret = lxc_abstract_unix_send_fds(ipc_sockets[0], &labelfd, 1, NULL, 0); - close(labelfd); if (ret <= 0) { - SYSERROR("%d", (int)ret); + if (ret < 0) + SYSERROR("Failed to send lsm label fd"); + + close(labelfd); goto close_mainloop; } + + close(labelfd); TRACE("Sent LSM label file descriptor %d to child", labelfd); } @@ -1364,7 +1390,8 @@ int lxc_attach(const char *name, const char *lxcpath, ret_parent = 0; to_cleanup_pid = -1; - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { + + if (options->attach_flags & LXC_ATTACH_TERMINAL) { ret = lxc_mainloop(&descr, -1); if (ret < 0) { ret_parent = -1; @@ -1373,7 +1400,7 @@ int lxc_attach(const char *name, const char *lxcpath, } close_mainloop: - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) + if (options->attach_flags & LXC_ATTACH_TERMINAL) lxc_mainloop_close(&descr); on_error: @@ -1385,10 +1412,11 @@ int lxc_attach(const char *name, const char *lxcpath, if (to_cleanup_pid > 0) (void)wait_for_pid(to_cleanup_pid); - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { - lxc_terminal_delete(&pty); - lxc_terminal_conf_free(&pty); + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_terminal_delete(&terminal); + lxc_terminal_conf_free(&terminal); } + lxc_proc_put_context_info(init_ctx); return ret_parent; } @@ -1396,10 +1424,11 @@ int lxc_attach(const char *name, const char *lxcpath, /* close unneeded file descriptors */ close(ipc_sockets[0]); ipc_sockets[0] = -EBADF; - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) { - lxc_attach_pty_close_master(&pty); - lxc_attach_pty_close_peer(&pty); - lxc_attach_pty_close_log(&pty); + + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + lxc_attach_terminal_close_master(&terminal); + lxc_attach_terminal_close_peer(&terminal); + lxc_attach_terminal_close_log(&terminal); } /* Wait for the parent to have setup cgroups. */ @@ -1407,8 +1436,9 @@ int lxc_attach(const char *name, const char *lxcpath, if (ret != sizeof(status)) { shutdown(ipc_sockets[1], SHUT_RDWR); lxc_proc_put_context_info(init_ctx); - rexit(-1); + _exit(EXIT_FAILURE); } + TRACE("Intermediate process starting to initialize"); /* Attach now, create another subprocess later, since pid namespaces @@ -1419,8 +1449,9 @@ int lxc_attach(const char *name, const char *lxcpath, ERROR("Failed to enter namespaces"); shutdown(ipc_sockets[1], SHUT_RDWR); lxc_proc_put_context_info(init_ctx); - rexit(-1); + _exit(EXIT_FAILURE); } + /* close namespace file descriptors */ lxc_proc_close_ns_fd(init_ctx); @@ -1440,7 +1471,7 @@ int lxc_attach(const char *name, const char *lxcpath, payload.ipc_socket = ipc_sockets[1]; payload.options = options; payload.init_ctx = init_ctx; - payload.pty_fd = pty.slave; + payload.terminal_slave_fd = terminal.slave; payload.exec_function = exec_function; payload.exec_payload = exec_payload; @@ -1449,17 +1480,28 @@ int lxc_attach(const char *name, const char *lxcpath, SYSERROR("Failed to clone attached process"); shutdown(ipc_sockets[1], SHUT_RDWR); lxc_proc_put_context_info(init_ctx); - rexit(-1); + _exit(EXIT_FAILURE); } if (pid == 0) { + if (options->attach_flags & LXC_ATTACH_TERMINAL) { + ret = pthread_sigmask(SIG_SETMASK, + &terminal.tty_state->oldmask, NULL); + if (ret < 0) { + SYSERROR("Failed to reset signal mask"); + _exit(EXIT_FAILURE); + } + } + ret = attach_child_main(&payload); if (ret < 0) ERROR("Failed to exec"); + _exit(EXIT_FAILURE); } - if (options->attach_flags & LXC_ATTACH_ALLOCATE_PTY) - lxc_attach_pty_close_slave(&pty); + + if (options->attach_flags & LXC_ATTACH_TERMINAL) + lxc_attach_terminal_close_slave(&terminal); /* Tell grandparent the pid of the pid of the newly created child. */ ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid)); @@ -1472,35 +1514,66 @@ int lxc_attach(const char *name, const char *lxcpath, */ shutdown(ipc_sockets[1], SHUT_RDWR); lxc_proc_put_context_info(init_ctx); - rexit(-1); + _exit(EXIT_FAILURE); } + TRACE("Sending pid %d of attached process", pid); /* The rest is in the hands of the initial and the attached process. */ lxc_proc_put_context_info(init_ctx); - rexit(0); + _exit(0); } -int lxc_attach_run_command(void* payload) +int lxc_attach_run_command(void *payload) { - lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload; + int ret = -1; + lxc_attach_command_t *cmd = payload; - execvp(cmd->program, cmd->argv); - SYSERROR("Failed to exec \"%s\".", cmd->program); - return -1; + ret = execvp(cmd->program, cmd->argv); + if (ret < 0) { + switch (errno) { + case ENOEXEC: + ret = 126; + break; + case ENOENT: + ret = 127; + break; + } + } + + SYSERROR("Failed to exec \"%s\"", cmd->program); + return ret; } int lxc_attach_run_shell(void* payload) { uid_t uid; - struct passwd *passwd; + struct passwd pwent; + struct passwd *pwentp = NULL; char *user_shell; + char *buf; + size_t bufsize; + int ret; /* Ignore payload parameter. */ (void)payload; uid = getuid(); - passwd = getpwuid(uid); + + bufsize = sysconf(_SC_GETPW_R_SIZE_MAX); + if (bufsize == -1) + bufsize = 1024; + + buf = malloc(bufsize); + if (buf) { + ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp); + if (!pwentp) { + if (ret == 0) + WARN("Could not find matched password record"); + + WARN("Failed to get password record - %u", uid); + } + } /* This probably happens because of incompatible nss implementations in * host and container (remember, this code is still using the host's @@ -1508,10 +1581,11 @@ int lxc_attach_run_shell(void* payload) * the information by spawning a [getent passwd uid] process and parsing * the result. */ - if (!passwd) + if (!pwentp) user_shell = lxc_attach_getpwshell(uid); else - user_shell = passwd->pw_shell; + user_shell = pwent.pw_shell; + if (user_shell) execlp(user_shell, user_shell, (char *)NULL); @@ -1519,8 +1593,11 @@ int lxc_attach_run_shell(void* payload) * on /bin/sh as a default shell. */ execlp("/bin/sh", "/bin/sh", (char *)NULL); + SYSERROR("Failed to execute shell"); - if (!passwd) + if (!pwentp) free(user_shell); + + free(buf); return -1; }