]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
coverity: Check fstat return value
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
e0732705
CS
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e0732705
CS
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e0732705
CS
22 */
23
24#define _GNU_SOURCE
25#include <unistd.h>
26#include <stdio.h>
27#include <string.h>
28#include <stdlib.h>
29#include <errno.h>
30#include <fcntl.h>
c476bdce 31#include <grp.h>
e0732705
CS
32#include <sys/param.h>
33#include <sys/prctl.h>
7a0b0b56 34#include <sys/mount.h>
5ec27989 35#include <sys/socket.h>
1ba0013f 36#include <sys/syscall.h>
905022f7 37#include <sys/wait.h>
910bb4fa 38#include <linux/unistd.h>
905022f7 39#include <pwd.h>
e0732705
CS
40
41#if !HAVE_DECL_PR_CAPBSET_DROP
42#define PR_CAPBSET_DROP 24
43#endif
44
45#include "namespace.h"
46#include "log.h"
47#include "attach.h"
48#include "caps.h"
e0732705 49#include "config.h"
6a44839f 50#include "utils.h"
9c4693b8
CS
51#include "commands.h"
52#include "cgroup.h"
025ed0f3 53#include "lxclock.h"
fe4de9a6 54#include "lsm/lsm.h"
9c4693b8
CS
55
56#if HAVE_SYS_PERSONALITY_H
57#include <sys/personality.h>
58#endif
e0732705 59
a3da2f3b
SG
60#ifndef SOCK_CLOEXEC
61# define SOCK_CLOEXEC 02000000
62#endif
63
e0732705
CS
64lxc_log_define(lxc_attach, lxc);
65
74a3920a 66static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
e0732705
CS
67{
68 struct lxc_proc_context_info *info = calloc(1, sizeof(*info));
69 FILE *proc_file;
70 char proc_fn[MAXPATHLEN];
460a1cf0 71 char *line = NULL;
e0732705 72 size_t line_bufsz = 0;
460a1cf0 73 int ret, found;
e0732705
CS
74
75 if (!info) {
76 SYSERROR("Could not allocate memory.");
77 return NULL;
78 }
79
80 /* read capabilities */
81 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", pid);
82
83 proc_file = fopen(proc_fn, "r");
84 if (!proc_file) {
85 SYSERROR("Could not open %s", proc_fn);
86 goto out_error;
87 }
88
89 found = 0;
90 while (getline(&line, &line_bufsz, proc_file) != -1) {
91 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
92 if (ret != EOF && ret > 0) {
93 found = 1;
94 break;
95 }
96 }
97
fa9ac567
SH
98 if (line)
99 free(line);
e0732705
CS
100 fclose(proc_file);
101
102 if (!found) {
103 SYSERROR("Could not read capability bounding set from %s", proc_fn);
104 errno = ENOENT;
105 goto out_error;
106 }
107
108 /* read personality */
109 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/personality", pid);
110
111 proc_file = fopen(proc_fn, "r");
112 if (!proc_file) {
113 SYSERROR("Could not open %s", proc_fn);
114 goto out_error;
115 }
116
117 ret = fscanf(proc_file, "%lx", &info->personality);
118 fclose(proc_file);
119
120 if (ret == EOF || ret == 0) {
121 SYSERROR("Could not read personality from %s", proc_fn);
122 errno = ENOENT;
123 goto out_error;
124 }
fe4de9a6 125 info->lsm_label = lsm_process_label_get(pid);
e0732705 126
e0732705
CS
127 return info;
128
129out_error:
460a1cf0 130 free(info);
e0732705
CS
131 return NULL;
132}
133
fe4de9a6
DE
134static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
135{
136 if (ctx->lsm_label)
137 free(ctx->lsm_label);
138 free(ctx);
139}
140
74a3920a 141static int lxc_attach_to_ns(pid_t pid, int which)
99d50954
CS
142{
143 char path[MAXPATHLEN];
fc763ab7
CS
144 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
145 * the file for user namepsaces in /proc/$pid/ns will be called
146 * 'user' once the kernel supports it
147 */
f4364484 148 static char *ns[] = { "user", "mnt", "pid", "uts", "ipc", "net" };
fc763ab7 149 static int flags[] = {
f4364484
SG
150 CLONE_NEWUSER, CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC,
151 CLONE_NEWNET
fc763ab7
CS
152 };
153 static const int size = sizeof(ns) / sizeof(char *);
99d50954 154 int fd[size];
fc763ab7
CS
155 int i, j, saved_errno;
156
99d50954
CS
157
158 snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
159 if (access(path, X_OK)) {
160 ERROR("Does this kernel version support 'attach' ?");
161 return -1;
162 }
163
164 for (i = 0; i < size; i++) {
fc763ab7
CS
165 /* ignore if we are not supposed to attach to that
166 * namespace
167 */
168 if (which != -1 && !(which & flags[i])) {
169 fd[i] = -1;
170 continue;
171 }
172
99d50954 173 snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
9c4693b8 174 fd[i] = open(path, O_RDONLY | O_CLOEXEC);
99d50954 175 if (fd[i] < 0) {
fc763ab7
CS
176 saved_errno = errno;
177
178 /* close all already opened file descriptors before
179 * we return an error, so we don't leak them
180 */
181 for (j = 0; j < i; j++)
182 close(fd[j]);
183
184 errno = saved_errno;
99d50954
CS
185 SYSERROR("failed to open '%s'", path);
186 return -1;
187 }
188 }
189
190 for (i = 0; i < size; i++) {
fc763ab7
CS
191 if (fd[i] >= 0 && setns(fd[i], 0) != 0) {
192 saved_errno = errno;
193
194 for (j = i; j < size; j++)
195 close(fd[j]);
196
197 errno = saved_errno;
99d50954
CS
198 SYSERROR("failed to set namespace '%s'", ns[i]);
199 return -1;
200 }
201
202 close(fd[i]);
203 }
204
205 return 0;
206}
207
74a3920a 208static int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
209{
210 int ret;
211
212 ret = unshare(CLONE_NEWNS);
213 if (ret < 0) {
214 SYSERROR("failed to unshare mount namespace");
215 return -1;
216 }
217
218 /* assume /proc is always mounted, so remount it */
219 ret = umount2("/proc", MNT_DETACH);
220 if (ret < 0) {
221 SYSERROR("failed to unmount /proc");
222 return -1;
223 }
224
225 ret = mount("none", "/proc", "proc", 0, NULL);
226 if (ret < 0) {
227 SYSERROR("failed to remount /proc");
228 return -1;
229 }
230
231 /* try to umount /sys - if it's not a mount point,
232 * we'll get EINVAL, then we ignore it because it
233 * may not have been mounted in the first place
234 */
235 ret = umount2("/sys", MNT_DETACH);
236 if (ret < 0 && errno != EINVAL) {
237 SYSERROR("failed to unmount /sys");
238 return -1;
239 } else if (ret == 0) {
240 /* remount it */
241 ret = mount("none", "/sys", "sysfs", 0, NULL);
242 if (ret < 0) {
243 SYSERROR("failed to remount /sys");
244 return -1;
245 }
246 }
247
248 return 0;
249}
250
74a3920a 251static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
e0732705
CS
252{
253 int last_cap = lxc_caps_last_cap();
254 int cap;
255
256 for (cap = 0; cap <= last_cap; cap++) {
257 if (ctx->capability_mask & (1LL << cap))
258 continue;
259
260 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
261 SYSERROR("failed to remove capability id %d", cap);
262 return -1;
263 }
264 }
265
266 return 0;
267}
905022f7 268
74a3920a 269static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
b3a39ba6 270{
799f96fd 271 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 272 char **extra_keep_store = NULL;
3d5e9f48
CS
273 int path_kept = 0;
274
275 if (extra_keep) {
276 size_t count, i;
277
278 for (count = 0; extra_keep[count]; count++);
279
280 extra_keep_store = calloc(count, sizeof(char *));
281 if (!extra_keep_store) {
282 SYSERROR("failed to allocate memory for storing current "
283 "environment variable values that will be kept");
284 return -1;
285 }
286 for (i = 0; i < count; i++) {
287 char *v = getenv(extra_keep[i]);
288 if (v) {
289 extra_keep_store[i] = strdup(v);
290 if (!extra_keep_store[i]) {
291 SYSERROR("failed to allocate memory for storing current "
292 "environment variable values that will be kept");
293 while (i > 0)
294 free(extra_keep_store[--i]);
295 free(extra_keep_store);
296 return -1;
297 }
298 if (strcmp(extra_keep[i], "PATH") == 0)
299 path_kept = 1;
300 }
301 /* calloc sets entire array to zero, so we don't
302 * need an else */
303 }
304 }
305
799f96fd 306 if (clearenv()) {
a9cab7e3 307 char **p;
799f96fd 308 SYSERROR("failed to clear environment");
a9cab7e3
CS
309 if (extra_keep_store) {
310 for (p = extra_keep_store; *p; p++)
311 free(*p);
312 free(extra_keep_store);
313 }
3d5e9f48
CS
314 return -1;
315 }
316
317 if (extra_keep_store) {
318 size_t i;
319 for (i = 0; extra_keep[i]; i++) {
320 if (extra_keep_store[i])
321 setenv(extra_keep[i], extra_keep_store[i], 1);
322 free(extra_keep_store[i]);
323 }
324 free(extra_keep_store);
325 }
326
327 /* always set a default path; shells and execlp tend
328 * to be fine without it, but there is a disturbing
329 * number of C programs out there that just assume
330 * that getenv("PATH") is never NULL and then die a
331 * painful segfault death. */
332 if (!path_kept) {
511a6936
SG
333#ifdef HAVE_CONFSTR
334 size_t n;
335 char *path_env;
336
3d5e9f48
CS
337 n = confstr(_CS_PATH, NULL, 0);
338 path_env = malloc(n);
339 if (path_env) {
340 confstr(_CS_PATH, path_env, n);
341 setenv("PATH", path_env, 1);
342 free(path_env);
343 }
344 /* don't error out, this is just an extra service */
511a6936
SG
345#else
346 setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
347#endif
799f96fd 348 }
b3a39ba6
DW
349 }
350
351 if (putenv("container=lxc")) {
352 SYSERROR("failed to set environment variable");
353 return -1;
354 }
355
3d5e9f48
CS
356 /* set extra environment variables */
357 if (extra_env) {
358 for (; *extra_env; extra_env++) {
359 /* duplicate the string, just to be on
360 * the safe side, because putenv does not
361 * do it for us */
362 char *p = strdup(*extra_env);
363 /* we just assume the user knows what they
364 * are doing, so we don't do any checks */
365 if (!p) {
366 SYSERROR("failed to allocate memory for additional environment "
367 "variables");
368 return -1;
369 }
370 putenv(p);
371 }
372 }
373
b3a39ba6
DW
374 return 0;
375}
376
74a3920a 377static char *lxc_attach_getpwshell(uid_t uid)
905022f7
CS
378{
379 /* local variables */
380 pid_t pid;
381 int pipes[2];
382 int ret;
383 int fd;
384 char *result = NULL;
385
386 /* we need to fork off a process that runs the
387 * getent program, and we need to capture its
388 * output, so we use a pipe for that purpose
389 */
390 ret = pipe(pipes);
391 if (ret < 0)
392 return NULL;
393
394 pid = fork();
395 if (pid < 0) {
396 close(pipes[0]);
397 close(pipes[1]);
398 return NULL;
399 }
400
401 if (pid) {
402 /* parent process */
403 FILE *pipe_f;
404 char *line = NULL;
405 size_t line_bufsz = 0;
406 int found = 0;
407 int status;
408
409 close(pipes[1]);
410
411 pipe_f = fdopen(pipes[0], "r");
412 while (getline(&line, &line_bufsz, pipe_f) != -1) {
413 char *token;
414 char *saveptr = NULL;
415 long value;
416 char *endptr = NULL;
417 int i;
418
419 /* if we already found something, just continue
420 * to read until the pipe doesn't deliver any more
421 * data, but don't modify the existing data
422 * structure
423 */
424 if (found)
425 continue;
426
427 /* trim line on the right hand side */
bbb8a488 428 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
905022f7
CS
429 line[i - 1] = '\0';
430
431 /* split into tokens: first user name */
432 token = strtok_r(line, ":", &saveptr);
433 if (!token)
434 continue;
435 /* next: dummy password field */
436 token = strtok_r(NULL, ":", &saveptr);
437 if (!token)
438 continue;
439 /* next: user id */
440 token = strtok_r(NULL, ":", &saveptr);
441 value = token ? strtol(token, &endptr, 10) : 0;
442 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
443 continue;
444 /* dummy sanity check: user id matches */
445 if ((uid_t) value != uid)
446 continue;
447 /* skip fields: gid, gecos, dir, go to next field 'shell' */
448 for (i = 0; i < 4; i++) {
449 token = strtok_r(NULL, ":", &saveptr);
450 if (!token)
451 break;
452 }
453 if (!token)
454 continue;
53a54099
SH
455 if (result)
456 free(result);
905022f7
CS
457 result = strdup(token);
458
459 /* sanity check that there are no fields after that */
460 token = strtok_r(NULL, ":", &saveptr);
461 if (token)
462 continue;
463
464 found = 1;
465 }
466
467 free(line);
468 fclose(pipe_f);
469 again:
470 if (waitpid(pid, &status, 0) < 0) {
471 if (errno == EINTR)
472 goto again;
473 return NULL;
474 }
475
476 /* some sanity checks: if anything even hinted at going
477 * wrong: we can't be sure we have a valid result, so
478 * we assume we don't
479 */
480
481 if (!WIFEXITED(status))
482 return NULL;
483
484 if (WEXITSTATUS(status) != 0)
485 return NULL;
486
487 if (!found)
488 return NULL;
489
490 return result;
491 } else {
492 /* child process */
493 char uid_buf[32];
494 char *arguments[] = {
495 "getent",
496 "passwd",
497 uid_buf,
498 NULL
499 };
500
501 close(pipes[0]);
502
503 /* we want to capture stdout */
504 dup2(pipes[1], 1);
505 close(pipes[1]);
506
507 /* get rid of stdin/stderr, so we try to associate it
508 * with /dev/null
509 */
510 fd = open("/dev/null", O_RDWR);
511 if (fd < 0) {
512 close(0);
513 close(2);
514 } else {
515 dup2(fd, 0);
516 dup2(fd, 2);
517 close(fd);
518 }
519
520 /* finish argument list */
521 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
522 if (ret <= 0)
523 exit(-1);
524
525 /* try to run getent program */
526 (void) execvp("getent", arguments);
527 exit(-1);
528 }
529}
cb3e61fa 530
74a3920a 531static void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
cb3e61fa
CS
532{
533 FILE *proc_file;
534 char proc_fn[MAXPATHLEN];
535 char *line = NULL;
536 size_t line_bufsz = 0;
537 int ret;
538 long value = -1;
539 uid_t uid = (uid_t)-1;
540 gid_t gid = (gid_t)-1;
541
542 /* read capabilities */
543 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", 1);
544
545 proc_file = fopen(proc_fn, "r");
546 if (!proc_file)
547 return;
548
549 while (getline(&line, &line_bufsz, proc_file) != -1) {
550 /* format is: real, effective, saved set user, fs
551 * we only care about real uid
552 */
553 ret = sscanf(line, "Uid: %ld", &value);
554 if (ret != EOF && ret > 0) {
555 uid = (uid_t) value;
556 } else {
557 ret = sscanf(line, "Gid: %ld", &value);
558 if (ret != EOF && ret > 0)
559 gid = (gid_t) value;
560 }
561 if (uid != (uid_t)-1 && gid != (gid_t)-1)
562 break;
563 }
564
565 fclose(proc_file);
566 free(line);
567
568 /* only override arguments if we found something */
569 if (uid != (uid_t)-1)
570 *init_uid = uid;
571 if (gid != (gid_t)-1)
572 *init_gid = gid;
573
574 /* TODO: we should also parse supplementary groups and use
575 * setgroups() to set them */
576}
9c4693b8
CS
577
578struct attach_clone_payload {
579 int ipc_socket;
580 lxc_attach_options_t* options;
581 struct lxc_proc_context_info* init_ctx;
582 lxc_attach_exec_t exec_function;
583 void* exec_payload;
584};
585
586static int attach_child_main(void* data);
587
588/* help the optimizer along if it doesn't know that exit always exits */
589#define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0)
590
591/* define default options if no options are supplied by the user */
592static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
593
594int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
595{
596 int ret, status;
f4364484 597 pid_t init_pid, pid, attached_pid, expected;
9c4693b8
CS
598 struct lxc_proc_context_info *init_ctx;
599 char* cwd;
600 char* new_cwd;
601 int ipc_sockets[2];
602
603 if (!options)
604 options = &attach_static_default_options;
605
606 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
607 if (init_pid < 0) {
608 ERROR("failed to get the init pid");
609 return -1;
610 }
611
612 init_ctx = lxc_proc_get_context_info(init_pid);
613 if (!init_ctx) {
614 ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
615 return -1;
616 }
617
618 cwd = getcwd(NULL, 0);
619
620 /* determine which namespaces the container was created with
621 * by asking lxc-start, if necessary
622 */
623 if (options->namespaces == -1) {
624 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
625 /* call failed */
626 if (options->namespaces == -1) {
627 ERROR("failed to automatically determine the "
628 "namespaces which the container unshared");
629 free(cwd);
fe4de9a6 630 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
631 return -1;
632 }
633 }
634
635 /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
636 * to make sure we don't irritate other threads that want to fork+exec away
637 *
638 * IMPORTANT: if the initial process is multithreaded and another call
639 * just fork()s away without exec'ing directly after, the socket fd will
640 * exist in the forked process from the other thread and any close() in
641 * our own child process will not really cause the socket to close properly,
642 * potentiall causing the parent to hang.
643 *
644 * For this reason, while IPC is still active, we have to use shutdown()
645 * if the child exits prematurely in order to signal that the socket
646 * is closed and cannot assume that the child exiting will automatically
647 * do that.
648 *
649 * IPC mechanism: (X is receiver)
650 * initial process intermediate attached
651 * X <--- send pid of
652 * attached proc,
653 * then exit
654 * send 0 ------------------------------------> X
655 * [do initialization]
656 * X <------------------------------------ send 1
657 * [add to cgroup, ...]
658 * send 2 ------------------------------------> X
659 * close socket close socket
660 * run program
661 */
662 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
663 if (ret < 0) {
664 SYSERROR("could not set up required IPC mechanism for attaching");
665 free(cwd);
fe4de9a6 666 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
667 return -1;
668 }
669
670 /* create intermediate subprocess, three reasons:
671 * 1. runs all pthread_atfork handlers and the
672 * child will no longer be threaded
673 * (we can't properly setns() in a threaded process)
674 * 2. we can't setns() in the child itself, since
675 * we want to make sure we are properly attached to
676 * the pidns
677 * 3. also, the initial thread has to put the attached
678 * process into the cgroup, which we can only do if
679 * we didn't already setns() (otherwise, user
680 * namespaces will hate us)
681 */
682 pid = fork();
683
684 if (pid < 0) {
685 SYSERROR("failed to create first subprocess");
686 free(cwd);
fe4de9a6 687 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
688 return -1;
689 }
690
691 if (pid) {
692 pid_t to_cleanup_pid = pid;
9c4693b8
CS
693
694 /* inital thread, we close the socket that is for the
695 * subprocesses
696 */
697 close(ipc_sockets[1]);
698 free(cwd);
699
f4364484
SG
700 /* attach to cgroup, if requested */
701 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
4fb3cba5 702 if (!cgroup_attach(name, lxcpath, pid))
f4364484 703 goto cleanup_error;
f4364484
SG
704 }
705
706 /* Let the child process know to go ahead */
707 status = 0;
708 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
709 if (ret <= 0) {
710 ERROR("error using IPC to notify attached process for initialization (0)");
711 goto cleanup_error;
712 }
713
9c4693b8
CS
714 /* get pid from intermediate process */
715 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
716 if (ret <= 0) {
717 if (ret != 0)
718 ERROR("error using IPC to receive pid of attached process");
719 goto cleanup_error;
720 }
721
722 /* reap intermediate process */
723 ret = wait_for_pid(pid);
724 if (ret < 0)
725 goto cleanup_error;
726
727 /* we will always have to reap the grandchild now */
728 to_cleanup_pid = attached_pid;
729
730 /* tell attached process it may start initializing */
731 status = 0;
732 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
733 if (ret <= 0) {
734 ERROR("error using IPC to notify attached process for initialization (0)");
735 goto cleanup_error;
736 }
737
738 /* wait for the attached process to finish initializing */
739 expected = 1;
740 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
741 if (ret <= 0) {
742 if (ret != 0)
743 ERROR("error using IPC to receive notification from attached process (1)");
744 goto cleanup_error;
745 }
746
9c4693b8
CS
747 /* tell attached process we're done */
748 status = 2;
749 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
750 if (ret <= 0) {
751 ERROR("error using IPC to notify attached process for initialization (2)");
752 goto cleanup_error;
753 }
754
755 /* now shut down communication with child, we're done */
756 shutdown(ipc_sockets[0], SHUT_RDWR);
757 close(ipc_sockets[0]);
fe4de9a6 758 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
759
760 /* we're done, the child process should now execute whatever
761 * it is that the user requested. The parent can now track it
762 * with waitpid() or similar.
763 */
764
765 *attached_process = attached_pid;
766 return 0;
767
768 cleanup_error:
769 /* first shut down the socket, then wait for the pid,
770 * otherwise the pid we're waiting for may never exit
771 */
772 shutdown(ipc_sockets[0], SHUT_RDWR);
773 close(ipc_sockets[0]);
774 if (to_cleanup_pid)
775 (void) wait_for_pid(to_cleanup_pid);
fe4de9a6 776 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
777 return -1;
778 }
779
780 /* first subprocess begins here, we close the socket that is for the
781 * initial thread
782 */
783 close(ipc_sockets[0]);
784
f4364484
SG
785 /* Wait for the parent to have setup cgroups */
786 expected = 0;
787 status = -1;
788 ret = lxc_read_nointr_expect(ipc_sockets[1], &status, sizeof(status), &expected);
789 if (ret <= 0) {
790 ERROR("error communicating with child process");
791 shutdown(ipc_sockets[1], SHUT_RDWR);
792 rexit(-1);
793 }
794
9c4693b8
CS
795 /* attach now, create another subprocess later, since pid namespaces
796 * only really affect the children of the current process
797 */
798 ret = lxc_attach_to_ns(init_pid, options->namespaces);
799 if (ret < 0) {
800 ERROR("failed to enter the namespace");
801 shutdown(ipc_sockets[1], SHUT_RDWR);
802 rexit(-1);
803 }
804
805 /* attach succeeded, try to cwd */
806 if (options->initial_cwd)
807 new_cwd = options->initial_cwd;
808 else
809 new_cwd = cwd;
810 ret = chdir(new_cwd);
811 if (ret < 0)
812 WARN("could not change directory to '%s'", new_cwd);
813 free(cwd);
814
815 /* now create the real child process */
816 {
817 struct attach_clone_payload payload = {
818 .ipc_socket = ipc_sockets[1],
819 .options = options,
820 .init_ctx = init_ctx,
821 .exec_function = exec_function,
822 .exec_payload = exec_payload
823 };
824 /* We use clone_parent here to make this subprocess a direct child of
825 * the initial process. Then this intermediate process can exit and
826 * the parent can directly track the attached process.
827 */
828 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
829 }
830
831 /* shouldn't happen, clone() should always return positive pid */
832 if (pid <= 0) {
833 SYSERROR("failed to create subprocess");
834 shutdown(ipc_sockets[1], SHUT_RDWR);
835 rexit(-1);
836 }
837
838 /* tell grandparent the pid of the pid of the newly created child */
839 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
840 if (ret != sizeof(pid)) {
841 /* if this really happens here, this is very unfortunate, since the
842 * parent will not know the pid of the attached process and will
843 * not be able to wait for it (and we won't either due to CLONE_PARENT)
844 * so the parent won't be able to reap it and the attached process
845 * will remain a zombie
846 */
847 ERROR("error using IPC to notify main process of pid of the attached process");
848 shutdown(ipc_sockets[1], SHUT_RDWR);
849 rexit(-1);
850 }
851
852 /* the rest is in the hands of the initial and the attached process */
853 rexit(0);
854}
855
74a3920a 856static int attach_child_main(void* data)
9c4693b8
CS
857{
858 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
859 int ipc_socket = payload->ipc_socket;
860 lxc_attach_options_t* options = payload->options;
861 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
1a2e58cf 862#if HAVE_SYS_PERSONALITY_H
9c4693b8 863 long new_personality;
1a2e58cf 864#endif
9c4693b8
CS
865 int ret;
866 int status;
867 int expected;
868 long flags;
869 int fd;
870 uid_t new_uid;
871 gid_t new_gid;
872
873 /* wait for the initial thread to signal us that it's ready
874 * for us to start initializing
875 */
876 expected = 0;
877 status = -1;
878 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
879 if (ret <= 0) {
880 ERROR("error using IPC to receive notification from initial process (0)");
881 shutdown(ipc_socket, SHUT_RDWR);
882 rexit(-1);
883 }
884
9c4693b8
CS
885 /* A description of the purpose of this functionality is
886 * provided in the lxc-attach(1) manual page. We have to
887 * remount here and not in the parent process, otherwise
888 * /proc may not properly reflect the new pid namespace.
889 */
890 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
891 ret = lxc_attach_remount_sys_proc();
892 if (ret < 0) {
893 shutdown(ipc_socket, SHUT_RDWR);
894 rexit(-1);
895 }
896 }
897
898 /* now perform additional attachments*/
899#if HAVE_SYS_PERSONALITY_H
900 if (options->personality < 0)
901 new_personality = init_ctx->personality;
902 else
903 new_personality = options->personality;
904
905 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
906 ret = personality(new_personality);
907 if (ret < 0) {
908 SYSERROR("could not ensure correct architecture");
909 shutdown(ipc_socket, SHUT_RDWR);
910 rexit(-1);
911 }
912 }
913#endif
914
915 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
916 ret = lxc_attach_drop_privs(init_ctx);
917 if (ret < 0) {
918 ERROR("could not drop privileges");
919 shutdown(ipc_socket, SHUT_RDWR);
920 rexit(-1);
921 }
922 }
923
924 /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
925 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
926 if (ret < 0) {
927 ERROR("could not set initial environment for attached process");
928 shutdown(ipc_socket, SHUT_RDWR);
929 rexit(-1);
930 }
931
932 /* set user / group id */
933 new_uid = 0;
934 new_gid = 0;
935 /* ignore errors, we will fall back to root in that case
936 * (/proc was not mounted etc.)
937 */
938 if (options->namespaces & CLONE_NEWUSER)
939 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
940
941 if (options->uid != (uid_t)-1)
942 new_uid = options->uid;
943 if (options->gid != (gid_t)-1)
944 new_gid = options->gid;
945
946 /* try to set the uid/gid combination */
c476bdce
SH
947 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
948 if (setgid(new_gid) || setgroups(0, NULL)) {
949 SYSERROR("switching to container gid");
950 shutdown(ipc_socket, SHUT_RDWR);
951 rexit(-1);
952 }
9c4693b8
CS
953 }
954 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
955 SYSERROR("switching to container uid");
956 shutdown(ipc_socket, SHUT_RDWR);
957 rexit(-1);
958 }
959
960 /* tell initial process it may now put us into the cgroups */
961 status = 1;
962 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
963 if (ret != sizeof(status)) {
964 ERROR("error using IPC to notify initial process for initialization (1)");
965 shutdown(ipc_socket, SHUT_RDWR);
966 rexit(-1);
967 }
968
969 /* wait for the initial thread to signal us that it has done
970 * everything for us when it comes to cgroups etc.
971 */
972 expected = 2;
973 status = -1;
974 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
975 if (ret <= 0) {
976 ERROR("error using IPC to receive final notification from initial process (2)");
977 shutdown(ipc_socket, SHUT_RDWR);
978 rexit(-1);
979 }
980
981 shutdown(ipc_socket, SHUT_RDWR);
982 close(ipc_socket);
72863294
DE
983
984 /* set new apparmor profile/selinux context */
985 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM)) {
986 int on_exec;
987
988 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
989 ret = lsm_process_label_set(init_ctx->lsm_label, 0, on_exec);
990 if (ret < 0) {
991 rexit(-1);
992 }
993 }
fe4de9a6 994 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
995
996 /* The following is done after the communication socket is
997 * shut down. That way, all errors that might (though
998 * unlikely) occur up until this point will have their messages
999 * printed to the original stderr (if logging is so configured)
1000 * and not the fd the user supplied, if any.
1001 */
1002
1003 /* fd handling for stdin, stdout and stderr;
1004 * ignore errors here, user may want to make sure
1005 * the fds are closed, for example */
1006 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
1007 dup2(options->stdin_fd, 0);
1008 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
1009 dup2(options->stdout_fd, 1);
1010 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
1011 dup2(options->stderr_fd, 2);
1012
1013 /* close the old fds */
1014 if (options->stdin_fd > 2)
1015 close(options->stdin_fd);
1016 if (options->stdout_fd > 2)
1017 close(options->stdout_fd);
1018 if (options->stderr_fd > 2)
1019 close(options->stderr_fd);
1020
1021 /* try to remove CLOEXEC flag from stdin/stdout/stderr,
1022 * but also here, ignore errors */
1023 for (fd = 0; fd <= 2; fd++) {
1024 flags = fcntl(fd, F_GETFL);
1025 if (flags < 0)
1026 continue;
1027 if (flags & FD_CLOEXEC)
1028 fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC);
1029 }
1030
1031 /* we're done, so we can now do whatever the user intended us to do */
1032 rexit(payload->exec_function(payload->exec_payload));
1033}
1034
1035int lxc_attach_run_command(void* payload)
1036{
1037 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1038
1039 execvp(cmd->program, cmd->argv);
1040 SYSERROR("failed to exec '%s'", cmd->program);
1041 return -1;
1042}
1043
1044int lxc_attach_run_shell(void* payload)
1045{
1046 uid_t uid;
1047 struct passwd *passwd;
1048 char *user_shell;
1049
1050 /* ignore payload parameter */
1051 (void)payload;
1052
1053 uid = getuid();
1054 passwd = getpwuid(uid);
1055
1056 /* this probably happens because of incompatible nss
1057 * implementations in host and container (remember, this
1058 * code is still using the host's glibc but our mount
1059 * namespace is in the container)
1060 * we may try to get the information by spawning a
1061 * [getent passwd uid] process and parsing the result
1062 */
1063 if (!passwd)
1064 user_shell = lxc_attach_getpwshell(uid);
1065 else
1066 user_shell = passwd->pw_shell;
1067
1068 if (user_shell)
1069 execlp(user_shell, user_shell, NULL);
1070
1071 /* executed if either no passwd entry or execvp fails,
1072 * we will fall back on /bin/sh as a default shell
1073 */
1074 execlp("/bin/sh", "/bin/sh", NULL);
1075 SYSERROR("failed to exec shell");
1076 return -1;
1077}