]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
Don't define new_personality when building without personalities
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
e0732705
CS
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e0732705
CS
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#define _GNU_SOURCE
25#include <unistd.h>
26#include <stdio.h>
27#include <string.h>
28#include <stdlib.h>
29#include <errno.h>
30#include <fcntl.h>
31#include <sys/param.h>
32#include <sys/prctl.h>
7a0b0b56 33#include <sys/mount.h>
5ec27989 34#include <sys/socket.h>
1ba0013f 35#include <sys/syscall.h>
905022f7 36#include <sys/wait.h>
910bb4fa 37#include <linux/unistd.h>
905022f7 38#include <pwd.h>
e0732705
CS
39
40#if !HAVE_DECL_PR_CAPBSET_DROP
41#define PR_CAPBSET_DROP 24
42#endif
43
44#include "namespace.h"
45#include "log.h"
46#include "attach.h"
47#include "caps.h"
e0732705 48#include "config.h"
9958532b 49#include "apparmor.h"
6a44839f 50#include "utils.h"
9c4693b8
CS
51#include "commands.h"
52#include "cgroup.h"
53
54#if HAVE_SYS_PERSONALITY_H
55#include <sys/personality.h>
56#endif
e0732705 57
a3da2f3b
SG
58#ifndef SOCK_CLOEXEC
59# define SOCK_CLOEXEC 02000000
60#endif
61
e0732705
CS
62lxc_log_define(lxc_attach, lxc);
63
e0732705
CS
64struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
65{
66 struct lxc_proc_context_info *info = calloc(1, sizeof(*info));
67 FILE *proc_file;
68 char proc_fn[MAXPATHLEN];
460a1cf0 69 char *line = NULL;
e0732705 70 size_t line_bufsz = 0;
460a1cf0 71 int ret, found;
e0732705
CS
72
73 if (!info) {
74 SYSERROR("Could not allocate memory.");
75 return NULL;
76 }
77
78 /* read capabilities */
79 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", pid);
80
81 proc_file = fopen(proc_fn, "r");
82 if (!proc_file) {
83 SYSERROR("Could not open %s", proc_fn);
84 goto out_error;
85 }
86
87 found = 0;
88 while (getline(&line, &line_bufsz, proc_file) != -1) {
89 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
90 if (ret != EOF && ret > 0) {
91 found = 1;
92 break;
93 }
94 }
95
fa9ac567
SH
96 if (line)
97 free(line);
e0732705
CS
98 fclose(proc_file);
99
100 if (!found) {
101 SYSERROR("Could not read capability bounding set from %s", proc_fn);
102 errno = ENOENT;
103 goto out_error;
104 }
105
106 /* read personality */
107 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/personality", pid);
108
109 proc_file = fopen(proc_fn, "r");
110 if (!proc_file) {
111 SYSERROR("Could not open %s", proc_fn);
112 goto out_error;
113 }
114
115 ret = fscanf(proc_file, "%lx", &info->personality);
116 fclose(proc_file);
117
118 if (ret == EOF || ret == 0) {
119 SYSERROR("Could not read personality from %s", proc_fn);
120 errno = ENOENT;
121 goto out_error;
122 }
9958532b 123 info->aa_profile = aa_get_profile(pid);
e0732705 124
e0732705
CS
125 return info;
126
127out_error:
460a1cf0 128 free(info);
e0732705
CS
129 return NULL;
130}
131
fc763ab7 132int lxc_attach_to_ns(pid_t pid, int which)
99d50954
CS
133{
134 char path[MAXPATHLEN];
fc763ab7
CS
135 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
136 * the file for user namepsaces in /proc/$pid/ns will be called
137 * 'user' once the kernel supports it
138 */
139 static char *ns[] = { "mnt", "pid", "uts", "ipc", "user", "net" };
140 static int flags[] = {
141 CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC,
142 CLONE_NEWUSER, CLONE_NEWNET
143 };
144 static const int size = sizeof(ns) / sizeof(char *);
99d50954 145 int fd[size];
fc763ab7
CS
146 int i, j, saved_errno;
147
99d50954
CS
148
149 snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
150 if (access(path, X_OK)) {
151 ERROR("Does this kernel version support 'attach' ?");
152 return -1;
153 }
154
155 for (i = 0; i < size; i++) {
fc763ab7
CS
156 /* ignore if we are not supposed to attach to that
157 * namespace
158 */
159 if (which != -1 && !(which & flags[i])) {
160 fd[i] = -1;
161 continue;
162 }
163
99d50954 164 snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
9c4693b8 165 fd[i] = open(path, O_RDONLY | O_CLOEXEC);
99d50954 166 if (fd[i] < 0) {
fc763ab7
CS
167 saved_errno = errno;
168
169 /* close all already opened file descriptors before
170 * we return an error, so we don't leak them
171 */
172 for (j = 0; j < i; j++)
173 close(fd[j]);
174
175 errno = saved_errno;
99d50954
CS
176 SYSERROR("failed to open '%s'", path);
177 return -1;
178 }
179 }
180
181 for (i = 0; i < size; i++) {
fc763ab7
CS
182 if (fd[i] >= 0 && setns(fd[i], 0) != 0) {
183 saved_errno = errno;
184
185 for (j = i; j < size; j++)
186 close(fd[j]);
187
188 errno = saved_errno;
99d50954
CS
189 SYSERROR("failed to set namespace '%s'", ns[i]);
190 return -1;
191 }
192
193 close(fd[i]);
194 }
195
196 return 0;
197}
198
7a0b0b56
CS
199int lxc_attach_remount_sys_proc()
200{
201 int ret;
202
203 ret = unshare(CLONE_NEWNS);
204 if (ret < 0) {
205 SYSERROR("failed to unshare mount namespace");
206 return -1;
207 }
208
209 /* assume /proc is always mounted, so remount it */
210 ret = umount2("/proc", MNT_DETACH);
211 if (ret < 0) {
212 SYSERROR("failed to unmount /proc");
213 return -1;
214 }
215
216 ret = mount("none", "/proc", "proc", 0, NULL);
217 if (ret < 0) {
218 SYSERROR("failed to remount /proc");
219 return -1;
220 }
221
222 /* try to umount /sys - if it's not a mount point,
223 * we'll get EINVAL, then we ignore it because it
224 * may not have been mounted in the first place
225 */
226 ret = umount2("/sys", MNT_DETACH);
227 if (ret < 0 && errno != EINVAL) {
228 SYSERROR("failed to unmount /sys");
229 return -1;
230 } else if (ret == 0) {
231 /* remount it */
232 ret = mount("none", "/sys", "sysfs", 0, NULL);
233 if (ret < 0) {
234 SYSERROR("failed to remount /sys");
235 return -1;
236 }
237 }
238
239 return 0;
240}
241
e0732705
CS
242int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
243{
244 int last_cap = lxc_caps_last_cap();
245 int cap;
246
247 for (cap = 0; cap <= last_cap; cap++) {
248 if (ctx->capability_mask & (1LL << cap))
249 continue;
250
251 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
252 SYSERROR("failed to remove capability id %d", cap);
253 return -1;
254 }
255 }
256
257 return 0;
258}
905022f7 259
799f96fd 260int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
b3a39ba6 261{
799f96fd 262 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48
CS
263 char **extra_keep_store = NULL;
264 char *path_env;
265 size_t n;
266 int path_kept = 0;
267
268 if (extra_keep) {
269 size_t count, i;
270
271 for (count = 0; extra_keep[count]; count++);
272
273 extra_keep_store = calloc(count, sizeof(char *));
274 if (!extra_keep_store) {
275 SYSERROR("failed to allocate memory for storing current "
276 "environment variable values that will be kept");
277 return -1;
278 }
279 for (i = 0; i < count; i++) {
280 char *v = getenv(extra_keep[i]);
281 if (v) {
282 extra_keep_store[i] = strdup(v);
283 if (!extra_keep_store[i]) {
284 SYSERROR("failed to allocate memory for storing current "
285 "environment variable values that will be kept");
286 while (i > 0)
287 free(extra_keep_store[--i]);
288 free(extra_keep_store);
289 return -1;
290 }
291 if (strcmp(extra_keep[i], "PATH") == 0)
292 path_kept = 1;
293 }
294 /* calloc sets entire array to zero, so we don't
295 * need an else */
296 }
297 }
298
799f96fd
CS
299 if (clearenv()) {
300 SYSERROR("failed to clear environment");
3d5e9f48
CS
301 return -1;
302 }
303
304 if (extra_keep_store) {
305 size_t i;
306 for (i = 0; extra_keep[i]; i++) {
307 if (extra_keep_store[i])
308 setenv(extra_keep[i], extra_keep_store[i], 1);
309 free(extra_keep_store[i]);
310 }
311 free(extra_keep_store);
312 }
313
314 /* always set a default path; shells and execlp tend
315 * to be fine without it, but there is a disturbing
316 * number of C programs out there that just assume
317 * that getenv("PATH") is never NULL and then die a
318 * painful segfault death. */
319 if (!path_kept) {
320 n = confstr(_CS_PATH, NULL, 0);
321 path_env = malloc(n);
322 if (path_env) {
323 confstr(_CS_PATH, path_env, n);
324 setenv("PATH", path_env, 1);
325 free(path_env);
326 }
327 /* don't error out, this is just an extra service */
799f96fd 328 }
b3a39ba6
DW
329 }
330
331 if (putenv("container=lxc")) {
332 SYSERROR("failed to set environment variable");
333 return -1;
334 }
335
3d5e9f48
CS
336 /* set extra environment variables */
337 if (extra_env) {
338 for (; *extra_env; extra_env++) {
339 /* duplicate the string, just to be on
340 * the safe side, because putenv does not
341 * do it for us */
342 char *p = strdup(*extra_env);
343 /* we just assume the user knows what they
344 * are doing, so we don't do any checks */
345 if (!p) {
346 SYSERROR("failed to allocate memory for additional environment "
347 "variables");
348 return -1;
349 }
350 putenv(p);
351 }
352 }
353
b3a39ba6
DW
354 return 0;
355}
356
905022f7
CS
357char *lxc_attach_getpwshell(uid_t uid)
358{
359 /* local variables */
360 pid_t pid;
361 int pipes[2];
362 int ret;
363 int fd;
364 char *result = NULL;
365
366 /* we need to fork off a process that runs the
367 * getent program, and we need to capture its
368 * output, so we use a pipe for that purpose
369 */
370 ret = pipe(pipes);
371 if (ret < 0)
372 return NULL;
373
374 pid = fork();
375 if (pid < 0) {
376 close(pipes[0]);
377 close(pipes[1]);
378 return NULL;
379 }
380
381 if (pid) {
382 /* parent process */
383 FILE *pipe_f;
384 char *line = NULL;
385 size_t line_bufsz = 0;
386 int found = 0;
387 int status;
388
389 close(pipes[1]);
390
391 pipe_f = fdopen(pipes[0], "r");
392 while (getline(&line, &line_bufsz, pipe_f) != -1) {
393 char *token;
394 char *saveptr = NULL;
395 long value;
396 char *endptr = NULL;
397 int i;
398
399 /* if we already found something, just continue
400 * to read until the pipe doesn't deliver any more
401 * data, but don't modify the existing data
402 * structure
403 */
404 if (found)
405 continue;
406
407 /* trim line on the right hand side */
bbb8a488 408 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
905022f7
CS
409 line[i - 1] = '\0';
410
411 /* split into tokens: first user name */
412 token = strtok_r(line, ":", &saveptr);
413 if (!token)
414 continue;
415 /* next: dummy password field */
416 token = strtok_r(NULL, ":", &saveptr);
417 if (!token)
418 continue;
419 /* next: user id */
420 token = strtok_r(NULL, ":", &saveptr);
421 value = token ? strtol(token, &endptr, 10) : 0;
422 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
423 continue;
424 /* dummy sanity check: user id matches */
425 if ((uid_t) value != uid)
426 continue;
427 /* skip fields: gid, gecos, dir, go to next field 'shell' */
428 for (i = 0; i < 4; i++) {
429 token = strtok_r(NULL, ":", &saveptr);
430 if (!token)
431 break;
432 }
433 if (!token)
434 continue;
53a54099
SH
435 if (result)
436 free(result);
905022f7
CS
437 result = strdup(token);
438
439 /* sanity check that there are no fields after that */
440 token = strtok_r(NULL, ":", &saveptr);
441 if (token)
442 continue;
443
444 found = 1;
445 }
446
447 free(line);
448 fclose(pipe_f);
449 again:
450 if (waitpid(pid, &status, 0) < 0) {
451 if (errno == EINTR)
452 goto again;
453 return NULL;
454 }
455
456 /* some sanity checks: if anything even hinted at going
457 * wrong: we can't be sure we have a valid result, so
458 * we assume we don't
459 */
460
461 if (!WIFEXITED(status))
462 return NULL;
463
464 if (WEXITSTATUS(status) != 0)
465 return NULL;
466
467 if (!found)
468 return NULL;
469
470 return result;
471 } else {
472 /* child process */
473 char uid_buf[32];
474 char *arguments[] = {
475 "getent",
476 "passwd",
477 uid_buf,
478 NULL
479 };
480
481 close(pipes[0]);
482
483 /* we want to capture stdout */
484 dup2(pipes[1], 1);
485 close(pipes[1]);
486
487 /* get rid of stdin/stderr, so we try to associate it
488 * with /dev/null
489 */
490 fd = open("/dev/null", O_RDWR);
491 if (fd < 0) {
492 close(0);
493 close(2);
494 } else {
495 dup2(fd, 0);
496 dup2(fd, 2);
497 close(fd);
498 }
499
500 /* finish argument list */
501 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
502 if (ret <= 0)
503 exit(-1);
504
505 /* try to run getent program */
506 (void) execvp("getent", arguments);
507 exit(-1);
508 }
509}
cb3e61fa
CS
510
511void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
512{
513 FILE *proc_file;
514 char proc_fn[MAXPATHLEN];
515 char *line = NULL;
516 size_t line_bufsz = 0;
517 int ret;
518 long value = -1;
519 uid_t uid = (uid_t)-1;
520 gid_t gid = (gid_t)-1;
521
522 /* read capabilities */
523 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", 1);
524
525 proc_file = fopen(proc_fn, "r");
526 if (!proc_file)
527 return;
528
529 while (getline(&line, &line_bufsz, proc_file) != -1) {
530 /* format is: real, effective, saved set user, fs
531 * we only care about real uid
532 */
533 ret = sscanf(line, "Uid: %ld", &value);
534 if (ret != EOF && ret > 0) {
535 uid = (uid_t) value;
536 } else {
537 ret = sscanf(line, "Gid: %ld", &value);
538 if (ret != EOF && ret > 0)
539 gid = (gid_t) value;
540 }
541 if (uid != (uid_t)-1 && gid != (gid_t)-1)
542 break;
543 }
544
545 fclose(proc_file);
546 free(line);
547
548 /* only override arguments if we found something */
549 if (uid != (uid_t)-1)
550 *init_uid = uid;
551 if (gid != (gid_t)-1)
552 *init_gid = gid;
553
554 /* TODO: we should also parse supplementary groups and use
555 * setgroups() to set them */
556}
9c4693b8
CS
557
558struct attach_clone_payload {
559 int ipc_socket;
560 lxc_attach_options_t* options;
561 struct lxc_proc_context_info* init_ctx;
562 lxc_attach_exec_t exec_function;
563 void* exec_payload;
564};
565
566static int attach_child_main(void* data);
567
568/* help the optimizer along if it doesn't know that exit always exits */
569#define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0)
570
571/* define default options if no options are supplied by the user */
572static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
573
574int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
575{
576 int ret, status;
577 pid_t init_pid, pid, attached_pid;
578 struct lxc_proc_context_info *init_ctx;
579 char* cwd;
580 char* new_cwd;
581 int ipc_sockets[2];
582
583 if (!options)
584 options = &attach_static_default_options;
585
586 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
587 if (init_pid < 0) {
588 ERROR("failed to get the init pid");
589 return -1;
590 }
591
592 init_ctx = lxc_proc_get_context_info(init_pid);
593 if (!init_ctx) {
594 ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
595 return -1;
596 }
597
598 cwd = getcwd(NULL, 0);
599
600 /* determine which namespaces the container was created with
601 * by asking lxc-start, if necessary
602 */
603 if (options->namespaces == -1) {
604 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
605 /* call failed */
606 if (options->namespaces == -1) {
607 ERROR("failed to automatically determine the "
608 "namespaces which the container unshared");
609 free(cwd);
610 free(init_ctx->aa_profile);
611 free(init_ctx);
612 return -1;
613 }
614 }
615
616 /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
617 * to make sure we don't irritate other threads that want to fork+exec away
618 *
619 * IMPORTANT: if the initial process is multithreaded and another call
620 * just fork()s away without exec'ing directly after, the socket fd will
621 * exist in the forked process from the other thread and any close() in
622 * our own child process will not really cause the socket to close properly,
623 * potentiall causing the parent to hang.
624 *
625 * For this reason, while IPC is still active, we have to use shutdown()
626 * if the child exits prematurely in order to signal that the socket
627 * is closed and cannot assume that the child exiting will automatically
628 * do that.
629 *
630 * IPC mechanism: (X is receiver)
631 * initial process intermediate attached
632 * X <--- send pid of
633 * attached proc,
634 * then exit
635 * send 0 ------------------------------------> X
636 * [do initialization]
637 * X <------------------------------------ send 1
638 * [add to cgroup, ...]
639 * send 2 ------------------------------------> X
640 * close socket close socket
641 * run program
642 */
643 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
644 if (ret < 0) {
645 SYSERROR("could not set up required IPC mechanism for attaching");
646 free(cwd);
647 free(init_ctx->aa_profile);
648 free(init_ctx);
649 return -1;
650 }
651
652 /* create intermediate subprocess, three reasons:
653 * 1. runs all pthread_atfork handlers and the
654 * child will no longer be threaded
655 * (we can't properly setns() in a threaded process)
656 * 2. we can't setns() in the child itself, since
657 * we want to make sure we are properly attached to
658 * the pidns
659 * 3. also, the initial thread has to put the attached
660 * process into the cgroup, which we can only do if
661 * we didn't already setns() (otherwise, user
662 * namespaces will hate us)
663 */
664 pid = fork();
665
666 if (pid < 0) {
667 SYSERROR("failed to create first subprocess");
668 free(cwd);
669 free(init_ctx->aa_profile);
670 free(init_ctx);
671 return -1;
672 }
673
674 if (pid) {
675 pid_t to_cleanup_pid = pid;
676 int expected = 0;
677
678 /* inital thread, we close the socket that is for the
679 * subprocesses
680 */
681 close(ipc_sockets[1]);
682 free(cwd);
683
684 /* get pid from intermediate process */
685 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
686 if (ret <= 0) {
687 if (ret != 0)
688 ERROR("error using IPC to receive pid of attached process");
689 goto cleanup_error;
690 }
691
692 /* reap intermediate process */
693 ret = wait_for_pid(pid);
694 if (ret < 0)
695 goto cleanup_error;
696
697 /* we will always have to reap the grandchild now */
698 to_cleanup_pid = attached_pid;
699
700 /* tell attached process it may start initializing */
701 status = 0;
702 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
703 if (ret <= 0) {
704 ERROR("error using IPC to notify attached process for initialization (0)");
705 goto cleanup_error;
706 }
707
708 /* wait for the attached process to finish initializing */
709 expected = 1;
710 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
711 if (ret <= 0) {
712 if (ret != 0)
713 ERROR("error using IPC to receive notification from attached process (1)");
714 goto cleanup_error;
715 }
716
717 /* attach to cgroup, if requested */
718 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
719 ret = lxc_cgroup_attach(attached_pid, name, lxcpath);
720 if (ret < 0) {
721 ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
722 goto cleanup_error;
723 }
724 }
725
726 /* tell attached process we're done */
727 status = 2;
728 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
729 if (ret <= 0) {
730 ERROR("error using IPC to notify attached process for initialization (2)");
731 goto cleanup_error;
732 }
733
734 /* now shut down communication with child, we're done */
735 shutdown(ipc_sockets[0], SHUT_RDWR);
736 close(ipc_sockets[0]);
737 free(init_ctx->aa_profile);
738 free(init_ctx);
739
740 /* we're done, the child process should now execute whatever
741 * it is that the user requested. The parent can now track it
742 * with waitpid() or similar.
743 */
744
745 *attached_process = attached_pid;
746 return 0;
747
748 cleanup_error:
749 /* first shut down the socket, then wait for the pid,
750 * otherwise the pid we're waiting for may never exit
751 */
752 shutdown(ipc_sockets[0], SHUT_RDWR);
753 close(ipc_sockets[0]);
754 if (to_cleanup_pid)
755 (void) wait_for_pid(to_cleanup_pid);
756 free(init_ctx->aa_profile);
757 free(init_ctx);
758 return -1;
759 }
760
761 /* first subprocess begins here, we close the socket that is for the
762 * initial thread
763 */
764 close(ipc_sockets[0]);
765
766 /* attach now, create another subprocess later, since pid namespaces
767 * only really affect the children of the current process
768 */
769 ret = lxc_attach_to_ns(init_pid, options->namespaces);
770 if (ret < 0) {
771 ERROR("failed to enter the namespace");
772 shutdown(ipc_sockets[1], SHUT_RDWR);
773 rexit(-1);
774 }
775
776 /* attach succeeded, try to cwd */
777 if (options->initial_cwd)
778 new_cwd = options->initial_cwd;
779 else
780 new_cwd = cwd;
781 ret = chdir(new_cwd);
782 if (ret < 0)
783 WARN("could not change directory to '%s'", new_cwd);
784 free(cwd);
785
786 /* now create the real child process */
787 {
788 struct attach_clone_payload payload = {
789 .ipc_socket = ipc_sockets[1],
790 .options = options,
791 .init_ctx = init_ctx,
792 .exec_function = exec_function,
793 .exec_payload = exec_payload
794 };
795 /* We use clone_parent here to make this subprocess a direct child of
796 * the initial process. Then this intermediate process can exit and
797 * the parent can directly track the attached process.
798 */
799 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
800 }
801
802 /* shouldn't happen, clone() should always return positive pid */
803 if (pid <= 0) {
804 SYSERROR("failed to create subprocess");
805 shutdown(ipc_sockets[1], SHUT_RDWR);
806 rexit(-1);
807 }
808
809 /* tell grandparent the pid of the pid of the newly created child */
810 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
811 if (ret != sizeof(pid)) {
812 /* if this really happens here, this is very unfortunate, since the
813 * parent will not know the pid of the attached process and will
814 * not be able to wait for it (and we won't either due to CLONE_PARENT)
815 * so the parent won't be able to reap it and the attached process
816 * will remain a zombie
817 */
818 ERROR("error using IPC to notify main process of pid of the attached process");
819 shutdown(ipc_sockets[1], SHUT_RDWR);
820 rexit(-1);
821 }
822
823 /* the rest is in the hands of the initial and the attached process */
824 rexit(0);
825}
826
827int attach_child_main(void* data)
828{
829 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
830 int ipc_socket = payload->ipc_socket;
831 lxc_attach_options_t* options = payload->options;
832 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
1a2e58cf 833#if HAVE_SYS_PERSONALITY_H
9c4693b8 834 long new_personality;
1a2e58cf 835#endif
9c4693b8
CS
836 int ret;
837 int status;
838 int expected;
839 long flags;
840 int fd;
841 uid_t new_uid;
842 gid_t new_gid;
843
844 /* wait for the initial thread to signal us that it's ready
845 * for us to start initializing
846 */
847 expected = 0;
848 status = -1;
849 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
850 if (ret <= 0) {
851 ERROR("error using IPC to receive notification from initial process (0)");
852 shutdown(ipc_socket, SHUT_RDWR);
853 rexit(-1);
854 }
855
856 /* load apparmor profile */
857 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_APPARMOR)) {
858 ret = attach_apparmor(init_ctx->aa_profile);
859 if (ret < 0) {
860 shutdown(ipc_socket, SHUT_RDWR);
861 rexit(-1);
862 }
863 }
864
865 /* A description of the purpose of this functionality is
866 * provided in the lxc-attach(1) manual page. We have to
867 * remount here and not in the parent process, otherwise
868 * /proc may not properly reflect the new pid namespace.
869 */
870 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
871 ret = lxc_attach_remount_sys_proc();
872 if (ret < 0) {
873 shutdown(ipc_socket, SHUT_RDWR);
874 rexit(-1);
875 }
876 }
877
878 /* now perform additional attachments*/
879#if HAVE_SYS_PERSONALITY_H
880 if (options->personality < 0)
881 new_personality = init_ctx->personality;
882 else
883 new_personality = options->personality;
884
885 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
886 ret = personality(new_personality);
887 if (ret < 0) {
888 SYSERROR("could not ensure correct architecture");
889 shutdown(ipc_socket, SHUT_RDWR);
890 rexit(-1);
891 }
892 }
893#endif
894
895 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
896 ret = lxc_attach_drop_privs(init_ctx);
897 if (ret < 0) {
898 ERROR("could not drop privileges");
899 shutdown(ipc_socket, SHUT_RDWR);
900 rexit(-1);
901 }
902 }
903
904 /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
905 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
906 if (ret < 0) {
907 ERROR("could not set initial environment for attached process");
908 shutdown(ipc_socket, SHUT_RDWR);
909 rexit(-1);
910 }
911
912 /* set user / group id */
913 new_uid = 0;
914 new_gid = 0;
915 /* ignore errors, we will fall back to root in that case
916 * (/proc was not mounted etc.)
917 */
918 if (options->namespaces & CLONE_NEWUSER)
919 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
920
921 if (options->uid != (uid_t)-1)
922 new_uid = options->uid;
923 if (options->gid != (gid_t)-1)
924 new_gid = options->gid;
925
926 /* try to set the uid/gid combination */
927 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER) && setgid(new_gid)) {
928 SYSERROR("switching to container gid");
929 shutdown(ipc_socket, SHUT_RDWR);
930 rexit(-1);
931 }
932 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
933 SYSERROR("switching to container uid");
934 shutdown(ipc_socket, SHUT_RDWR);
935 rexit(-1);
936 }
937
938 /* tell initial process it may now put us into the cgroups */
939 status = 1;
940 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
941 if (ret != sizeof(status)) {
942 ERROR("error using IPC to notify initial process for initialization (1)");
943 shutdown(ipc_socket, SHUT_RDWR);
944 rexit(-1);
945 }
946
947 /* wait for the initial thread to signal us that it has done
948 * everything for us when it comes to cgroups etc.
949 */
950 expected = 2;
951 status = -1;
952 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
953 if (ret <= 0) {
954 ERROR("error using IPC to receive final notification from initial process (2)");
955 shutdown(ipc_socket, SHUT_RDWR);
956 rexit(-1);
957 }
958
959 shutdown(ipc_socket, SHUT_RDWR);
960 close(ipc_socket);
961 free(init_ctx->aa_profile);
962 free(init_ctx);
963
964 /* The following is done after the communication socket is
965 * shut down. That way, all errors that might (though
966 * unlikely) occur up until this point will have their messages
967 * printed to the original stderr (if logging is so configured)
968 * and not the fd the user supplied, if any.
969 */
970
971 /* fd handling for stdin, stdout and stderr;
972 * ignore errors here, user may want to make sure
973 * the fds are closed, for example */
974 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
975 dup2(options->stdin_fd, 0);
976 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
977 dup2(options->stdout_fd, 1);
978 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
979 dup2(options->stderr_fd, 2);
980
981 /* close the old fds */
982 if (options->stdin_fd > 2)
983 close(options->stdin_fd);
984 if (options->stdout_fd > 2)
985 close(options->stdout_fd);
986 if (options->stderr_fd > 2)
987 close(options->stderr_fd);
988
989 /* try to remove CLOEXEC flag from stdin/stdout/stderr,
990 * but also here, ignore errors */
991 for (fd = 0; fd <= 2; fd++) {
992 flags = fcntl(fd, F_GETFL);
993 if (flags < 0)
994 continue;
995 if (flags & FD_CLOEXEC)
996 fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC);
997 }
998
999 /* we're done, so we can now do whatever the user intended us to do */
1000 rexit(payload->exec_function(payload->exec_payload));
1001}
1002
1003int lxc_attach_run_command(void* payload)
1004{
1005 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1006
1007 execvp(cmd->program, cmd->argv);
1008 SYSERROR("failed to exec '%s'", cmd->program);
1009 return -1;
1010}
1011
1012int lxc_attach_run_shell(void* payload)
1013{
1014 uid_t uid;
1015 struct passwd *passwd;
1016 char *user_shell;
1017
1018 /* ignore payload parameter */
1019 (void)payload;
1020
1021 uid = getuid();
1022 passwd = getpwuid(uid);
1023
1024 /* this probably happens because of incompatible nss
1025 * implementations in host and container (remember, this
1026 * code is still using the host's glibc but our mount
1027 * namespace is in the container)
1028 * we may try to get the information by spawning a
1029 * [getent passwd uid] process and parsing the result
1030 */
1031 if (!passwd)
1032 user_shell = lxc_attach_getpwshell(uid);
1033 else
1034 user_shell = passwd->pw_shell;
1035
1036 if (user_shell)
1037 execlp(user_shell, user_shell, NULL);
1038
1039 /* executed if either no passwd entry or execvp fails,
1040 * we will fall back on /bin/sh as a default shell
1041 */
1042 execlp("/bin/sh", "/bin/sh", NULL);
1043 SYSERROR("failed to exec shell");
1044 return -1;
1045}