]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/attach.c
refactor AppArmor into LSM backend, add SELinux support
[mirror_lxc.git] / src / lxc / attach.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <sys/param.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34 #include <sys/socket.h>
35 #include <sys/syscall.h>
36 #include <sys/wait.h>
37 #include <linux/unistd.h>
38 #include <pwd.h>
39
40 #if !HAVE_DECL_PR_CAPBSET_DROP
41 #define PR_CAPBSET_DROP 24
42 #endif
43
44 #include "namespace.h"
45 #include "log.h"
46 #include "attach.h"
47 #include "caps.h"
48 #include "config.h"
49 #include "utils.h"
50 #include "commands.h"
51 #include "cgroup.h"
52 #include "lxclock.h"
53 #include "lsm/lsm.h"
54
55 #if HAVE_SYS_PERSONALITY_H
56 #include <sys/personality.h>
57 #endif
58
59 #ifndef SOCK_CLOEXEC
60 # define SOCK_CLOEXEC 02000000
61 #endif
62
63 lxc_log_define(lxc_attach, lxc);
64
65 struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
66 {
67 struct lxc_proc_context_info *info = calloc(1, sizeof(*info));
68 FILE *proc_file;
69 char proc_fn[MAXPATHLEN];
70 char *line = NULL;
71 size_t line_bufsz = 0;
72 int ret, found;
73
74 if (!info) {
75 SYSERROR("Could not allocate memory.");
76 return NULL;
77 }
78
79 /* read capabilities */
80 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", pid);
81
82 process_lock();
83 proc_file = fopen(proc_fn, "r");
84 process_unlock();
85 if (!proc_file) {
86 SYSERROR("Could not open %s", proc_fn);
87 goto out_error;
88 }
89
90 found = 0;
91 while (getline(&line, &line_bufsz, proc_file) != -1) {
92 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
93 if (ret != EOF && ret > 0) {
94 found = 1;
95 break;
96 }
97 }
98
99 if (line)
100 free(line);
101 process_lock();
102 fclose(proc_file);
103 process_unlock();
104
105 if (!found) {
106 SYSERROR("Could not read capability bounding set from %s", proc_fn);
107 errno = ENOENT;
108 goto out_error;
109 }
110
111 /* read personality */
112 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/personality", pid);
113
114 process_lock();
115 proc_file = fopen(proc_fn, "r");
116 process_unlock();
117 if (!proc_file) {
118 SYSERROR("Could not open %s", proc_fn);
119 goto out_error;
120 }
121
122 ret = fscanf(proc_file, "%lx", &info->personality);
123 process_lock();
124 fclose(proc_file);
125 process_unlock();
126
127 if (ret == EOF || ret == 0) {
128 SYSERROR("Could not read personality from %s", proc_fn);
129 errno = ENOENT;
130 goto out_error;
131 }
132 info->lsm_label = lsm_process_label_get(pid);
133
134 return info;
135
136 out_error:
137 free(info);
138 return NULL;
139 }
140
141 static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
142 {
143 if (ctx->lsm_label)
144 free(ctx->lsm_label);
145 free(ctx);
146 }
147
148 int lxc_attach_to_ns(pid_t pid, int which)
149 {
150 char path[MAXPATHLEN];
151 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
152 * the file for user namepsaces in /proc/$pid/ns will be called
153 * 'user' once the kernel supports it
154 */
155 static char *ns[] = { "mnt", "pid", "uts", "ipc", "user", "net" };
156 static int flags[] = {
157 CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC,
158 CLONE_NEWUSER, CLONE_NEWNET
159 };
160 static const int size = sizeof(ns) / sizeof(char *);
161 int fd[size];
162 int i, j, saved_errno;
163
164
165 snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
166 if (access(path, X_OK)) {
167 ERROR("Does this kernel version support 'attach' ?");
168 return -1;
169 }
170
171 for (i = 0; i < size; i++) {
172 /* ignore if we are not supposed to attach to that
173 * namespace
174 */
175 if (which != -1 && !(which & flags[i])) {
176 fd[i] = -1;
177 continue;
178 }
179
180 snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
181 process_lock();
182 fd[i] = open(path, O_RDONLY | O_CLOEXEC);
183 process_unlock();
184 if (fd[i] < 0) {
185 saved_errno = errno;
186
187 /* close all already opened file descriptors before
188 * we return an error, so we don't leak them
189 */
190 process_lock();
191 for (j = 0; j < i; j++)
192 close(fd[j]);
193 process_unlock();
194
195 errno = saved_errno;
196 SYSERROR("failed to open '%s'", path);
197 return -1;
198 }
199 }
200
201 for (i = 0; i < size; i++) {
202 if (fd[i] >= 0 && setns(fd[i], 0) != 0) {
203 saved_errno = errno;
204
205 for (j = i; j < size; j++)
206 close(fd[j]);
207
208 errno = saved_errno;
209 SYSERROR("failed to set namespace '%s'", ns[i]);
210 return -1;
211 }
212
213 process_lock();
214 close(fd[i]);
215 process_unlock();
216 }
217
218 return 0;
219 }
220
221 int lxc_attach_remount_sys_proc()
222 {
223 int ret;
224
225 ret = unshare(CLONE_NEWNS);
226 if (ret < 0) {
227 SYSERROR("failed to unshare mount namespace");
228 return -1;
229 }
230
231 /* assume /proc is always mounted, so remount it */
232 ret = umount2("/proc", MNT_DETACH);
233 if (ret < 0) {
234 SYSERROR("failed to unmount /proc");
235 return -1;
236 }
237
238 ret = mount("none", "/proc", "proc", 0, NULL);
239 if (ret < 0) {
240 SYSERROR("failed to remount /proc");
241 return -1;
242 }
243
244 /* try to umount /sys - if it's not a mount point,
245 * we'll get EINVAL, then we ignore it because it
246 * may not have been mounted in the first place
247 */
248 ret = umount2("/sys", MNT_DETACH);
249 if (ret < 0 && errno != EINVAL) {
250 SYSERROR("failed to unmount /sys");
251 return -1;
252 } else if (ret == 0) {
253 /* remount it */
254 ret = mount("none", "/sys", "sysfs", 0, NULL);
255 if (ret < 0) {
256 SYSERROR("failed to remount /sys");
257 return -1;
258 }
259 }
260
261 return 0;
262 }
263
264 int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
265 {
266 int last_cap = lxc_caps_last_cap();
267 int cap;
268
269 for (cap = 0; cap <= last_cap; cap++) {
270 if (ctx->capability_mask & (1LL << cap))
271 continue;
272
273 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
274 SYSERROR("failed to remove capability id %d", cap);
275 return -1;
276 }
277 }
278
279 return 0;
280 }
281
282 int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
283 {
284 if (policy == LXC_ATTACH_CLEAR_ENV) {
285 char **extra_keep_store = NULL;
286 int path_kept = 0;
287
288 if (extra_keep) {
289 size_t count, i;
290
291 for (count = 0; extra_keep[count]; count++);
292
293 extra_keep_store = calloc(count, sizeof(char *));
294 if (!extra_keep_store) {
295 SYSERROR("failed to allocate memory for storing current "
296 "environment variable values that will be kept");
297 return -1;
298 }
299 for (i = 0; i < count; i++) {
300 char *v = getenv(extra_keep[i]);
301 if (v) {
302 extra_keep_store[i] = strdup(v);
303 if (!extra_keep_store[i]) {
304 SYSERROR("failed to allocate memory for storing current "
305 "environment variable values that will be kept");
306 while (i > 0)
307 free(extra_keep_store[--i]);
308 free(extra_keep_store);
309 return -1;
310 }
311 if (strcmp(extra_keep[i], "PATH") == 0)
312 path_kept = 1;
313 }
314 /* calloc sets entire array to zero, so we don't
315 * need an else */
316 }
317 }
318
319 if (clearenv()) {
320 char **p;
321 SYSERROR("failed to clear environment");
322 if (extra_keep_store) {
323 for (p = extra_keep_store; *p; p++)
324 free(*p);
325 free(extra_keep_store);
326 }
327 return -1;
328 }
329
330 if (extra_keep_store) {
331 size_t i;
332 for (i = 0; extra_keep[i]; i++) {
333 if (extra_keep_store[i])
334 setenv(extra_keep[i], extra_keep_store[i], 1);
335 free(extra_keep_store[i]);
336 }
337 free(extra_keep_store);
338 }
339
340 /* always set a default path; shells and execlp tend
341 * to be fine without it, but there is a disturbing
342 * number of C programs out there that just assume
343 * that getenv("PATH") is never NULL and then die a
344 * painful segfault death. */
345 if (!path_kept) {
346 #ifdef HAVE_CONFSTR
347 size_t n;
348 char *path_env;
349
350 n = confstr(_CS_PATH, NULL, 0);
351 path_env = malloc(n);
352 if (path_env) {
353 confstr(_CS_PATH, path_env, n);
354 setenv("PATH", path_env, 1);
355 free(path_env);
356 }
357 /* don't error out, this is just an extra service */
358 #else
359 setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
360 #endif
361 }
362 }
363
364 if (putenv("container=lxc")) {
365 SYSERROR("failed to set environment variable");
366 return -1;
367 }
368
369 /* set extra environment variables */
370 if (extra_env) {
371 for (; *extra_env; extra_env++) {
372 /* duplicate the string, just to be on
373 * the safe side, because putenv does not
374 * do it for us */
375 char *p = strdup(*extra_env);
376 /* we just assume the user knows what they
377 * are doing, so we don't do any checks */
378 if (!p) {
379 SYSERROR("failed to allocate memory for additional environment "
380 "variables");
381 return -1;
382 }
383 putenv(p);
384 }
385 }
386
387 return 0;
388 }
389
390 char *lxc_attach_getpwshell(uid_t uid)
391 {
392 /* local variables */
393 pid_t pid;
394 int pipes[2];
395 int ret;
396 int fd;
397 char *result = NULL;
398
399 /* we need to fork off a process that runs the
400 * getent program, and we need to capture its
401 * output, so we use a pipe for that purpose
402 */
403 process_lock();
404 ret = pipe(pipes);
405 process_unlock();
406 if (ret < 0)
407 return NULL;
408
409 pid = fork();
410 if (pid < 0) {
411 process_lock();
412 close(pipes[0]);
413 close(pipes[1]);
414 process_unlock();
415 return NULL;
416 }
417
418 if (pid) {
419 /* parent process */
420 FILE *pipe_f;
421 char *line = NULL;
422 size_t line_bufsz = 0;
423 int found = 0;
424 int status;
425
426 process_lock();
427 close(pipes[1]);
428 process_unlock();
429
430 process_lock();
431 pipe_f = fdopen(pipes[0], "r");
432 process_unlock();
433 while (getline(&line, &line_bufsz, pipe_f) != -1) {
434 char *token;
435 char *saveptr = NULL;
436 long value;
437 char *endptr = NULL;
438 int i;
439
440 /* if we already found something, just continue
441 * to read until the pipe doesn't deliver any more
442 * data, but don't modify the existing data
443 * structure
444 */
445 if (found)
446 continue;
447
448 /* trim line on the right hand side */
449 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
450 line[i - 1] = '\0';
451
452 /* split into tokens: first user name */
453 token = strtok_r(line, ":", &saveptr);
454 if (!token)
455 continue;
456 /* next: dummy password field */
457 token = strtok_r(NULL, ":", &saveptr);
458 if (!token)
459 continue;
460 /* next: user id */
461 token = strtok_r(NULL, ":", &saveptr);
462 value = token ? strtol(token, &endptr, 10) : 0;
463 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
464 continue;
465 /* dummy sanity check: user id matches */
466 if ((uid_t) value != uid)
467 continue;
468 /* skip fields: gid, gecos, dir, go to next field 'shell' */
469 for (i = 0; i < 4; i++) {
470 token = strtok_r(NULL, ":", &saveptr);
471 if (!token)
472 break;
473 }
474 if (!token)
475 continue;
476 if (result)
477 free(result);
478 result = strdup(token);
479
480 /* sanity check that there are no fields after that */
481 token = strtok_r(NULL, ":", &saveptr);
482 if (token)
483 continue;
484
485 found = 1;
486 }
487
488 free(line);
489 process_lock();
490 fclose(pipe_f);
491 process_unlock();
492 again:
493 if (waitpid(pid, &status, 0) < 0) {
494 if (errno == EINTR)
495 goto again;
496 return NULL;
497 }
498
499 /* some sanity checks: if anything even hinted at going
500 * wrong: we can't be sure we have a valid result, so
501 * we assume we don't
502 */
503
504 if (!WIFEXITED(status))
505 return NULL;
506
507 if (WEXITSTATUS(status) != 0)
508 return NULL;
509
510 if (!found)
511 return NULL;
512
513 return result;
514 } else {
515 /* child process */
516 char uid_buf[32];
517 char *arguments[] = {
518 "getent",
519 "passwd",
520 uid_buf,
521 NULL
522 };
523
524 process_unlock(); // we're no longer sharing
525 close(pipes[0]);
526
527 /* we want to capture stdout */
528 dup2(pipes[1], 1);
529 close(pipes[1]);
530
531 /* get rid of stdin/stderr, so we try to associate it
532 * with /dev/null
533 */
534 fd = open("/dev/null", O_RDWR);
535 if (fd < 0) {
536 close(0);
537 close(2);
538 } else {
539 dup2(fd, 0);
540 dup2(fd, 2);
541 close(fd);
542 }
543
544 /* finish argument list */
545 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
546 if (ret <= 0)
547 exit(-1);
548
549 /* try to run getent program */
550 (void) execvp("getent", arguments);
551 exit(-1);
552 }
553 }
554
555 void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
556 {
557 FILE *proc_file;
558 char proc_fn[MAXPATHLEN];
559 char *line = NULL;
560 size_t line_bufsz = 0;
561 int ret;
562 long value = -1;
563 uid_t uid = (uid_t)-1;
564 gid_t gid = (gid_t)-1;
565
566 /* read capabilities */
567 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", 1);
568
569 proc_file = fopen(proc_fn, "r");
570 if (!proc_file)
571 return;
572
573 while (getline(&line, &line_bufsz, proc_file) != -1) {
574 /* format is: real, effective, saved set user, fs
575 * we only care about real uid
576 */
577 ret = sscanf(line, "Uid: %ld", &value);
578 if (ret != EOF && ret > 0) {
579 uid = (uid_t) value;
580 } else {
581 ret = sscanf(line, "Gid: %ld", &value);
582 if (ret != EOF && ret > 0)
583 gid = (gid_t) value;
584 }
585 if (uid != (uid_t)-1 && gid != (gid_t)-1)
586 break;
587 }
588
589 fclose(proc_file);
590 free(line);
591
592 /* only override arguments if we found something */
593 if (uid != (uid_t)-1)
594 *init_uid = uid;
595 if (gid != (gid_t)-1)
596 *init_gid = gid;
597
598 /* TODO: we should also parse supplementary groups and use
599 * setgroups() to set them */
600 }
601
602 struct attach_clone_payload {
603 int ipc_socket;
604 lxc_attach_options_t* options;
605 struct lxc_proc_context_info* init_ctx;
606 lxc_attach_exec_t exec_function;
607 void* exec_payload;
608 };
609
610 static int attach_child_main(void* data);
611
612 /* help the optimizer along if it doesn't know that exit always exits */
613 #define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0)
614
615 /* define default options if no options are supplied by the user */
616 static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
617
618 int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
619 {
620 int ret, status;
621 pid_t init_pid, pid, attached_pid;
622 struct lxc_proc_context_info *init_ctx;
623 char* cwd;
624 char* new_cwd;
625 int ipc_sockets[2];
626
627 if (!options)
628 options = &attach_static_default_options;
629
630 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
631 if (init_pid < 0) {
632 ERROR("failed to get the init pid");
633 return -1;
634 }
635
636 init_ctx = lxc_proc_get_context_info(init_pid);
637 if (!init_ctx) {
638 ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
639 return -1;
640 }
641
642 cwd = getcwd(NULL, 0);
643
644 /* determine which namespaces the container was created with
645 * by asking lxc-start, if necessary
646 */
647 if (options->namespaces == -1) {
648 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
649 /* call failed */
650 if (options->namespaces == -1) {
651 ERROR("failed to automatically determine the "
652 "namespaces which the container unshared");
653 free(cwd);
654 lxc_proc_put_context_info(init_ctx);
655 return -1;
656 }
657 }
658
659 /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
660 * to make sure we don't irritate other threads that want to fork+exec away
661 *
662 * IMPORTANT: if the initial process is multithreaded and another call
663 * just fork()s away without exec'ing directly after, the socket fd will
664 * exist in the forked process from the other thread and any close() in
665 * our own child process will not really cause the socket to close properly,
666 * potentiall causing the parent to hang.
667 *
668 * For this reason, while IPC is still active, we have to use shutdown()
669 * if the child exits prematurely in order to signal that the socket
670 * is closed and cannot assume that the child exiting will automatically
671 * do that.
672 *
673 * IPC mechanism: (X is receiver)
674 * initial process intermediate attached
675 * X <--- send pid of
676 * attached proc,
677 * then exit
678 * send 0 ------------------------------------> X
679 * [do initialization]
680 * X <------------------------------------ send 1
681 * [add to cgroup, ...]
682 * send 2 ------------------------------------> X
683 * close socket close socket
684 * run program
685 */
686 process_lock();
687 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
688 process_unlock();
689 if (ret < 0) {
690 SYSERROR("could not set up required IPC mechanism for attaching");
691 free(cwd);
692 lxc_proc_put_context_info(init_ctx);
693 return -1;
694 }
695
696 /* create intermediate subprocess, three reasons:
697 * 1. runs all pthread_atfork handlers and the
698 * child will no longer be threaded
699 * (we can't properly setns() in a threaded process)
700 * 2. we can't setns() in the child itself, since
701 * we want to make sure we are properly attached to
702 * the pidns
703 * 3. also, the initial thread has to put the attached
704 * process into the cgroup, which we can only do if
705 * we didn't already setns() (otherwise, user
706 * namespaces will hate us)
707 */
708 pid = fork();
709
710 if (pid < 0) {
711 SYSERROR("failed to create first subprocess");
712 free(cwd);
713 lxc_proc_put_context_info(init_ctx);
714 return -1;
715 }
716
717 if (pid) {
718 pid_t to_cleanup_pid = pid;
719 int expected = 0;
720
721 /* inital thread, we close the socket that is for the
722 * subprocesses
723 */
724 process_lock();
725 close(ipc_sockets[1]);
726 process_unlock();
727 free(cwd);
728
729 /* get pid from intermediate process */
730 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
731 if (ret <= 0) {
732 if (ret != 0)
733 ERROR("error using IPC to receive pid of attached process");
734 goto cleanup_error;
735 }
736
737 /* reap intermediate process */
738 ret = wait_for_pid(pid);
739 if (ret < 0)
740 goto cleanup_error;
741
742 /* we will always have to reap the grandchild now */
743 to_cleanup_pid = attached_pid;
744
745 /* tell attached process it may start initializing */
746 status = 0;
747 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
748 if (ret <= 0) {
749 ERROR("error using IPC to notify attached process for initialization (0)");
750 goto cleanup_error;
751 }
752
753 /* wait for the attached process to finish initializing */
754 expected = 1;
755 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
756 if (ret <= 0) {
757 if (ret != 0)
758 ERROR("error using IPC to receive notification from attached process (1)");
759 goto cleanup_error;
760 }
761
762 /* attach to cgroup, if requested */
763 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
764 struct cgroup_meta_data *meta_data;
765 struct cgroup_process_info *container_info;
766
767 meta_data = lxc_cgroup_load_meta();
768 if (!meta_data) {
769 ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
770 goto cleanup_error;
771 }
772
773 container_info = lxc_cgroup_get_container_info(name, lxcpath, meta_data);
774 lxc_cgroup_put_meta(meta_data);
775 if (!container_info) {
776 ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
777 goto cleanup_error;
778 }
779
780 ret = lxc_cgroup_enter(container_info, attached_pid, false);
781 lxc_cgroup_process_info_free(container_info);
782 if (ret < 0) {
783 ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid);
784 goto cleanup_error;
785 }
786 }
787
788 /* tell attached process we're done */
789 status = 2;
790 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
791 if (ret <= 0) {
792 ERROR("error using IPC to notify attached process for initialization (2)");
793 goto cleanup_error;
794 }
795
796 /* now shut down communication with child, we're done */
797 shutdown(ipc_sockets[0], SHUT_RDWR);
798 process_lock();
799 close(ipc_sockets[0]);
800 process_unlock();
801 lxc_proc_put_context_info(init_ctx);
802
803 /* we're done, the child process should now execute whatever
804 * it is that the user requested. The parent can now track it
805 * with waitpid() or similar.
806 */
807
808 *attached_process = attached_pid;
809 return 0;
810
811 cleanup_error:
812 /* first shut down the socket, then wait for the pid,
813 * otherwise the pid we're waiting for may never exit
814 */
815 shutdown(ipc_sockets[0], SHUT_RDWR);
816 process_lock();
817 close(ipc_sockets[0]);
818 process_unlock();
819 if (to_cleanup_pid)
820 (void) wait_for_pid(to_cleanup_pid);
821 lxc_proc_put_context_info(init_ctx);
822 return -1;
823 }
824
825 process_unlock(); // we're no longer sharing
826 /* first subprocess begins here, we close the socket that is for the
827 * initial thread
828 */
829 close(ipc_sockets[0]);
830
831 /* attach now, create another subprocess later, since pid namespaces
832 * only really affect the children of the current process
833 */
834 ret = lxc_attach_to_ns(init_pid, options->namespaces);
835 if (ret < 0) {
836 ERROR("failed to enter the namespace");
837 shutdown(ipc_sockets[1], SHUT_RDWR);
838 rexit(-1);
839 }
840
841 /* attach succeeded, try to cwd */
842 if (options->initial_cwd)
843 new_cwd = options->initial_cwd;
844 else
845 new_cwd = cwd;
846 ret = chdir(new_cwd);
847 if (ret < 0)
848 WARN("could not change directory to '%s'", new_cwd);
849 free(cwd);
850
851 /* now create the real child process */
852 {
853 struct attach_clone_payload payload = {
854 .ipc_socket = ipc_sockets[1],
855 .options = options,
856 .init_ctx = init_ctx,
857 .exec_function = exec_function,
858 .exec_payload = exec_payload
859 };
860 /* We use clone_parent here to make this subprocess a direct child of
861 * the initial process. Then this intermediate process can exit and
862 * the parent can directly track the attached process.
863 */
864 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
865 }
866
867 /* shouldn't happen, clone() should always return positive pid */
868 if (pid <= 0) {
869 SYSERROR("failed to create subprocess");
870 shutdown(ipc_sockets[1], SHUT_RDWR);
871 rexit(-1);
872 }
873
874 /* tell grandparent the pid of the pid of the newly created child */
875 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
876 if (ret != sizeof(pid)) {
877 /* if this really happens here, this is very unfortunate, since the
878 * parent will not know the pid of the attached process and will
879 * not be able to wait for it (and we won't either due to CLONE_PARENT)
880 * so the parent won't be able to reap it and the attached process
881 * will remain a zombie
882 */
883 ERROR("error using IPC to notify main process of pid of the attached process");
884 shutdown(ipc_sockets[1], SHUT_RDWR);
885 rexit(-1);
886 }
887
888 /* the rest is in the hands of the initial and the attached process */
889 rexit(0);
890 }
891
892 int attach_child_main(void* data)
893 {
894 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
895 int ipc_socket = payload->ipc_socket;
896 lxc_attach_options_t* options = payload->options;
897 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
898 #if HAVE_SYS_PERSONALITY_H
899 long new_personality;
900 #endif
901 int ret;
902 int status;
903 int expected;
904 long flags;
905 int fd;
906 uid_t new_uid;
907 gid_t new_gid;
908
909 /* wait for the initial thread to signal us that it's ready
910 * for us to start initializing
911 */
912 expected = 0;
913 status = -1;
914 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
915 if (ret <= 0) {
916 ERROR("error using IPC to receive notification from initial process (0)");
917 shutdown(ipc_socket, SHUT_RDWR);
918 rexit(-1);
919 }
920
921 /* load apparmor profile */
922 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_APPARMOR)) {
923 ret = lsm_process_label_set(init_ctx->lsm_label, 0);
924 if (ret < 0) {
925 shutdown(ipc_socket, SHUT_RDWR);
926 rexit(-1);
927 }
928 }
929
930 /* A description of the purpose of this functionality is
931 * provided in the lxc-attach(1) manual page. We have to
932 * remount here and not in the parent process, otherwise
933 * /proc may not properly reflect the new pid namespace.
934 */
935 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
936 ret = lxc_attach_remount_sys_proc();
937 if (ret < 0) {
938 shutdown(ipc_socket, SHUT_RDWR);
939 rexit(-1);
940 }
941 }
942
943 /* now perform additional attachments*/
944 #if HAVE_SYS_PERSONALITY_H
945 if (options->personality < 0)
946 new_personality = init_ctx->personality;
947 else
948 new_personality = options->personality;
949
950 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
951 ret = personality(new_personality);
952 if (ret < 0) {
953 SYSERROR("could not ensure correct architecture");
954 shutdown(ipc_socket, SHUT_RDWR);
955 rexit(-1);
956 }
957 }
958 #endif
959
960 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
961 ret = lxc_attach_drop_privs(init_ctx);
962 if (ret < 0) {
963 ERROR("could not drop privileges");
964 shutdown(ipc_socket, SHUT_RDWR);
965 rexit(-1);
966 }
967 }
968
969 /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
970 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
971 if (ret < 0) {
972 ERROR("could not set initial environment for attached process");
973 shutdown(ipc_socket, SHUT_RDWR);
974 rexit(-1);
975 }
976
977 /* set user / group id */
978 new_uid = 0;
979 new_gid = 0;
980 /* ignore errors, we will fall back to root in that case
981 * (/proc was not mounted etc.)
982 */
983 if (options->namespaces & CLONE_NEWUSER)
984 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
985
986 if (options->uid != (uid_t)-1)
987 new_uid = options->uid;
988 if (options->gid != (gid_t)-1)
989 new_gid = options->gid;
990
991 /* try to set the uid/gid combination */
992 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER) && setgid(new_gid)) {
993 SYSERROR("switching to container gid");
994 shutdown(ipc_socket, SHUT_RDWR);
995 rexit(-1);
996 }
997 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
998 SYSERROR("switching to container uid");
999 shutdown(ipc_socket, SHUT_RDWR);
1000 rexit(-1);
1001 }
1002
1003 /* tell initial process it may now put us into the cgroups */
1004 status = 1;
1005 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
1006 if (ret != sizeof(status)) {
1007 ERROR("error using IPC to notify initial process for initialization (1)");
1008 shutdown(ipc_socket, SHUT_RDWR);
1009 rexit(-1);
1010 }
1011
1012 /* wait for the initial thread to signal us that it has done
1013 * everything for us when it comes to cgroups etc.
1014 */
1015 expected = 2;
1016 status = -1;
1017 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
1018 if (ret <= 0) {
1019 ERROR("error using IPC to receive final notification from initial process (2)");
1020 shutdown(ipc_socket, SHUT_RDWR);
1021 rexit(-1);
1022 }
1023
1024 shutdown(ipc_socket, SHUT_RDWR);
1025 close(ipc_socket);
1026 lxc_proc_put_context_info(init_ctx);
1027
1028 /* The following is done after the communication socket is
1029 * shut down. That way, all errors that might (though
1030 * unlikely) occur up until this point will have their messages
1031 * printed to the original stderr (if logging is so configured)
1032 * and not the fd the user supplied, if any.
1033 */
1034
1035 /* fd handling for stdin, stdout and stderr;
1036 * ignore errors here, user may want to make sure
1037 * the fds are closed, for example */
1038 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
1039 dup2(options->stdin_fd, 0);
1040 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
1041 dup2(options->stdout_fd, 1);
1042 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
1043 dup2(options->stderr_fd, 2);
1044
1045 /* close the old fds */
1046 if (options->stdin_fd > 2)
1047 close(options->stdin_fd);
1048 if (options->stdout_fd > 2)
1049 close(options->stdout_fd);
1050 if (options->stderr_fd > 2)
1051 close(options->stderr_fd);
1052
1053 /* try to remove CLOEXEC flag from stdin/stdout/stderr,
1054 * but also here, ignore errors */
1055 for (fd = 0; fd <= 2; fd++) {
1056 flags = fcntl(fd, F_GETFL);
1057 if (flags < 0)
1058 continue;
1059 if (flags & FD_CLOEXEC)
1060 fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC);
1061 }
1062
1063 /* we're done, so we can now do whatever the user intended us to do */
1064 rexit(payload->exec_function(payload->exec_payload));
1065 }
1066
1067 int lxc_attach_run_command(void* payload)
1068 {
1069 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1070
1071 execvp(cmd->program, cmd->argv);
1072 SYSERROR("failed to exec '%s'", cmd->program);
1073 return -1;
1074 }
1075
1076 int lxc_attach_run_shell(void* payload)
1077 {
1078 uid_t uid;
1079 struct passwd *passwd;
1080 char *user_shell;
1081
1082 /* ignore payload parameter */
1083 (void)payload;
1084
1085 uid = getuid();
1086 passwd = getpwuid(uid);
1087
1088 /* this probably happens because of incompatible nss
1089 * implementations in host and container (remember, this
1090 * code is still using the host's glibc but our mount
1091 * namespace is in the container)
1092 * we may try to get the information by spawning a
1093 * [getent passwd uid] process and parsing the result
1094 */
1095 if (!passwd)
1096 user_shell = lxc_attach_getpwshell(uid);
1097 else
1098 user_shell = passwd->pw_shell;
1099
1100 if (user_shell)
1101 execlp(user_shell, user_shell, NULL);
1102
1103 /* executed if either no passwd entry or execvp fails,
1104 * we will fall back on /bin/sh as a default shell
1105 */
1106 execlp("/bin/sh", "/bin/sh", NULL);
1107 SYSERROR("failed to exec shell");
1108 return -1;
1109 }