]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/attach.c
coverity: Check fcntl return value
[mirror_lxc.git] / src / lxc / attach.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <unistd.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <grp.h>
32 #include <sys/param.h>
33 #include <sys/prctl.h>
34 #include <sys/mount.h>
35 #include <sys/socket.h>
36 #include <sys/syscall.h>
37 #include <sys/wait.h>
38 #include <linux/unistd.h>
39 #include <pwd.h>
40
41 #if !HAVE_DECL_PR_CAPBSET_DROP
42 #define PR_CAPBSET_DROP 24
43 #endif
44
45 #include "namespace.h"
46 #include "log.h"
47 #include "attach.h"
48 #include "caps.h"
49 #include "config.h"
50 #include "utils.h"
51 #include "commands.h"
52 #include "cgroup.h"
53 #include "lxclock.h"
54 #include "lsm/lsm.h"
55
56 #if HAVE_SYS_PERSONALITY_H
57 #include <sys/personality.h>
58 #endif
59
60 #ifndef SOCK_CLOEXEC
61 # define SOCK_CLOEXEC 02000000
62 #endif
63
64 lxc_log_define(lxc_attach, lxc);
65
66 static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
67 {
68 struct lxc_proc_context_info *info = calloc(1, sizeof(*info));
69 FILE *proc_file;
70 char proc_fn[MAXPATHLEN];
71 char *line = NULL;
72 size_t line_bufsz = 0;
73 int ret, found;
74
75 if (!info) {
76 SYSERROR("Could not allocate memory.");
77 return NULL;
78 }
79
80 /* read capabilities */
81 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", pid);
82
83 proc_file = fopen(proc_fn, "r");
84 if (!proc_file) {
85 SYSERROR("Could not open %s", proc_fn);
86 goto out_error;
87 }
88
89 found = 0;
90 while (getline(&line, &line_bufsz, proc_file) != -1) {
91 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
92 if (ret != EOF && ret > 0) {
93 found = 1;
94 break;
95 }
96 }
97
98 if (line)
99 free(line);
100 fclose(proc_file);
101
102 if (!found) {
103 SYSERROR("Could not read capability bounding set from %s", proc_fn);
104 errno = ENOENT;
105 goto out_error;
106 }
107
108 /* read personality */
109 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/personality", pid);
110
111 proc_file = fopen(proc_fn, "r");
112 if (!proc_file) {
113 SYSERROR("Could not open %s", proc_fn);
114 goto out_error;
115 }
116
117 ret = fscanf(proc_file, "%lx", &info->personality);
118 fclose(proc_file);
119
120 if (ret == EOF || ret == 0) {
121 SYSERROR("Could not read personality from %s", proc_fn);
122 errno = ENOENT;
123 goto out_error;
124 }
125 info->lsm_label = lsm_process_label_get(pid);
126
127 return info;
128
129 out_error:
130 free(info);
131 return NULL;
132 }
133
134 static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
135 {
136 if (ctx->lsm_label)
137 free(ctx->lsm_label);
138 free(ctx);
139 }
140
141 static int lxc_attach_to_ns(pid_t pid, int which)
142 {
143 char path[MAXPATHLEN];
144 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
145 * the file for user namepsaces in /proc/$pid/ns will be called
146 * 'user' once the kernel supports it
147 */
148 static char *ns[] = { "user", "mnt", "pid", "uts", "ipc", "net" };
149 static int flags[] = {
150 CLONE_NEWUSER, CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC,
151 CLONE_NEWNET
152 };
153 static const int size = sizeof(ns) / sizeof(char *);
154 int fd[size];
155 int i, j, saved_errno;
156
157
158 snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
159 if (access(path, X_OK)) {
160 ERROR("Does this kernel version support 'attach' ?");
161 return -1;
162 }
163
164 for (i = 0; i < size; i++) {
165 /* ignore if we are not supposed to attach to that
166 * namespace
167 */
168 if (which != -1 && !(which & flags[i])) {
169 fd[i] = -1;
170 continue;
171 }
172
173 snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
174 fd[i] = open(path, O_RDONLY | O_CLOEXEC);
175 if (fd[i] < 0) {
176 saved_errno = errno;
177
178 /* close all already opened file descriptors before
179 * we return an error, so we don't leak them
180 */
181 for (j = 0; j < i; j++)
182 close(fd[j]);
183
184 errno = saved_errno;
185 SYSERROR("failed to open '%s'", path);
186 return -1;
187 }
188 }
189
190 for (i = 0; i < size; i++) {
191 if (fd[i] >= 0 && setns(fd[i], 0) != 0) {
192 saved_errno = errno;
193
194 for (j = i; j < size; j++)
195 close(fd[j]);
196
197 errno = saved_errno;
198 SYSERROR("failed to set namespace '%s'", ns[i]);
199 return -1;
200 }
201
202 close(fd[i]);
203 }
204
205 return 0;
206 }
207
208 static int lxc_attach_remount_sys_proc(void)
209 {
210 int ret;
211
212 ret = unshare(CLONE_NEWNS);
213 if (ret < 0) {
214 SYSERROR("failed to unshare mount namespace");
215 return -1;
216 }
217
218 /* assume /proc is always mounted, so remount it */
219 ret = umount2("/proc", MNT_DETACH);
220 if (ret < 0) {
221 SYSERROR("failed to unmount /proc");
222 return -1;
223 }
224
225 ret = mount("none", "/proc", "proc", 0, NULL);
226 if (ret < 0) {
227 SYSERROR("failed to remount /proc");
228 return -1;
229 }
230
231 /* try to umount /sys - if it's not a mount point,
232 * we'll get EINVAL, then we ignore it because it
233 * may not have been mounted in the first place
234 */
235 ret = umount2("/sys", MNT_DETACH);
236 if (ret < 0 && errno != EINVAL) {
237 SYSERROR("failed to unmount /sys");
238 return -1;
239 } else if (ret == 0) {
240 /* remount it */
241 ret = mount("none", "/sys", "sysfs", 0, NULL);
242 if (ret < 0) {
243 SYSERROR("failed to remount /sys");
244 return -1;
245 }
246 }
247
248 return 0;
249 }
250
251 static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
252 {
253 int last_cap = lxc_caps_last_cap();
254 int cap;
255
256 for (cap = 0; cap <= last_cap; cap++) {
257 if (ctx->capability_mask & (1LL << cap))
258 continue;
259
260 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
261 SYSERROR("failed to remove capability id %d", cap);
262 return -1;
263 }
264 }
265
266 return 0;
267 }
268
269 static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
270 {
271 if (policy == LXC_ATTACH_CLEAR_ENV) {
272 char **extra_keep_store = NULL;
273 int path_kept = 0;
274
275 if (extra_keep) {
276 size_t count, i;
277
278 for (count = 0; extra_keep[count]; count++);
279
280 extra_keep_store = calloc(count, sizeof(char *));
281 if (!extra_keep_store) {
282 SYSERROR("failed to allocate memory for storing current "
283 "environment variable values that will be kept");
284 return -1;
285 }
286 for (i = 0; i < count; i++) {
287 char *v = getenv(extra_keep[i]);
288 if (v) {
289 extra_keep_store[i] = strdup(v);
290 if (!extra_keep_store[i]) {
291 SYSERROR("failed to allocate memory for storing current "
292 "environment variable values that will be kept");
293 while (i > 0)
294 free(extra_keep_store[--i]);
295 free(extra_keep_store);
296 return -1;
297 }
298 if (strcmp(extra_keep[i], "PATH") == 0)
299 path_kept = 1;
300 }
301 /* calloc sets entire array to zero, so we don't
302 * need an else */
303 }
304 }
305
306 if (clearenv()) {
307 char **p;
308 SYSERROR("failed to clear environment");
309 if (extra_keep_store) {
310 for (p = extra_keep_store; *p; p++)
311 free(*p);
312 free(extra_keep_store);
313 }
314 return -1;
315 }
316
317 if (extra_keep_store) {
318 size_t i;
319 for (i = 0; extra_keep[i]; i++) {
320 if (extra_keep_store[i]) {
321 if (setenv(extra_keep[i], extra_keep_store[i], 1) < 0)
322 SYSERROR("Unable to set environment variable");
323 }
324 free(extra_keep_store[i]);
325 }
326 free(extra_keep_store);
327 }
328
329 /* always set a default path; shells and execlp tend
330 * to be fine without it, but there is a disturbing
331 * number of C programs out there that just assume
332 * that getenv("PATH") is never NULL and then die a
333 * painful segfault death. */
334 if (!path_kept) {
335 #ifdef HAVE_CONFSTR
336 size_t n;
337 char *path_env;
338
339 n = confstr(_CS_PATH, NULL, 0);
340 path_env = malloc(n);
341 if (path_env) {
342 confstr(_CS_PATH, path_env, n);
343 setenv("PATH", path_env, 1);
344 free(path_env);
345 }
346 /* don't error out, this is just an extra service */
347 #else
348 setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
349 #endif
350 }
351 }
352
353 if (putenv("container=lxc")) {
354 SYSERROR("failed to set environment variable");
355 return -1;
356 }
357
358 /* set extra environment variables */
359 if (extra_env) {
360 for (; *extra_env; extra_env++) {
361 /* duplicate the string, just to be on
362 * the safe side, because putenv does not
363 * do it for us */
364 char *p = strdup(*extra_env);
365 /* we just assume the user knows what they
366 * are doing, so we don't do any checks */
367 if (!p) {
368 SYSERROR("failed to allocate memory for additional environment "
369 "variables");
370 return -1;
371 }
372 putenv(p);
373 }
374 }
375
376 return 0;
377 }
378
379 static char *lxc_attach_getpwshell(uid_t uid)
380 {
381 /* local variables */
382 pid_t pid;
383 int pipes[2];
384 int ret;
385 int fd;
386 char *result = NULL;
387
388 /* we need to fork off a process that runs the
389 * getent program, and we need to capture its
390 * output, so we use a pipe for that purpose
391 */
392 ret = pipe(pipes);
393 if (ret < 0)
394 return NULL;
395
396 pid = fork();
397 if (pid < 0) {
398 close(pipes[0]);
399 close(pipes[1]);
400 return NULL;
401 }
402
403 if (pid) {
404 /* parent process */
405 FILE *pipe_f;
406 char *line = NULL;
407 size_t line_bufsz = 0;
408 int found = 0;
409 int status;
410
411 close(pipes[1]);
412
413 pipe_f = fdopen(pipes[0], "r");
414 while (getline(&line, &line_bufsz, pipe_f) != -1) {
415 char *token;
416 char *saveptr = NULL;
417 long value;
418 char *endptr = NULL;
419 int i;
420
421 /* if we already found something, just continue
422 * to read until the pipe doesn't deliver any more
423 * data, but don't modify the existing data
424 * structure
425 */
426 if (found)
427 continue;
428
429 /* trim line on the right hand side */
430 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
431 line[i - 1] = '\0';
432
433 /* split into tokens: first user name */
434 token = strtok_r(line, ":", &saveptr);
435 if (!token)
436 continue;
437 /* next: dummy password field */
438 token = strtok_r(NULL, ":", &saveptr);
439 if (!token)
440 continue;
441 /* next: user id */
442 token = strtok_r(NULL, ":", &saveptr);
443 value = token ? strtol(token, &endptr, 10) : 0;
444 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
445 continue;
446 /* dummy sanity check: user id matches */
447 if ((uid_t) value != uid)
448 continue;
449 /* skip fields: gid, gecos, dir, go to next field 'shell' */
450 for (i = 0; i < 4; i++) {
451 token = strtok_r(NULL, ":", &saveptr);
452 if (!token)
453 break;
454 }
455 if (!token)
456 continue;
457 if (result)
458 free(result);
459 result = strdup(token);
460
461 /* sanity check that there are no fields after that */
462 token = strtok_r(NULL, ":", &saveptr);
463 if (token)
464 continue;
465
466 found = 1;
467 }
468
469 free(line);
470 fclose(pipe_f);
471 again:
472 if (waitpid(pid, &status, 0) < 0) {
473 if (errno == EINTR)
474 goto again;
475 return NULL;
476 }
477
478 /* some sanity checks: if anything even hinted at going
479 * wrong: we can't be sure we have a valid result, so
480 * we assume we don't
481 */
482
483 if (!WIFEXITED(status))
484 return NULL;
485
486 if (WEXITSTATUS(status) != 0)
487 return NULL;
488
489 if (!found)
490 return NULL;
491
492 return result;
493 } else {
494 /* child process */
495 char uid_buf[32];
496 char *arguments[] = {
497 "getent",
498 "passwd",
499 uid_buf,
500 NULL
501 };
502
503 close(pipes[0]);
504
505 /* we want to capture stdout */
506 dup2(pipes[1], 1);
507 close(pipes[1]);
508
509 /* get rid of stdin/stderr, so we try to associate it
510 * with /dev/null
511 */
512 fd = open("/dev/null", O_RDWR);
513 if (fd < 0) {
514 close(0);
515 close(2);
516 } else {
517 dup2(fd, 0);
518 dup2(fd, 2);
519 close(fd);
520 }
521
522 /* finish argument list */
523 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
524 if (ret <= 0)
525 exit(-1);
526
527 /* try to run getent program */
528 (void) execvp("getent", arguments);
529 exit(-1);
530 }
531 }
532
533 static void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
534 {
535 FILE *proc_file;
536 char proc_fn[MAXPATHLEN];
537 char *line = NULL;
538 size_t line_bufsz = 0;
539 int ret;
540 long value = -1;
541 uid_t uid = (uid_t)-1;
542 gid_t gid = (gid_t)-1;
543
544 /* read capabilities */
545 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", 1);
546
547 proc_file = fopen(proc_fn, "r");
548 if (!proc_file)
549 return;
550
551 while (getline(&line, &line_bufsz, proc_file) != -1) {
552 /* format is: real, effective, saved set user, fs
553 * we only care about real uid
554 */
555 ret = sscanf(line, "Uid: %ld", &value);
556 if (ret != EOF && ret > 0) {
557 uid = (uid_t) value;
558 } else {
559 ret = sscanf(line, "Gid: %ld", &value);
560 if (ret != EOF && ret > 0)
561 gid = (gid_t) value;
562 }
563 if (uid != (uid_t)-1 && gid != (gid_t)-1)
564 break;
565 }
566
567 fclose(proc_file);
568 free(line);
569
570 /* only override arguments if we found something */
571 if (uid != (uid_t)-1)
572 *init_uid = uid;
573 if (gid != (gid_t)-1)
574 *init_gid = gid;
575
576 /* TODO: we should also parse supplementary groups and use
577 * setgroups() to set them */
578 }
579
580 struct attach_clone_payload {
581 int ipc_socket;
582 lxc_attach_options_t* options;
583 struct lxc_proc_context_info* init_ctx;
584 lxc_attach_exec_t exec_function;
585 void* exec_payload;
586 };
587
588 static int attach_child_main(void* data);
589
590 /* help the optimizer along if it doesn't know that exit always exits */
591 #define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0)
592
593 /* define default options if no options are supplied by the user */
594 static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
595
596 int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
597 {
598 int ret, status;
599 pid_t init_pid, pid, attached_pid, expected;
600 struct lxc_proc_context_info *init_ctx;
601 char* cwd;
602 char* new_cwd;
603 int ipc_sockets[2];
604
605 if (!options)
606 options = &attach_static_default_options;
607
608 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
609 if (init_pid < 0) {
610 ERROR("failed to get the init pid");
611 return -1;
612 }
613
614 init_ctx = lxc_proc_get_context_info(init_pid);
615 if (!init_ctx) {
616 ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
617 return -1;
618 }
619
620 cwd = getcwd(NULL, 0);
621
622 /* determine which namespaces the container was created with
623 * by asking lxc-start, if necessary
624 */
625 if (options->namespaces == -1) {
626 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
627 /* call failed */
628 if (options->namespaces == -1) {
629 ERROR("failed to automatically determine the "
630 "namespaces which the container unshared");
631 free(cwd);
632 lxc_proc_put_context_info(init_ctx);
633 return -1;
634 }
635 }
636
637 /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
638 * to make sure we don't irritate other threads that want to fork+exec away
639 *
640 * IMPORTANT: if the initial process is multithreaded and another call
641 * just fork()s away without exec'ing directly after, the socket fd will
642 * exist in the forked process from the other thread and any close() in
643 * our own child process will not really cause the socket to close properly,
644 * potentiall causing the parent to hang.
645 *
646 * For this reason, while IPC is still active, we have to use shutdown()
647 * if the child exits prematurely in order to signal that the socket
648 * is closed and cannot assume that the child exiting will automatically
649 * do that.
650 *
651 * IPC mechanism: (X is receiver)
652 * initial process intermediate attached
653 * X <--- send pid of
654 * attached proc,
655 * then exit
656 * send 0 ------------------------------------> X
657 * [do initialization]
658 * X <------------------------------------ send 1
659 * [add to cgroup, ...]
660 * send 2 ------------------------------------> X
661 * close socket close socket
662 * run program
663 */
664 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
665 if (ret < 0) {
666 SYSERROR("could not set up required IPC mechanism for attaching");
667 free(cwd);
668 lxc_proc_put_context_info(init_ctx);
669 return -1;
670 }
671
672 /* create intermediate subprocess, three reasons:
673 * 1. runs all pthread_atfork handlers and the
674 * child will no longer be threaded
675 * (we can't properly setns() in a threaded process)
676 * 2. we can't setns() in the child itself, since
677 * we want to make sure we are properly attached to
678 * the pidns
679 * 3. also, the initial thread has to put the attached
680 * process into the cgroup, which we can only do if
681 * we didn't already setns() (otherwise, user
682 * namespaces will hate us)
683 */
684 pid = fork();
685
686 if (pid < 0) {
687 SYSERROR("failed to create first subprocess");
688 free(cwd);
689 lxc_proc_put_context_info(init_ctx);
690 return -1;
691 }
692
693 if (pid) {
694 pid_t to_cleanup_pid = pid;
695
696 /* inital thread, we close the socket that is for the
697 * subprocesses
698 */
699 close(ipc_sockets[1]);
700 free(cwd);
701
702 /* attach to cgroup, if requested */
703 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
704 if (!cgroup_attach(name, lxcpath, pid))
705 goto cleanup_error;
706 }
707
708 /* Let the child process know to go ahead */
709 status = 0;
710 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
711 if (ret <= 0) {
712 ERROR("error using IPC to notify attached process for initialization (0)");
713 goto cleanup_error;
714 }
715
716 /* get pid from intermediate process */
717 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
718 if (ret <= 0) {
719 if (ret != 0)
720 ERROR("error using IPC to receive pid of attached process");
721 goto cleanup_error;
722 }
723
724 /* reap intermediate process */
725 ret = wait_for_pid(pid);
726 if (ret < 0)
727 goto cleanup_error;
728
729 /* we will always have to reap the grandchild now */
730 to_cleanup_pid = attached_pid;
731
732 /* tell attached process it may start initializing */
733 status = 0;
734 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
735 if (ret <= 0) {
736 ERROR("error using IPC to notify attached process for initialization (0)");
737 goto cleanup_error;
738 }
739
740 /* wait for the attached process to finish initializing */
741 expected = 1;
742 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
743 if (ret <= 0) {
744 if (ret != 0)
745 ERROR("error using IPC to receive notification from attached process (1)");
746 goto cleanup_error;
747 }
748
749 /* tell attached process we're done */
750 status = 2;
751 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
752 if (ret <= 0) {
753 ERROR("error using IPC to notify attached process for initialization (2)");
754 goto cleanup_error;
755 }
756
757 /* now shut down communication with child, we're done */
758 shutdown(ipc_sockets[0], SHUT_RDWR);
759 close(ipc_sockets[0]);
760 lxc_proc_put_context_info(init_ctx);
761
762 /* we're done, the child process should now execute whatever
763 * it is that the user requested. The parent can now track it
764 * with waitpid() or similar.
765 */
766
767 *attached_process = attached_pid;
768 return 0;
769
770 cleanup_error:
771 /* first shut down the socket, then wait for the pid,
772 * otherwise the pid we're waiting for may never exit
773 */
774 shutdown(ipc_sockets[0], SHUT_RDWR);
775 close(ipc_sockets[0]);
776 if (to_cleanup_pid)
777 (void) wait_for_pid(to_cleanup_pid);
778 lxc_proc_put_context_info(init_ctx);
779 return -1;
780 }
781
782 /* first subprocess begins here, we close the socket that is for the
783 * initial thread
784 */
785 close(ipc_sockets[0]);
786
787 /* Wait for the parent to have setup cgroups */
788 expected = 0;
789 status = -1;
790 ret = lxc_read_nointr_expect(ipc_sockets[1], &status, sizeof(status), &expected);
791 if (ret <= 0) {
792 ERROR("error communicating with child process");
793 shutdown(ipc_sockets[1], SHUT_RDWR);
794 rexit(-1);
795 }
796
797 /* attach now, create another subprocess later, since pid namespaces
798 * only really affect the children of the current process
799 */
800 ret = lxc_attach_to_ns(init_pid, options->namespaces);
801 if (ret < 0) {
802 ERROR("failed to enter the namespace");
803 shutdown(ipc_sockets[1], SHUT_RDWR);
804 rexit(-1);
805 }
806
807 /* attach succeeded, try to cwd */
808 if (options->initial_cwd)
809 new_cwd = options->initial_cwd;
810 else
811 new_cwd = cwd;
812 ret = chdir(new_cwd);
813 if (ret < 0)
814 WARN("could not change directory to '%s'", new_cwd);
815 free(cwd);
816
817 /* now create the real child process */
818 {
819 struct attach_clone_payload payload = {
820 .ipc_socket = ipc_sockets[1],
821 .options = options,
822 .init_ctx = init_ctx,
823 .exec_function = exec_function,
824 .exec_payload = exec_payload
825 };
826 /* We use clone_parent here to make this subprocess a direct child of
827 * the initial process. Then this intermediate process can exit and
828 * the parent can directly track the attached process.
829 */
830 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
831 }
832
833 /* shouldn't happen, clone() should always return positive pid */
834 if (pid <= 0) {
835 SYSERROR("failed to create subprocess");
836 shutdown(ipc_sockets[1], SHUT_RDWR);
837 rexit(-1);
838 }
839
840 /* tell grandparent the pid of the pid of the newly created child */
841 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
842 if (ret != sizeof(pid)) {
843 /* if this really happens here, this is very unfortunate, since the
844 * parent will not know the pid of the attached process and will
845 * not be able to wait for it (and we won't either due to CLONE_PARENT)
846 * so the parent won't be able to reap it and the attached process
847 * will remain a zombie
848 */
849 ERROR("error using IPC to notify main process of pid of the attached process");
850 shutdown(ipc_sockets[1], SHUT_RDWR);
851 rexit(-1);
852 }
853
854 /* the rest is in the hands of the initial and the attached process */
855 rexit(0);
856 }
857
858 static int attach_child_main(void* data)
859 {
860 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
861 int ipc_socket = payload->ipc_socket;
862 lxc_attach_options_t* options = payload->options;
863 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
864 #if HAVE_SYS_PERSONALITY_H
865 long new_personality;
866 #endif
867 int ret;
868 int status;
869 int expected;
870 long flags;
871 int fd;
872 uid_t new_uid;
873 gid_t new_gid;
874
875 /* wait for the initial thread to signal us that it's ready
876 * for us to start initializing
877 */
878 expected = 0;
879 status = -1;
880 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
881 if (ret <= 0) {
882 ERROR("error using IPC to receive notification from initial process (0)");
883 shutdown(ipc_socket, SHUT_RDWR);
884 rexit(-1);
885 }
886
887 /* A description of the purpose of this functionality is
888 * provided in the lxc-attach(1) manual page. We have to
889 * remount here and not in the parent process, otherwise
890 * /proc may not properly reflect the new pid namespace.
891 */
892 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
893 ret = lxc_attach_remount_sys_proc();
894 if (ret < 0) {
895 shutdown(ipc_socket, SHUT_RDWR);
896 rexit(-1);
897 }
898 }
899
900 /* now perform additional attachments*/
901 #if HAVE_SYS_PERSONALITY_H
902 if (options->personality < 0)
903 new_personality = init_ctx->personality;
904 else
905 new_personality = options->personality;
906
907 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
908 ret = personality(new_personality);
909 if (ret < 0) {
910 SYSERROR("could not ensure correct architecture");
911 shutdown(ipc_socket, SHUT_RDWR);
912 rexit(-1);
913 }
914 }
915 #endif
916
917 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
918 ret = lxc_attach_drop_privs(init_ctx);
919 if (ret < 0) {
920 ERROR("could not drop privileges");
921 shutdown(ipc_socket, SHUT_RDWR);
922 rexit(-1);
923 }
924 }
925
926 /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
927 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
928 if (ret < 0) {
929 ERROR("could not set initial environment for attached process");
930 shutdown(ipc_socket, SHUT_RDWR);
931 rexit(-1);
932 }
933
934 /* set user / group id */
935 new_uid = 0;
936 new_gid = 0;
937 /* ignore errors, we will fall back to root in that case
938 * (/proc was not mounted etc.)
939 */
940 if (options->namespaces & CLONE_NEWUSER)
941 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
942
943 if (options->uid != (uid_t)-1)
944 new_uid = options->uid;
945 if (options->gid != (gid_t)-1)
946 new_gid = options->gid;
947
948 /* try to set the uid/gid combination */
949 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
950 if (setgid(new_gid) || setgroups(0, NULL)) {
951 SYSERROR("switching to container gid");
952 shutdown(ipc_socket, SHUT_RDWR);
953 rexit(-1);
954 }
955 }
956 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
957 SYSERROR("switching to container uid");
958 shutdown(ipc_socket, SHUT_RDWR);
959 rexit(-1);
960 }
961
962 /* tell initial process it may now put us into the cgroups */
963 status = 1;
964 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
965 if (ret != sizeof(status)) {
966 ERROR("error using IPC to notify initial process for initialization (1)");
967 shutdown(ipc_socket, SHUT_RDWR);
968 rexit(-1);
969 }
970
971 /* wait for the initial thread to signal us that it has done
972 * everything for us when it comes to cgroups etc.
973 */
974 expected = 2;
975 status = -1;
976 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
977 if (ret <= 0) {
978 ERROR("error using IPC to receive final notification from initial process (2)");
979 shutdown(ipc_socket, SHUT_RDWR);
980 rexit(-1);
981 }
982
983 shutdown(ipc_socket, SHUT_RDWR);
984 close(ipc_socket);
985
986 /* set new apparmor profile/selinux context */
987 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM)) {
988 int on_exec;
989
990 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
991 ret = lsm_process_label_set(init_ctx->lsm_label, 0, on_exec);
992 if (ret < 0) {
993 rexit(-1);
994 }
995 }
996 lxc_proc_put_context_info(init_ctx);
997
998 /* The following is done after the communication socket is
999 * shut down. That way, all errors that might (though
1000 * unlikely) occur up until this point will have their messages
1001 * printed to the original stderr (if logging is so configured)
1002 * and not the fd the user supplied, if any.
1003 */
1004
1005 /* fd handling for stdin, stdout and stderr;
1006 * ignore errors here, user may want to make sure
1007 * the fds are closed, for example */
1008 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
1009 dup2(options->stdin_fd, 0);
1010 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
1011 dup2(options->stdout_fd, 1);
1012 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
1013 dup2(options->stderr_fd, 2);
1014
1015 /* close the old fds */
1016 if (options->stdin_fd > 2)
1017 close(options->stdin_fd);
1018 if (options->stdout_fd > 2)
1019 close(options->stdout_fd);
1020 if (options->stderr_fd > 2)
1021 close(options->stderr_fd);
1022
1023 /* try to remove CLOEXEC flag from stdin/stdout/stderr,
1024 * but also here, ignore errors */
1025 for (fd = 0; fd <= 2; fd++) {
1026 flags = fcntl(fd, F_GETFL);
1027 if (flags < 0)
1028 continue;
1029 if (flags & FD_CLOEXEC) {
1030 if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0) {
1031 SYSERROR("Unable to clear CLOEXEC from fd");
1032 }
1033 }
1034 }
1035
1036 /* we're done, so we can now do whatever the user intended us to do */
1037 rexit(payload->exec_function(payload->exec_payload));
1038 }
1039
1040 int lxc_attach_run_command(void* payload)
1041 {
1042 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1043
1044 execvp(cmd->program, cmd->argv);
1045 SYSERROR("failed to exec '%s'", cmd->program);
1046 return -1;
1047 }
1048
1049 int lxc_attach_run_shell(void* payload)
1050 {
1051 uid_t uid;
1052 struct passwd *passwd;
1053 char *user_shell;
1054
1055 /* ignore payload parameter */
1056 (void)payload;
1057
1058 uid = getuid();
1059 passwd = getpwuid(uid);
1060
1061 /* this probably happens because of incompatible nss
1062 * implementations in host and container (remember, this
1063 * code is still using the host's glibc but our mount
1064 * namespace is in the container)
1065 * we may try to get the information by spawning a
1066 * [getent passwd uid] process and parsing the result
1067 */
1068 if (!passwd)
1069 user_shell = lxc_attach_getpwshell(uid);
1070 else
1071 user_shell = passwd->pw_shell;
1072
1073 if (user_shell)
1074 execlp(user_shell, user_shell, NULL);
1075
1076 /* executed if either no passwd entry or execvp fails,
1077 * we will fall back on /bin/sh as a default shell
1078 */
1079 execlp("/bin/sh", "/bin/sh", NULL);
1080 SYSERROR("failed to exec shell");
1081 return -1;
1082 }