]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
Merge pull request #1539 from brauner/2017-05-06/fix_abstract_unix_sockets
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
e0732705
CS
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e0732705
CS
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e0732705
CS
22 */
23
24#define _GNU_SOURCE
25#include <unistd.h>
26#include <stdio.h>
27#include <string.h>
28#include <stdlib.h>
2eef2bda 29#include <signal.h>
e0732705
CS
30#include <errno.h>
31#include <fcntl.h>
c476bdce 32#include <grp.h>
e0732705
CS
33#include <sys/param.h>
34#include <sys/prctl.h>
7a0b0b56 35#include <sys/mount.h>
5ec27989 36#include <sys/socket.h>
1ba0013f 37#include <sys/syscall.h>
905022f7 38#include <sys/wait.h>
910bb4fa 39#include <linux/unistd.h>
905022f7 40#include <pwd.h>
e0732705 41
955e2a02 42#ifndef HAVE_DECL_PR_CAPBSET_DROP
e0732705
CS
43#define PR_CAPBSET_DROP 24
44#endif
45
955e2a02
CB
46#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS
47#define PR_SET_NO_NEW_PRIVS 38
48#endif
49
50#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS
51#define PR_GET_NO_NEW_PRIVS 39
52#endif
53
e0732705
CS
54#include "namespace.h"
55#include "log.h"
81f466d0 56#include "af_unix.h"
e0732705
CS
57#include "attach.h"
58#include "caps.h"
e0732705 59#include "config.h"
6a44839f 60#include "utils.h"
9c4693b8
CS
61#include "commands.h"
62#include "cgroup.h"
025ed0f3 63#include "lxclock.h"
2c4ea790
SH
64#include "conf.h"
65#include "lxcseccomp.h"
66#include <lxc/lxccontainer.h>
fe4de9a6 67#include "lsm/lsm.h"
9b8e3c96 68#include "confile.h"
9c4693b8
CS
69
70#if HAVE_SYS_PERSONALITY_H
71#include <sys/personality.h>
72#endif
e0732705 73
a3da2f3b
SG
74#ifndef SOCK_CLOEXEC
75# define SOCK_CLOEXEC 02000000
76#endif
77
d6a3c917
SG
78#ifndef MS_REC
79#define MS_REC 16384
80#endif
81
82#ifndef MS_SLAVE
83#define MS_SLAVE (1<<19)
84#endif
85
e0732705
CS
86lxc_log_define(lxc_attach, lxc);
87
82b1f317 88/* /proc/pid-to-str/current\0 = (5 + 21 + 7 + 1) */
eab15c1e 89#define __LSMATTRLEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
81f466d0
CB
90static int lsm_openat(int procfd, pid_t pid, int on_exec)
91{
92 int ret = -1;
5c3fcae7 93 int labelfd = -1;
82b1f317 94 const char *name;
81f466d0 95 char path[__LSMATTRLEN];
5c3fcae7
SG
96
97 name = lsm_name();
98
99 if (strcmp(name, "nop") == 0)
81f466d0 100 return 0;
5c3fcae7
SG
101
102 if (strcmp(name, "none") == 0)
81f466d0 103 return 0;
5c3fcae7
SG
104
105 /* We don't support on-exec with AppArmor */
106 if (strcmp(name, "AppArmor") == 0)
107 on_exec = 0;
108
82b1f317 109 if (on_exec)
81f466d0 110 ret = snprintf(path, __LSMATTRLEN, "%d/attr/exec", pid);
82b1f317 111 else
81f466d0 112 ret = snprintf(path, __LSMATTRLEN, "%d/attr/current", pid);
82b1f317
CB
113 if (ret < 0 || ret >= __LSMATTRLEN)
114 return -1;
5c3fcae7 115
82b1f317 116 labelfd = openat(procfd, path, O_RDWR);
5c3fcae7 117 if (labelfd < 0) {
82b1f317 118 SYSERROR("Unable to open file descriptor to set LSM label.");
81f466d0 119 return -1;
5c3fcae7
SG
120 }
121
81f466d0
CB
122 return labelfd;
123}
124
125static int lsm_set_label_at(int lsm_labelfd, int on_exec, char *lsm_label)
126{
127 int fret = -1;
128 const char* name;
129 char *command = NULL;
130
131 name = lsm_name();
132
133 if (strcmp(name, "nop") == 0)
134 return 0;
135
136 if (strcmp(name, "none") == 0)
137 return 0;
138
139 /* We don't support on-exec with AppArmor */
140 if (strcmp(name, "AppArmor") == 0)
141 on_exec = 0;
142
5c3fcae7
SG
143 if (strcmp(name, "AppArmor") == 0) {
144 int size;
145
146 command = malloc(strlen(lsm_label) + strlen("changeprofile ") + 1);
147 if (!command) {
8ce83369 148 SYSERROR("Failed to write apparmor profile.");
5c3fcae7
SG
149 goto out;
150 }
151
152 size = sprintf(command, "changeprofile %s", lsm_label);
153 if (size < 0) {
8ce83369 154 SYSERROR("Failed to write apparmor profile.");
5c3fcae7
SG
155 goto out;
156 }
157
81f466d0
CB
158 if (write(lsm_labelfd, command, size + 1) < 0) {
159 SYSERROR("Unable to set LSM label: %s.", command);
5c3fcae7
SG
160 goto out;
161 }
81f466d0
CB
162 INFO("Set LSM label to: %s.", command);
163 } else if (strcmp(name, "SELinux") == 0) {
164 if (write(lsm_labelfd, lsm_label, strlen(lsm_label) + 1) < 0) {
8ce83369 165 SYSERROR("Unable to set LSM label: %s.", lsm_label);
5c3fcae7
SG
166 goto out;
167 }
81f466d0
CB
168 INFO("Set LSM label to: %s.", lsm_label);
169 } else {
8ce83369 170 ERROR("Unable to restore label for unknown LSM: %s.", name);
5c3fcae7
SG
171 goto out;
172 }
81f466d0 173 fret = 0;
5c3fcae7
SG
174
175out:
176 free(command);
177
81f466d0
CB
178 if (lsm_labelfd != -1)
179 close(lsm_labelfd);
5c3fcae7 180
81f466d0 181 return fret;
5c3fcae7
SG
182}
183
8ce83369 184/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
eab15c1e 185#define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
74a3920a 186static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
e0732705 187{
e0732705 188 FILE *proc_file;
8ce83369
CB
189 char proc_fn[__PROC_STATUS_LEN];
190 bool found;
191 int ret;
460a1cf0 192 char *line = NULL;
e0732705 193 size_t line_bufsz = 0;
8ce83369 194 struct lxc_proc_context_info *info = NULL;
e0732705 195
8ce83369
CB
196 /* Read capabilities. */
197 ret = snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", pid);
198 if (ret < 0 || ret >= __PROC_STATUS_LEN)
199 goto on_error;
e0732705
CS
200
201 proc_file = fopen(proc_fn, "r");
202 if (!proc_file) {
8ce83369
CB
203 SYSERROR("Could not open %s.", proc_fn);
204 goto on_error;
e0732705
CS
205 }
206
8ce83369
CB
207 info = calloc(1, sizeof(*info));
208 if (!info) {
209 SYSERROR("Could not allocate memory.");
210 return NULL;
211 }
212
213 found = false;
e0732705
CS
214 while (getline(&line, &line_bufsz, proc_file) != -1) {
215 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
8ce83369
CB
216 if (ret != EOF && ret == 1) {
217 found = true;
e0732705
CS
218 break;
219 }
220 }
221
f10fad2f 222 free(line);
e0732705
CS
223 fclose(proc_file);
224
225 if (!found) {
8ce83369 226 SYSERROR("Could not read capability bounding set from %s.", proc_fn);
e0732705 227 errno = ENOENT;
8ce83369 228 goto on_error;
e0732705
CS
229 }
230
fe4de9a6 231 info->lsm_label = lsm_process_label_get(pid);
e0732705 232
e0732705
CS
233 return info;
234
8ce83369 235on_error:
460a1cf0 236 free(info);
e0732705
CS
237 return NULL;
238}
239
fe4de9a6
DE
240static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
241{
f10fad2f 242 free(ctx->lsm_label);
2c4ea790
SH
243 if (ctx->container)
244 lxc_container_put(ctx->container);
fe4de9a6
DE
245 free(ctx);
246}
247
74a3920a 248static int lxc_attach_to_ns(pid_t pid, int which)
99d50954 249{
26818618 250 int fd[LXC_NS_MAX];
fc763ab7
CS
251 int i, j, saved_errno;
252
99d50954 253
a052913d 254 if (access("/proc/self/ns", X_OK)) {
8ce83369 255 ERROR("Does this kernel version support namespaces?");
99d50954
CS
256 return -1;
257 }
258
26818618 259 for (i = 0; i < LXC_NS_MAX; i++) {
8ce83369 260 /* Ignore if we are not supposed to attach to that namespace. */
26818618 261 if (which != -1 && !(which & ns_info[i].clone_flag)) {
fc763ab7
CS
262 fd[i] = -1;
263 continue;
264 }
265
26818618 266 fd[i] = lxc_preserve_ns(pid, ns_info[i].proc_name);
99d50954 267 if (fd[i] < 0) {
fc763ab7
CS
268 saved_errno = errno;
269
8ce83369
CB
270 /* Close all already opened file descriptors before we
271 * return an error, so we don't leak them.
fc763ab7
CS
272 */
273 for (j = 0; j < i; j++)
274 close(fd[j]);
275
276 errno = saved_errno;
8ce83369 277 SYSERROR("Failed to open namespace: \"%s\".", ns_info[i].proc_name);
99d50954
CS
278 return -1;
279 }
280 }
281
26818618
CB
282 for (i = 0; i < LXC_NS_MAX; i++) {
283 if (fd[i] < 0)
284 continue;
285
286 if (setns(fd[i], 0) < 0) {
fc763ab7
CS
287 saved_errno = errno;
288
26818618 289 for (j = i; j < LXC_NS_MAX; j++)
fc763ab7
CS
290 close(fd[j]);
291
292 errno = saved_errno;
26818618 293 SYSERROR("Failed to attach to namespace \"%s\".", ns_info[i].proc_name);
99d50954
CS
294 return -1;
295 }
296
26818618
CB
297 DEBUG("Attached to namespace \"%s\".", ns_info[i].proc_name);
298
99d50954
CS
299 close(fd[i]);
300 }
301
302 return 0;
303}
304
74a3920a 305static int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
306{
307 int ret;
308
309 ret = unshare(CLONE_NEWNS);
310 if (ret < 0) {
8ce83369 311 SYSERROR("Failed to unshare mount namespace.");
7a0b0b56
CS
312 return -1;
313 }
314
2c6f3fc9
SH
315 if (detect_shared_rootfs()) {
316 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
8ce83369 317 SYSERROR("Failed to make / rslave.");
2c6f3fc9
SH
318 ERROR("Continuing...");
319 }
320 }
321
8ce83369 322 /* Assume /proc is always mounted, so remount it. */
7a0b0b56
CS
323 ret = umount2("/proc", MNT_DETACH);
324 if (ret < 0) {
8ce83369 325 SYSERROR("Failed to unmount /proc.");
7a0b0b56
CS
326 return -1;
327 }
328
329 ret = mount("none", "/proc", "proc", 0, NULL);
330 if (ret < 0) {
8ce83369 331 SYSERROR("Failed to remount /proc.");
7a0b0b56
CS
332 return -1;
333 }
334
8ce83369
CB
335 /* Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
336 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
337 */
338 ret = umount2("/sys", MNT_DETACH);
339 if (ret < 0 && errno != EINVAL) {
8ce83369 340 SYSERROR("Failed to unmount /sys.");
7a0b0b56
CS
341 return -1;
342 } else if (ret == 0) {
8ce83369 343 /* Remount it. */
7a0b0b56
CS
344 ret = mount("none", "/sys", "sysfs", 0, NULL);
345 if (ret < 0) {
8ce83369 346 SYSERROR("Failed to remount /sys.");
7a0b0b56
CS
347 return -1;
348 }
349 }
350
351 return 0;
352}
353
74a3920a 354static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
e0732705
CS
355{
356 int last_cap = lxc_caps_last_cap();
357 int cap;
358
359 for (cap = 0; cap <= last_cap; cap++) {
360 if (ctx->capability_mask & (1LL << cap))
361 continue;
362
363 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
8ce83369 364 SYSERROR("Failed to remove capability id %d.", cap);
e0732705
CS
365 return -1;
366 }
367 }
368
369 return 0;
370}
905022f7 371
74a3920a 372static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
b3a39ba6 373{
799f96fd 374 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 375 char **extra_keep_store = NULL;
3d5e9f48
CS
376 int path_kept = 0;
377
378 if (extra_keep) {
379 size_t count, i;
380
381 for (count = 0; extra_keep[count]; count++);
382
383 extra_keep_store = calloc(count, sizeof(char *));
384 if (!extra_keep_store) {
8ce83369
CB
385 SYSERROR("Failed to allocate memory for storing current "
386 "environment variable values that will be kept.");
3d5e9f48
CS
387 return -1;
388 }
389 for (i = 0; i < count; i++) {
390 char *v = getenv(extra_keep[i]);
391 if (v) {
392 extra_keep_store[i] = strdup(v);
393 if (!extra_keep_store[i]) {
8ce83369
CB
394 SYSERROR("Failed to allocate memory for storing current "
395 "environment variable values that will be kept.");
3d5e9f48
CS
396 while (i > 0)
397 free(extra_keep_store[--i]);
398 free(extra_keep_store);
399 return -1;
400 }
401 if (strcmp(extra_keep[i], "PATH") == 0)
402 path_kept = 1;
403 }
8ce83369
CB
404 /* Calloc sets entire array to zero, so we don't
405 * need an else.
406 */
3d5e9f48
CS
407 }
408 }
409
799f96fd 410 if (clearenv()) {
a9cab7e3 411 char **p;
8ce83369 412 SYSERROR("Failed to clear environment.");
a9cab7e3
CS
413 if (extra_keep_store) {
414 for (p = extra_keep_store; *p; p++)
415 free(*p);
416 free(extra_keep_store);
417 }
3d5e9f48
CS
418 return -1;
419 }
420
421 if (extra_keep_store) {
422 size_t i;
423 for (i = 0; extra_keep[i]; i++) {
acd4922e
SG
424 if (extra_keep_store[i]) {
425 if (setenv(extra_keep[i], extra_keep_store[i], 1) < 0)
8ce83369 426 SYSERROR("Unable to set environment variable.");
acd4922e 427 }
3d5e9f48
CS
428 free(extra_keep_store[i]);
429 }
430 free(extra_keep_store);
431 }
432
8ce83369
CB
433 /* Always set a default path; shells and execlp tend to be fine
434 * without it, but there is a disturbing number of C programs
435 * out there that just assume that getenv("PATH") is never NULL
436 * and then die a painful segfault death.
437 */
cfa70b88 438 if (!path_kept)
511a6936 439 setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
b3a39ba6
DW
440 }
441
442 if (putenv("container=lxc")) {
8ce83369 443 SYSERROR("Failed to set environment variable.");
b3a39ba6
DW
444 return -1;
445 }
446
8ce83369 447 /* Set extra environment variables. */
3d5e9f48
CS
448 if (extra_env) {
449 for (; *extra_env; extra_env++) {
8ce83369
CB
450 /* Duplicate the string, just to be on the safe side,
451 * because putenv does not do it for us.
452 */
3d5e9f48 453 char *p = strdup(*extra_env);
8ce83369
CB
454 /* We just assume the user knows what they are doing, so
455 * we don't do any checks.
456 */
3d5e9f48 457 if (!p) {
8ce83369
CB
458 SYSERROR("Failed to allocate memory for additional environment "
459 "variables.");
3d5e9f48
CS
460 return -1;
461 }
462 putenv(p);
463 }
464 }
465
b3a39ba6
DW
466 return 0;
467}
468
74a3920a 469static char *lxc_attach_getpwshell(uid_t uid)
905022f7 470{
905022f7
CS
471 pid_t pid;
472 int pipes[2];
473 int ret;
474 int fd;
475 char *result = NULL;
476
8ce83369
CB
477 /* We need to fork off a process that runs the getent program, and we
478 * need to capture its output, so we use a pipe for that purpose.
905022f7
CS
479 */
480 ret = pipe(pipes);
481 if (ret < 0)
482 return NULL;
483
484 pid = fork();
485 if (pid < 0) {
486 close(pipes[0]);
487 close(pipes[1]);
488 return NULL;
489 }
490
491 if (pid) {
905022f7
CS
492 FILE *pipe_f;
493 char *line = NULL;
494 size_t line_bufsz = 0;
495 int found = 0;
496 int status;
497
498 close(pipes[1]);
499
500 pipe_f = fdopen(pipes[0], "r");
501 while (getline(&line, &line_bufsz, pipe_f) != -1) {
502 char *token;
503 char *saveptr = NULL;
504 long value;
505 char *endptr = NULL;
506 int i;
507
8ce83369
CB
508 /* If we already found something, just continue to read
509 * until the pipe doesn't deliver any more data, but
510 * don't modify the existing data structure.
905022f7
CS
511 */
512 if (found)
513 continue;
514
8ce83369 515 /* Trim line on the right hand side. */
bbb8a488 516 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
905022f7
CS
517 line[i - 1] = '\0';
518
8ce83369 519 /* Split into tokens: first: user name. */
905022f7
CS
520 token = strtok_r(line, ":", &saveptr);
521 if (!token)
522 continue;
523 /* next: dummy password field */
524 token = strtok_r(NULL, ":", &saveptr);
525 if (!token)
526 continue;
527 /* next: user id */
528 token = strtok_r(NULL, ":", &saveptr);
529 value = token ? strtol(token, &endptr, 10) : 0;
530 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
531 continue;
532 /* dummy sanity check: user id matches */
533 if ((uid_t) value != uid)
534 continue;
535 /* skip fields: gid, gecos, dir, go to next field 'shell' */
536 for (i = 0; i < 4; i++) {
537 token = strtok_r(NULL, ":", &saveptr);
538 if (!token)
539 break;
540 }
541 if (!token)
542 continue;
f10fad2f 543 free(result);
905022f7
CS
544 result = strdup(token);
545
8ce83369 546 /* Sanity check that there are no fields after that. */
905022f7
CS
547 token = strtok_r(NULL, ":", &saveptr);
548 if (token)
549 continue;
550
551 found = 1;
552 }
553
554 free(line);
555 fclose(pipe_f);
556 again:
557 if (waitpid(pid, &status, 0) < 0) {
558 if (errno == EINTR)
559 goto again;
560 return NULL;
561 }
562
8ce83369
CB
563 /* Some sanity checks. If anything even hinted at going wrong,
564 * we can't be sure we have a valid result, so we assume we
565 * don't.
905022f7
CS
566 */
567
568 if (!WIFEXITED(status))
569 return NULL;
570
571 if (WEXITSTATUS(status) != 0)
572 return NULL;
573
574 if (!found)
575 return NULL;
576
577 return result;
578 } else {
905022f7
CS
579 char uid_buf[32];
580 char *arguments[] = {
581 "getent",
582 "passwd",
583 uid_buf,
584 NULL
585 };
586
587 close(pipes[0]);
588
8ce83369 589 /* We want to capture stdout. */
905022f7
CS
590 dup2(pipes[1], 1);
591 close(pipes[1]);
592
8ce83369
CB
593 /* Get rid of stdin/stderr, so we try to associate it with
594 * /dev/null.
905022f7
CS
595 */
596 fd = open("/dev/null", O_RDWR);
597 if (fd < 0) {
598 close(0);
599 close(2);
600 } else {
601 dup2(fd, 0);
602 dup2(fd, 2);
603 close(fd);
604 }
605
8ce83369 606 /* Finish argument list. */
905022f7
CS
607 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
608 if (ret <= 0)
609 exit(-1);
610
8ce83369 611 /* Try to run getent program. */
905022f7
CS
612 (void) execvp("getent", arguments);
613 exit(-1);
614 }
615}
cb3e61fa 616
74a3920a 617static void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
cb3e61fa
CS
618{
619 FILE *proc_file;
8ce83369
CB
620 char proc_fn[__PROC_STATUS_LEN];
621 int ret;
cb3e61fa
CS
622 char *line = NULL;
623 size_t line_bufsz = 0;
cb3e61fa
CS
624 long value = -1;
625 uid_t uid = (uid_t)-1;
626 gid_t gid = (gid_t)-1;
627
8ce83369
CB
628 /* Read capabilities. */
629 snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", 1);
cb3e61fa
CS
630
631 proc_file = fopen(proc_fn, "r");
632 if (!proc_file)
633 return;
634
635 while (getline(&line, &line_bufsz, proc_file) != -1) {
8ce83369
CB
636 /* Format is: real, effective, saved set user, fs we only care
637 * about real uid.
cb3e61fa
CS
638 */
639 ret = sscanf(line, "Uid: %ld", &value);
8ce83369 640 if (ret != EOF && ret == 1) {
cb3e61fa
CS
641 uid = (uid_t) value;
642 } else {
643 ret = sscanf(line, "Gid: %ld", &value);
8ce83369 644 if (ret != EOF && ret == 1)
cb3e61fa
CS
645 gid = (gid_t) value;
646 }
647 if (uid != (uid_t)-1 && gid != (gid_t)-1)
648 break;
649 }
650
651 fclose(proc_file);
652 free(line);
653
8ce83369 654 /* Only override arguments if we found something. */
cb3e61fa
CS
655 if (uid != (uid_t)-1)
656 *init_uid = uid;
657 if (gid != (gid_t)-1)
658 *init_gid = gid;
659
660 /* TODO: we should also parse supplementary groups and use
8ce83369
CB
661 * setgroups() to set them.
662 */
cb3e61fa 663}
9c4693b8
CS
664
665struct attach_clone_payload {
666 int ipc_socket;
667 lxc_attach_options_t* options;
668 struct lxc_proc_context_info* init_ctx;
669 lxc_attach_exec_t exec_function;
670 void* exec_payload;
671};
672
673static int attach_child_main(void* data);
674
8ce83369 675/* Help the optimizer along if it doesn't know that exit always exits. */
5dcc1ca6 676#define rexit(c) do { int __c = (c); _exit(__c); return __c; } while(0)
9c4693b8 677
8ce83369 678/* Define default options if no options are supplied by the user. */
9c4693b8
CS
679static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
680
bd4307f0 681static bool fetch_seccomp(struct lxc_container *c,
ff07d7bb 682 lxc_attach_options_t *options)
2c4ea790 683{
bd7b4e28 684 char *path;
2eef2bda 685
bd4307f0
CB
686 if (!(options->namespaces & CLONE_NEWNS) || !(options->attach_flags & LXC_ATTACH_LSM)) {
687 free(c->lxc_conf->seccomp);
688 c->lxc_conf->seccomp = NULL;
2c4ea790 689 return true;
bd4307f0 690 }
bd7b4e28 691
2e812c16 692 /* Remove current setting. */
bd7b4e28 693 if (!c->set_config_item(c, "lxc.seccomp", "")) {
2c4ea790 694 return false;
bd7b4e28
SG
695 }
696
8ce83369 697 /* Fetch the current profile path over the cmd interface. */
bd7b4e28
SG
698 path = c->get_running_config_item(c, "lxc.seccomp");
699 if (!path) {
bd4307f0 700 INFO("Failed to get running config item for lxc.seccomp.");
bd7b4e28
SG
701 return true;
702 }
703
8ce83369 704 /* Copy the value into the new lxc_conf. */
bd7b4e28
SG
705 if (!c->set_config_item(c, "lxc.seccomp", path)) {
706 free(path);
707 return false;
708 }
709 free(path);
710
8ce83369 711 /* Attempt to parse the resulting config. */
2c4ea790 712 if (lxc_read_seccomp_config(c->lxc_conf) < 0) {
8ce83369 713 ERROR("Error reading seccomp policy.");
2c4ea790
SH
714 return false;
715 }
716
2e812c16
CB
717 INFO("Retrieved seccomp policy.");
718 return true;
719}
720
bd4307f0 721static bool no_new_privs(struct lxc_container *c,
2e812c16
CB
722 lxc_attach_options_t *options)
723{
2e812c16
CB
724 char *val;
725
2e812c16
CB
726 /* Remove current setting. */
727 if (!c->set_config_item(c, "lxc.no_new_privs", "")) {
728 return false;
729 }
730
731 /* Retrieve currently active setting. */
732 val = c->get_running_config_item(c, "lxc.no_new_privs");
733 if (!val) {
734 INFO("Failed to get running config item for lxc.no_new_privs.");
735 return false;
736 }
737
738 /* Set currently active setting. */
739 if (!c->set_config_item(c, "lxc.no_new_privs", val)) {
740 free(val);
741 return false;
742 }
743 free(val);
744
2c4ea790
SH
745 return true;
746}
747
9b8e3c96
SH
748static signed long get_personality(const char *name, const char *lxcpath)
749{
0d7cf7e9 750 char *p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
9b8e3c96
SH
751 signed long ret;
752
753 if (!p)
754 return -1;
755 ret = lxc_config_parse_arch(p);
756 free(p);
757 return ret;
758}
759
9c4693b8
CS
760int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
761{
762 int ret, status;
f4364484 763 pid_t init_pid, pid, attached_pid, expected;
9c4693b8
CS
764 struct lxc_proc_context_info *init_ctx;
765 char* cwd;
766 char* new_cwd;
767 int ipc_sockets[2];
9b8e3c96 768 signed long personality;
9c4693b8
CS
769
770 if (!options)
771 options = &attach_static_default_options;
772
773 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
774 if (init_pid < 0) {
8ce83369 775 ERROR("Failed to get init pid.");
9c4693b8
CS
776 return -1;
777 }
778
779 init_ctx = lxc_proc_get_context_info(init_pid);
780 if (!init_ctx) {
8ce83369
CB
781 ERROR("Failed to get context of init process: %ld.",
782 (long)init_pid);
9c4693b8
CS
783 return -1;
784 }
785
9b8e3c96
SH
786 personality = get_personality(name, lxcpath);
787 if (init_ctx->personality < 0) {
8ce83369 788 ERROR("Failed to get personality of the container.");
9b8e3c96
SH
789 lxc_proc_put_context_info(init_ctx);
790 return -1;
791 }
792 init_ctx->personality = personality;
793
ff07d7bb
CB
794 init_ctx->container = lxc_container_new(name, lxcpath);
795 if (!init_ctx->container)
796 return -1;
797
bd4307f0 798 if (!fetch_seccomp(init_ctx->container, options))
8ce83369 799 WARN("Failed to get seccomp policy.");
2c4ea790 800
bd4307f0 801 if (!no_new_privs(init_ctx->container, options))
2e812c16
CB
802 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set.");
803
9c4693b8
CS
804 cwd = getcwd(NULL, 0);
805
8ce83369
CB
806 /* Determine which namespaces the container was created with
807 * by asking lxc-start, if necessary.
9c4693b8
CS
808 */
809 if (options->namespaces == -1) {
810 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
811 /* call failed */
812 if (options->namespaces == -1) {
8ce83369
CB
813 ERROR("Failed to automatically determine the "
814 "namespaces which the container uses.");
9c4693b8 815 free(cwd);
fe4de9a6 816 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
817 return -1;
818 }
819 }
820
8ce83369
CB
821 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
822 * to make sure we don't irritate other threads that want to fork+exec
823 * away
9c4693b8
CS
824 *
825 * IMPORTANT: if the initial process is multithreaded and another call
826 * just fork()s away without exec'ing directly after, the socket fd will
827 * exist in the forked process from the other thread and any close() in
8ce83369
CB
828 * our own child process will not really cause the socket to close
829 * properly, potentiall causing the parent to hang.
9c4693b8
CS
830 *
831 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
832 * if the child exits prematurely in order to signal that the socket is
833 * closed and cannot assume that the child exiting will automatically do
834 * that.
9c4693b8
CS
835 *
836 * IPC mechanism: (X is receiver)
837 * initial process intermediate attached
838 * X <--- send pid of
839 * attached proc,
840 * then exit
841 * send 0 ------------------------------------> X
842 * [do initialization]
843 * X <------------------------------------ send 1
844 * [add to cgroup, ...]
845 * send 2 ------------------------------------> X
81f466d0
CB
846 * [set LXC_ATTACH_NO_NEW_PRIVS]
847 * X <------------------------------------ send 3
848 * [open LSM label fd]
849 * send 4 ------------------------------------> X
850 * [set LSM label]
9c4693b8
CS
851 * close socket close socket
852 * run program
853 */
854 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
855 if (ret < 0) {
8ce83369 856 SYSERROR("Could not set up required IPC mechanism for attaching.");
9c4693b8 857 free(cwd);
fe4de9a6 858 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
859 return -1;
860 }
861
8ce83369
CB
862 /* Create intermediate subprocess, three reasons:
863 * 1. Runs all pthread_atfork handlers and the child will no
864 * longer be threaded (we can't properly setns() in a threaded
865 * process).
866 * 2. We can't setns() in the child itself, since we want to make
867 * sure we are properly attached to the pidns.
868 * 3. Also, the initial thread has to put the attached process
869 * into the cgroup, which we can only do if we didn't already
870 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
871 */
872 pid = fork();
873
874 if (pid < 0) {
8ce83369 875 SYSERROR("Failed to create first subprocess.");
9c4693b8 876 free(cwd);
fe4de9a6 877 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
878 return -1;
879 }
880
881 if (pid) {
81f466d0 882 int procfd = -1;
9c4693b8 883 pid_t to_cleanup_pid = pid;
9c4693b8 884
8ce83369
CB
885 /* Initial thread, we close the socket that is for the
886 * subprocesses.
9c4693b8
CS
887 */
888 close(ipc_sockets[1]);
889 free(cwd);
890
8ce83369 891 /* Attach to cgroup, if requested. */
f4364484 892 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
4fb3cba5 893 if (!cgroup_attach(name, lxcpath, pid))
8ce83369 894 goto on_error;
f4364484
SG
895 }
896
c6d09e15
WB
897 /* Setup resource limits */
898 if (!lxc_list_empty(&init_ctx->container->lxc_conf->limits) && setup_resource_limits(&init_ctx->container->lxc_conf->limits, pid)) {
899 goto on_error;
900 }
901
81f466d0
CB
902 /* Open /proc before setns() to the containers namespace so we
903 * don't rely on any information from inside the container.
904 */
905 procfd = open("/proc", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
906 if (procfd < 0) {
907 SYSERROR("Unable to open /proc.");
8ce83369 908 goto on_error;
81f466d0
CB
909 }
910
8ce83369 911 /* Let the child process know to go ahead. */
f4364484
SG
912 status = 0;
913 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
914 if (ret <= 0) {
8ce83369
CB
915 ERROR("Intended to send sequence number 0: %s.",
916 strerror(errno));
917 goto on_error;
f4364484
SG
918 }
919
8ce83369 920 /* Get pid of attached process from intermediate process. */
9c4693b8
CS
921 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
922 if (ret <= 0) {
923 if (ret != 0)
8ce83369
CB
924 ERROR("Expected to receive pid: %s.", strerror(errno));
925 goto on_error;
9c4693b8
CS
926 }
927
8ce83369 928 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
62183f1a
SH
929 if (options->stdin_fd == 0) {
930 signal(SIGINT, SIG_IGN);
931 signal(SIGQUIT, SIG_IGN);
932 }
2eef2bda 933
8ce83369 934 /* Reap intermediate process. */
9c4693b8
CS
935 ret = wait_for_pid(pid);
936 if (ret < 0)
8ce83369 937 goto on_error;
9c4693b8 938
8ce83369 939 /* We will always have to reap the attached process now. */
9c4693b8
CS
940 to_cleanup_pid = attached_pid;
941
8ce83369 942 /* Tell attached process it may start initializing. */
9c4693b8
CS
943 status = 0;
944 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
945 if (ret <= 0) {
8ce83369
CB
946 ERROR("Intended to send sequence number 0: %s.", strerror(errno));
947 goto on_error;
9c4693b8
CS
948 }
949
8ce83369 950 /* Wait for the attached process to finish initializing. */
9c4693b8
CS
951 expected = 1;
952 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
953 if (ret <= 0) {
954 if (ret != 0)
8ce83369
CB
955 ERROR("Expected to receive sequence number 1: %s.", strerror(errno));
956 goto on_error;
9c4693b8
CS
957 }
958
8ce83369 959 /* Tell attached process we're done. */
9c4693b8
CS
960 status = 2;
961 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
962 if (ret <= 0) {
8ce83369
CB
963 ERROR("Intended to send sequence number 2: %s.", strerror(errno));
964 goto on_error;
9c4693b8
CS
965 }
966
81f466d0
CB
967 /* Wait for the (grand)child to tell us that it's ready to set
968 * up its LSM labels.
969 */
970 expected = 3;
971 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
972 if (ret <= 0) {
8ce83369 973 ERROR("Expected to receive sequence number 3: %s.",
81f466d0 974 strerror(errno));
8ce83369 975 goto on_error;
81f466d0
CB
976 }
977
978 /* Open LSM fd and send it to child. */
979 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM) && init_ctx->lsm_label) {
fad6ef95 980 int on_exec, saved_errno;
1d8e5ca2 981 int labelfd = -1;
81f466d0
CB
982 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
983 /* Open fd for the LSM security module. */
984 labelfd = lsm_openat(procfd, attached_pid, on_exec);
985 if (labelfd < 0)
8ce83369 986 goto on_error;
81f466d0
CB
987
988 /* Send child fd of the LSM security module to write to. */
989 ret = lxc_abstract_unix_send_fd(ipc_sockets[0], labelfd, NULL, 0);
fad6ef95 990 saved_errno = errno;
1d8e5ca2 991 close(labelfd);
81f466d0 992 if (ret <= 0) {
fad6ef95 993 ERROR("Intended to send file descriptor %d: %s.", labelfd, strerror(saved_errno));
8ce83369 994 goto on_error;
81f466d0
CB
995 }
996 }
997
1d8e5ca2
CB
998 if (procfd >= 0)
999 close(procfd);
8ce83369 1000 /* Now shut down communication with child, we're done. */
9c4693b8
CS
1001 shutdown(ipc_sockets[0], SHUT_RDWR);
1002 close(ipc_sockets[0]);
fe4de9a6 1003 lxc_proc_put_context_info(init_ctx);
9c4693b8 1004
8ce83369
CB
1005 /* We're done, the child process should now execute whatever it
1006 * is that the user requested. The parent can now track it with
1007 * waitpid() or similar.
9c4693b8
CS
1008 */
1009
1010 *attached_process = attached_pid;
1011 return 0;
1012
8ce83369
CB
1013 on_error:
1014 /* First shut down the socket, then wait for the pid, otherwise
1015 * the pid we're waiting for may never exit.
9c4693b8 1016 */
81f466d0
CB
1017 if (procfd >= 0)
1018 close(procfd);
9c4693b8
CS
1019 shutdown(ipc_sockets[0], SHUT_RDWR);
1020 close(ipc_sockets[0]);
1021 if (to_cleanup_pid)
1022 (void) wait_for_pid(to_cleanup_pid);
fe4de9a6 1023 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1024 return -1;
1025 }
1026
8ce83369
CB
1027 /* First subprocess begins here, we close the socket that is for the
1028 * initial thread.
9c4693b8
CS
1029 */
1030 close(ipc_sockets[0]);
1031
8ce83369 1032 /* Wait for the parent to have setup cgroups. */
f4364484
SG
1033 expected = 0;
1034 status = -1;
1035 ret = lxc_read_nointr_expect(ipc_sockets[1], &status, sizeof(status), &expected);
1036 if (ret <= 0) {
8ce83369 1037 ERROR("Expected to receive sequence number 0: %s.", strerror(errno));
f4364484
SG
1038 shutdown(ipc_sockets[1], SHUT_RDWR);
1039 rexit(-1);
1040 }
1041
dac862c0 1042 if ((options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) && cgns_supported())
1043 options->namespaces |= CLONE_NEWCGROUP;
fe3c80af 1044
8ce83369
CB
1045 /* Attach now, create another subprocess later, since pid namespaces
1046 * only really affect the children of the current process.
9c4693b8
CS
1047 */
1048 ret = lxc_attach_to_ns(init_pid, options->namespaces);
1049 if (ret < 0) {
8ce83369 1050 ERROR("Failed to enter namespaces.");
9c4693b8
CS
1051 shutdown(ipc_sockets[1], SHUT_RDWR);
1052 rexit(-1);
1053 }
1054
8ce83369 1055 /* Attach succeeded, try to cwd. */
9c4693b8
CS
1056 if (options->initial_cwd)
1057 new_cwd = options->initial_cwd;
1058 else
1059 new_cwd = cwd;
1060 ret = chdir(new_cwd);
1061 if (ret < 0)
8ce83369 1062 WARN("Could not change directory to \"%s\".", new_cwd);
9c4693b8
CS
1063 free(cwd);
1064
8ce83369 1065 /* Now create the real child process. */
9c4693b8
CS
1066 {
1067 struct attach_clone_payload payload = {
1068 .ipc_socket = ipc_sockets[1],
1069 .options = options,
1070 .init_ctx = init_ctx,
1071 .exec_function = exec_function,
5c3fcae7 1072 .exec_payload = exec_payload,
9c4693b8 1073 };
8ce83369
CB
1074 /* We use clone_parent here to make this subprocess a direct
1075 * child of the initial process. Then this intermediate process
1076 * can exit and the parent can directly track the attached
1077 * process.
9c4693b8
CS
1078 */
1079 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
1080 }
1081
8ce83369 1082 /* Shouldn't happen, clone() should always return positive pid. */
9c4693b8 1083 if (pid <= 0) {
8ce83369 1084 SYSERROR("Failed to create subprocess.");
9c4693b8
CS
1085 shutdown(ipc_sockets[1], SHUT_RDWR);
1086 rexit(-1);
1087 }
1088
8ce83369 1089 /* Tell grandparent the pid of the pid of the newly created child. */
9c4693b8
CS
1090 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
1091 if (ret != sizeof(pid)) {
8ce83369
CB
1092 /* If this really happens here, this is very unfortunate, since
1093 * the parent will not know the pid of the attached process and
1094 * will not be able to wait for it (and we won't either due to
1095 * CLONE_PARENT) so the parent won't be able to reap it and the
1096 * attached process will remain a zombie.
9c4693b8 1097 */
8ce83369 1098 ERROR("Intended to send pid %d: %s.", pid, strerror(errno));
9c4693b8
CS
1099 shutdown(ipc_sockets[1], SHUT_RDWR);
1100 rexit(-1);
1101 }
1102
8ce83369 1103 /* The rest is in the hands of the initial and the attached process. */
9c4693b8
CS
1104 rexit(0);
1105}
1106
74a3920a 1107static int attach_child_main(void* data)
9c4693b8
CS
1108{
1109 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
1110 int ipc_socket = payload->ipc_socket;
1111 lxc_attach_options_t* options = payload->options;
1112 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
1a2e58cf 1113#if HAVE_SYS_PERSONALITY_H
9c4693b8 1114 long new_personality;
1a2e58cf 1115#endif
9c4693b8
CS
1116 int ret;
1117 int status;
1118 int expected;
1119 long flags;
1120 int fd;
81f466d0 1121 int lsm_labelfd;
9c4693b8
CS
1122 uid_t new_uid;
1123 gid_t new_gid;
1124
8ce83369
CB
1125 /* Wait for the initial thread to signal us that it's ready for us to
1126 * start initializing.
9c4693b8
CS
1127 */
1128 expected = 0;
1129 status = -1;
1130 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
1131 if (ret <= 0) {
8ce83369 1132 ERROR("Expected to receive sequence number 0: %s.", strerror(errno));
9c4693b8
CS
1133 shutdown(ipc_socket, SHUT_RDWR);
1134 rexit(-1);
1135 }
1136
8ce83369
CB
1137 /* A description of the purpose of this functionality is provided in the
1138 * lxc-attach(1) manual page. We have to remount here and not in the
1139 * parent process, otherwise /proc may not properly reflect the new pid
1140 * namespace.
9c4693b8
CS
1141 */
1142 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
1143 ret = lxc_attach_remount_sys_proc();
1144 if (ret < 0) {
1145 shutdown(ipc_socket, SHUT_RDWR);
1146 rexit(-1);
1147 }
1148 }
1149
8ce83369 1150 /* Now perform additional attachments. */
9c4693b8
CS
1151#if HAVE_SYS_PERSONALITY_H
1152 if (options->personality < 0)
1153 new_personality = init_ctx->personality;
1154 else
1155 new_personality = options->personality;
1156
1157 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
1158 ret = personality(new_personality);
1159 if (ret < 0) {
8ce83369 1160 SYSERROR("Could not ensure correct architecture.");
9c4693b8
CS
1161 shutdown(ipc_socket, SHUT_RDWR);
1162 rexit(-1);
1163 }
1164 }
1165#endif
1166
1167 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
1168 ret = lxc_attach_drop_privs(init_ctx);
1169 if (ret < 0) {
8ce83369 1170 ERROR("Could not drop privileges.");
9c4693b8
CS
1171 shutdown(ipc_socket, SHUT_RDWR);
1172 rexit(-1);
1173 }
1174 }
1175
8ce83369
CB
1176 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
1177 * if you want this to be a no-op).
1178 */
9c4693b8
CS
1179 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
1180 if (ret < 0) {
8ce83369 1181 ERROR("Could not set initial environment for attached process.");
9c4693b8
CS
1182 shutdown(ipc_socket, SHUT_RDWR);
1183 rexit(-1);
1184 }
1185
8ce83369 1186 /* Set {u,g}id. */
9c4693b8
CS
1187 new_uid = 0;
1188 new_gid = 0;
8ce83369
CB
1189 /* Ignore errors, we will fall back to root in that case (/proc was not
1190 * mounted etc.).
9c4693b8
CS
1191 */
1192 if (options->namespaces & CLONE_NEWUSER)
1193 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
1194
1195 if (options->uid != (uid_t)-1)
1196 new_uid = options->uid;
1197 if (options->gid != (gid_t)-1)
1198 new_gid = options->gid;
1199
8ce83369 1200 /* Setup the controlling tty. */
d3b63011 1201 if (options->stdin_fd && isatty(options->stdin_fd)) {
82e28fe0 1202 if (setsid() < 0) {
8ce83369 1203 SYSERROR("Unable to setsid.");
82e28fe0
SG
1204 shutdown(ipc_socket, SHUT_RDWR);
1205 rexit(-1);
1206 }
1207
1208 if (ioctl(options->stdin_fd, TIOCSCTTY, (char *)NULL) < 0) {
8ce83369 1209 SYSERROR("Unable to set TIOCSTTY.");
82e28fe0
SG
1210 shutdown(ipc_socket, SHUT_RDWR);
1211 rexit(-1);
1212 }
1213 }
1214
8ce83369 1215 /* Try to set the {u,g}id combination. */
c476bdce
SH
1216 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
1217 if (setgid(new_gid) || setgroups(0, NULL)) {
8ce83369 1218 SYSERROR("Switching to container gid.");
c476bdce
SH
1219 shutdown(ipc_socket, SHUT_RDWR);
1220 rexit(-1);
1221 }
9c4693b8
CS
1222 }
1223 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
8ce83369 1224 SYSERROR("Switching to container uid.");
9c4693b8
CS
1225 shutdown(ipc_socket, SHUT_RDWR);
1226 rexit(-1);
1227 }
1228
8ce83369 1229 /* Tell initial process it may now put us into cgroups. */
9c4693b8
CS
1230 status = 1;
1231 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
1232 if (ret != sizeof(status)) {
8ce83369 1233 ERROR("Intended to send sequence number 1: %s.", strerror(errno));
9c4693b8
CS
1234 shutdown(ipc_socket, SHUT_RDWR);
1235 rexit(-1);
1236 }
1237
8ce83369
CB
1238 /* Wait for the initial thread to signal us that it has done everything
1239 * for us when it comes to cgroups etc.
9c4693b8
CS
1240 */
1241 expected = 2;
1242 status = -1;
1243 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
1244 if (ret <= 0) {
8ce83369 1245 ERROR("Expected to receive sequence number 2: %s", strerror(errno));
9c4693b8
CS
1246 shutdown(ipc_socket, SHUT_RDWR);
1247 rexit(-1);
1248 }
1249
2e812c16
CB
1250 if ((init_ctx->container && init_ctx->container->lxc_conf &&
1251 init_ctx->container->lxc_conf->no_new_privs) ||
1252 (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
1253 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1254 SYSERROR("PR_SET_NO_NEW_PRIVS could not be set. "
1255 "Process can use execve() gainable "
1256 "privileges.");
81f466d0 1257 shutdown(ipc_socket, SHUT_RDWR);
2e812c16
CB
1258 rexit(-1);
1259 }
1260 INFO("PR_SET_NO_NEW_PRIVS is set. Process cannot use execve() "
1261 "gainable privileges.");
1262 }
1263
81f466d0
CB
1264 /* Tell the (grand)parent to send us LSM label fd. */
1265 status = 3;
1266 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
1267 if (ret <= 0) {
8ce83369 1268 ERROR("Intended to send sequence number 3: %s.", strerror(errno));
81f466d0
CB
1269 shutdown(ipc_socket, SHUT_RDWR);
1270 rexit(-1);
1271 }
1272
5c3fcae7 1273 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM) && init_ctx->lsm_label) {
72863294 1274 int on_exec;
81f466d0
CB
1275 /* Receive fd for LSM security module. */
1276 ret = lxc_abstract_unix_recv_fd(ipc_socket, &lsm_labelfd, NULL, 0);
1277 if (ret <= 0) {
8ce83369 1278 ERROR("Expected to receive file descriptor: %s.", strerror(errno));
81f466d0
CB
1279 shutdown(ipc_socket, SHUT_RDWR);
1280 rexit(-1);
1281 }
72863294 1282
81f466d0 1283 /* Change into our new LSM profile. */
72863294 1284 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
81f466d0
CB
1285 if (lsm_set_label_at(lsm_labelfd, on_exec, init_ctx->lsm_label) < 0) {
1286 SYSERROR("Failed to set LSM label.");
1287 shutdown(ipc_socket, SHUT_RDWR);
1288 close(lsm_labelfd);
72863294
DE
1289 rexit(-1);
1290 }
81f466d0 1291 close(lsm_labelfd);
72863294 1292 }
81f466d0 1293
2c4ea790 1294 if (init_ctx->container && init_ctx->container->lxc_conf &&
bd4307f0
CB
1295 init_ctx->container->lxc_conf->seccomp &&
1296 (lxc_seccomp_load(init_ctx->container->lxc_conf) != 0)) {
8ce83369 1297 ERROR("Failed to load seccomp policy.");
81f466d0 1298 shutdown(ipc_socket, SHUT_RDWR);
2c4ea790
SH
1299 rexit(-1);
1300 }
81f466d0
CB
1301
1302 shutdown(ipc_socket, SHUT_RDWR);
1303 close(ipc_socket);
fe4de9a6 1304 lxc_proc_put_context_info(init_ctx);
9c4693b8 1305
8ce83369
CB
1306 /* The following is done after the communication socket is shut down.
1307 * That way, all errors that might (though unlikely) occur up until this
1308 * point will have their messages printed to the original stderr (if
1309 * logging is so configured) and not the fd the user supplied, if any.
9c4693b8
CS
1310 */
1311
8ce83369
CB
1312 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
1313 * may want to make sure the fds are closed, for example.
1314 */
9c4693b8
CS
1315 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
1316 dup2(options->stdin_fd, 0);
1317 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
1318 dup2(options->stdout_fd, 1);
1319 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
1320 dup2(options->stderr_fd, 2);
1321
1322 /* close the old fds */
1323 if (options->stdin_fd > 2)
1324 close(options->stdin_fd);
1325 if (options->stdout_fd > 2)
1326 close(options->stdout_fd);
1327 if (options->stderr_fd > 2)
1328 close(options->stderr_fd);
1329
8ce83369
CB
1330 /* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
1331 * here, ignore errors.
1332 */
9c4693b8
CS
1333 for (fd = 0; fd <= 2; fd++) {
1334 flags = fcntl(fd, F_GETFL);
1335 if (flags < 0)
1336 continue;
26818618
CB
1337 if (flags & FD_CLOEXEC)
1338 if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0)
8ce83369 1339 SYSERROR("Unable to clear FD_CLOEXEC from file descriptor.");
9c4693b8
CS
1340 }
1341
8ce83369 1342 /* We're done, so we can now do whatever the user intended us to do. */
9c4693b8
CS
1343 rexit(payload->exec_function(payload->exec_payload));
1344}
1345
1346int lxc_attach_run_command(void* payload)
1347{
1348 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1349
1350 execvp(cmd->program, cmd->argv);
8ce83369 1351 SYSERROR("Failed to exec \"%s\".", cmd->program);
9c4693b8
CS
1352 return -1;
1353}
1354
1355int lxc_attach_run_shell(void* payload)
1356{
1357 uid_t uid;
1358 struct passwd *passwd;
1359 char *user_shell;
1360
8ce83369 1361 /* Ignore payload parameter. */
9c4693b8
CS
1362 (void)payload;
1363
1364 uid = getuid();
1365 passwd = getpwuid(uid);
1366
8ce83369
CB
1367 /* This probably happens because of incompatible nss implementations in
1368 * host and container (remember, this code is still using the host's
1369 * glibc but our mount namespace is in the container) we may try to get
1370 * the information by spawning a [getent passwd uid] process and parsing
1371 * the result.
9c4693b8
CS
1372 */
1373 if (!passwd)
1374 user_shell = lxc_attach_getpwshell(uid);
1375 else
1376 user_shell = passwd->pw_shell;
1377
1378 if (user_shell)
acf47e1b 1379 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1380
8ce83369
CB
1381 /* Executed if either no passwd entry or execvp fails, we will fall back
1382 * on /bin/sh as a default shell.
9c4693b8 1383 */
acf47e1b 1384 execlp("/bin/sh", "/bin/sh", (char *)NULL);
8ce83369 1385 SYSERROR("Failed to exec shell.");
9c4693b8
CS
1386 return -1;
1387}