]>
Commit | Line | Data |
---|---|---|
cc73685d | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
e0732705 | 2 | |
d38dd64a CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE 1 | |
5 | #endif | |
e0732705 CS |
6 | #include <errno.h> |
7 | #include <fcntl.h> | |
c476bdce | 8 | #include <grp.h> |
604ca1c0 | 9 | #include <linux/unistd.h> |
6f4f1937 | 10 | #include <pwd.h> |
0bece477 | 11 | #include <pthread.h> |
6f4f1937 CB |
12 | #include <signal.h> |
13 | #include <stdio.h> | |
14 | #include <stdlib.h> | |
15 | #include <string.h> | |
6f4f1937 | 16 | #include <sys/mount.h> |
e0732705 CS |
17 | #include <sys/param.h> |
18 | #include <sys/prctl.h> | |
5ec27989 | 19 | #include <sys/socket.h> |
1ba0013f | 20 | #include <sys/syscall.h> |
905022f7 | 21 | #include <sys/wait.h> |
604ca1c0 CB |
22 | #include <termios.h> |
23 | #include <unistd.h> | |
6f4f1937 CB |
24 | |
25 | #include <lxc/lxccontainer.h> | |
e0732705 | 26 | |
81f466d0 | 27 | #include "af_unix.h" |
e0732705 CS |
28 | #include "attach.h" |
29 | #include "caps.h" | |
9c4693b8 | 30 | #include "cgroup.h" |
6f4f1937 | 31 | #include "commands.h" |
2c4ea790 | 32 | #include "conf.h" |
6f4f1937 | 33 | #include "config.h" |
9b8e3c96 | 34 | #include "confile.h" |
6f4f1937 CB |
35 | #include "log.h" |
36 | #include "lsm/lsm.h" | |
37 | #include "lxclock.h" | |
38 | #include "lxcseccomp.h" | |
604ca1c0 | 39 | #include "macro.h" |
ba2be1a8 | 40 | #include "mainloop.h" |
cd8f5663 | 41 | #include "memory_utils.h" |
657256e0 | 42 | #include "mount_utils.h" |
6f4f1937 | 43 | #include "namespace.h" |
f40988c7 | 44 | #include "process_utils.h" |
a9f0cecf | 45 | #include "sync.h" |
59524108 | 46 | #include "syscall_wrappers.h" |
0ed9b1bc | 47 | #include "terminal.h" |
6f4f1937 | 48 | #include "utils.h" |
9c4693b8 CS |
49 | |
50 | #if HAVE_SYS_PERSONALITY_H | |
51 | #include <sys/personality.h> | |
52 | #endif | |
e0732705 | 53 | |
ac2cecc4 | 54 | lxc_log_define(attach, lxc); |
e0732705 | 55 | |
ef05d368 CB |
56 | /* Define default options if no options are supplied by the user. */ |
57 | static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT; | |
58 | ||
ab919e5f | 59 | struct attach_context { |
afc691a0 | 60 | unsigned int attach_flags; |
500ed813 | 61 | int init_pid; |
25c659d5 CB |
62 | int dfd_init_pid; |
63 | int dfd_self_pid; | |
9680e7b0 CB |
64 | uid_t init_uid; |
65 | gid_t init_gid; | |
0e304baa CB |
66 | char *lsm_label; |
67 | struct lxc_container *container; | |
68 | signed long personality; | |
69 | unsigned long long capability_mask; | |
70 | int ns_inherited; | |
71 | int ns_fd[LXC_NS_MAX]; | |
72 | struct lsm_ops *lsm_ops; | |
73 | }; | |
74 | ||
d8764025 CB |
75 | static pid_t pidfd_get_pid(int pidfd) |
76 | { | |
77 | __do_free char *line = NULL; | |
78 | __do_fclose FILE *f = NULL; | |
79 | size_t len = 0; | |
80 | char path[STRLITERALLEN("/proc/self/fdinfo/") + | |
81 | INTTYPE_TO_STRLEN(int) + 1 ] = "/proc/self/fdinfo/"; | |
82 | int ret; | |
83 | ||
84 | if (pidfd < 0) | |
85 | return -EBADF; | |
86 | ||
87 | ret = snprintf(path + STRLITERALLEN("/proc/self/fdinfo/"), | |
88 | INTTYPE_TO_STRLEN(int), "%d", pidfd); | |
89 | if (ret < 0 || ret > (size_t)INTTYPE_TO_STRLEN(int)) | |
90 | return ret_errno(EIO); | |
91 | ||
92 | f = fopen_cloexec(path, "re"); | |
93 | if (!f) | |
94 | return -errno; | |
95 | ||
96 | while (getline(&line, &len, f) != -1) { | |
97 | const char *prefix = "Pid:\t"; | |
98 | const size_t prefix_len = STRLITERALLEN("Pid:\t"); | |
99 | int pid = -ESRCH; | |
100 | char *slider = line; | |
101 | ||
102 | if (strncmp(slider, prefix, prefix_len)) | |
103 | continue; | |
104 | ||
105 | slider += prefix_len; | |
106 | slider = lxc_trim_whitespace_in_place(slider); | |
107 | ||
108 | ret = lxc_safe_int(slider, &pid); | |
109 | if (ret) | |
110 | return -ret; | |
111 | ||
112 | return pid; | |
113 | } | |
114 | ||
115 | return ret_errno(ENOENT); | |
116 | } | |
117 | ||
6e36c297 CB |
118 | static inline bool sync_wake_pid(int fd, pid_t pid) |
119 | { | |
120 | return lxc_write_nointr(fd, &pid, sizeof(pid_t)) == sizeof(pid_t); | |
121 | } | |
122 | ||
123 | static inline bool sync_wait_pid(int fd, pid_t *pid) | |
124 | { | |
125 | return lxc_read_nointr(fd, pid, sizeof(pid_t)) == sizeof(pid_t); | |
126 | } | |
127 | ||
128 | static inline bool sync_wake_fd(int fd, int fd_send) | |
129 | { | |
130 | return lxc_abstract_unix_send_fds(fd, &fd_send, 1, NULL, 0) > 0; | |
131 | } | |
132 | ||
133 | static inline bool sync_wait_fd(int fd, int *fd_recv) | |
134 | { | |
135 | return lxc_abstract_unix_recv_fds(fd, fd_recv, 1, NULL, 0) > 0; | |
136 | } | |
137 | ||
afc691a0 CB |
138 | static bool attach_lsm(lxc_attach_options_t *options) |
139 | { | |
140 | return (options->namespaces & CLONE_NEWNS) && | |
141 | (options->attach_flags & (LXC_ATTACH_LSM | LXC_ATTACH_LSM_LABEL)); | |
142 | } | |
143 | ||
9745eb8a CB |
144 | static struct attach_context *alloc_attach_context(void) |
145 | { | |
581b849a CB |
146 | struct attach_context *ctx; |
147 | ||
148 | ctx = zalloc(sizeof(struct attach_context)); | |
149 | if (!ctx) | |
150 | return ret_set_errno(NULL, ENOMEM); | |
151 | ||
152 | ctx->dfd_self_pid = -EBADF; | |
153 | ctx->dfd_init_pid = -EBADF; | |
bac33ebd | 154 | ctx->init_gid = -ESRCH; |
9680e7b0 CB |
155 | ctx->init_uid = 0; |
156 | ctx->init_gid = 0; | |
581b849a CB |
157 | |
158 | for (int i = 0; i < LXC_NS_MAX; i++) | |
159 | ctx->ns_fd[i] = -EBADF; | |
160 | ||
161 | return ctx; | |
9745eb8a CB |
162 | } |
163 | ||
ee142207 CB |
164 | static int get_personality(const char *name, const char *lxcpath, |
165 | signed long *personality) | |
d92c8e40 CB |
166 | { |
167 | __do_free char *p = NULL; | |
ee142207 | 168 | signed long per; |
d92c8e40 CB |
169 | |
170 | p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath); | |
ee142207 CB |
171 | if (!p) { |
172 | *personality = LXC_ARCH_UNCHANGED; | |
173 | return 0; | |
174 | } | |
d92c8e40 | 175 | |
ee142207 CB |
176 | per = lxc_config_parse_arch(p); |
177 | if (per == LXC_ARCH_UNCHANGED) | |
178 | return ret_errno(EINVAL); | |
179 | ||
180 | *personality = per; | |
181 | return 0; | |
d92c8e40 CB |
182 | } |
183 | ||
9680e7b0 CB |
184 | static int parse_init_status(struct attach_context *ctx, lxc_attach_options_t *options) |
185 | { | |
186 | __do_free char *line = NULL; | |
187 | __do_fclose FILE *f = NULL; | |
188 | size_t len = 0; | |
189 | bool caps_found = false; | |
190 | bool uid_found, gid_found; | |
191 | ||
192 | f = fdopenat(ctx->dfd_init_pid, "status", "re"); | |
193 | if (!f) | |
194 | return -errno; | |
195 | ||
196 | if (options->namespaces & CLONE_NEWUSER) | |
197 | uid_found = gid_found = false; | |
198 | else | |
199 | uid_found = gid_found = true; | |
200 | ||
201 | while (getline(&line, &len, f) != -1) { | |
202 | signed long value = -1; | |
203 | int ret; | |
204 | ||
205 | if (options->namespaces & CLONE_NEWUSER) { | |
206 | /* | |
207 | * Format is: real, effective, saved set user, fs we only care | |
208 | * about real uid. | |
209 | */ | |
210 | ret = sscanf(line, "Uid: %ld", &value); | |
211 | if (ret != EOF && ret == 1) { | |
212 | uid_found = true; | |
213 | ctx->init_uid = (uid_t)value; | |
214 | goto next; | |
215 | } | |
216 | ||
217 | ret = sscanf(line, "Gid: %ld", &value); | |
218 | if (ret != EOF && ret == 1) { | |
219 | gid_found = true; | |
220 | ctx->init_gid = (gid_t)value; | |
221 | goto next; | |
222 | } | |
223 | } | |
224 | ||
225 | ret = sscanf(line, "CapBnd: %llx", &ctx->capability_mask); | |
226 | if (ret != EOF && ret == 1) { | |
227 | caps_found = true; | |
228 | goto next; | |
229 | } | |
230 | ||
231 | next: | |
232 | if (uid_found && gid_found && caps_found) | |
233 | break; | |
234 | ||
235 | } | |
236 | ||
237 | /* | |
238 | * TODO: we should also parse supplementary groups and use | |
239 | * setgroups() to set them. | |
240 | */ | |
241 | ||
242 | return 0; | |
243 | } | |
244 | ||
500ed813 | 245 | static int get_attach_context(struct attach_context *ctx, |
afc691a0 CB |
246 | struct lxc_container *container, |
247 | lxc_attach_options_t *options) | |
e0732705 | 248 | { |
9680e7b0 CB |
249 | __do_close int init_pidfd = -EBADF; |
250 | __do_free char *lsm_label = NULL; | |
6f4f1937 | 251 | int ret; |
c538837d | 252 | char path[LXC_PROC_PID_LEN]; |
e0732705 | 253 | |
500ed813 | 254 | ctx->container = container; |
afc691a0 | 255 | ctx->attach_flags = options->attach_flags; |
500ed813 | 256 | |
d8764025 CB |
257 | init_pidfd = lxc_cmd_get_init_pidfd(container->name, container->config_path); |
258 | if (init_pidfd >= 0) | |
259 | ctx->init_pid = pidfd_get_pid(init_pidfd); | |
260 | else | |
261 | ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path); | |
262 | ||
500ed813 CB |
263 | if (ctx->init_pid < 0) |
264 | return log_error(-1, "Failed to get init pid"); | |
265 | ||
25c659d5 CB |
266 | ret = snprintf(path, sizeof(path), "/proc/%d", lxc_raw_getpid()); |
267 | if (ret < 0 || ret >= sizeof(path)) | |
268 | return ret_errno(EIO); | |
269 | ||
9680e7b0 CB |
270 | ctx->dfd_self_pid = openat(-EBADF, path, O_CLOEXEC | O_NOCTTY | O_NOFOLLOW | O_PATH | O_DIRECTORY); |
271 | if (ctx->dfd_self_pid < 0) | |
25c659d5 CB |
272 | return -errno; |
273 | ||
c538837d CB |
274 | ret = snprintf(path, sizeof(path), "/proc/%d", ctx->init_pid); |
275 | if (ret < 0 || ret >= sizeof(path)) | |
276 | return ret_errno(EIO); | |
e0732705 | 277 | |
9680e7b0 CB |
278 | ctx->dfd_init_pid = openat(-EBADF, path, O_CLOEXEC | O_NOCTTY | O_NOFOLLOW | O_PATH | O_DIRECTORY); |
279 | if (ctx->dfd_init_pid < 0) | |
c538837d CB |
280 | return -errno; |
281 | ||
d8764025 CB |
282 | if (init_pidfd >= 0) { |
283 | ret = lxc_raw_pidfd_send_signal(init_pidfd, 0, NULL, 0); | |
284 | if (ret) | |
285 | return log_error_errno(-errno, errno, "Container process exited or PID has been recycled"); | |
286 | else | |
287 | TRACE("Container process still running and PID was not recycled"); | |
288 | } | |
289 | ||
9680e7b0 CB |
290 | ret = parse_init_status(ctx, options); |
291 | if (ret) | |
292 | return log_error_errno(-errno, errno, "Failed to open parse status file"); | |
e0732705 | 293 | |
4eb19ac0 | 294 | ctx->lsm_ops = lsm_init_static(); |
d701d729 | 295 | |
afc691a0 CB |
296 | if (attach_lsm(options)) { |
297 | if (ctx->attach_flags & LXC_ATTACH_LSM_LABEL) | |
298 | lsm_label = options->lsm_label; | |
299 | else | |
9680e7b0 | 300 | lsm_label = ctx->lsm_ops->process_label_get_at(ctx->lsm_ops, ctx->dfd_init_pid); |
afc691a0 CB |
301 | if (!lsm_label) |
302 | WARN("No security context received"); | |
303 | else | |
304 | INFO("Retrieved security context %s", lsm_label); | |
305 | } | |
9745eb8a | 306 | ctx->ns_inherited = 0; |
e0732705 | 307 | |
ee142207 CB |
308 | ret = get_personality(container->name, container->config_path, &ctx->personality); |
309 | if (ret) | |
310 | return log_error_errno(ret, errno, "Failed to get personality of the container"); | |
d92c8e40 | 311 | |
1874ef74 CB |
312 | if (!ctx->container->lxc_conf) { |
313 | ctx->container->lxc_conf = lxc_conf_init(); | |
314 | if (!ctx->container->lxc_conf) | |
315 | return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate new lxc config"); | |
316 | } | |
317 | ||
afc691a0 | 318 | ctx->lsm_label = move_ptr(lsm_label); |
9745eb8a | 319 | return 0; |
e0732705 CS |
320 | } |
321 | ||
25c659d5 | 322 | static int in_same_namespace(int ns_fd_pid1, int ns_fd_pid2, const char *ns_path) |
299d1198 | 323 | { |
644e7393 | 324 | __do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF; |
3cc629fe | 325 | int ret = -1; |
299d1198 CB |
326 | struct stat ns_st1, ns_st2; |
327 | ||
25c659d5 | 328 | ns_fd1 = openat(ns_fd_pid1, ns_path, O_CLOEXEC | O_NOCTTY | O_RDONLY); |
134284c3 | 329 | if (ns_fd1 < 0) { |
25c659d5 | 330 | /* The kernel does not support this namespace. This is not an error. */ |
134284c3 CB |
331 | if (errno == ENOENT) |
332 | return -EINVAL; | |
333 | ||
3cc629fe | 334 | return -1; |
134284c3 | 335 | } |
299d1198 | 336 | |
25c659d5 | 337 | ns_fd2 = openat(ns_fd_pid2, ns_path, O_CLOEXEC | O_NOCTTY | O_RDONLY); |
21d0acc2 | 338 | if (ns_fd2 < 0) |
3cc629fe | 339 | return -1; |
299d1198 CB |
340 | |
341 | ret = fstat(ns_fd1, &ns_st1); | |
21d0acc2 | 342 | if (ret < 0) |
3cc629fe | 343 | return -1; |
299d1198 CB |
344 | |
345 | ret = fstat(ns_fd2, &ns_st2); | |
21d0acc2 | 346 | if (ret < 0) |
3cc629fe | 347 | return -1; |
299d1198 CB |
348 | |
349 | /* processes are in the same namespace */ | |
25c659d5 CB |
350 | if ((ns_st1.st_dev == ns_st2.st_dev) && |
351 | (ns_st1.st_ino == ns_st2.st_ino)) | |
3cc629fe | 352 | return -EINVAL; |
299d1198 CB |
353 | |
354 | /* processes are in different namespaces */ | |
3cc629fe | 355 | return move_fd(ns_fd2); |
299d1198 CB |
356 | } |
357 | ||
b7873c95 CB |
358 | static int get_attach_context_nsfds(struct attach_context *ctx, |
359 | lxc_attach_options_t *options) | |
360 | { | |
b7873c95 CB |
361 | for (int i = 0; i < LXC_NS_MAX; i++) { |
362 | int j; | |
363 | ||
364 | if (options->namespaces & ns_info[i].clone_flag) | |
25c659d5 | 365 | ctx->ns_fd[i] = openat(ctx->dfd_init_pid, ns_info[i].proc_path, O_CLOEXEC | O_NOCTTY | O_RDONLY); |
b7873c95 | 366 | else if (ctx->ns_inherited & ns_info[i].clone_flag) |
25c659d5 | 367 | ctx->ns_fd[i] = in_same_namespace(ctx->dfd_self_pid, ctx->dfd_init_pid, ns_info[i].proc_path); |
b7873c95 CB |
368 | else |
369 | continue; | |
370 | ||
371 | if (ctx->ns_fd[i] >= 0) | |
372 | continue; | |
373 | ||
374 | if (ctx->ns_fd[i] == -EINVAL) { | |
25c659d5 | 375 | DEBUG("Inheriting %s namespace", ns_info[i].proc_name); |
b7873c95 CB |
376 | ctx->ns_inherited &= ~ns_info[i].clone_flag; |
377 | continue; | |
378 | } | |
379 | ||
380 | /* We failed to preserve the namespace. */ | |
25c659d5 | 381 | SYSERROR("Failed to preserve %s namespace of %d", ns_info[i].proc_name, ctx->init_pid); |
b7873c95 CB |
382 | |
383 | /* Close all already opened file descriptors before we return an | |
384 | * error, so we don't leak them. | |
385 | */ | |
386 | for (j = 0; j < i; j++) | |
387 | close_prot_errno_disarm(ctx->ns_fd[j]); | |
388 | ||
389 | return -1; | |
390 | } | |
391 | ||
392 | return 0; | |
393 | } | |
394 | ||
7e995801 | 395 | static inline void close_nsfds(struct attach_context *ctx) |
b7873c95 CB |
396 | { |
397 | for (int i = 0; i < LXC_NS_MAX; i++) | |
398 | close_prot_errno_disarm(ctx->ns_fd[i]); | |
399 | } | |
400 | ||
401 | static void put_attach_context(struct attach_context *ctx) | |
402 | { | |
c538837d | 403 | if (ctx) { |
afc691a0 CB |
404 | if (!(ctx->attach_flags & LXC_ATTACH_LSM_LABEL)) |
405 | free_disarm(ctx->lsm_label); | |
25c659d5 | 406 | close_prot_errno_disarm(ctx->dfd_init_pid); |
b7873c95 | 407 | |
c538837d CB |
408 | if (ctx->container) { |
409 | lxc_container_put(ctx->container); | |
410 | ctx->container = NULL; | |
411 | } | |
b7873c95 | 412 | |
c538837d CB |
413 | close_nsfds(ctx); |
414 | free(ctx); | |
415 | } | |
b7873c95 CB |
416 | } |
417 | ||
c538837d | 418 | static int attach_context_container(struct attach_context *ctx) |
99d50954 | 419 | { |
ffeeed8b CB |
420 | for (int i = 0; i < LXC_NS_MAX; i++) { |
421 | int ret; | |
99d50954 | 422 | |
877f3a04 | 423 | if (ctx->ns_fd[i] < 0) |
26818618 CB |
424 | continue; |
425 | ||
21d0acc2 | 426 | ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag); |
ffeeed8b | 427 | if (ret < 0) |
39b3b69b CB |
428 | return log_error_errno(-1, errno, |
429 | "Failed to attach to %s namespace of %d", | |
430 | ns_info[i].proc_name, ctx->init_pid); | |
99d50954 | 431 | |
39b3b69b CB |
432 | DEBUG("Attached to %s namespace of %d", |
433 | ns_info[i].proc_name, ctx->init_pid); | |
99d50954 CS |
434 | } |
435 | ||
436 | return 0; | |
437 | } | |
438 | ||
c538837d CB |
439 | /* |
440 | * Place anything in here that needs to be get rid of before we move into the | |
441 | * container's context and fail hard if we can't. | |
442 | */ | |
443 | static bool attach_context_security_barrier(struct attach_context *ctx) | |
444 | { | |
445 | if (ctx) { | |
25c659d5 CB |
446 | if (close(ctx->dfd_self_pid)) |
447 | return false; | |
448 | ctx->dfd_self_pid = -EBADF; | |
449 | ||
450 | if (close(ctx->dfd_init_pid)) | |
c538837d | 451 | return false; |
25c659d5 | 452 | ctx->dfd_init_pid = -EBADF; |
c538837d CB |
453 | } |
454 | ||
455 | return true; | |
456 | } | |
457 | ||
e4103cf6 | 458 | int lxc_attach_remount_sys_proc(void) |
7a0b0b56 CS |
459 | { |
460 | int ret; | |
461 | ||
462 | ret = unshare(CLONE_NEWNS); | |
ffeeed8b CB |
463 | if (ret < 0) |
464 | return log_error_errno(-1, errno, "Failed to unshare mount namespace"); | |
7a0b0b56 | 465 | |
9e61fb1f CB |
466 | if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) |
467 | SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); | |
2c6f3fc9 | 468 | |
8ce83369 | 469 | /* Assume /proc is always mounted, so remount it. */ |
7a0b0b56 | 470 | ret = umount2("/proc", MNT_DETACH); |
ffeeed8b CB |
471 | if (ret < 0) |
472 | return log_error_errno(-1, errno, "Failed to unmount /proc"); | |
7a0b0b56 | 473 | |
657256e0 | 474 | ret = mount_filesystem("proc", "/proc", 0); |
ffeeed8b CB |
475 | if (ret < 0) |
476 | return log_error_errno(-1, errno, "Failed to remount /proc"); | |
7a0b0b56 | 477 | |
ffeeed8b CB |
478 | /* |
479 | * Try to umount /sys. If it's not a mount point, we'll get EINVAL, then | |
8ce83369 | 480 | * we ignore it because it may not have been mounted in the first place. |
7a0b0b56 CS |
481 | */ |
482 | ret = umount2("/sys", MNT_DETACH); | |
ffeeed8b CB |
483 | if (ret < 0 && errno != EINVAL) |
484 | return log_error_errno(-1, errno, "Failed to unmount /sys"); | |
485 | ||
486 | /* Remount it. */ | |
657256e0 | 487 | if (ret == 0 && mount_filesystem("sysfs", "/sys", 0)) |
ffeeed8b | 488 | return log_error_errno(-1, errno, "Failed to remount /sys"); |
7a0b0b56 CS |
489 | |
490 | return 0; | |
491 | } | |
492 | ||
677e1d27 | 493 | static int drop_capabilities(struct attach_context *ctx) |
e0732705 | 494 | { |
ffeeed8b | 495 | int last_cap; |
e0732705 | 496 | |
6f4f1937 | 497 | last_cap = lxc_caps_last_cap(); |
ffeeed8b | 498 | for (int cap = 0; cap <= last_cap; cap++) { |
e0732705 CS |
499 | if (ctx->capability_mask & (1LL << cap)) |
500 | continue; | |
501 | ||
b81689a1 | 502 | if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0), |
ffeeed8b CB |
503 | prctl_arg(0), prctl_arg(0))) |
504 | return log_error_errno(-1, errno, "Failed to drop capability %d", cap); | |
ea918412 | 505 | |
94ac256f | 506 | TRACE("Dropped capability %d", cap); |
e0732705 CS |
507 | } |
508 | ||
509 | return 0; | |
510 | } | |
905022f7 | 511 | |
ab919e5f | 512 | static int lxc_attach_set_environment(struct attach_context *ctx, |
7385273f | 513 | enum lxc_attach_env_policy_t policy, |
6f4f1937 | 514 | char **extra_env, char **extra_keep) |
b3a39ba6 | 515 | { |
3d55242a | 516 | int ret; |
7385273f | 517 | struct lxc_list *iterator; |
518 | ||
799f96fd | 519 | if (policy == LXC_ATTACH_CLEAR_ENV) { |
3d5e9f48 | 520 | int path_kept = 0; |
6f4f1937 | 521 | char **extra_keep_store = NULL; |
3d5e9f48 CS |
522 | |
523 | if (extra_keep) { | |
524 | size_t count, i; | |
525 | ||
3d55242a CB |
526 | for (count = 0; extra_keep[count]; count++) |
527 | ; | |
3d5e9f48 | 528 | |
89b7bfe3 | 529 | extra_keep_store = zalloc(count * sizeof(char *)); |
3d55242a | 530 | if (!extra_keep_store) |
3d5e9f48 | 531 | return -1; |
3d55242a | 532 | |
3d5e9f48 CS |
533 | for (i = 0; i < count; i++) { |
534 | char *v = getenv(extra_keep[i]); | |
535 | if (v) { | |
536 | extra_keep_store[i] = strdup(v); | |
537 | if (!extra_keep_store[i]) { | |
3d5e9f48 CS |
538 | while (i > 0) |
539 | free(extra_keep_store[--i]); | |
ea918412 | 540 | |
3d5e9f48 CS |
541 | free(extra_keep_store); |
542 | return -1; | |
543 | } | |
3d55242a | 544 | |
3d5e9f48 CS |
545 | if (strcmp(extra_keep[i], "PATH") == 0) |
546 | path_kept = 1; | |
547 | } | |
3d5e9f48 CS |
548 | } |
549 | } | |
550 | ||
799f96fd | 551 | if (clearenv()) { |
a9cab7e3 | 552 | if (extra_keep_store) { |
3d55242a CB |
553 | char **p; |
554 | ||
a9cab7e3 CS |
555 | for (p = extra_keep_store; *p; p++) |
556 | free(*p); | |
3d55242a | 557 | |
a9cab7e3 CS |
558 | free(extra_keep_store); |
559 | } | |
3d55242a | 560 | |
ffeeed8b | 561 | return log_error(-1, "Failed to clear environment"); |
3d5e9f48 CS |
562 | } |
563 | ||
564 | if (extra_keep_store) { | |
565 | size_t i; | |
6f4f1937 | 566 | |
3d5e9f48 | 567 | for (i = 0; extra_keep[i]; i++) { |
acd4922e | 568 | if (extra_keep_store[i]) { |
3d55242a CB |
569 | ret = setenv(extra_keep[i], extra_keep_store[i], 1); |
570 | if (ret < 0) | |
a24c5678 | 571 | SYSWARN("Failed to set environment variable"); |
acd4922e | 572 | } |
ea918412 | 573 | |
3d5e9f48 CS |
574 | free(extra_keep_store[i]); |
575 | } | |
ea918412 | 576 | |
3d5e9f48 CS |
577 | free(extra_keep_store); |
578 | } | |
579 | ||
8ce83369 CB |
580 | /* Always set a default path; shells and execlp tend to be fine |
581 | * without it, but there is a disturbing number of C programs | |
582 | * out there that just assume that getenv("PATH") is never NULL | |
583 | * and then die a painful segfault death. | |
584 | */ | |
3d55242a CB |
585 | if (!path_kept) { |
586 | ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1); | |
587 | if (ret < 0) | |
a24c5678 | 588 | SYSWARN("Failed to set environment variable"); |
3d55242a | 589 | } |
b3a39ba6 DW |
590 | } |
591 | ||
3d55242a | 592 | ret = putenv("container=lxc"); |
ffeeed8b | 593 | if (ret < 0) |
818a57fc | 594 | return log_warn(-1, "Failed to set environment variable"); |
b3a39ba6 | 595 | |
7385273f | 596 | /* Set container environment variables.*/ |
640952e5 | 597 | if (ctx->container->lxc_conf) { |
ab919e5f | 598 | lxc_list_for_each(iterator, &ctx->container->lxc_conf->environment) { |
3d55242a CB |
599 | char *env_tmp; |
600 | ||
601 | env_tmp = strdup((char *)iterator->elem); | |
602 | if (!env_tmp) | |
7385273f | 603 | return -1; |
7385273f | 604 | |
3d55242a | 605 | ret = putenv(env_tmp); |
ffeeed8b CB |
606 | if (ret < 0) |
607 | return log_error_errno(-1, errno, "Failed to set environment variable: %s", (char *)iterator->elem); | |
7385273f | 608 | } |
609 | } | |
610 | ||
8ce83369 | 611 | /* Set extra environment variables. */ |
3d5e9f48 CS |
612 | if (extra_env) { |
613 | for (; *extra_env; extra_env++) { | |
3d55242a | 614 | char *p; |
ea918412 | 615 | |
8ce83369 CB |
616 | /* We just assume the user knows what they are doing, so |
617 | * we don't do any checks. | |
618 | */ | |
3d55242a CB |
619 | p = strdup(*extra_env); |
620 | if (!p) | |
3d5e9f48 | 621 | return -1; |
3d55242a CB |
622 | |
623 | ret = putenv(p); | |
624 | if (ret < 0) | |
a24c5678 | 625 | SYSWARN("Failed to set environment variable"); |
3d5e9f48 CS |
626 | } |
627 | } | |
628 | ||
b3a39ba6 DW |
629 | return 0; |
630 | } | |
631 | ||
74a3920a | 632 | static char *lxc_attach_getpwshell(uid_t uid) |
905022f7 | 633 | { |
1b9c9f5b | 634 | __do_free char *line = NULL, *result = NULL; |
cd8f5663 | 635 | __do_fclose FILE *pipe_f = NULL; |
6f4f1937 | 636 | int fd, ret; |
905022f7 CS |
637 | pid_t pid; |
638 | int pipes[2]; | |
3fa23ac3 CB |
639 | bool found = false; |
640 | size_t line_bufsz = 0; | |
905022f7 | 641 | |
8ce83369 CB |
642 | /* We need to fork off a process that runs the getent program, and we |
643 | * need to capture its output, so we use a pipe for that purpose. | |
905022f7 | 644 | */ |
3fa23ac3 | 645 | ret = pipe2(pipes, O_CLOEXEC); |
905022f7 CS |
646 | if (ret < 0) |
647 | return NULL; | |
648 | ||
649 | pid = fork(); | |
650 | if (pid < 0) { | |
651 | close(pipes[0]); | |
652 | close(pipes[1]); | |
653 | return NULL; | |
654 | } | |
655 | ||
3fa23ac3 | 656 | if (!pid) { |
905022f7 CS |
657 | char uid_buf[32]; |
658 | char *arguments[] = { | |
659 | "getent", | |
660 | "passwd", | |
661 | uid_buf, | |
662 | NULL | |
663 | }; | |
664 | ||
665 | close(pipes[0]); | |
666 | ||
8ce83369 | 667 | /* We want to capture stdout. */ |
3fa23ac3 | 668 | ret = dup2(pipes[1], STDOUT_FILENO); |
905022f7 | 669 | close(pipes[1]); |
3fa23ac3 | 670 | if (ret < 0) |
ea918412 | 671 | _exit(EXIT_FAILURE); |
905022f7 | 672 | |
8ce83369 CB |
673 | /* Get rid of stdin/stderr, so we try to associate it with |
674 | * /dev/null. | |
905022f7 | 675 | */ |
3fa23ac3 | 676 | fd = open_devnull(); |
905022f7 | 677 | if (fd < 0) { |
3fa23ac3 CB |
678 | close(STDIN_FILENO); |
679 | close(STDERR_FILENO); | |
905022f7 | 680 | } else { |
3fa23ac3 | 681 | (void)dup3(fd, STDIN_FILENO, O_CLOEXEC); |
59f0e209 | 682 | (void)dup3(fd, STDERR_FILENO, O_CLOEXEC); |
905022f7 CS |
683 | close(fd); |
684 | } | |
685 | ||
8ce83369 | 686 | /* Finish argument list. */ |
3fa23ac3 CB |
687 | ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid); |
688 | if (ret <= 0 || ret >= sizeof(uid_buf)) | |
ea918412 | 689 | _exit(EXIT_FAILURE); |
905022f7 | 690 | |
8ce83369 | 691 | /* Try to run getent program. */ |
3fa23ac3 | 692 | (void)execvp("getent", arguments); |
ea918412 | 693 | _exit(EXIT_FAILURE); |
905022f7 | 694 | } |
3fa23ac3 CB |
695 | |
696 | close(pipes[1]); | |
697 | ||
4110345b | 698 | pipe_f = fdopen(pipes[0], "re"); |
cf4026f1 CB |
699 | if (!pipe_f) { |
700 | close(pipes[0]); | |
701 | goto reap_child; | |
702 | } | |
703 | /* Transfer ownership of pipes[0] to pipe_f. */ | |
704 | move_fd(pipes[0]); | |
705 | ||
3fa23ac3 CB |
706 | while (getline(&line, &line_bufsz, pipe_f) != -1) { |
707 | int i; | |
708 | long value; | |
709 | char *token; | |
710 | char *endptr = NULL, *saveptr = NULL; | |
711 | ||
712 | /* If we already found something, just continue to read | |
713 | * until the pipe doesn't deliver any more data, but | |
714 | * don't modify the existing data structure. | |
715 | */ | |
716 | if (found) | |
717 | continue; | |
718 | ||
18d4ffde | 719 | if (!line) |
720 | continue; | |
721 | ||
3fa23ac3 CB |
722 | /* Trim line on the right hand side. */ |
723 | for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i) | |
724 | line[i - 1] = '\0'; | |
725 | ||
726 | /* Split into tokens: first: user name. */ | |
727 | token = strtok_r(line, ":", &saveptr); | |
728 | if (!token) | |
729 | continue; | |
730 | ||
731 | /* next: dummy password field */ | |
732 | token = strtok_r(NULL, ":", &saveptr); | |
733 | if (!token) | |
734 | continue; | |
735 | ||
736 | /* next: user id */ | |
737 | token = strtok_r(NULL, ":", &saveptr); | |
738 | value = token ? strtol(token, &endptr, 10) : 0; | |
739 | if (!token || !endptr || *endptr || value == LONG_MIN || | |
ea918412 | 740 | value == LONG_MAX) |
3fa23ac3 CB |
741 | continue; |
742 | ||
743 | /* dummy sanity check: user id matches */ | |
744 | if ((uid_t)value != uid) | |
745 | continue; | |
746 | ||
747 | /* skip fields: gid, gecos, dir, go to next field 'shell' */ | |
748 | for (i = 0; i < 4; i++) { | |
749 | token = strtok_r(NULL, ":", &saveptr); | |
750 | if (!token) | |
751 | continue; | |
752 | } | |
ea918412 | 753 | |
3fa23ac3 CB |
754 | if (!token) |
755 | continue; | |
ea918412 | 756 | |
1b9c9f5b | 757 | free_disarm(result); |
3fa23ac3 CB |
758 | result = strdup(token); |
759 | ||
760 | /* Sanity check that there are no fields after that. */ | |
761 | token = strtok_r(NULL, ":", &saveptr); | |
762 | if (token) | |
763 | continue; | |
764 | ||
765 | found = true; | |
766 | } | |
ea918412 | 767 | |
cf4026f1 | 768 | reap_child: |
3fa23ac3 | 769 | ret = wait_for_pid(pid); |
1b9c9f5b | 770 | if (ret < 0) |
3fa23ac3 | 771 | return NULL; |
3fa23ac3 | 772 | |
1b9c9f5b | 773 | if (!found) |
3fa23ac3 | 774 | return NULL; |
3fa23ac3 | 775 | |
1b9c9f5b | 776 | return move_ptr(result); |
905022f7 | 777 | } |
cb3e61fa | 778 | |
d4db3d14 | 779 | static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options) |
2c4ea790 | 780 | { |
cd8f5663 | 781 | __do_free char *path = NULL; |
d4db3d14 CB |
782 | int ret; |
783 | bool bret; | |
2eef2bda | 784 | |
afc691a0 | 785 | if (!attach_lsm(options)) { |
cca66e06 | 786 | free_disarm(c->lxc_conf->seccomp.seccomp); |
2c4ea790 | 787 | return true; |
bd4307f0 | 788 | } |
bd7b4e28 | 789 | |
afc691a0 | 790 | /* Remove current setting. */ |
d4db3d14 | 791 | if (!c->set_config_item(c, "lxc.seccomp.profile", "") && |
ea918412 | 792 | !c->set_config_item(c, "lxc.seccomp", "")) |
2c4ea790 | 793 | return false; |
bd7b4e28 | 794 | |
8ce83369 | 795 | /* Fetch the current profile path over the cmd interface. */ |
0b427da0 | 796 | path = c->get_running_config_item(c, "lxc.seccomp.profile"); |
bd7b4e28 | 797 | if (!path) { |
d4db3d14 | 798 | INFO("Failed to retrieve lxc.seccomp.profile"); |
ea918412 | 799 | |
0b427da0 | 800 | path = c->get_running_config_item(c, "lxc.seccomp"); |
cca66e06 CB |
801 | if (!path) |
802 | return log_info(true, "Failed to retrieve lxc.seccomp"); | |
bd7b4e28 SG |
803 | } |
804 | ||
8ce83369 | 805 | /* Copy the value into the new lxc_conf. */ |
d4db3d14 | 806 | bret = c->set_config_item(c, "lxc.seccomp.profile", path); |
d4db3d14 CB |
807 | if (!bret) |
808 | return false; | |
bd7b4e28 | 809 | |
8ce83369 | 810 | /* Attempt to parse the resulting config. */ |
d4db3d14 | 811 | ret = lxc_read_seccomp_config(c->lxc_conf); |
cca66e06 CB |
812 | if (ret < 0) |
813 | return log_error(false, "Failed to retrieve seccomp policy"); | |
2c4ea790 | 814 | |
cca66e06 | 815 | return log_info(true, "Retrieved seccomp policy"); |
2e812c16 CB |
816 | } |
817 | ||
6f4f1937 | 818 | static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options) |
2e812c16 | 819 | { |
cd8f5663 | 820 | __do_free char *val = NULL; |
2e812c16 | 821 | |
2e812c16 | 822 | /* Remove current setting. */ |
02d3b72b CB |
823 | if (!c->set_config_item(c, "lxc.no_new_privs", "")) |
824 | return log_info(false, "Failed to unset lxc.no_new_privs"); | |
2e812c16 CB |
825 | |
826 | /* Retrieve currently active setting. */ | |
827 | val = c->get_running_config_item(c, "lxc.no_new_privs"); | |
02d3b72b CB |
828 | if (!val) |
829 | return log_info(false, "Failed to retrieve lxc.no_new_privs"); | |
2e812c16 CB |
830 | |
831 | /* Set currently active setting. */ | |
cd8f5663 | 832 | return c->set_config_item(c, "lxc.no_new_privs", val); |
2c4ea790 SH |
833 | } |
834 | ||
338b230f | 835 | struct attach_payload { |
a998454a | 836 | int ipc_socket; |
cecf3e83 | 837 | int terminal_pts_fd; |
a998454a | 838 | lxc_attach_options_t *options; |
ab919e5f | 839 | struct attach_context *ctx; |
a998454a CB |
840 | lxc_attach_exec_t exec_function; |
841 | void *exec_payload; | |
842 | }; | |
843 | ||
338b230f | 844 | static void put_attach_payload(struct attach_payload *p) |
ba2be1a8 | 845 | { |
afc691a0 CB |
846 | if (p) { |
847 | close_prot_errno_disarm(p->ipc_socket); | |
848 | close_prot_errno_disarm(p->terminal_pts_fd); | |
dd53c8af | 849 | put_attach_context(p->ctx); |
ab919e5f | 850 | p->ctx = NULL; |
b21da190 | 851 | } |
ba2be1a8 CB |
852 | } |
853 | ||
338b230f | 854 | __noreturn static void do_attach(struct attach_payload *ap) |
a998454a | 855 | { |
afc691a0 CB |
856 | lxc_attach_exec_t attach_function = move_ptr(ap->exec_function); |
857 | void *attach_function_args = move_ptr(ap->exec_payload); | |
427a8067 | 858 | int lsm_fd, ret; |
a998454a CB |
859 | uid_t new_uid; |
860 | gid_t new_gid; | |
936efc72 CB |
861 | uid_t ns_root_uid = 0; |
862 | gid_t ns_root_gid = 0; | |
338b230f CB |
863 | lxc_attach_options_t* options = ap->options; |
864 | struct attach_context *ctx = ap->ctx; | |
ab919e5f | 865 | struct lxc_conf *conf = ctx->container->lxc_conf; |
a998454a CB |
866 | |
867 | /* A description of the purpose of this functionality is provided in the | |
868 | * lxc-attach(1) manual page. We have to remount here and not in the | |
869 | * parent process, otherwise /proc may not properly reflect the new pid | |
870 | * namespace. | |
871 | */ | |
872 | if (!(options->namespaces & CLONE_NEWNS) && | |
873 | (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) { | |
874 | ret = lxc_attach_remount_sys_proc(); | |
b75c344c CB |
875 | if (ret < 0) |
876 | goto on_error; | |
ea918412 | 877 | |
b75c344c | 878 | TRACE("Remounted \"/proc\" and \"/sys\""); |
a998454a CB |
879 | } |
880 | ||
5b514ce3 | 881 | /* Now perform additional attachments. */ |
a998454a | 882 | #if HAVE_SYS_PERSONALITY_H |
a998454a | 883 | if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) { |
b75c344c CB |
884 | long new_personality; |
885 | ||
886 | if (options->personality < 0) | |
ab919e5f | 887 | new_personality = ctx->personality; |
b75c344c CB |
888 | else |
889 | new_personality = options->personality; | |
ea918412 | 890 | |
ee142207 CB |
891 | if (new_personality != LXC_ARCH_UNCHANGED) { |
892 | ret = personality(new_personality); | |
893 | if (ret < 0) | |
894 | goto on_error; | |
ea918412 | 895 | |
ee142207 CB |
896 | TRACE("Set new personality"); |
897 | } | |
a998454a CB |
898 | } |
899 | #endif | |
900 | ||
901 | if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) { | |
677e1d27 | 902 | ret = drop_capabilities(ctx); |
b75c344c CB |
903 | if (ret < 0) |
904 | goto on_error; | |
ea918412 | 905 | |
b75c344c | 906 | TRACE("Dropped capabilities"); |
a998454a CB |
907 | } |
908 | ||
909 | /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) | |
910 | * if you want this to be a no-op). | |
911 | */ | |
ab919e5f | 912 | ret = lxc_attach_set_environment(ctx, |
7385273f | 913 | options->env_policy, |
a998454a CB |
914 | options->extra_env_vars, |
915 | options->extra_keep_env); | |
b75c344c CB |
916 | if (ret < 0) |
917 | goto on_error; | |
ea918412 | 918 | |
b75c344c | 919 | TRACE("Set up environment"); |
a998454a | 920 | |
afc691a0 CB |
921 | /* |
922 | * This remark only affects fully unprivileged containers: | |
57de839f CB |
923 | * Receive fd for LSM security module before we set{g,u}id(). The reason |
924 | * is that on set{g,u}id() the kernel will a) make us undumpable and b) | |
925 | * we will change our effective uid. This means our effective uid will | |
926 | * be different from the effective uid of the process that created us | |
927 | * which means that this processs no longer has capabilities in our | |
928 | * namespace including CAP_SYS_PTRACE. This means we will not be able to | |
929 | * read and /proc/<pid> files for the process anymore when /proc is | |
930 | * mounted with hidepid={1,2}. So let's get the lsm label fd before the | |
931 | * set{g,u}id(). | |
932 | */ | |
afc691a0 | 933 | if (attach_lsm(options) && ctx->lsm_label) { |
f8e88e94 | 934 | if (!sync_wait_fd(ap->ipc_socket, ATTACH_SYNC_LSM(&lsm_fd))) { |
6e36c297 | 935 | SYSERROR("Failed to receive lsm label fd"); |
b75c344c | 936 | goto on_error; |
9044b79e | 937 | } |
938 | ||
57de839f CB |
939 | TRACE("Received LSM label file descriptor %d from parent", lsm_fd); |
940 | } | |
941 | ||
08ea9270 | 942 | if (options->stdin_fd > 0 && isatty(options->stdin_fd)) { |
cd0a2b2f | 943 | ret = lxc_make_controlling_terminal(options->stdin_fd); |
08ea9270 CB |
944 | if (ret < 0) |
945 | goto on_error; | |
946 | } | |
947 | ||
b58214ac CB |
948 | if (!lxc_setgroups(0, NULL) && errno != EPERM) |
949 | goto on_error; | |
950 | ||
936efc72 CB |
951 | if (options->namespaces & CLONE_NEWUSER) { |
952 | /* Check whether nsuid 0 has a mapping. */ | |
953 | ns_root_uid = get_ns_uid(0); | |
ea918412 | 954 | |
936efc72 CB |
955 | /* Check whether nsgid 0 has a mapping. */ |
956 | ns_root_gid = get_ns_gid(0); | |
a998454a | 957 | |
936efc72 CB |
958 | /* If there's no mapping for nsuid 0 try to retrieve the nsuid |
959 | * init was started with. | |
960 | */ | |
961 | if (ns_root_uid == LXC_INVALID_UID) | |
9680e7b0 CB |
962 | ns_root_uid = ctx->init_uid; |
963 | ||
964 | if (ns_root_gid == LXC_INVALID_UID) | |
965 | ns_root_gid = ctx->init_gid; | |
ea918412 | 966 | |
936efc72 CB |
967 | if (ns_root_uid == LXC_INVALID_UID) |
968 | goto on_error; | |
a998454a | 969 | |
464c4611 | 970 | if (!lxc_switch_uid_gid(ns_root_uid, ns_root_gid)) |
b75c344c | 971 | goto on_error; |
a998454a CB |
972 | } |
973 | ||
936efc72 CB |
974 | /* Set {u,g}id. */ |
975 | if (options->uid != LXC_INVALID_UID) | |
976 | new_uid = options->uid; | |
977 | else | |
978 | new_uid = ns_root_uid; | |
979 | ||
980 | if (options->gid != LXC_INVALID_GID) | |
981 | new_gid = options->gid; | |
982 | else | |
983 | new_gid = ns_root_gid; | |
984 | ||
afc691a0 | 985 | if (attach_lsm(options) && ctx->lsm_label) { |
d3ba7c98 | 986 | bool on_exec; |
a998454a CB |
987 | |
988 | /* Change into our new LSM profile. */ | |
d3ba7c98 | 989 | on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false; |
afc691a0 | 990 | ret = ctx->lsm_ops->process_label_set_at(ctx->lsm_ops, lsm_fd, ctx->lsm_label, on_exec); |
cb2420df | 991 | close_prot_errno_disarm(lsm_fd); |
b75c344c CB |
992 | if (ret < 0) |
993 | goto on_error; | |
ea918412 | 994 | |
ab919e5f | 995 | TRACE("Set %s LSM label to \"%s\"", ctx->lsm_ops->name, ctx->lsm_label); |
a998454a CB |
996 | } |
997 | ||
640952e5 | 998 | if (conf->no_new_privs || (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { |
6ce8e678 AL |
999 | ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0), |
1000 | prctl_arg(0), prctl_arg(0)); | |
1001 | if (ret < 0) | |
1002 | goto on_error; | |
1003 | ||
1004 | TRACE("Set PR_SET_NO_NEW_PRIVS"); | |
1005 | } | |
1006 | ||
640952e5 | 1007 | if (conf->seccomp.seccomp) { |
cdb2a47f | 1008 | ret = lxc_seccomp_load(conf); |
b75c344c CB |
1009 | if (ret < 0) |
1010 | goto on_error; | |
ea918412 | 1011 | |
b75c344c | 1012 | TRACE("Loaded seccomp profile"); |
cdb2a47f | 1013 | |
338b230f | 1014 | ret = lxc_seccomp_send_notifier_fd(&conf->seccomp, ap->ipc_socket); |
c3e3c21a CB |
1015 | if (ret < 0) |
1016 | goto on_error; | |
a998454a | 1017 | } |
ea918412 | 1018 | |
a998454a CB |
1019 | /* The following is done after the communication socket is shut down. |
1020 | * That way, all errors that might (though unlikely) occur up until this | |
1021 | * point will have their messages printed to the original stderr (if | |
1022 | * logging is so configured) and not the fd the user supplied, if any. | |
1023 | */ | |
1024 | ||
1025 | /* Fd handling for stdin, stdout and stderr; ignore errors here, user | |
1026 | * may want to make sure the fds are closed, for example. | |
1027 | */ | |
08ea9270 | 1028 | if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO) |
b4959848 CB |
1029 | if (dup2(options->stdin_fd, STDIN_FILENO)) |
1030 | DEBUG("Failed to replace stdin with %d", options->stdin_fd); | |
08ea9270 CB |
1031 | |
1032 | if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO) | |
b4959848 CB |
1033 | if (dup2(options->stdout_fd, STDOUT_FILENO)) |
1034 | DEBUG("Failed to replace stdout with %d", options->stdin_fd); | |
08ea9270 CB |
1035 | |
1036 | if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO) | |
b4959848 CB |
1037 | if (dup2(options->stderr_fd, STDERR_FILENO)) |
1038 | DEBUG("Failed to replace stderr with %d", options->stdin_fd); | |
a998454a CB |
1039 | |
1040 | /* close the old fds */ | |
08ea9270 | 1041 | if (options->stdin_fd > STDERR_FILENO) |
a998454a | 1042 | close(options->stdin_fd); |
08ea9270 CB |
1043 | |
1044 | if (options->stdout_fd > STDERR_FILENO) | |
a998454a | 1045 | close(options->stdout_fd); |
08ea9270 CB |
1046 | |
1047 | if (options->stderr_fd > STDERR_FILENO) | |
a998454a CB |
1048 | close(options->stderr_fd); |
1049 | ||
427a8067 CB |
1050 | /* |
1051 | * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also | |
a998454a CB |
1052 | * here, ignore errors. |
1053 | */ | |
427a8067 | 1054 | for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) { |
3f62938a | 1055 | ret = fd_cloexec(fd, false); |
b75c344c CB |
1056 | if (ret < 0) { |
1057 | SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd); | |
1058 | goto on_error; | |
1059 | } | |
a998454a CB |
1060 | } |
1061 | ||
9e84479f | 1062 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
338b230f | 1063 | ret = lxc_terminal_prepare_login(ap->terminal_pts_fd); |
ba2be1a8 | 1064 | if (ret < 0) { |
338b230f | 1065 | SYSERROR("Failed to prepare terminal file descriptor %d", ap->terminal_pts_fd); |
ba2be1a8 CB |
1066 | goto on_error; |
1067 | } | |
ea918412 | 1068 | |
338b230f | 1069 | TRACE("Prepared terminal file descriptor %d", ap->terminal_pts_fd); |
ba2be1a8 CB |
1070 | } |
1071 | ||
afc691a0 CB |
1072 | put_attach_payload(ap); |
1073 | ||
936efc72 CB |
1074 | /* Avoid unnecessary syscalls. */ |
1075 | if (new_uid == ns_root_uid) | |
1076 | new_uid = LXC_INVALID_UID; | |
1077 | ||
1078 | if (new_gid == ns_root_gid) | |
1079 | new_gid = LXC_INVALID_GID; | |
c353b0b9 | 1080 | |
6aff5157 | 1081 | /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */ |
c353b0b9 CB |
1082 | ret = fix_stdio_permissions(new_uid); |
1083 | if (ret) | |
a2c26bef | 1084 | INFO("Failed to adjust stdio permissions"); |
936efc72 | 1085 | |
464c4611 | 1086 | if (!lxc_switch_uid_gid(new_uid, new_gid)) |
936efc72 CB |
1087 | goto on_error; |
1088 | ||
a998454a | 1089 | /* We're done, so we can now do whatever the user intended us to do. */ |
afc691a0 | 1090 | _exit(attach_function(attach_function_args)); |
b75c344c CB |
1091 | |
1092 | on_error: | |
dab02267 | 1093 | ERROR("Failed to attach to container"); |
c7ac2e1c | 1094 | _exit(EXIT_FAILURE); |
a998454a CB |
1095 | } |
1096 | ||
f797f05e | 1097 | static int lxc_attach_terminal(const char *name, const char *lxcpath, struct lxc_conf *conf, |
9e84479f | 1098 | struct lxc_terminal *terminal) |
ba2be1a8 CB |
1099 | { |
1100 | int ret; | |
1101 | ||
9e84479f | 1102 | lxc_terminal_init(terminal); |
ba2be1a8 | 1103 | |
8ea93a0f | 1104 | ret = lxc_terminal_create(name, lxcpath, conf, terminal); |
c2af3a15 CB |
1105 | if (ret < 0) |
1106 | return log_error(-1, "Failed to create terminal"); | |
ba2be1a8 | 1107 | |
ba2be1a8 | 1108 | return 0; |
ba2be1a8 CB |
1109 | } |
1110 | ||
9e84479f CB |
1111 | static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal, |
1112 | struct lxc_epoll_descr *descr) | |
ba2be1a8 CB |
1113 | { |
1114 | int ret; | |
1115 | ||
1116 | ret = lxc_mainloop_open(descr); | |
c2af3a15 CB |
1117 | if (ret < 0) |
1118 | return log_error(-1, "Failed to create mainloop"); | |
ba2be1a8 | 1119 | |
9e84479f | 1120 | ret = lxc_terminal_mainloop_add(descr, terminal); |
ba2be1a8 | 1121 | if (ret < 0) { |
ba2be1a8 | 1122 | lxc_mainloop_close(descr); |
c2af3a15 | 1123 | return log_error(-1, "Failed to add handlers to mainloop"); |
ba2be1a8 CB |
1124 | } |
1125 | ||
1126 | return 0; | |
1127 | } | |
1128 | ||
36a94ce8 | 1129 | static inline void lxc_attach_terminal_close_ptx(struct lxc_terminal *terminal) |
ba2be1a8 | 1130 | { |
36a94ce8 | 1131 | close_prot_errno_disarm(terminal->ptx); |
ba2be1a8 CB |
1132 | } |
1133 | ||
cecf3e83 | 1134 | static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal) |
ba2be1a8 | 1135 | { |
41808e20 | 1136 | close_prot_errno_disarm(terminal->pty); |
ba2be1a8 CB |
1137 | } |
1138 | ||
9e84479f | 1139 | static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal) |
ba2be1a8 | 1140 | { |
19a3e906 | 1141 | close_prot_errno_disarm(terminal->peer); |
ba2be1a8 CB |
1142 | } |
1143 | ||
9e84479f | 1144 | static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal) |
ba2be1a8 | 1145 | { |
19a3e906 | 1146 | close_prot_errno_disarm(terminal->log_fd); |
ba2be1a8 CB |
1147 | } |
1148 | ||
908fbc1a CB |
1149 | int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, |
1150 | void *exec_payload, lxc_attach_options_t *options, | |
1151 | pid_t *attached_process) | |
9c4693b8 | 1152 | { |
6f9fe5d0 | 1153 | int ret_parent = -1; |
6f9fe5d0 | 1154 | struct lxc_epoll_descr descr = {}; |
a9f0cecf | 1155 | int ret; |
26abd7ea | 1156 | char *name, *lxcpath; |
9c4693b8 | 1157 | int ipc_sockets[2]; |
500ed813 | 1158 | pid_t attached_pid, pid, to_cleanup_pid; |
ab919e5f | 1159 | struct attach_context *ctx; |
9e84479f | 1160 | struct lxc_terminal terminal; |
1cce35e6 | 1161 | struct lxc_conf *conf; |
9c4693b8 | 1162 | |
908fbc1a | 1163 | if (!container) |
540a2f70 | 1164 | return ret_set_errno(-1, EINVAL); |
908fbc1a CB |
1165 | |
1166 | if (!lxc_container_get(container)) | |
540a2f70 | 1167 | return ret_set_errno(-1, EINVAL); |
908fbc1a CB |
1168 | |
1169 | name = container->name; | |
1170 | lxcpath = container->config_path; | |
1171 | ||
afc691a0 | 1172 | if (!options) { |
9c4693b8 | 1173 | options = &attach_static_default_options; |
afc691a0 CB |
1174 | options->lsm_label = NULL; |
1175 | } | |
9c4693b8 | 1176 | |
9745eb8a | 1177 | ctx = alloc_attach_context(); |
ab919e5f | 1178 | if (!ctx) { |
9745eb8a CB |
1179 | lxc_container_put(container); |
1180 | return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate attach context"); | |
1181 | } | |
1182 | ||
afc691a0 | 1183 | ret = get_attach_context(ctx, container, options); |
9745eb8a | 1184 | if (ret) { |
7e995801 | 1185 | put_attach_context(ctx); |
74ce42b5 | 1186 | return log_error(-1, "Failed to get attach context"); |
9c4693b8 CS |
1187 | } |
1188 | ||
ab919e5f | 1189 | conf = ctx->container->lxc_conf; |
ba773996 | 1190 | |
ab919e5f | 1191 | if (!fetch_seccomp(ctx->container, options)) |
ae026f55 | 1192 | WARN("Failed to get seccomp policy"); |
2c4ea790 | 1193 | |
ab919e5f | 1194 | if (!no_new_privs(ctx->container, options)) |
ae026f55 | 1195 | WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set"); |
2e812c16 | 1196 | |
74ce42b5 | 1197 | /* Determine which namespaces the container was created with. */ |
9c4693b8 CS |
1198 | if (options->namespaces == -1) { |
1199 | options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath); | |
9c4693b8 | 1200 | if (options->namespaces == -1) { |
dd53c8af | 1201 | put_attach_context(ctx); |
74ce42b5 | 1202 | return log_error(-1, "Failed to automatically determine the namespaces which the container uses"); |
9c4693b8 | 1203 | } |
877f3a04 | 1204 | |
74ce42b5 | 1205 | for (int i = 0; i < LXC_NS_MAX; i++) { |
877f3a04 CB |
1206 | if (ns_info[i].clone_flag & CLONE_NEWCGROUP) |
1207 | if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) || | |
1208 | !cgns_supported()) | |
1209 | continue; | |
1210 | ||
1211 | if (ns_info[i].clone_flag & options->namespaces) | |
1212 | continue; | |
1213 | ||
ab919e5f | 1214 | ctx->ns_inherited |= ns_info[i].clone_flag; |
877f3a04 CB |
1215 | } |
1216 | } | |
1217 | ||
b7873c95 CB |
1218 | ret = get_attach_context_nsfds(ctx, options); |
1219 | if (ret) { | |
b7873c95 | 1220 | lxc_container_put(container); |
74ce42b5 | 1221 | return log_error(-1, "Failed to get namespace file descriptors"); |
9c4693b8 CS |
1222 | } |
1223 | ||
9e84479f | 1224 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
f797f05e | 1225 | ret = lxc_attach_terminal(name, lxcpath, conf, &terminal); |
ba2be1a8 | 1226 | if (ret < 0) { |
dd53c8af | 1227 | put_attach_context(ctx); |
74ce42b5 | 1228 | return log_error(-1, "Failed to setup new terminal"); |
ba2be1a8 CB |
1229 | } |
1230 | ||
9e84479f | 1231 | terminal.log_fd = options->log_fd; |
c948657b | 1232 | } else { |
9e84479f | 1233 | lxc_terminal_init(&terminal); |
ba2be1a8 CB |
1234 | } |
1235 | ||
8ce83369 CB |
1236 | /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order |
1237 | * to make sure we don't irritate other threads that want to fork+exec | |
1238 | * away | |
9c4693b8 CS |
1239 | * |
1240 | * IMPORTANT: if the initial process is multithreaded and another call | |
1241 | * just fork()s away without exec'ing directly after, the socket fd will | |
1242 | * exist in the forked process from the other thread and any close() in | |
8ce83369 | 1243 | * our own child process will not really cause the socket to close |
1d801260 | 1244 | * properly, potentially causing the parent to hang. |
9c4693b8 CS |
1245 | * |
1246 | * For this reason, while IPC is still active, we have to use shutdown() | |
8ce83369 CB |
1247 | * if the child exits prematurely in order to signal that the socket is |
1248 | * closed and cannot assume that the child exiting will automatically do | |
1249 | * that. | |
9c4693b8 CS |
1250 | * |
1251 | * IPC mechanism: (X is receiver) | |
1252 | * initial process intermediate attached | |
1253 | * X <--- send pid of | |
1254 | * attached proc, | |
1255 | * then exit | |
1256 | * send 0 ------------------------------------> X | |
1257 | * [do initialization] | |
1258 | * X <------------------------------------ send 1 | |
1259 | * [add to cgroup, ...] | |
1260 | * send 2 ------------------------------------> X | |
81f466d0 CB |
1261 | * [set LXC_ATTACH_NO_NEW_PRIVS] |
1262 | * X <------------------------------------ send 3 | |
1263 | * [open LSM label fd] | |
1264 | * send 4 ------------------------------------> X | |
1265 | * [set LSM label] | |
9c4693b8 CS |
1266 | * close socket close socket |
1267 | * run program | |
1268 | */ | |
1269 | ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); | |
1270 | if (ret < 0) { | |
dd53c8af | 1271 | put_attach_context(ctx); |
74ce42b5 | 1272 | return log_error_errno(-1, errno, "Could not set up required IPC mechanism for attaching"); |
9c4693b8 CS |
1273 | } |
1274 | ||
e3f0e436 CB |
1275 | /* Create intermediate subprocess, two reasons: |
1276 | * 1. We can't setns() in the child itself, since we want to make | |
8ce83369 | 1277 | * sure we are properly attached to the pidns. |
e3f0e436 | 1278 | * 2. Also, the initial thread has to put the attached process |
8ce83369 CB |
1279 | * into the cgroup, which we can only do if we didn't already |
1280 | * setns() (otherwise, user namespaces will hate us). | |
9c4693b8 CS |
1281 | */ |
1282 | pid = fork(); | |
9c4693b8 | 1283 | if (pid < 0) { |
dd53c8af | 1284 | put_attach_context(ctx); |
74ce42b5 | 1285 | return log_error_errno(-1, errno, "Failed to create first subprocess"); |
9c4693b8 CS |
1286 | } |
1287 | ||
4f25e72f | 1288 | if (pid == 0) { |
26abd7ea | 1289 | char *cwd, *new_cwd; |
a588a482 | 1290 | |
ba2be1a8 | 1291 | /* close unneeded file descriptors */ |
4f25e72f | 1292 | close_prot_errno_disarm(ipc_sockets[0]); |
2202afc9 | 1293 | |
4f25e72f CB |
1294 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
1295 | lxc_attach_terminal_close_ptx(&terminal); | |
1296 | lxc_attach_terminal_close_peer(&terminal); | |
1297 | lxc_attach_terminal_close_log(&terminal); | |
f4364484 SG |
1298 | } |
1299 | ||
4f25e72f | 1300 | /* Wait for the parent to have setup cgroups. */ |
6e48e7c5 | 1301 | if (!sync_wait(ipc_sockets[1], ATTACH_SYNC_CGROUP)) { |
4f25e72f | 1302 | shutdown(ipc_sockets[1], SHUT_RDWR); |
dd53c8af | 1303 | put_attach_context(ctx); |
4f25e72f | 1304 | _exit(EXIT_FAILURE); |
bb2ada6f CB |
1305 | } |
1306 | ||
c538837d CB |
1307 | if (!attach_context_security_barrier(ctx)) { |
1308 | shutdown(ipc_sockets[1], SHUT_RDWR); | |
1309 | put_attach_context(ctx); | |
1310 | _exit(EXIT_FAILURE); | |
1311 | } | |
1312 | ||
4f25e72f CB |
1313 | TRACE("Intermediate process starting to initialize"); |
1314 | ||
a588a482 CB |
1315 | cwd = getcwd(NULL, 0); |
1316 | ||
c538837d CB |
1317 | /* |
1318 | * Attach now, create another subprocess later, since pid | |
1319 | * namespaces only really affect the children of the current | |
1320 | * process. | |
1321 | * | |
1322 | * Note that this is a crucial barrier. We're no moving into | |
1323 | * the container's context so we need to make sure to not leak | |
1324 | * anything sensitive. That especially means things such as | |
1325 | * open file descriptors! | |
4f25e72f | 1326 | */ |
c538837d | 1327 | ret = attach_context_container(ctx); |
4f25e72f CB |
1328 | if (ret < 0) { |
1329 | ERROR("Failed to enter namespaces"); | |
1330 | shutdown(ipc_sockets[1], SHUT_RDWR); | |
dd53c8af | 1331 | put_attach_context(ctx); |
4f25e72f | 1332 | _exit(EXIT_FAILURE); |
ba2be1a8 CB |
1333 | } |
1334 | ||
4f25e72f | 1335 | /* close namespace file descriptors */ |
7e995801 | 1336 | close_nsfds(ctx); |
ea918412 | 1337 | |
4f25e72f CB |
1338 | /* Attach succeeded, try to cwd. */ |
1339 | if (options->initial_cwd) | |
1340 | new_cwd = options->initial_cwd; | |
1341 | else | |
1342 | new_cwd = cwd; | |
1343 | if (new_cwd) { | |
1344 | ret = chdir(new_cwd); | |
1345 | if (ret < 0) | |
1346 | WARN("Could not change directory to \"%s\"", new_cwd); | |
ba2be1a8 | 1347 | } |
a588a482 | 1348 | free_disarm(cwd); |
c6d09e15 | 1349 | |
4f25e72f | 1350 | /* Create attached process. */ |
4f25e72f CB |
1351 | pid = lxc_raw_clone(CLONE_PARENT, NULL); |
1352 | if (pid < 0) { | |
1353 | SYSERROR("Failed to clone attached process"); | |
1354 | shutdown(ipc_sockets[1], SHUT_RDWR); | |
dd53c8af | 1355 | put_attach_context(ctx); |
4f25e72f CB |
1356 | _exit(EXIT_FAILURE); |
1357 | } | |
f4364484 | 1358 | |
4f25e72f | 1359 | if (pid == 0) { |
338b230f | 1360 | struct attach_payload ap = { |
a64902ab CB |
1361 | .ipc_socket = ipc_sockets[1], |
1362 | .options = options, | |
1363 | .ctx = ctx, | |
1364 | .terminal_pts_fd = terminal.pty, | |
1365 | .exec_function = exec_function, | |
1366 | .exec_payload = exec_payload, | |
1367 | }; | |
1368 | ||
4f25e72f CB |
1369 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
1370 | ret = lxc_terminal_signal_sigmask_safe_blocked(&terminal); | |
1371 | if (ret < 0) { | |
1372 | SYSERROR("Failed to reset signal mask"); | |
1373 | _exit(EXIT_FAILURE); | |
1374 | } | |
1375 | } | |
ea918412 | 1376 | |
a64902ab | 1377 | /* Does not return. */ |
338b230f | 1378 | do_attach(&ap); |
62183f1a | 1379 | } |
2eef2bda | 1380 | |
4f25e72f CB |
1381 | if (options->attach_flags & LXC_ATTACH_TERMINAL) |
1382 | lxc_attach_terminal_close_pts(&terminal); | |
ea918412 | 1383 | |
4f25e72f | 1384 | /* Tell grandparent the pid of the pid of the newly created child. */ |
f8e88e94 | 1385 | if (!sync_wake_pid(ipc_sockets[1], ATTACH_SYNC_PID(pid))) { |
4f25e72f CB |
1386 | /* If this really happens here, this is very unfortunate, since |
1387 | * the parent will not know the pid of the attached process and | |
1388 | * will not be able to wait for it (and we won't either due to | |
1389 | * CLONE_PARENT) so the parent won't be able to reap it and the | |
1390 | * attached process will remain a zombie. | |
1391 | */ | |
1392 | shutdown(ipc_sockets[1], SHUT_RDWR); | |
dd53c8af | 1393 | put_attach_context(ctx); |
4f25e72f CB |
1394 | _exit(EXIT_FAILURE); |
1395 | } | |
9c4693b8 | 1396 | |
4f25e72f | 1397 | TRACE("Sending pid %d of attached process", pid); |
9c4693b8 | 1398 | |
4f25e72f | 1399 | /* The rest is in the hands of the initial and the attached process. */ |
dd53c8af | 1400 | put_attach_context(ctx); |
4f25e72f CB |
1401 | _exit(EXIT_SUCCESS); |
1402 | } | |
6f4f1937 | 1403 | |
4f25e72f | 1404 | to_cleanup_pid = pid; |
ea918412 | 1405 | |
4f25e72f | 1406 | /* close unneeded file descriptors */ |
cb2420df | 1407 | close_prot_errno_disarm(ipc_sockets[1]); |
7e995801 | 1408 | close_nsfds(ctx); |
4f25e72f CB |
1409 | if (options->attach_flags & LXC_ATTACH_TERMINAL) |
1410 | lxc_attach_terminal_close_pts(&terminal); | |
81f466d0 | 1411 | |
4f25e72f CB |
1412 | /* Attach to cgroup, if requested. */ |
1413 | if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { | |
1414 | /* | |
1415 | * If this is the unified hierarchy cgroup_attach() is | |
1416 | * enough. | |
1417 | */ | |
1418 | ret = cgroup_attach(conf, name, lxcpath, pid); | |
1419 | if (ret) { | |
1420 | call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL; | |
9044b79e | 1421 | |
4f25e72f CB |
1422 | cgroup_ops = cgroup_init(conf); |
1423 | if (!cgroup_ops) | |
1424 | goto on_error; | |
9044b79e | 1425 | |
4f25e72f CB |
1426 | if (!cgroup_ops->attach(cgroup_ops, conf, name, lxcpath, pid)) |
1427 | goto on_error; | |
81f466d0 | 1428 | } |
4f25e72f CB |
1429 | TRACE("Moved intermediate process %d into container's cgroups", pid); |
1430 | } | |
81f466d0 | 1431 | |
4f25e72f CB |
1432 | /* Setup /proc limits */ |
1433 | if (!lxc_list_empty(&conf->procs)) { | |
1434 | ret = setup_proc_filesystem(&conf->procs, pid); | |
1435 | if (ret < 0) | |
1436 | goto on_error; | |
4f3b6a85 CB |
1437 | |
1438 | TRACE("Setup /proc/%d settings", pid); | |
4f25e72f | 1439 | } |
cdb2a47f | 1440 | |
4f25e72f CB |
1441 | /* Setup resource limits */ |
1442 | if (!lxc_list_empty(&conf->limits)) { | |
1443 | ret = setup_resource_limits(&conf->limits, pid); | |
1444 | if (ret < 0) | |
1445 | goto on_error; | |
4f3b6a85 CB |
1446 | |
1447 | TRACE("Setup resource limits"); | |
4f25e72f | 1448 | } |
cdb2a47f | 1449 | |
4f25e72f CB |
1450 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
1451 | ret = lxc_attach_terminal_mainloop_init(&terminal, &descr); | |
1452 | if (ret < 0) | |
1453 | goto on_error; | |
9c4693b8 | 1454 | |
4f25e72f CB |
1455 | TRACE("Initialized terminal mainloop"); |
1456 | } | |
9c4693b8 | 1457 | |
4f25e72f | 1458 | /* Let the child process know to go ahead. */ |
6e48e7c5 | 1459 | if (!sync_wake(ipc_sockets[0], ATTACH_SYNC_CGROUP)) |
4f25e72f | 1460 | goto close_mainloop; |
ba2be1a8 | 1461 | |
4f25e72f | 1462 | TRACE("Told intermediate process to start initializing"); |
ea918412 | 1463 | |
4f25e72f | 1464 | /* Get pid of attached process from intermediate process. */ |
f8e88e94 | 1465 | if (!sync_wait_pid(ipc_sockets[0], ATTACH_SYNC_PID(&attached_pid))) |
4f25e72f | 1466 | goto close_mainloop; |
ba2be1a8 | 1467 | |
4f25e72f | 1468 | TRACE("Received pid %d of attached process in parent pid namespace", attached_pid); |
ba2be1a8 | 1469 | |
4f25e72f | 1470 | /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */ |
5d2b46fb | 1471 | if (options->stdin_fd == STDIN_FILENO) { |
4f25e72f CB |
1472 | signal(SIGINT, SIG_IGN); |
1473 | signal(SIGQUIT, SIG_IGN); | |
1474 | } | |
ba2be1a8 | 1475 | |
4f25e72f CB |
1476 | /* Reap intermediate process. */ |
1477 | ret = wait_for_pid(pid); | |
1478 | if (ret < 0) | |
1479 | goto close_mainloop; | |
ba2be1a8 | 1480 | |
4f25e72f | 1481 | TRACE("Intermediate process %d exited", pid); |
ea918412 | 1482 | |
4f25e72f CB |
1483 | /* We will always have to reap the attached process now. */ |
1484 | to_cleanup_pid = attached_pid; | |
9c4693b8 | 1485 | |
4f25e72f | 1486 | /* Open LSM fd and send it to child. */ |
afc691a0 | 1487 | if (attach_lsm(options) && ctx->lsm_label) { |
ad001fb6 | 1488 | __do_close int labelfd = -EBADF; |
4f25e72f | 1489 | bool on_exec; |
ea918412 | 1490 | |
4f25e72f | 1491 | on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false; |
afc691a0 | 1492 | labelfd = ctx->lsm_ops->process_label_fd_get(ctx->lsm_ops, attached_pid, on_exec); |
4f25e72f CB |
1493 | if (labelfd < 0) |
1494 | goto close_mainloop; | |
9c4693b8 | 1495 | |
4f25e72f | 1496 | TRACE("Opened LSM label file descriptor %d", labelfd); |
ea918412 | 1497 | |
4f25e72f | 1498 | /* Send child fd of the LSM security module to write to. */ |
f8e88e94 | 1499 | if (!sync_wake_fd(ipc_sockets[0], ATTACH_SYNC_LSM(labelfd))) { |
6e36c297 | 1500 | SYSERROR("Failed to send lsm label fd"); |
4f25e72f CB |
1501 | goto close_mainloop; |
1502 | } | |
1503 | ||
4f25e72f | 1504 | TRACE("Sent LSM label file descriptor %d to child", labelfd); |
9c4693b8 | 1505 | } |
ea918412 | 1506 | |
4f25e72f CB |
1507 | if (conf->seccomp.seccomp) { |
1508 | ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]); | |
1509 | if (ret < 0) | |
1510 | goto close_mainloop; | |
9c4693b8 | 1511 | |
4f25e72f | 1512 | ret = lxc_seccomp_add_notifier(name, lxcpath, &conf->seccomp); |
d6d979bc | 1513 | if (ret < 0) |
4f25e72f | 1514 | goto close_mainloop; |
d6d979bc | 1515 | } |
9c4693b8 | 1516 | |
4f25e72f CB |
1517 | /* We're done, the child process should now execute whatever it |
1518 | * is that the user requested. The parent can now track it with | |
1519 | * waitpid() or similar. | |
1520 | */ | |
9c4693b8 | 1521 | |
4f25e72f | 1522 | *attached_process = attached_pid; |
a998454a | 1523 | |
4f25e72f CB |
1524 | /* Now shut down communication with child, we're done. */ |
1525 | shutdown(ipc_sockets[0], SHUT_RDWR); | |
cb2420df | 1526 | close_prot_errno_disarm(ipc_sockets[0]); |
f157b056 | 1527 | |
4f25e72f CB |
1528 | ret_parent = 0; |
1529 | to_cleanup_pid = -1; | |
ea918412 | 1530 | |
4f25e72f CB |
1531 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { |
1532 | ret = lxc_mainloop(&descr, -1); | |
1533 | if (ret < 0) { | |
1534 | ret_parent = -1; | |
1535 | to_cleanup_pid = attached_pid; | |
1536 | } | |
a998454a | 1537 | } |
ea918412 | 1538 | |
4f25e72f | 1539 | close_mainloop: |
9e84479f | 1540 | if (options->attach_flags & LXC_ATTACH_TERMINAL) |
4f25e72f | 1541 | lxc_mainloop_close(&descr); |
9c4693b8 | 1542 | |
4f25e72f CB |
1543 | on_error: |
1544 | if (ipc_sockets[0] >= 0) { | |
1545 | shutdown(ipc_sockets[0], SHUT_RDWR); | |
cb2420df | 1546 | close_prot_errno_disarm(ipc_sockets[0]); |
9c4693b8 | 1547 | } |
ea918412 | 1548 | |
4f25e72f CB |
1549 | if (to_cleanup_pid > 0) |
1550 | (void)wait_for_pid(to_cleanup_pid); | |
1551 | ||
1552 | if (options->attach_flags & LXC_ATTACH_TERMINAL) { | |
1553 | lxc_terminal_delete(&terminal); | |
1554 | lxc_terminal_conf_free(&terminal); | |
1555 | } | |
9c4693b8 | 1556 | |
dd53c8af | 1557 | put_attach_context(ctx); |
4f25e72f | 1558 | return ret_parent; |
9c4693b8 CS |
1559 | } |
1560 | ||
06346bb0 | 1561 | int lxc_attach_run_command(void *payload) |
9c4693b8 | 1562 | { |
06346bb0 CB |
1563 | int ret = -1; |
1564 | lxc_attach_command_t *cmd = payload; | |
9c4693b8 | 1565 | |
06346bb0 CB |
1566 | ret = execvp(cmd->program, cmd->argv); |
1567 | if (ret < 0) { | |
1568 | switch (errno) { | |
1569 | case ENOEXEC: | |
1570 | ret = 126; | |
cf0fd972 | 1571 | break; |
06346bb0 CB |
1572 | case ENOENT: |
1573 | ret = 127; | |
cf0fd972 | 1574 | break; |
06346bb0 CB |
1575 | } |
1576 | } | |
ea918412 | 1577 | |
c2af3a15 | 1578 | return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program); |
9c4693b8 CS |
1579 | } |
1580 | ||
1581 | int lxc_attach_run_shell(void* payload) | |
1582 | { | |
cd8f5663 | 1583 | __do_free char *buf = NULL; |
9c4693b8 | 1584 | uid_t uid; |
cb7aa5e8 DJ |
1585 | struct passwd pwent; |
1586 | struct passwd *pwentp = NULL; | |
9c4693b8 | 1587 | char *user_shell; |
cb7aa5e8 DJ |
1588 | size_t bufsize; |
1589 | int ret; | |
9c4693b8 | 1590 | |
8ce83369 | 1591 | /* Ignore payload parameter. */ |
9c4693b8 CS |
1592 | (void)payload; |
1593 | ||
1594 | uid = getuid(); | |
cb7aa5e8 DJ |
1595 | |
1596 | bufsize = sysconf(_SC_GETPW_R_SIZE_MAX); | |
1597 | if (bufsize == -1) | |
1598 | bufsize = 1024; | |
1599 | ||
1600 | buf = malloc(bufsize); | |
1601 | if (buf) { | |
1602 | ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp); | |
1603 | if (!pwentp) { | |
1604 | if (ret == 0) | |
ea918412 | 1605 | WARN("Could not find matched password record"); |
cb7aa5e8 DJ |
1606 | |
1607 | WARN("Failed to get password record - %u", uid); | |
1608 | } | |
1609 | } | |
9c4693b8 | 1610 | |
8ce83369 CB |
1611 | /* This probably happens because of incompatible nss implementations in |
1612 | * host and container (remember, this code is still using the host's | |
1613 | * glibc but our mount namespace is in the container) we may try to get | |
1614 | * the information by spawning a [getent passwd uid] process and parsing | |
1615 | * the result. | |
9c4693b8 | 1616 | */ |
cb7aa5e8 | 1617 | if (!pwentp) |
9c4693b8 CS |
1618 | user_shell = lxc_attach_getpwshell(uid); |
1619 | else | |
cb7aa5e8 | 1620 | user_shell = pwent.pw_shell; |
ea918412 | 1621 | |
9c4693b8 | 1622 | if (user_shell) |
acf47e1b | 1623 | execlp(user_shell, user_shell, (char *)NULL); |
9c4693b8 | 1624 | |
8ce83369 CB |
1625 | /* Executed if either no passwd entry or execvp fails, we will fall back |
1626 | * on /bin/sh as a default shell. | |
9c4693b8 | 1627 | */ |
acf47e1b | 1628 | execlp("/bin/sh", "/bin/sh", (char *)NULL); |
ea918412 | 1629 | |
edeb1836 | 1630 | SYSERROR("Failed to execute shell"); |
cb7aa5e8 | 1631 | if (!pwentp) |
edeb1836 | 1632 | free(user_shell); |
ea918412 | 1633 | |
9c4693b8 CS |
1634 | return -1; |
1635 | } |