]> git.proxmox.com Git - mirror_lxc.git/blame_incremental - src/lxc/start.c
lxc_monitord: use lxc_safe_int() && use exit()
[mirror_lxc.git] / src / lxc / start.c
... / ...
CommitLineData
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#define _GNU_SOURCE
25#include "config.h"
26
27#include <alloca.h>
28#include <dirent.h>
29#include <errno.h>
30#include <fcntl.h>
31#include <grp.h>
32#include <poll.h>
33#include <signal.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38#include <sys/file.h>
39#include <sys/mount.h>
40#include <sys/param.h>
41#include <sys/prctl.h>
42#include <sys/socket.h>
43#include <sys/stat.h>
44#include <sys/syscall.h>
45#include <sys/types.h>
46#include <sys/un.h>
47#include <sys/wait.h>
48
49#if HAVE_SYS_CAPABILITY_H
50#include <sys/capability.h>
51#endif
52
53#ifndef HAVE_DECL_PR_CAPBSET_DROP
54#define PR_CAPBSET_DROP 24
55#endif
56
57#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS
58#define PR_SET_NO_NEW_PRIVS 38
59#endif
60
61#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS
62#define PR_GET_NO_NEW_PRIVS 39
63#endif
64
65#include "af_unix.h"
66#include "bdev.h"
67#include "caps.h"
68#include "cgroup.h"
69#include "commands.h"
70#include "conf.h"
71#include "console.h"
72#include "error.h"
73#include "log.h"
74#include "lxclock.h"
75#include "lxcseccomp.h"
76#include "lxcutmp.h"
77#include "mainloop.h"
78#include "monitor.h"
79#include "namespace.h"
80#include "start.h"
81#include "sync.h"
82#include "utils.h"
83#include "lsm/lsm.h"
84
85lxc_log_define(lxc_start, lxc);
86
87extern void mod_all_rdeps(struct lxc_container *c, bool inc);
88static bool do_destroy_container(struct lxc_conf *conf);
89static int lxc_rmdir_onedev_wrapper(void *data);
90static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
91 const char *name);
92
93static void print_top_failing_dir(const char *path)
94{
95 size_t len = strlen(path);
96 char *copy = alloca(len + 1), *p, *e, saved;
97 strcpy(copy, path);
98
99 p = copy;
100 e = copy + len;
101 while (p < e) {
102 while (p < e && *p == '/')
103 p++;
104 while (p < e && *p != '/')
105 p++;
106 saved = *p;
107 *p = '\0';
108 if (access(copy, X_OK)) {
109 SYSERROR("Could not access %s. Please grant it x "
110 "access, or add an ACL for the container "
111 "root.", copy);
112 return;
113 }
114 *p = saved;
115 }
116}
117
118static void close_ns(int ns_fd[LXC_NS_MAX])
119{
120 int i;
121
122 for (i = 0; i < LXC_NS_MAX; i++) {
123 if (ns_fd[i] > -1) {
124 close(ns_fd[i]);
125 ns_fd[i] = -1;
126 }
127 }
128}
129
130/* preserve_ns: open /proc/@pid/ns/@ns for each namespace specified
131 * in clone_flags.
132 * Return true on success, false on failure.
133 */
134static bool preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags, pid_t pid)
135{
136 int i, ret;
137
138 for (i = 0; i < LXC_NS_MAX; i++)
139 ns_fd[i] = -1;
140
141 ret = lxc_preserve_ns(pid, "");
142 if (ret < 0) {
143 SYSERROR("Kernel does not support attaching to namespaces.");
144 return false;
145 } else {
146 close(ret);
147 }
148
149 for (i = 0; i < LXC_NS_MAX; i++) {
150 if ((clone_flags & ns_info[i].clone_flag) == 0)
151 continue;
152 ns_fd[i] = lxc_preserve_ns(pid, ns_info[i].proc_name);
153 if (ns_fd[i] < 0)
154 goto error;
155 }
156
157 return true;
158
159error:
160 if (errno == ENOENT)
161 SYSERROR("Kernel does not support attaching to %s namespaces.", ns_info[i].proc_name);
162 else
163 SYSERROR("Failed to open file descriptor for %s namespace: %s.", ns_info[i].proc_name, strerror(errno));
164 close_ns(ns_fd);
165 return false;
166}
167
168static int attach_ns(const int ns_fd[LXC_NS_MAX]) {
169 int i;
170
171 for (i = 0; i < LXC_NS_MAX; i++) {
172 if (ns_fd[i] < 0)
173 continue;
174
175 if (setns(ns_fd[i], 0) != 0)
176 goto error;
177 }
178 return 0;
179
180error:
181 SYSERROR("Failed to attach %s namespace.", ns_info[i].proc_name);
182 return -1;
183}
184
185static int match_fd(int fd)
186{
187 return (fd == 0 || fd == 1 || fd == 2);
188}
189
190/* Check for any fds we need to close.
191 * - If fd_to_ignore != -1, then if we find that fd open we will ignore it.
192 * - By default we warn about open fds we find.
193 * - If closeall is true, we will close open fds.
194 * - If lxc-start was passed "-C", then conf->close_all_fds will be true, in
195 * which case we also close all open fds.
196 * - A daemonized container will always pass closeall=true.
197 */
198int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int fd_to_ignore)
199{
200 struct dirent *direntp;
201 int fd, fddir;
202 DIR *dir;
203
204 if (conf && conf->close_all_fds)
205 closeall = true;
206
207restart:
208 dir = opendir("/proc/self/fd");
209 if (!dir) {
210 WARN("Failed to open directory: %m.");
211 return -1;
212 }
213
214 fddir = dirfd(dir);
215
216 while ((direntp = readdir(dir))) {
217 if (!direntp)
218 break;
219
220 if (!strcmp(direntp->d_name, "."))
221 continue;
222
223 if (!strcmp(direntp->d_name, ".."))
224 continue;
225
226 fd = atoi(direntp->d_name);
227
228 if (fd == fddir || fd == lxc_log_fd || fd == fd_to_ignore)
229 continue;
230
231 if (current_config && fd == current_config->logfd)
232 continue;
233
234 if (match_fd(fd))
235 continue;
236
237 if (closeall) {
238 close(fd);
239 closedir(dir);
240 INFO("Closed inherited fd: %d.", fd);
241 goto restart;
242 }
243 WARN("Inherited fd: %d.", fd);
244 }
245
246 /* Only enable syslog at this point to avoid the above logging function
247 * to open a new fd and make the check_inherited function enter an
248 * infinite loop.
249 */
250 lxc_log_enable_syslog();
251
252 closedir(dir); /* cannot fail */
253 return 0;
254}
255
256static int setup_signal_fd(sigset_t *oldmask)
257{
258 sigset_t mask;
259 int fd;
260
261 /* Block everything except serious error signals. */
262 if (sigfillset(&mask) ||
263 sigdelset(&mask, SIGILL) ||
264 sigdelset(&mask, SIGSEGV) ||
265 sigdelset(&mask, SIGBUS) ||
266 sigdelset(&mask, SIGWINCH) ||
267 sigprocmask(SIG_BLOCK, &mask, oldmask)) {
268 SYSERROR("Failed to set signal mask.");
269 return -1;
270 }
271
272 fd = signalfd(-1, &mask, 0);
273 if (fd < 0) {
274 SYSERROR("Failed to create signal file descriptor.");
275 return -1;
276 }
277
278 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
279 SYSERROR("Failed to set FD_CLOEXEC on the signal file descriptor: %d.", fd);
280 close(fd);
281 return -1;
282 }
283
284 DEBUG("Set SIGCHLD handler with file descriptor: %d.", fd);
285
286 return fd;
287}
288
289static int signal_handler(int fd, uint32_t events, void *data,
290 struct lxc_epoll_descr *descr)
291{
292 struct signalfd_siginfo siginfo;
293 siginfo_t info;
294 int ret;
295 pid_t *pid = data;
296 bool init_died = false;
297
298 ret = read(fd, &siginfo, sizeof(siginfo));
299 if (ret < 0) {
300 ERROR("Failed to read signal info from signal file descriptor: %d.", fd);
301 return -1;
302 }
303
304 if (ret != sizeof(siginfo)) {
305 ERROR("Unexpected size for siginfo struct.");
306 return -1;
307 }
308
309 /* Check whether init is running. */
310 info.si_pid = 0;
311 ret = waitid(P_PID, *pid, &info, WEXITED | WNOWAIT | WNOHANG);
312 if (ret == 0 && info.si_pid == *pid)
313 init_died = true;
314
315 if (siginfo.ssi_signo != SIGCHLD) {
316 kill(*pid, siginfo.ssi_signo);
317 INFO("Forwarded signal %d to pid %d.", siginfo.ssi_signo, *pid);
318 return init_died ? 1 : 0;
319 }
320
321 if (siginfo.ssi_code == CLD_STOPPED) {
322 INFO("Container init process was stopped.");
323 return init_died ? 1 : 0;
324 } else if (siginfo.ssi_code == CLD_CONTINUED) {
325 INFO("Container init process was continued.");
326 return init_died ? 1 : 0;
327 }
328
329 /* More robustness, protect ourself from a SIGCHLD sent
330 * by a process different from the container init.
331 */
332 if (siginfo.ssi_pid != *pid) {
333 WARN("Invalid pid for SIGCHLD. Received pid %d, expected pid %d.", siginfo.ssi_pid, *pid);
334 return init_died ? 1 : 0;
335 }
336
337 DEBUG("Container init process %d exited.", *pid);
338 return 1;
339}
340
341int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
342{
343 handler->state = state;
344 lxc_monitor_send_state(name, state, handler->lxcpath);
345 return 0;
346}
347
348int lxc_poll(const char *name, struct lxc_handler *handler)
349{
350 int sigfd = handler->sigfd;
351 int pid = handler->pid;
352 struct lxc_epoll_descr descr;
353
354 if (lxc_mainloop_open(&descr)) {
355 ERROR("Failed to create LXC mainloop.");
356 goto out_sigfd;
357 }
358
359 if (lxc_mainloop_add_handler(&descr, sigfd, signal_handler, &pid)) {
360 ERROR("Failed to add signal handler with file descriptor %d to LXC mainloop.", sigfd);
361 goto out_mainloop_open;
362 }
363
364 if (lxc_console_mainloop_add(&descr, handler->conf)) {
365 ERROR("Failed to add console handler to LXC mainloop.");
366 goto out_mainloop_open;
367 }
368
369 if (lxc_cmd_mainloop_add(name, &descr, handler)) {
370 ERROR("Failed to add command handler to LXC mainloop.");
371 goto out_mainloop_open;
372 }
373
374 if (handler->conf->need_utmp_watch) {
375 #if HAVE_SYS_CAPABILITY_H
376 if (lxc_utmp_mainloop_add(&descr, handler)) {
377 ERROR("Failed to add utmp handler to LXC mainloop.");
378 goto out_mainloop_open;
379 }
380 #else
381 DEBUG("Not starting utmp handler as CAP_SYS_BOOT cannot be dropped without capabilities support.");
382 #endif
383 }
384
385 return lxc_mainloop(&descr, -1);
386
387out_mainloop_open:
388 lxc_mainloop_close(&descr);
389
390out_sigfd:
391 close(sigfd);
392
393 return -1;
394}
395
396struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char *lxcpath)
397{
398 int i;
399 struct lxc_handler *handler;
400
401 handler = malloc(sizeof(*handler));
402 if (!handler)
403 return NULL;
404
405 memset(handler, 0, sizeof(*handler));
406
407 handler->ttysock[0] = handler->ttysock[1] = -1;
408 handler->conf = conf;
409 handler->lxcpath = lxcpath;
410 handler->pinfd = -1;
411
412 for (i = 0; i < LXC_NS_MAX; i++)
413 handler->nsfd[i] = -1;
414
415 lsm_init();
416
417 handler->name = strdup(name);
418 if (!handler->name) {
419 ERROR("Failed to allocate memory.");
420 goto out_free;
421 }
422
423 if (lxc_cmd_init(name, handler, lxcpath))
424 goto out_free_name;
425
426 if (lxc_read_seccomp_config(conf) != 0) {
427 ERROR("Failed loading seccomp policy.");
428 goto out_close_maincmd_fd;
429 }
430
431 /* Begin by setting the state to STARTING. */
432 if (lxc_set_state(name, handler, STARTING)) {
433 ERROR("Failed to set state for container \"%s\" to \"%s\".", name, lxc_state2str(STARTING));
434 goto out_close_maincmd_fd;
435 }
436
437 /* Start of environment variable setup for hooks. */
438 if (name && setenv("LXC_NAME", name, 1))
439 SYSERROR("Failed to set environment variable: LXC_NAME=%s.", name);
440
441 if (conf->rcfile && setenv("LXC_CONFIG_FILE", conf->rcfile, 1))
442 SYSERROR("Failed to set environment variable: LXC_CONFIG_FILE=%s.", conf->rcfile);
443
444 if (conf->rootfs.mount && setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1))
445 SYSERROR("Failed to set environment variable: LXC_ROOTFS_MOUNT=%s.", conf->rootfs.mount);
446
447 if (conf->rootfs.path && setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1))
448 SYSERROR("Failed to set environment variable: LXC_ROOTFS_PATH=%s.", conf->rootfs.path);
449
450 if (conf->console.path && setenv("LXC_CONSOLE", conf->console.path, 1))
451 SYSERROR("Failed to set environment variable: LXC_CONSOLE=%s.", conf->console.path);
452
453 if (conf->console.log_path && setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1))
454 SYSERROR("Failed to set environment variable: LXC_CONSOLE_LOGPATH=%s.", conf->console.log_path);
455
456 if (setenv("LXC_CGNS_AWARE", "1", 1))
457 SYSERROR("Failed to set environment variable LXC_CGNS_AWARE=1.");
458 /* End of environment variable setup for hooks. */
459
460 if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
461 ERROR("Failed to run lxc.hook.pre-start for container \"%s\".", name);
462 goto out_aborting;
463 }
464
465 /* The signal fd has to be created before forking otherwise if the child
466 * process exits before we setup the signal fd, the event will be lost
467 * and the command will be stuck.
468 */
469 handler->sigfd = setup_signal_fd(&handler->oldmask);
470 if (handler->sigfd < 0) {
471 ERROR("Failed to setup SIGCHLD fd handler.");
472 goto out_delete_tty;
473 }
474
475 /* Do this after setting up signals since it might unblock SIGWINCH. */
476 if (lxc_console_create(conf)) {
477 ERROR("Failed to create console for container \"%s\".", name);
478 goto out_restore_sigmask;
479 }
480
481 if (ttys_shift_ids(conf) < 0) {
482 ERROR("Failed to shift tty into container.");
483 goto out_restore_sigmask;
484 }
485
486 INFO("Container \"%s\" is initialized.", name);
487 return handler;
488
489out_restore_sigmask:
490 sigprocmask(SIG_SETMASK, &handler->oldmask, NULL);
491out_delete_tty:
492 lxc_delete_tty(&conf->tty_info);
493out_aborting:
494 lxc_set_state(name, handler, ABORTING);
495out_close_maincmd_fd:
496 close(conf->maincmd_fd);
497 conf->maincmd_fd = -1;
498out_free_name:
499 free(handler->name);
500 handler->name = NULL;
501out_free:
502 free(handler);
503 return NULL;
504}
505
506void lxc_fini(const char *name, struct lxc_handler *handler)
507{
508 int i, rc;
509 pid_t self = getpid();
510 char *namespaces[LXC_NS_MAX+1];
511 size_t namespace_count = 0;
512
513 /* The STOPPING state is there for future cleanup code which can take
514 * awhile.
515 */
516 lxc_set_state(name, handler, STOPPING);
517
518 for (i = 0; i < LXC_NS_MAX; i++) {
519 if (handler->nsfd[i] != -1) {
520 rc = asprintf(&namespaces[namespace_count], "%s:/proc/%d/fd/%d",
521 ns_info[i].proc_name, self, handler->nsfd[i]);
522 if (rc == -1) {
523 SYSERROR("Failed to allocate memory.");
524 break;
525 }
526 ++namespace_count;
527 }
528 }
529 namespaces[namespace_count] = NULL;
530
531 if (handler->conf->reboot && setenv("LXC_TARGET", "reboot", 1))
532 SYSERROR("Failed to set environment variable: LXC_TARGET=reboot.");
533
534 if (!handler->conf->reboot && setenv("LXC_TARGET", "stop", 1))
535 SYSERROR("Failed to set environment variable: LXC_TARGET=stop.");
536
537 if (run_lxc_hooks(name, "stop", handler->conf, handler->lxcpath, namespaces))
538 ERROR("Failed to run lxc.hook.stop for container \"%s\".", name);
539
540 while (namespace_count--)
541 free(namespaces[namespace_count]);
542 for (i = 0; i < LXC_NS_MAX; i++) {
543 if (handler->nsfd[i] != -1) {
544 close(handler->nsfd[i]);
545 handler->nsfd[i] = -1;
546 }
547 }
548
549 if (handler->netnsfd >= 0) {
550 close(handler->netnsfd);
551 handler->netnsfd = -1;
552 }
553
554 lxc_set_state(name, handler, STOPPED);
555
556 if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL)) {
557 ERROR("Failed to run lxc.hook.post-stop for container \"%s\".", name);
558 if (handler->conf->reboot) {
559 WARN("Container will be stopped instead of rebooted.");
560 handler->conf->reboot = 0;
561 if (setenv("LXC_TARGET", "stop", 1))
562 WARN("Failed to set environment variable: LXC_TARGET=stop.");
563 }
564 }
565
566 /* Reset mask set by setup_signal_fd. */
567 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL))
568 WARN("Failed to restore signal mask.");
569
570 lxc_console_delete(&handler->conf->console);
571 lxc_delete_tty(&handler->conf->tty_info);
572 close(handler->conf->maincmd_fd);
573 handler->conf->maincmd_fd = -1;
574 free(handler->name);
575 if (handler->ttysock[0] != -1) {
576 close(handler->ttysock[0]);
577 close(handler->ttysock[1]);
578 }
579
580 if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1)
581 lxc_destroy_container_on_signal(handler, name);
582
583 cgroup_destroy(handler);
584 free(handler);
585}
586
587void lxc_abort(const char *name, struct lxc_handler *handler)
588{
589 int ret, status;
590
591 lxc_set_state(name, handler, ABORTING);
592 if (handler->pid > 0)
593 kill(handler->pid, SIGKILL);
594 while ((ret = waitpid(-1, &status, 0)) > 0) {
595 ;
596 }
597}
598
599#include <sys/reboot.h>
600#include <linux/reboot.h>
601
602/* reboot(LINUX_REBOOT_CMD_CAD_ON) will return -EINVAL in a child pid namespace
603 * if container reboot support exists. Otherwise, it will either succeed or
604 * return -EPERM.
605 */
606static int container_reboot_supported(void *arg)
607{
608 int *cmd = arg;
609 int ret;
610
611 ret = reboot(*cmd);
612 if (ret == -1 && errno == EINVAL)
613 return 1;
614 return 0;
615}
616
617static int must_drop_cap_sys_boot(struct lxc_conf *conf)
618{
619 FILE *f;
620 int ret, cmd, v, flags;
621 long stack_size = 4096;
622 void *stack = alloca(stack_size);
623 int status;
624 pid_t pid;
625
626 f = fopen("/proc/sys/kernel/ctrl-alt-del", "r");
627 if (!f) {
628 DEBUG("failed to open /proc/sys/kernel/ctrl-alt-del");
629 return 1;
630 }
631
632 ret = fscanf(f, "%d", &v);
633 fclose(f);
634 if (ret != 1) {
635 DEBUG("Failed to read /proc/sys/kernel/ctrl-alt-del.");
636 return 1;
637 }
638 cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF;
639
640 flags = CLONE_NEWPID | SIGCHLD;
641 if (!lxc_list_empty(&conf->id_map))
642 flags |= CLONE_NEWUSER;
643
644#ifdef __ia64__
645 pid = __clone2(container_reboot_supported, stack, stack_size, flags, &cmd);
646#else
647 stack += stack_size;
648 pid = clone(container_reboot_supported, stack, flags, &cmd);
649#endif
650 if (pid < 0) {
651 if (flags & CLONE_NEWUSER)
652 ERROR("Failed to clone (%#x): %s (includes CLONE_NEWUSER).", flags, strerror(errno));
653 else
654 ERROR("Failed to clone (%#x): %s.", flags, strerror(errno));
655 return -1;
656 }
657 if (wait(&status) < 0) {
658 SYSERROR("Unexpected wait error: %m.");
659 return -1;
660 }
661
662 if (WEXITSTATUS(status) != 1)
663 return 1;
664
665 return 0;
666}
667
668/* netpipe is used in the unprivileged case to transfer the ifindexes from
669 * parent to child
670 */
671static int netpipe = -1;
672
673static inline int count_veths(struct lxc_list *network)
674{
675 struct lxc_list *iterator;
676 struct lxc_netdev *netdev;
677 int count = 0;
678
679 lxc_list_for_each(iterator, network) {
680 netdev = iterator->elem;
681 if (netdev->type != LXC_NET_VETH)
682 continue;
683 count++;
684 }
685 return count;
686}
687
688static int read_unpriv_netifindex(struct lxc_list *network)
689{
690 struct lxc_list *iterator;
691 struct lxc_netdev *netdev;
692
693 if (netpipe == -1)
694 return 0;
695 lxc_list_for_each(iterator, network) {
696 netdev = iterator->elem;
697 if (netdev->type != LXC_NET_VETH)
698 continue;
699 if (!(netdev->name = malloc(IFNAMSIZ))) {
700 ERROR("Out of memory.");
701 close(netpipe);
702 return -1;
703 }
704 if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
705 close(netpipe);
706 return -1;
707 }
708 }
709 close(netpipe);
710 return 0;
711}
712
713static int do_start(void *data)
714{
715 struct lxc_list *iterator;
716 struct lxc_handler *handler = data;
717 int devnull_fd = -1, ret;
718 char path[PATH_MAX];
719
720 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
721 SYSERROR("Failed to set signal mask.");
722 return -1;
723 }
724
725 /* This prctl must be before the synchro, so if the parent dies before
726 * we set the parent death signal, we will detect its death with the
727 * synchro right after, otherwise we have a window where the parent can
728 * exit before we set the pdeath signal leading to a unsupervized
729 * container.
730 */
731 if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) {
732 SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL.");
733 return -1;
734 }
735
736 lxc_sync_fini_parent(handler);
737
738 /* Don't leak the pinfd to the container. */
739 if (handler->pinfd >= 0) {
740 close(handler->pinfd);
741 }
742
743 if (lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP))
744 return -1;
745
746 /* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
747 * https://github.com/lxc/lxd/issues/1978.
748 */
749 if ((handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
750 (CLONE_NEWNET | CLONE_NEWUSER)) {
751 ret = unshare(CLONE_NEWNET);
752 if (ret < 0) {
753 SYSERROR("Failed to unshare CLONE_NEWNET.");
754 goto out_warn_father;
755 }
756 INFO("Unshared CLONE_NEWNET.");
757 }
758
759 /* Tell the parent task it can begin to configure the container and wait
760 * for it to finish.
761 */
762 if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
763 return -1;
764
765 if (read_unpriv_netifindex(&handler->conf->network) < 0)
766 goto out_warn_father;
767
768 /* If we are in a new user namespace, become root there to have
769 * privilege over our namespace. When using lxc-execute we default to
770 * root, but this can be overriden using the lxc.init_uid and
771 * lxc.init_gid configuration options.
772 */
773 if (!lxc_list_empty(&handler->conf->id_map)) {
774 gid_t new_gid = 0;
775 if (handler->conf->is_execute && handler->conf->init_gid)
776 new_gid = handler->conf->init_gid;
777
778 uid_t new_uid = 0;
779 if (handler->conf->is_execute && handler->conf->init_uid)
780 new_uid = handler->conf->init_uid;
781
782 NOTICE("Switching to uid=%d and gid=%d in new user namespace.", new_uid, new_gid);
783 if (setgid(new_gid)) {
784 SYSERROR("Failed to setgid().");
785 goto out_warn_father;
786 }
787 if (setuid(new_uid)) {
788 SYSERROR("Failed to setuid().");
789 goto out_warn_father;
790 }
791 if (setgroups(0, NULL)) {
792 SYSERROR("Failed to setgroups().");
793 goto out_warn_father;
794 }
795 }
796
797 if (access(handler->lxcpath, X_OK)) {
798 print_top_failing_dir(handler->lxcpath);
799 goto out_warn_father;
800 }
801
802 #if HAVE_SYS_CAPABILITY_H
803 if (handler->conf->need_utmp_watch) {
804 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
805 SYSERROR("Failed to remove the CAP_SYS_BOOT capability.");
806 goto out_warn_father;
807 }
808 DEBUG("Dropped the CAP_SYS_BOOT capability.");
809 }
810 #endif
811
812 ret = snprintf(path, sizeof(path), "%s/dev/null", handler->conf->rootfs.mount);
813 if (ret < 0 || ret >= sizeof(path))
814 goto out_warn_father;
815
816 /* In order to checkpoint restore, we need to have everything in the
817 * same mount namespace. However, some containers may not have a
818 * reasonable /dev (in particular, they may not have /dev/null), so we
819 * can't set init's std fds to /dev/null by opening it from inside the
820 * container.
821 *
822 * If that's the case, fall back to using the host's /dev/null. This
823 * means that migration won't work, but at least we won't spew output
824 * where it isn't wanted.
825 */
826 if (handler->backgrounded && !handler->conf->autodev && access(path, F_OK) < 0) {
827 devnull_fd = open_devnull();
828
829 if (devnull_fd < 0)
830 goto out_warn_father;
831 WARN("Using /dev/null from the host for container init's "
832 "standard file descriptors. Migration will not work.");
833 }
834
835 /* Setup the container, ip, names, utsname, ... */
836 if (lxc_setup(handler)) {
837 ERROR("Failed to setup container \"%s\".", handler->name);
838 goto out_warn_father;
839 }
840
841 /* Ask father to setup cgroups and wait for him to finish. */
842 if (lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP))
843 goto out_error;
844
845 /* Unshare cgroup namespace after we have setup our cgroups. If we do it
846 * earlier we end up with a wrong view of /proc/self/cgroup. For
847 * example, assume we unshare(CLONE_NEWCGROUP) first, and then create
848 * the cgroup for the container, say /sys/fs/cgroup/cpuset/lxc/c, then
849 * /proc/self/cgroup would show us:
850 *
851 * 8:cpuset:/lxc/c
852 *
853 * whereas it should actually show
854 *
855 * 8:cpuset:/
856 */
857 if (cgns_supported()) {
858 if (unshare(CLONE_NEWCGROUP) < 0) {
859 INFO("Failed to unshare CLONE_NEWCGROUP.");
860 goto out_warn_father;
861 }
862 INFO("Unshared CLONE_NEWCGROUP.");
863 }
864
865 /* Set the label to change to when we exec(2) the container's init. */
866 if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0)
867 goto out_warn_father;
868
869 /* Set PR_SET_NO_NEW_PRIVS after we changed the lsm label. If we do it
870 * before we aren't allowed anymore.
871 */
872 if (handler->conf->no_new_privs) {
873 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
874 SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
875 goto out_warn_father;
876 }
877 DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
878 }
879
880 /* Some init's such as busybox will set sane tty settings on stdin,
881 * stdout, stderr which it thinks is the console. We already set them
882 * the way we wanted on the real terminal, and we want init to do its
883 * setup on its console ie. the pty allocated in lxc_console_create() so
884 * make sure that that pty is stdin,stdout,stderr.
885 */
886 if (lxc_console_set_stdfds(handler->conf->console.slave) < 0)
887 goto out_warn_father;
888
889 /* If we mounted a temporary proc, then unmount it now. */
890 tmp_proc_unmount(handler->conf);
891
892 if (lxc_seccomp_load(handler->conf) != 0)
893 goto out_warn_father;
894
895 if (run_lxc_hooks(handler->name, "start", handler->conf, handler->lxcpath, NULL)) {
896 ERROR("Failed to run lxc.hook.start for container \"%s\".", handler->name);
897 goto out_warn_father;
898 }
899
900 /* The clearenv() and putenv() calls have been moved here to allow us to
901 * use environment variables passed to the various hooks, such as the
902 * start hook above. Not all of the variables like CONFIG_PATH or ROOTFS
903 * are valid in this context but others are.
904 */
905 if (clearenv()) {
906 SYSERROR("Failed to clear environment.");
907 /* Don't error out though. */
908 }
909
910 lxc_list_for_each(iterator, &handler->conf->environment) {
911 if (putenv((char *)iterator->elem)) {
912 SYSERROR("Failed to set environment variable: %s.", (char *)iterator->elem);
913 goto out_warn_father;
914 }
915 }
916
917 if (putenv("container=lxc")) {
918 SYSERROR("Failed to set environment variable: container=lxc.");
919 goto out_warn_father;
920 }
921
922 if (handler->conf->pty_names) {
923 if (putenv(handler->conf->pty_names)) {
924 SYSERROR("Failed to set environment variable for container ptys.");
925 goto out_warn_father;
926 }
927 }
928
929 close(handler->sigfd);
930
931 if (devnull_fd < 0) {
932 devnull_fd = open_devnull();
933
934 if (devnull_fd < 0)
935 goto out_warn_father;
936 }
937
938 if (handler->backgrounded && set_stdfds(devnull_fd))
939 goto out_warn_father;
940
941 if (devnull_fd >= 0) {
942 close(devnull_fd);
943 devnull_fd = -1;
944 }
945
946 setsid();
947
948 /* After this call, we are in error because this ops should not return
949 * as it execs.
950 */
951 handler->ops->start(handler, handler->data);
952
953out_warn_father:
954 /* We want the parent to know something went wrong, so we return a
955 * special error code.
956 */
957 lxc_sync_wake_parent(handler, LXC_SYNC_ERROR);
958
959out_error:
960 if (devnull_fd >= 0)
961 close(devnull_fd);
962
963 return -1;
964}
965
966static int save_phys_nics(struct lxc_conf *conf)
967{
968 struct lxc_list *iterator;
969 int am_root = (getuid() == 0);
970
971 if (!am_root)
972 return 0;
973
974 lxc_list_for_each(iterator, &conf->network) {
975 struct lxc_netdev *netdev = iterator->elem;
976
977 if (netdev->type != LXC_NET_PHYS)
978 continue;
979 conf->saved_nics = realloc(conf->saved_nics,
980 (conf->num_savednics+1)*sizeof(struct saved_nic));
981 if (!conf->saved_nics)
982 return -1;
983 conf->saved_nics[conf->num_savednics].ifindex = netdev->ifindex;
984 conf->saved_nics[conf->num_savednics].orig_name = strdup(netdev->link);
985 if (!conf->saved_nics[conf->num_savednics].orig_name)
986 return -1;
987 INFO("Stored saved_nic #%d idx %d name %s.", conf->num_savednics,
988 conf->saved_nics[conf->num_savednics].ifindex,
989 conf->saved_nics[conf->num_savednics].orig_name);
990 conf->num_savednics++;
991 }
992
993 return 0;
994}
995
996static int recv_fd(int sock, int *fd)
997{
998 if (lxc_abstract_unix_recv_fd(sock, fd, NULL, 0) < 0) {
999 SYSERROR("Error receiving tty file descriptor from child process.");
1000 return -1;
1001 }
1002 if (*fd == -1)
1003 return -1;
1004 return 0;
1005}
1006
1007static int recv_ttys_from_child(struct lxc_handler *handler)
1008{
1009 struct lxc_conf *conf = handler->conf;
1010 int i, sock = handler->ttysock[1];
1011 struct lxc_tty_info *tty_info = &conf->tty_info;
1012
1013 if (!conf->tty)
1014 return 0;
1015
1016 tty_info->pty_info = malloc(sizeof(*tty_info->pty_info) * conf->tty);
1017 if (!tty_info->pty_info)
1018 return -1;
1019
1020 for (i = 0; i < conf->tty; i++) {
1021 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1022 pty_info->busy = 0;
1023 if (recv_fd(sock, &pty_info->slave) < 0 ||
1024 recv_fd(sock, &pty_info->master) < 0) {
1025 ERROR("Error receiving tty info from child process.");
1026 return -1;
1027 }
1028 }
1029 tty_info->nbtty = conf->tty;
1030
1031 return 0;
1032}
1033
1034void resolve_clone_flags(struct lxc_handler *handler)
1035{
1036 handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS;
1037
1038 if (!lxc_list_empty(&handler->conf->id_map))
1039 handler->clone_flags |= CLONE_NEWUSER;
1040
1041 if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
1042 if (!lxc_requests_empty_network(handler))
1043 handler->clone_flags |= CLONE_NEWNET;
1044 } else {
1045 INFO("Inheriting a NET namespace.");
1046 }
1047
1048 if (handler->conf->inherit_ns_fd[LXC_NS_IPC] == -1)
1049 handler->clone_flags |= CLONE_NEWIPC;
1050 else
1051 INFO("Inheriting an IPC namespace.");
1052
1053 if (handler->conf->inherit_ns_fd[LXC_NS_UTS] == -1)
1054 handler->clone_flags |= CLONE_NEWUTS;
1055 else
1056 INFO("Inheriting a UTS namespace.");
1057}
1058
1059static int lxc_spawn(struct lxc_handler *handler)
1060{
1061 int failed_before_rename = 0;
1062 const char *name = handler->name;
1063 bool cgroups_connected = false;
1064 int saved_ns_fd[LXC_NS_MAX];
1065 int preserve_mask = 0, i, flags;
1066 int netpipepair[2], nveths;
1067
1068 netpipe = -1;
1069
1070 for (i = 0; i < LXC_NS_MAX; i++)
1071 if (handler->conf->inherit_ns_fd[i] != -1)
1072 preserve_mask |= ns_info[i].clone_flag;
1073
1074 if (lxc_sync_init(handler))
1075 return -1;
1076
1077 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->ttysock) < 0) {
1078 lxc_sync_fini(handler);
1079 return -1;
1080 }
1081
1082 resolve_clone_flags(handler);
1083
1084 if (handler->clone_flags & CLONE_NEWNET) {
1085 if (!lxc_list_empty(&handler->conf->network)) {
1086
1087 /* Find gateway addresses from the link device, which is
1088 * no longer accessible inside the container. Do this
1089 * before creating network interfaces, since goto
1090 * out_delete_net does not work before lxc_clone.
1091 */
1092 if (lxc_find_gateway_addresses(handler)) {
1093 ERROR("Failed to find gateway addresses.");
1094 lxc_sync_fini(handler);
1095 return -1;
1096 }
1097
1098 /* That should be done before the clone because we will
1099 * fill the netdev index and use them in the child.
1100 */
1101 if (lxc_create_network(handler)) {
1102 ERROR("Failed to create the network.");
1103 lxc_sync_fini(handler);
1104 return -1;
1105 }
1106 }
1107
1108 if (save_phys_nics(handler->conf)) {
1109 ERROR("Failed to save physical nic info.");
1110 goto out_abort;
1111 }
1112 }
1113
1114 if (!cgroup_init(handler)) {
1115 ERROR("Failed initializing cgroup support.");
1116 goto out_delete_net;
1117 }
1118
1119 cgroups_connected = true;
1120
1121 if (!cgroup_create(handler)) {
1122 ERROR("Failed creating cgroups.");
1123 goto out_delete_net;
1124 }
1125
1126 /* If the rootfs is not a blockdev, prevent the container from marking
1127 * it readonly.
1128 * If the container is unprivileged then skip rootfs pinning.
1129 */
1130 if (lxc_list_empty(&handler->conf->id_map)) {
1131 handler->pinfd = pin_rootfs(handler->conf->rootfs.path);
1132 if (handler->pinfd == -1)
1133 INFO("Failed to pin the rootfs for container \"%s\".", handler->name);
1134 }
1135
1136 if (!preserve_ns(saved_ns_fd, preserve_mask, getpid()))
1137 goto out_delete_net;
1138
1139 if (attach_ns(handler->conf->inherit_ns_fd) < 0)
1140 goto out_delete_net;
1141
1142 if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
1143 if (pipe(netpipepair) < 0) {
1144 SYSERROR("Failed to create pipe.");
1145 goto out_delete_net;
1146 }
1147 /* Store netpipe in the global var for do_start's use. */
1148 netpipe = netpipepair[0];
1149 }
1150
1151 /* Create a process in a new set of namespaces. */
1152 flags = handler->clone_flags;
1153 if (handler->clone_flags & CLONE_NEWUSER) {
1154 /* If CLONE_NEWUSER and CLONE_NEWNET was requested, we need to
1155 * clone a new user namespace first and only later unshare our
1156 * network namespace to ensure that network devices ownership is
1157 * set up correctly.
1158 */
1159 flags &= ~CLONE_NEWNET;
1160 }
1161 handler->pid = lxc_clone(do_start, handler, flags);
1162 if (handler->pid < 0) {
1163 SYSERROR("Failed to clone a new set of namespaces.");
1164 goto out_delete_net;
1165 }
1166 for (i = 0; i < LXC_NS_MAX; i++)
1167 if (flags & ns_info[i].clone_flag)
1168 INFO("Cloned %s.", ns_info[i].flag_name);
1169
1170 if (!preserve_ns(handler->nsfd, handler->clone_flags | preserve_mask, handler->pid))
1171 INFO("Failed to preserve namespace for lxc.hook.stop.");
1172
1173 if (attach_ns(saved_ns_fd))
1174 WARN("Failed to restore saved namespaces.");
1175
1176 lxc_sync_fini_child(handler);
1177
1178 /* Map the container uids. The container became an invalid userid the
1179 * moment it was cloned with CLONE_NEWUSER. This call doesn't change
1180 * anything immediately, but allows the container to setuid(0) (0 being
1181 * mapped to something else on the host.) later to become a valid uid
1182 * again.
1183 */
1184 if (lxc_map_ids(&handler->conf->id_map, handler->pid)) {
1185 ERROR("Failed to set up id mapping.");
1186 goto out_delete_net;
1187 }
1188
1189 if (lxc_sync_wake_child(handler, LXC_SYNC_STARTUP)) {
1190 failed_before_rename = 1;
1191 goto out_delete_net;
1192 }
1193
1194 if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) {
1195 failed_before_rename = 1;
1196 goto out_delete_net;
1197 }
1198
1199 if (!cgroup_create_legacy(handler)) {
1200 ERROR("Failed to setup legacy cgroups for container \"%s\".", name);
1201 goto out_delete_net;
1202 }
1203 if (!cgroup_setup_limits(handler, false)) {
1204 ERROR("Failed to setup cgroup limits for container \"%s\".", name);
1205 goto out_delete_net;
1206 }
1207
1208 if (!cgroup_enter(handler))
1209 goto out_delete_net;
1210
1211 if (!cgroup_chown(handler))
1212 goto out_delete_net;
1213
1214 if (failed_before_rename)
1215 goto out_delete_net;
1216
1217 /* Create the network configuration. */
1218 if (handler->clone_flags & CLONE_NEWNET) {
1219 if (lxc_assign_network(handler->lxcpath, handler->name,
1220 &handler->conf->network, handler->pid)) {
1221 ERROR("Failed to create the configured network.");
1222 goto out_delete_net;
1223 }
1224 }
1225
1226 if (netpipe != -1) {
1227 struct lxc_list *iterator;
1228 struct lxc_netdev *netdev;
1229
1230 close(netpipe);
1231 lxc_list_for_each(iterator, &handler->conf->network) {
1232 netdev = iterator->elem;
1233 if (netdev->type != LXC_NET_VETH)
1234 continue;
1235 if (write(netpipepair[1], netdev->name, IFNAMSIZ) != IFNAMSIZ) {
1236 ERROR("Error writing veth name to container.");
1237 goto out_delete_net;
1238 }
1239 }
1240 close(netpipepair[1]);
1241 }
1242
1243 /* Tell the child to continue its initialization. We'll get
1244 * LXC_SYNC_CGROUP when it is ready for us to setup cgroups.
1245 */
1246 if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
1247 goto out_delete_net;
1248
1249 if (!cgroup_setup_limits(handler, true)) {
1250 ERROR("Failed to setup the devices cgroup for container \"%s\".", name);
1251 goto out_delete_net;
1252 }
1253
1254 cgroup_disconnect();
1255 cgroups_connected = false;
1256
1257 /* Read tty fds allocated by child. */
1258 if (recv_ttys_from_child(handler) < 0) {
1259 ERROR("Failed to receive tty info from child process.");
1260 goto out_delete_net;
1261 }
1262
1263 /* Tell the child to complete its initialization and wait for it to exec
1264 * or return an error. (The child will never return
1265 * LXC_SYNC_POST_CGROUP+1. It will either close the sync pipe, causing
1266 * lxc_sync_barrier_child to return success, or return a different
1267 * value, causing us to error out).
1268 */
1269 if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CGROUP))
1270 return -1;
1271
1272 if (detect_shared_rootfs())
1273 umount2(handler->conf->rootfs.mount, MNT_DETACH);
1274
1275 if (handler->ops->post_start(handler, handler->data))
1276 goto out_abort;
1277
1278 if (lxc_set_state(name, handler, RUNNING)) {
1279 ERROR("Failed to set state for container \"%s\" to \"%s\".", name,
1280 lxc_state2str(RUNNING));
1281 goto out_abort;
1282 }
1283
1284 lxc_sync_fini(handler);
1285 handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
1286
1287 return 0;
1288
1289out_delete_net:
1290 if (cgroups_connected)
1291 cgroup_disconnect();
1292 if (handler->clone_flags & CLONE_NEWNET)
1293 lxc_delete_network(handler);
1294out_abort:
1295 lxc_abort(name, handler);
1296 lxc_sync_fini(handler);
1297 if (handler->pinfd >= 0) {
1298 close(handler->pinfd);
1299 handler->pinfd = -1;
1300 }
1301
1302 return -1;
1303}
1304
1305int __lxc_start(const char *name, struct lxc_conf *conf,
1306 struct lxc_operations* ops, void *data, const char *lxcpath,
1307 bool backgrounded)
1308{
1309 struct lxc_handler *handler;
1310 int err = -1;
1311 int status;
1312 bool removed_all_netdevs = true;
1313
1314 handler = lxc_init(name, conf, lxcpath);
1315 if (!handler) {
1316 ERROR("Failed to initialize container \"%s\".", name);
1317 return -1;
1318 }
1319 handler->ops = ops;
1320 handler->data = data;
1321 handler->backgrounded = backgrounded;
1322 handler->netnsfd = -1;
1323
1324 if (must_drop_cap_sys_boot(handler->conf)) {
1325 #if HAVE_SYS_CAPABILITY_H
1326 DEBUG("Dropping CAP_SYS_BOOT capability.");
1327 #else
1328 DEBUG("Not dropping CAP_SYS_BOOT capability as capabilities aren't supported.");
1329 #endif
1330 } else {
1331 DEBUG("Not dropping CAP_SYS_BOOT or watching utmp.");
1332 handler->conf->need_utmp_watch = 0;
1333 }
1334
1335 if (!attach_block_device(handler->conf)) {
1336 ERROR("Failed to attach block device.");
1337 goto out_fini_nonet;
1338 }
1339
1340 if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
1341 /* If the backing store is a device, mount it here and now. */
1342 if (rootfs_is_blockdev(conf)) {
1343 if (unshare(CLONE_NEWNS) < 0) {
1344 ERROR("Failed to unshare CLONE_NEWNS.");
1345 goto out_fini_nonet;
1346 }
1347 INFO("Unshared CLONE_NEWNS.");
1348
1349 remount_all_slave();
1350 if (do_rootfs_setup(conf, name, lxcpath) < 0) {
1351 ERROR("Error setting up rootfs mount as root before spawn.");
1352 goto out_fini_nonet;
1353 }
1354 INFO("Set up container rootfs as host root.");
1355 }
1356 }
1357
1358 err = lxc_spawn(handler);
1359 if (err) {
1360 ERROR("Failed to spawn container \"%s\".", name);
1361 goto out_detach_blockdev;
1362 }
1363
1364 handler->conf->reboot = 0;
1365
1366 err = lxc_poll(name, handler);
1367 if (err) {
1368 ERROR("LXC mainloop exited with error: %d.", err);
1369 if (handler->netnsfd >= 0) {
1370 close(handler->netnsfd);
1371 handler->netnsfd = -1;
1372 }
1373 goto out_abort;
1374 }
1375
1376 while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR)
1377 continue;
1378
1379 /* If the child process exited but was not signaled, it didn't call
1380 * reboot. This should mean it was an lxc-execute which simply exited.
1381 * In any case, treat it as a 'halt'.
1382 */
1383 if (WIFSIGNALED(status)) {
1384 switch(WTERMSIG(status)) {
1385 case SIGINT: /* halt */
1386 DEBUG("Container \"%s\" is halting.", name);
1387 break;
1388 case SIGHUP: /* reboot */
1389 DEBUG("Container \"%s\" is rebooting.", name);
1390 handler->conf->reboot = 1;
1391 break;
1392 case SIGSYS: /* seccomp */
1393 DEBUG("Container \"%s\" violated its seccomp policy.", name);
1394 break;
1395 default:
1396 DEBUG("Unknown exit status for container \"%s\" init %d.", name, WTERMSIG(status));
1397 break;
1398 }
1399 }
1400
1401 DEBUG("Pushing physical nics back to host namespace");
1402 lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
1403
1404 DEBUG("Tearing down virtual network devices used by container \"%s\".", name);
1405 removed_all_netdevs = lxc_delete_network(handler);
1406
1407 if (handler->pinfd >= 0) {
1408 close(handler->pinfd);
1409 handler->pinfd = -1;
1410 }
1411
1412 lxc_monitor_send_exit_code(name, status, handler->lxcpath);
1413 err = lxc_error_set_and_log(handler->pid, status);
1414out_fini:
1415 if (!removed_all_netdevs) {
1416 DEBUG("Failed tearing down network devices used by container. Trying again!");
1417 removed_all_netdevs = lxc_delete_network(handler);
1418 if (!removed_all_netdevs)
1419 DEBUG("Failed tearing down network devices used by container. Not trying again!");
1420 }
1421
1422out_detach_blockdev:
1423 detach_block_device(handler->conf);
1424
1425out_fini_nonet:
1426 lxc_fini(name, handler);
1427 return err;
1428
1429out_abort:
1430 lxc_abort(name, handler);
1431 goto out_fini;
1432}
1433
1434struct start_args {
1435 char *const *argv;
1436};
1437
1438static int start(struct lxc_handler *handler, void* data)
1439{
1440 struct start_args *arg = data;
1441
1442 NOTICE("Exec'ing \"%s\".", arg->argv[0]);
1443
1444 execvp(arg->argv[0], arg->argv);
1445 SYSERROR("Failed to exec \"%s\".", arg->argv[0]);
1446 return 0;
1447}
1448
1449static int post_start(struct lxc_handler *handler, void* data)
1450{
1451 struct start_args *arg = data;
1452
1453 NOTICE("Started \"%s\" with pid \"%d\".", arg->argv[0], handler->pid);
1454 return 0;
1455}
1456
1457static struct lxc_operations start_ops = {
1458 .start = start,
1459 .post_start = post_start
1460};
1461
1462int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf,
1463 const char *lxcpath, bool backgrounded)
1464{
1465 struct start_args start_arg = {
1466 .argv = argv,
1467 };
1468
1469 conf->need_utmp_watch = 1;
1470 return __lxc_start(name, conf, &start_ops, &start_arg, lxcpath, backgrounded);
1471}
1472
1473static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
1474 const char *name)
1475{
1476 char destroy[MAXPATHLEN];
1477 bool bret = true;
1478 int ret = 0;
1479 struct lxc_container *c;
1480 if (handler->conf->rootfs.path && handler->conf->rootfs.mount) {
1481 bret = do_destroy_container(handler->conf);
1482 if (!bret) {
1483 ERROR("Error destroying rootfs for container \"%s\".", name);
1484 return;
1485 }
1486 }
1487 INFO("Destroyed rootfs for container \"%s\".", name);
1488
1489 ret = snprintf(destroy, MAXPATHLEN, "%s/%s", handler->lxcpath, name);
1490 if (ret < 0 || ret >= MAXPATHLEN) {
1491 ERROR("Error destroying directory for container \"%s\".", name);
1492 return;
1493 }
1494
1495 c = lxc_container_new(name, handler->lxcpath);
1496 if (c) {
1497 if (container_disk_lock(c)) {
1498 INFO("Could not update lxc_snapshots file.");
1499 lxc_container_put(c);
1500 } else {
1501 mod_all_rdeps(c, false);
1502 container_disk_unlock(c);
1503 lxc_container_put(c);
1504 }
1505 }
1506
1507 if (am_unpriv())
1508 ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper, destroy);
1509 else
1510 ret = lxc_rmdir_onedev(destroy, NULL);
1511
1512 if (ret < 0) {
1513 ERROR("Error destroying directory for container \"%s\".", name);
1514 return;
1515 }
1516 INFO("Destroyed directory for container \"%s\".", name);
1517}
1518
1519static int lxc_rmdir_onedev_wrapper(void *data)
1520{
1521 char *arg = (char *) data;
1522 return lxc_rmdir_onedev(arg, NULL);
1523}
1524
1525static bool do_destroy_container(struct lxc_conf *conf) {
1526 if (am_unpriv()) {
1527 if (userns_exec_1(conf, bdev_destroy_wrapper, conf) < 0)
1528 return false;
1529 return true;
1530 }
1531 return bdev_destroy(conf);
1532}