]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/start.c
Merge pull request #2643 from brauner/2018-09-23/cgroup_scoping_fixes
[mirror_lxc.git] / src / lxc / start.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 * Serge Hallyn <serge@hallyn.com>
9 * Christian Brauner <christian.brauner@ubuntu.com>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #ifndef _GNU_SOURCE
27 #define _GNU_SOURCE 1
28 #endif
29 #include <alloca.h>
30 #include <dirent.h>
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <grp.h>
34 #include <poll.h>
35 #include <pthread.h>
36 #include <signal.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/file.h>
41 #include <sys/mount.h>
42 #include <sys/param.h>
43 #include <sys/prctl.h>
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/syscall.h>
47 #include <sys/types.h>
48 #include <sys/un.h>
49 #include <sys/wait.h>
50 #include <unistd.h>
51
52 #include "af_unix.h"
53 #include "caps.h"
54 #include "cgroup.h"
55 #include "commands.h"
56 #include "commands_utils.h"
57 #include "conf.h"
58 #include "config.h"
59 #include "confile_utils.h"
60 #include "error.h"
61 #include "file_utils.h"
62 #include "list.h"
63 #include "log.h"
64 #include "lsm/lsm.h"
65 #include "lxccontainer.h"
66 #include "lxclock.h"
67 #include "lxcseccomp.h"
68 #include "macro.h"
69 #include "mainloop.h"
70 #include "monitor.h"
71 #include "namespace.h"
72 #include "network.h"
73 #include "start.h"
74 #include "storage/storage.h"
75 #include "storage/storage_utils.h"
76 #include "sync.h"
77 #include "terminal.h"
78 #include "utils.h"
79
80 #if HAVE_LIBCAP
81 #include <sys/capability.h>
82 #endif
83
84 #ifndef HAVE_STRLCPY
85 #include "include/strlcpy.h"
86 #endif
87
88 lxc_log_define(start, lxc);
89
90 extern void mod_all_rdeps(struct lxc_container *c, bool inc);
91 static bool do_destroy_container(struct lxc_handler *handler);
92 static int lxc_rmdir_onedev_wrapper(void *data);
93 static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
94 const char *name);
95
96 static void print_top_failing_dir(const char *path)
97 {
98 int ret;
99 size_t len;
100 char *copy, *e, *p, saved;
101
102 len = strlen(path);
103 copy = alloca(len + 1);
104 (void)strlcpy(copy, path, len + 1);
105
106 p = copy;
107 e = copy + len;
108
109 while (p < e) {
110 while (p < e && *p == '/')
111 p++;
112
113 while (p < e && *p != '/')
114 p++;
115
116 saved = *p;
117 *p = '\0';
118
119 ret = access(copy, X_OK);
120 if (ret != 0) {
121 SYSERROR("Could not access %s. Please grant it x "
122 "access, or add an ACL for the container "
123 "root", copy);
124 return;
125 }
126 *p = saved;
127 }
128 }
129
130 static void lxc_put_nsfds(struct lxc_handler *handler)
131 {
132 int i;
133
134 for (i = 0; i < LXC_NS_MAX; i++) {
135 if (handler->nsfd[i] < 0)
136 continue;
137
138 close(handler->nsfd[i]);
139 handler->nsfd[i] = -EBADF;
140 }
141 }
142
143 static int lxc_try_preserve_ns(const int pid, const char *ns)
144 {
145 int fd;
146
147 fd = lxc_preserve_ns(pid, ns);
148 if (fd < 0) {
149 if (errno != ENOENT) {
150 SYSERROR("Failed to preserve %s namespace", ns);
151 return -EINVAL;
152 }
153
154 SYSWARN("Kernel does not support preserving %s namespaces", ns);
155 return -EOPNOTSUPP;
156 }
157
158 return fd;
159 }
160
161 /* lxc_try_preserve_namespaces: open /proc/@pid/ns/@ns for each namespace
162 * specified in ns_clone_flags.
163 * Return true on success, false on failure.
164 */
165 static bool lxc_try_preserve_namespaces(struct lxc_handler *handler,
166 int ns_clone_flags, pid_t pid)
167 {
168 int i;
169
170 for (i = 0; i < LXC_NS_MAX; i++)
171 handler->nsfd[i] = -EBADF;
172
173 for (i = 0; i < LXC_NS_MAX; i++) {
174 int fd;
175
176 if ((ns_clone_flags & ns_info[i].clone_flag) == 0)
177 continue;
178
179 fd = lxc_try_preserve_ns(pid, ns_info[i].proc_name);
180 if (fd < 0) {
181 handler->nsfd[i] = -EBADF;
182
183 /* Do not fail to start container on kernels that do
184 * not support interacting with namespaces through
185 * /proc.
186 */
187 if (fd == -EOPNOTSUPP)
188 continue;
189
190 lxc_put_nsfds(handler);
191 return false;
192 }
193
194 handler->nsfd[i] = fd;
195 DEBUG("Preserved %s namespace via fd %d", ns_info[i].proc_name,
196 handler->nsfd[i]);
197 }
198
199 return true;
200 }
201
202 static inline bool match_stdfds(int fd)
203 {
204 return (fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO);
205 }
206
207 int lxc_check_inherited(struct lxc_conf *conf, bool closeall,
208 int *fds_to_ignore, size_t len_fds)
209 {
210 int fd, fddir;
211 size_t i;
212 DIR *dir;
213 struct dirent *direntp;
214
215 if (conf && conf->close_all_fds)
216 closeall = true;
217
218 restart:
219 dir = opendir("/proc/self/fd");
220 if (!dir) {
221 SYSWARN("Failed to open directory");
222 return -1;
223 }
224
225 fddir = dirfd(dir);
226
227 while ((direntp = readdir(dir))) {
228 int ret;
229 struct lxc_list *cur;
230 bool matched = false;
231
232 if (strcmp(direntp->d_name, ".") == 0)
233 continue;
234
235 if (strcmp(direntp->d_name, "..") == 0)
236 continue;
237
238 ret = lxc_safe_int(direntp->d_name, &fd);
239 if (ret < 0) {
240 INFO("Could not parse file descriptor for \"%s\"", direntp->d_name);
241 continue;
242 }
243
244 for (i = 0; i < len_fds; i++)
245 if (fds_to_ignore[i] == fd)
246 break;
247
248 if (fd == fddir || fd == lxc_log_fd ||
249 (i < len_fds && fd == fds_to_ignore[i]))
250 continue;
251
252 /* Keep state clients that wait on reboots. */
253 if (conf) {
254 lxc_list_for_each(cur, &conf->state_clients) {
255 struct lxc_state_client *client = cur->elem;
256
257 if (client->clientfd != fd)
258 continue;
259
260 matched = true;
261 break;
262 }
263 }
264
265 if (matched)
266 continue;
267
268 if (current_config && fd == current_config->logfd)
269 continue;
270
271 if (match_stdfds(fd))
272 continue;
273
274 if (closeall) {
275 close(fd);
276 closedir(dir);
277 INFO("Closed inherited fd %d", fd);
278 goto restart;
279 }
280 WARN("Inherited fd %d", fd);
281 }
282
283 /* Only enable syslog at this point to avoid the above logging function
284 * to open a new fd and make the check_inherited function enter an
285 * infinite loop.
286 */
287 lxc_log_enable_syslog();
288
289 closedir(dir); /* cannot fail */
290 return 0;
291 }
292
293 static int setup_signal_fd(sigset_t *oldmask)
294 {
295 int ret;
296 int sig;
297 sigset_t mask;
298 const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH};
299
300 /* Block everything except serious error signals. */
301 ret = sigfillset(&mask);
302 if (ret < 0)
303 return -EBADF;
304
305 for (sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
306 ret = sigdelset(&mask, signals[sig]);
307 if (ret < 0)
308 return -EBADF;
309 }
310
311 ret = pthread_sigmask(SIG_BLOCK, &mask, oldmask);
312 if (ret < 0) {
313 SYSERROR("Failed to set signal mask");
314 return -EBADF;
315 }
316
317 ret = signalfd(-1, &mask, SFD_CLOEXEC);
318 if (ret < 0) {
319 SYSERROR("Failed to create signal file descriptor");
320 return -EBADF;
321 }
322
323 TRACE("Created signal file descriptor %d", ret);
324
325 return ret;
326 }
327
328 static int signal_handler(int fd, uint32_t events, void *data,
329 struct lxc_epoll_descr *descr)
330 {
331 int ret;
332 siginfo_t info;
333 struct signalfd_siginfo siginfo;
334 struct lxc_handler *hdlr = data;
335
336 ret = lxc_read_nointr(fd, &siginfo, sizeof(siginfo));
337 if (ret < 0) {
338 ERROR("Failed to read signal info from signal file descriptor %d", fd);
339 return LXC_MAINLOOP_ERROR;
340 }
341
342 if (ret != sizeof(siginfo)) {
343 ERROR("Unexpected size for struct signalfd_siginfo");
344 return -EINVAL;
345 }
346
347 /* Check whether init is running. */
348 info.si_pid = 0;
349 ret = waitid(P_PID, hdlr->pid, &info, WEXITED | WNOWAIT | WNOHANG);
350 if (ret == 0 && info.si_pid == hdlr->pid)
351 hdlr->init_died = true;
352
353 /* Try to figure out a reasonable exit status to report. */
354 if (hdlr->init_died) {
355 switch (info.si_code) {
356 case CLD_EXITED:
357 hdlr->exit_status = info.si_status << 8;
358 break;
359 case CLD_KILLED:
360 case CLD_DUMPED:
361 case CLD_STOPPED:
362 hdlr->exit_status = info.si_status << 8 | 0x7f;
363 break;
364 case CLD_CONTINUED:
365 /* Huh? The waitid() told us it's dead *and* continued? */
366 WARN("Init %d dead and continued?", hdlr->pid);
367 hdlr->exit_status = 1;
368 break;
369 default:
370 ERROR("Unknown si_code: %d", info.si_code);
371 hdlr->exit_status = 1;
372 }
373 }
374
375 if (siginfo.ssi_signo == SIGHUP) {
376 kill(hdlr->pid, SIGTERM);
377 INFO("Killing %d since terminal hung up", hdlr->pid);
378 return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
379 }
380
381 if (siginfo.ssi_signo != SIGCHLD) {
382 kill(hdlr->pid, siginfo.ssi_signo);
383 INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid);
384 return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
385 }
386
387 /* More robustness, protect ourself from a SIGCHLD sent
388 * by a process different from the container init.
389 */
390 if (siginfo.ssi_pid != hdlr->pid) {
391 NOTICE("Received %d from pid %d instead of container init %d",
392 siginfo.ssi_signo, siginfo.ssi_pid, hdlr->pid);
393 return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
394 }
395
396 if (siginfo.ssi_code == CLD_STOPPED) {
397 INFO("Container init process was stopped");
398 return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
399 } else if (siginfo.ssi_code == CLD_CONTINUED) {
400 INFO("Container init process was continued");
401 return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
402 }
403
404 DEBUG("Container init process %d exited", hdlr->pid);
405 return LXC_MAINLOOP_CLOSE;
406 }
407
408 int lxc_serve_state_clients(const char *name, struct lxc_handler *handler,
409 lxc_state_t state)
410 {
411 size_t retlen;
412 ssize_t ret;
413 struct lxc_list *cur, *next;
414 struct lxc_state_client *client;
415 struct lxc_msg msg = {.type = lxc_msg_state, .value = state};
416
417 if (state == THAWED)
418 handler->state = RUNNING;
419 else
420 handler->state = state;
421
422 TRACE("Set container state to %s", lxc_state2str(state));
423
424 if (lxc_list_empty(&handler->conf->state_clients)) {
425 TRACE("No state clients registered");
426 return 0;
427 }
428
429 retlen = strlcpy(msg.name, name, sizeof(msg.name));
430 if (retlen >= sizeof(msg.name))
431 return -E2BIG;
432
433 lxc_list_for_each_safe(cur, &handler->conf->state_clients, next) {
434 client = cur->elem;
435
436 if (client->states[state] == 0) {
437 TRACE("State %s not registered for state client %d",
438 lxc_state2str(state), client->clientfd);
439 continue;
440 }
441
442 TRACE("Sending state %s to state client %d",
443 lxc_state2str(state), client->clientfd);
444
445 ret = lxc_send_nointr(client->clientfd, &msg, sizeof(msg), MSG_NOSIGNAL);
446 if (ret <= 0)
447 SYSERROR("Failed to send message to client");
448
449 /* kick client from list */
450 lxc_list_del(cur);
451 close(client->clientfd);
452 free(cur->elem);
453 free(cur);
454 }
455
456 return 0;
457 }
458
459 static int lxc_serve_state_socket_pair(const char *name,
460 struct lxc_handler *handler,
461 lxc_state_t state)
462 {
463 ssize_t ret;
464
465 if (!handler->daemonize ||
466 handler->state_socket_pair[1] < 0 ||
467 state == STARTING)
468 return 0;
469
470 /* Close read end of the socket pair. */
471 close(handler->state_socket_pair[0]);
472 handler->state_socket_pair[0] = -1;
473
474 again:
475 ret = lxc_abstract_unix_send_credential(handler->state_socket_pair[1],
476 &(int){state}, sizeof(int));
477 if (ret < 0) {
478 SYSERROR("Failed to send state to %d", handler->state_socket_pair[1]);
479
480 if (errno == EINTR)
481 goto again;
482
483 return -1;
484 }
485
486 if (ret != sizeof(int)) {
487 ERROR("Message too long : %d", handler->state_socket_pair[1]);
488 return -1;
489 }
490
491 TRACE("Sent container state \"%s\" to %d", lxc_state2str(state),
492 handler->state_socket_pair[1]);
493
494 /* Close write end of the socket pair. */
495 close(handler->state_socket_pair[1]);
496 handler->state_socket_pair[1] = -1;
497
498 return 0;
499 }
500
501 int lxc_set_state(const char *name, struct lxc_handler *handler,
502 lxc_state_t state)
503 {
504 int ret;
505
506 ret = lxc_serve_state_socket_pair(name, handler, state);
507 if (ret < 0) {
508 ERROR("Failed to synchronize via anonymous pair of unix sockets");
509 return -1;
510 }
511
512 ret = lxc_serve_state_clients(name, handler, state);
513 if (ret < 0)
514 return -1;
515
516 /* This function will try to connect to the legacy lxc-monitord state
517 * server and only exists for backwards compatibility.
518 */
519 lxc_monitor_send_state(name, state, handler->lxcpath);
520
521 return 0;
522 }
523
524 int lxc_poll(const char *name, struct lxc_handler *handler)
525 {
526 int ret;
527 bool has_console = true;
528 struct lxc_epoll_descr descr, descr_console;
529
530 if (handler->conf->console.path &&
531 strcmp(handler->conf->console.path, "none") == 0)
532 has_console = false;
533
534 ret = lxc_mainloop_open(&descr);
535 if (ret < 0) {
536 ERROR("Failed to create mainloop");
537 goto out_sigfd;
538 }
539
540 if (has_console) {
541 ret = lxc_mainloop_open(&descr_console);
542 if (ret < 0) {
543 ERROR("Failed to create console mainloop");
544 goto out_mainloop;
545 }
546 }
547
548 ret = lxc_mainloop_add_handler(&descr, handler->sigfd, signal_handler, handler);
549 if (ret < 0) {
550 ERROR("Failed to add signal handler for %d to mainloop", handler->sigfd);
551 goto out_mainloop_console;
552 }
553
554 if (has_console) {
555 struct lxc_terminal *console = &handler->conf->console;
556
557 ret = lxc_terminal_mainloop_add(&descr, console);
558 if (ret < 0) {
559 ERROR("Failed to add console handlers to mainloop");
560 goto out_mainloop_console;
561 }
562
563 ret = lxc_terminal_mainloop_add(&descr_console, console);
564 if (ret < 0) {
565 ERROR("Failed to add console handlers to console mainloop");
566 goto out_mainloop_console;
567 }
568
569 handler->conf->console.descr = &descr;
570 }
571
572 ret = lxc_cmd_mainloop_add(name, &descr, handler);
573 if (ret < 0) {
574 ERROR("Failed to add command handler to mainloop");
575 goto out_mainloop_console;
576 }
577
578 TRACE("Mainloop is ready");
579
580 ret = lxc_mainloop(&descr, -1);
581 close(descr.epfd);
582 descr.epfd = -EBADF;
583 if (ret < 0 || !handler->init_died)
584 goto out_mainloop_console;
585
586 if (has_console)
587 ret = lxc_mainloop(&descr_console, 0);
588
589 out_mainloop_console:
590 if (has_console) {
591 lxc_mainloop_close(&descr_console);
592 TRACE("Closed console mainloop");
593 }
594
595 out_mainloop:
596 lxc_mainloop_close(&descr);
597 TRACE("Closed mainloop");
598
599 out_sigfd:
600 close(handler->sigfd);
601 TRACE("Closed signal file descriptor %d", handler->sigfd);
602 handler->sigfd = -EBADF;
603
604 return ret;
605 }
606
607 void lxc_zero_handler(struct lxc_handler *handler)
608 {
609 int i;
610
611 memset(handler, 0, sizeof(struct lxc_handler));
612
613 handler->pinfd = -1;
614
615 handler->sigfd = -1;
616
617 for (i = 0; i < LXC_NS_MAX; i++)
618 handler->nsfd[i] = -1;
619
620 handler->data_sock[0] = -1;
621 handler->data_sock[1] = -1;
622
623 handler->state_socket_pair[0] = -1;
624 handler->state_socket_pair[1] = -1;
625
626 handler->sync_sock[0] = -1;
627 handler->sync_sock[1] = -1;
628 }
629
630 void lxc_free_handler(struct lxc_handler *handler)
631 {
632 if (handler->pinfd >= 0)
633 close(handler->pinfd);
634
635 if (handler->sigfd >= 0)
636 close(handler->sigfd);
637
638 lxc_put_nsfds(handler);
639
640 if (handler->conf && handler->conf->reboot == REBOOT_NONE)
641 if (handler->conf->maincmd_fd >= 0)
642 lxc_abstract_unix_close(handler->conf->maincmd_fd);
643
644 if (handler->state_socket_pair[0] >= 0)
645 close(handler->state_socket_pair[0]);
646
647 if (handler->state_socket_pair[1] >= 0)
648 close(handler->state_socket_pair[1]);
649
650 handler->conf = NULL;
651 free(handler);
652 handler = NULL;
653 }
654
655 struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
656 const char *lxcpath, bool daemonize)
657 {
658 int i, ret;
659 struct lxc_handler *handler;
660
661 handler = malloc(sizeof(*handler));
662 if (!handler)
663 return NULL;
664
665 memset(handler, 0, sizeof(*handler));
666
667 /* Note that am_guest_unpriv() checks the effective uid. We
668 * probably don't care if we are real root only if we are running
669 * as root so this should be fine.
670 */
671 handler->am_root = !am_guest_unpriv();
672 handler->data_sock[0] = handler->data_sock[1] = -1;
673 handler->conf = conf;
674 handler->lxcpath = lxcpath;
675 handler->pinfd = -1;
676 handler->sigfd = -EBADF;
677 handler->init_died = false;
678 handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
679 if (handler->conf->reboot == REBOOT_NONE)
680 lxc_list_init(&handler->conf->state_clients);
681
682 for (i = 0; i < LXC_NS_MAX; i++)
683 handler->nsfd[i] = -1;
684
685 handler->name = name;
686
687 if (daemonize && handler->conf->reboot == REBOOT_NONE) {
688 /* Create socketpair() to synchronize on daemonized startup.
689 * When the container reboots we don't need to synchronize
690 * again currently so don't open another socketpair().
691 */
692 ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
693 handler->state_socket_pair);
694 if (ret < 0) {
695 ERROR("Failed to create anonymous pair of unix sockets");
696 goto on_error;
697 }
698
699 TRACE("Created anonymous pair {%d,%d} of unix sockets",
700 handler->state_socket_pair[0],
701 handler->state_socket_pair[1]);
702 }
703
704 if (handler->conf->reboot == REBOOT_NONE) {
705 handler->conf->maincmd_fd = lxc_cmd_init(name, lxcpath, "command");
706 if (handler->conf->maincmd_fd < 0) {
707 ERROR("Failed to set up command socket");
708 goto on_error;
709 }
710 }
711
712 TRACE("Unix domain socket %d for command server is ready",
713 handler->conf->maincmd_fd);
714
715 return handler;
716
717 on_error:
718 lxc_free_handler(handler);
719
720 return NULL;
721 }
722
723 int lxc_init(const char *name, struct lxc_handler *handler)
724 {
725 int ret;
726 const char *loglevel;
727 struct lxc_conf *conf = handler->conf;
728
729 handler->monitor_pid = lxc_raw_getpid();
730
731 lsm_init();
732 TRACE("Initialized LSM");
733
734 ret = lxc_read_seccomp_config(conf);
735 if (ret < 0) {
736 ERROR("Failed loading seccomp policy");
737 goto out_close_maincmd_fd;
738 }
739 TRACE("Read seccomp policy");
740
741 /* Begin by setting the state to STARTING. */
742 ret = lxc_set_state(name, handler, STARTING);
743 if (ret < 0) {
744 ERROR("Failed to set state to \"%s\"", lxc_state2str(STARTING));
745 goto out_close_maincmd_fd;
746 }
747 TRACE("Set container state to \"STARTING\"");
748
749 /* Start of environment variable setup for hooks. */
750 ret = setenv("LXC_NAME", name, 1);
751 if (ret < 0)
752 SYSERROR("Failed to set environment variable: LXC_NAME=%s", name);
753
754 if (conf->rcfile) {
755 ret = setenv("LXC_CONFIG_FILE", conf->rcfile, 1);
756 if (ret < 0)
757 SYSERROR("Failed to set environment variable: "
758 "LXC_CONFIG_FILE=%s", conf->rcfile);
759 }
760
761 if (conf->rootfs.mount) {
762 ret = setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1);
763 if (ret < 0)
764 SYSERROR("Failed to set environment variable: "
765 "LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount);
766 }
767
768 if (conf->rootfs.path) {
769 ret = setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1);
770 if (ret < 0)
771 SYSERROR("Failed to set environment variable: "
772 "LXC_ROOTFS_PATH=%s", conf->rootfs.path);
773 }
774
775 if (conf->console.path) {
776 ret = setenv("LXC_CONSOLE", conf->console.path, 1);
777 if (ret < 0)
778 SYSERROR("Failed to set environment variable: "
779 "LXC_CONSOLE=%s", conf->console.path);
780 }
781
782 if (conf->console.log_path) {
783 ret = setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1);
784 if (ret < 0)
785 SYSERROR("Failed to set environment variable: "
786 "LXC_CONSOLE_LOGPATH=%s", conf->console.log_path);
787 }
788
789 if (cgns_supported()) {
790 ret = setenv("LXC_CGNS_AWARE", "1", 1);
791 if (ret < 0)
792 SYSERROR("Failed to set environment variable "
793 "LXC_CGNS_AWARE=1");
794 }
795
796 loglevel = lxc_log_priority_to_string(lxc_log_get_level());
797 ret = setenv("LXC_LOG_LEVEL", loglevel, 1);
798 if (ret < 0)
799 SYSERROR("Set environment variable LXC_LOG_LEVEL=%s",
800 loglevel);
801
802 if (conf->hooks_version == 0)
803 ret = setenv("LXC_HOOK_VERSION", "0", 1);
804 else
805 ret = setenv("LXC_HOOK_VERSION", "1", 1);
806 if (ret < 0)
807 SYSERROR("Failed to set environment variable LXC_HOOK_VERSION=%u", conf->hooks_version);
808 /* End of environment variable setup for hooks. */
809
810 TRACE("Set environment variables");
811
812 ret = run_lxc_hooks(name, "pre-start", conf, NULL);
813 if (ret < 0) {
814 ERROR("Failed to run lxc.hook.pre-start for container \"%s\"", name);
815 goto out_aborting;
816 }
817 TRACE("Ran pre-start hooks");
818
819 /* The signal fd has to be created before forking otherwise if the child
820 * process exits before we setup the signal fd, the event will be lost
821 * and the command will be stuck.
822 */
823 handler->sigfd = setup_signal_fd(&handler->oldmask);
824 if (handler->sigfd < 0) {
825 ERROR("Failed to setup SIGCHLD fd handler.");
826 goto out_delete_tty;
827 }
828 TRACE("Set up signal fd");
829
830 /* Do this after setting up signals since it might unblock SIGWINCH. */
831 ret = lxc_terminal_setup(conf);
832 if (ret < 0) {
833 ERROR("Failed to create console");
834 goto out_restore_sigmask;
835 }
836 TRACE("Created console");
837
838 ret = lxc_terminal_map_ids(conf, &conf->console);
839 if (ret < 0) {
840 ERROR("Failed to chown console");
841 goto out_delete_terminal;
842 }
843 TRACE("Chowned console");
844
845 handler->cgroup_ops = cgroup_init(handler->conf);
846 if (!handler->cgroup_ops) {
847 ERROR("Failed to initialize cgroup driver");
848 goto out_delete_terminal;
849 }
850 TRACE("Initialized cgroup driver");
851
852 ret = lsm_process_prepare(conf, handler->lxcpath);
853 if (ret < 0) {
854 ERROR("Failed to initialize LSM");
855 goto out_destroy_cgroups;
856 }
857 TRACE("Initialized LSM");
858
859 INFO("Container \"%s\" is initialized", name);
860 return 0;
861
862 out_destroy_cgroups:
863 handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
864 handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
865
866 out_delete_terminal:
867 lxc_terminal_delete(&handler->conf->console);
868
869 out_restore_sigmask:
870 (void)pthread_sigmask(SIG_SETMASK, &handler->oldmask, NULL);
871
872 out_delete_tty:
873 lxc_delete_tty(&conf->ttys);
874
875 out_aborting:
876 (void)lxc_set_state(name, handler, ABORTING);
877
878 out_close_maincmd_fd:
879 lxc_abstract_unix_close(conf->maincmd_fd);
880 conf->maincmd_fd = -1;
881 return -1;
882 }
883
884 void lxc_fini(const char *name, struct lxc_handler *handler)
885 {
886 int i, ret;
887 pid_t self;
888 struct lxc_list *cur, *next;
889 char *namespaces[LXC_NS_MAX + 1];
890 size_t namespace_count = 0;
891 struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
892
893 /* The STOPPING state is there for future cleanup code which can take
894 * awhile.
895 */
896 lxc_set_state(name, handler, STOPPING);
897
898 self = lxc_raw_getpid();
899 for (i = 0; i < LXC_NS_MAX; i++) {
900 if (handler->nsfd[i] < 0)
901 continue;
902
903 if (handler->conf->hooks_version == 0)
904 ret = asprintf(&namespaces[namespace_count],
905 "%s:/proc/%d/fd/%d", ns_info[i].proc_name,
906 self, handler->nsfd[i]);
907 else
908 ret = asprintf(&namespaces[namespace_count],
909 "/proc/%d/fd/%d", self, handler->nsfd[i]);
910 if (ret == -1) {
911 SYSERROR("Failed to allocate memory");
912 break;
913 }
914
915 if (handler->conf->hooks_version == 0) {
916 namespace_count++;
917 continue;
918 }
919
920 ret = setenv(ns_info[i].env_name, namespaces[namespace_count], 1);
921 if (ret < 0)
922 SYSERROR("Failed to set environment variable %s=%s",
923 ns_info[i].env_name, namespaces[namespace_count]);
924 else
925 TRACE("Set environment variable %s=%s",
926 ns_info[i].env_name, namespaces[namespace_count]);
927
928 namespace_count++;
929 }
930 namespaces[namespace_count] = NULL;
931
932 if (handler->conf->reboot > REBOOT_NONE) {
933 ret = setenv("LXC_TARGET", "reboot", 1);
934 if (ret < 0)
935 SYSERROR("Failed to set environment variable: "
936 "LXC_TARGET=reboot");
937 }
938
939 if (handler->conf->reboot == REBOOT_NONE) {
940 ret = setenv("LXC_TARGET", "stop", 1);
941 if (ret < 0)
942 SYSERROR("Failed to set environment variable: "
943 "LXC_TARGET=stop");
944 }
945
946 if (handler->conf->hooks_version == 0)
947 ret = run_lxc_hooks(name, "stop", handler->conf, namespaces);
948 else
949 ret = run_lxc_hooks(name, "stop", handler->conf, NULL);
950 if (ret < 0)
951 ERROR("Failed to run \"lxc.hook.stop\" hook");
952
953 while (namespace_count--)
954 free(namespaces[namespace_count]);
955
956 lsm_process_cleanup(handler->conf, handler->lxcpath);
957
958 cgroup_ops->payload_destroy(cgroup_ops, handler);
959 cgroup_ops->monitor_destroy(cgroup_ops, handler);
960 cgroup_exit(cgroup_ops);
961
962 if (handler->conf->reboot == REBOOT_NONE) {
963 /* For all new state clients simply close the command socket.
964 * This will inform all state clients that the container is
965 * STOPPED and also prevents a race between a open()/close() on
966 * the command socket causing a new process to get ECONNREFUSED
967 * because we haven't yet closed the command socket.
968 */
969 lxc_abstract_unix_close(handler->conf->maincmd_fd);
970 handler->conf->maincmd_fd = -1;
971 TRACE("Closed command socket");
972
973 /* This function will try to connect to the legacy lxc-monitord
974 * state server and only exists for backwards compatibility.
975 */
976 lxc_monitor_send_state(name, STOPPED, handler->lxcpath);
977
978 /* The command socket is closed so no one can acces the command
979 * socket anymore so there's no need to lock it.
980 */
981 handler->state = STOPPED;
982 TRACE("Set container state to \"STOPPED\"");
983 } else {
984 lxc_set_state(name, handler, STOPPED);
985 }
986
987 ret = run_lxc_hooks(name, "post-stop", handler->conf, NULL);
988 if (ret < 0) {
989 ERROR("Failed to run lxc.hook.post-stop for container \"%s\"", name);
990 if (handler->conf->reboot > REBOOT_NONE) {
991 WARN("Container will be stopped instead of rebooted");
992 handler->conf->reboot = REBOOT_NONE;
993
994 ret = setenv("LXC_TARGET", "stop", 1);
995 if (ret < 0)
996 WARN("Failed to set environment variable: "
997 "LXC_TARGET=stop");
998 }
999 }
1000
1001 /* Reset mask set by setup_signal_fd. */
1002 ret = pthread_sigmask(SIG_SETMASK, &handler->oldmask, NULL);
1003 if (ret < 0)
1004 SYSWARN("Failed to restore signal mask");
1005
1006 lxc_terminal_delete(&handler->conf->console);
1007 lxc_delete_tty(&handler->conf->ttys);
1008
1009 /* The command socket is now closed, no more state clients can register
1010 * themselves from now on. So free the list of state clients.
1011 */
1012 lxc_list_for_each_safe(cur, &handler->conf->state_clients, next) {
1013 struct lxc_state_client *client = cur->elem;
1014
1015 /* Keep state clients that want to be notified about reboots. */
1016 if ((handler->conf->reboot > REBOOT_NONE) &&
1017 (client->states[RUNNING] == 2))
1018 continue;
1019
1020 /* close state client socket */
1021 lxc_list_del(cur);
1022 close(client->clientfd);
1023 free(cur->elem);
1024 free(cur);
1025 }
1026
1027 if (handler->conf->ephemeral == 1 && handler->conf->reboot != REBOOT_REQ)
1028 lxc_destroy_container_on_signal(handler, name);
1029
1030 lxc_free_handler(handler);
1031 }
1032
1033 void lxc_abort(const char *name, struct lxc_handler *handler)
1034 {
1035 int ret, status;
1036
1037 lxc_set_state(name, handler, ABORTING);
1038
1039 if (handler->pid > 0) {
1040 ret = kill(handler->pid, SIGKILL);
1041 if (ret < 0)
1042 SYSERROR("Failed to send SIGKILL to %d", handler->pid);
1043 }
1044
1045 while ((ret = waitpid(-1, &status, 0)) > 0) {
1046 ;
1047 }
1048 }
1049
1050 static int do_start(void *data)
1051 {
1052 int ret;
1053 char path[PATH_MAX];
1054 uid_t new_uid;
1055 gid_t new_gid;
1056 struct lxc_list *iterator;
1057 uid_t nsuid = 0;
1058 gid_t nsgid = 0;
1059 int devnull_fd = -1;
1060 struct lxc_handler *handler = data;
1061
1062 lxc_sync_fini_parent(handler);
1063
1064 /* This prctl must be before the synchro, so if the parent dies before
1065 * we set the parent death signal, we will detect its death with the
1066 * synchro right after, otherwise we have a window where the parent can
1067 * exit before we set the pdeath signal leading to a unsupervized
1068 * container.
1069 */
1070 ret = lxc_set_death_signal(SIGKILL);
1071 if (ret < 0) {
1072 SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
1073 goto out_warn_father;
1074 }
1075
1076 ret = lxc_ambient_caps_up();
1077 if (ret < 0) {
1078 ERROR("Failed to raise ambient capabilities");
1079 goto out_warn_father;
1080 }
1081
1082 ret = pthread_sigmask(SIG_SETMASK, &handler->oldmask, NULL);
1083 if (ret < 0) {
1084 SYSERROR("Failed to set signal mask");
1085 goto out_warn_father;
1086 }
1087
1088 /* Don't leak the pinfd to the container. */
1089 if (handler->pinfd >= 0)
1090 close(handler->pinfd);
1091
1092 ret = lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP);
1093 if (ret < 0)
1094 goto out_warn_father;
1095
1096 /* Unshare CLONE_NEWNET after CLONE_NEWUSER. See
1097 * https://github.com/lxc/lxd/issues/1978.
1098 */
1099 if ((handler->ns_clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
1100 (CLONE_NEWNET | CLONE_NEWUSER)) {
1101 ret = unshare(CLONE_NEWNET);
1102 if (ret < 0) {
1103 SYSERROR("Failed to unshare CLONE_NEWNET");
1104 goto out_warn_father;
1105 }
1106 INFO("Unshared CLONE_NEWNET");
1107 }
1108
1109 /* Tell the parent task it can begin to configure the container and wait
1110 * for it to finish.
1111 */
1112 ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE);
1113 if (ret < 0)
1114 goto out_error;
1115
1116 ret = lxc_network_recv_veth_names_from_parent(handler);
1117 if (ret < 0) {
1118 ERROR("Failed to receive veth names from parent");
1119 goto out_warn_father;
1120 }
1121
1122 /* If we are in a new user namespace, become root there to have
1123 * privilege over our namespace.
1124 */
1125 if (!lxc_list_empty(&handler->conf->id_map)) {
1126 if (!handler->conf->root_nsuid_map)
1127 nsuid = handler->conf->init_uid;
1128
1129 if (!handler->conf->root_nsgid_map)
1130 nsgid = handler->conf->init_gid;
1131
1132 if (!lxc_switch_uid_gid(nsuid, nsgid))
1133 goto out_warn_father;
1134
1135 /* Drop groups only after we switched to a valid gid in the new
1136 * user namespace.
1137 */
1138 if (!lxc_setgroups(0, NULL) &&
1139 (handler->am_root || errno != EPERM))
1140 goto out_warn_father;
1141
1142 ret = prctl(PR_SET_DUMPABLE, prctl_arg(1), prctl_arg(0),
1143 prctl_arg(0), prctl_arg(0));
1144 if (ret < 0)
1145 goto out_warn_father;
1146
1147 /* set{g,u}id() clears deathsignal */
1148 ret = lxc_set_death_signal(SIGKILL);
1149 if (ret < 0) {
1150 SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
1151 goto out_warn_father;
1152 }
1153 }
1154
1155 ret = access(handler->lxcpath, X_OK);
1156 if (ret != 0) {
1157 print_top_failing_dir(handler->lxcpath);
1158 goto out_warn_father;
1159 }
1160
1161 ret = snprintf(path, sizeof(path), "%s/dev/null",
1162 handler->conf->rootfs.mount);
1163 if (ret < 0 || ret >= sizeof(path))
1164 goto out_warn_father;
1165
1166 /* In order to checkpoint restore, we need to have everything in the
1167 * same mount namespace. However, some containers may not have a
1168 * reasonable /dev (in particular, they may not have /dev/null), so we
1169 * can't set init's std fds to /dev/null by opening it from inside the
1170 * container.
1171 *
1172 * If that's the case, fall back to using the host's /dev/null. This
1173 * means that migration won't work, but at least we won't spew output
1174 * where it isn't wanted.
1175 */
1176 if (handler->daemonize && !handler->conf->autodev) {
1177 ret = access(path, F_OK);
1178 if (ret != 0) {
1179 devnull_fd = open_devnull();
1180
1181 if (devnull_fd < 0)
1182 goto out_warn_father;
1183 WARN("Using /dev/null from the host for container "
1184 "init's standard file descriptors. Migration will "
1185 "not work");
1186 }
1187 }
1188
1189 /* Ask father to setup cgroups and wait for him to finish. */
1190 ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP);
1191 if (ret < 0)
1192 goto out_error;
1193
1194 /* Unshare cgroup namespace after we have setup our cgroups. If we do it
1195 * earlier we end up with a wrong view of /proc/self/cgroup. For
1196 * example, assume we unshare(CLONE_NEWCGROUP) first, and then create
1197 * the cgroup for the container, say /sys/fs/cgroup/cpuset/lxc/c, then
1198 * /proc/self/cgroup would show us:
1199 *
1200 * 8:cpuset:/lxc/c
1201 *
1202 * whereas it should actually show
1203 *
1204 * 8:cpuset:/
1205 */
1206 if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
1207 ret = unshare(CLONE_NEWCGROUP);
1208 if (ret < 0) {
1209 INFO("Failed to unshare CLONE_NEWCGROUP");
1210 goto out_warn_father;
1211 }
1212 INFO("Unshared CLONE_NEWCGROUP");
1213 }
1214
1215 /* Add the requested environment variables to the current environment to
1216 * allow them to be used by the various hooks, such as the start hook
1217 * above.
1218 */
1219 lxc_list_for_each(iterator, &handler->conf->environment) {
1220 ret = putenv((char *)iterator->elem);
1221 if (ret < 0) {
1222 SYSERROR("Failed to set environment variable: %s",
1223 (char *)iterator->elem);
1224 goto out_warn_father;
1225 }
1226 }
1227
1228 /* Setup the container, ip, names, utsname, ... */
1229 ret = lxc_setup(handler);
1230 close(handler->data_sock[1]);
1231 close(handler->data_sock[0]);
1232 if (ret < 0) {
1233 ERROR("Failed to setup container \"%s\"", handler->name);
1234 goto out_warn_father;
1235 }
1236
1237 /* Set the label to change to when we exec(2) the container's init. */
1238 ret = lsm_process_label_set(NULL, handler->conf, true);
1239 if (ret < 0)
1240 goto out_warn_father;
1241
1242 /* Set PR_SET_NO_NEW_PRIVS after we changed the lsm label. If we do it
1243 * before we aren't allowed anymore.
1244 */
1245 if (handler->conf->no_new_privs) {
1246 ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
1247 prctl_arg(0), prctl_arg(0));
1248 if (ret < 0) {
1249 SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block "
1250 "execve() gainable privileges");
1251 goto out_warn_father;
1252 }
1253 DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable "
1254 "privileges");
1255 }
1256
1257 /* Some init's such as busybox will set sane tty settings on stdin,
1258 * stdout, stderr which it thinks is the console. We already set them
1259 * the way we wanted on the real terminal, and we want init to do its
1260 * setup on its console ie. the pty allocated in lxc_terminal_setup() so
1261 * make sure that that pty is stdin,stdout,stderr.
1262 */
1263 if (handler->conf->console.slave >= 0) {
1264 if (handler->daemonize || !handler->conf->is_execute)
1265 ret = set_stdfds(handler->conf->console.slave);
1266 else
1267 ret = lxc_terminal_set_stdfds(handler->conf->console.slave);
1268 if (ret < 0) {
1269 ERROR("Failed to redirect std{in,out,err} to pty file "
1270 "descriptor %d", handler->conf->console.slave);
1271 goto out_warn_father;
1272 }
1273 }
1274
1275 /* If we mounted a temporary proc, then unmount it now. */
1276 tmp_proc_unmount(handler->conf);
1277
1278 ret = lxc_seccomp_load(handler->conf);
1279 if (ret < 0)
1280 goto out_warn_father;
1281
1282 ret = run_lxc_hooks(handler->name, "start", handler->conf, NULL);
1283 if (ret < 0) {
1284 ERROR("Failed to run lxc.hook.start for container \"%s\"",
1285 handler->name);
1286 goto out_warn_father;
1287 }
1288
1289 close(handler->sigfd);
1290
1291 if (handler->conf->console.slave < 0 && handler->daemonize) {
1292 if (devnull_fd < 0) {
1293 devnull_fd = open_devnull();
1294 if (devnull_fd < 0)
1295 goto out_warn_father;
1296 }
1297
1298 ret = set_stdfds(devnull_fd);
1299 if (ret < 0) {
1300 ERROR("Failed to redirect std{in,out,err} to \"/dev/null\"");
1301 goto out_warn_father;
1302 }
1303 }
1304
1305 if (devnull_fd >= 0) {
1306 close(devnull_fd);
1307 devnull_fd = -1;
1308 }
1309
1310 setsid();
1311
1312 if (handler->conf->init_cwd) {
1313 ret = chdir(handler->conf->init_cwd);
1314 if (ret < 0) {
1315 SYSERROR("Could not change directory to \"%s\"",
1316 handler->conf->init_cwd);
1317 goto out_warn_father;
1318 }
1319 }
1320
1321 ret = lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP_LIMITS);
1322 if (ret < 0)
1323 goto out_warn_father;
1324
1325 /* Reset the environment variables the user requested in a clear
1326 * environment.
1327 */
1328 ret = clearenv();
1329 /* Don't error out though. */
1330 if (ret < 0)
1331 SYSERROR("Failed to clear environment.");
1332
1333 lxc_list_for_each(iterator, &handler->conf->environment) {
1334 ret = putenv((char *)iterator->elem);
1335 if (ret < 0) {
1336 SYSERROR("Failed to set environment variable: %s",
1337 (char *)iterator->elem);
1338 goto out_warn_father;
1339 }
1340 }
1341
1342 ret = putenv("container=lxc");
1343 if (ret < 0) {
1344 SYSERROR("Failed to set environment variable: container=lxc");
1345 goto out_warn_father;
1346 }
1347
1348 if (handler->conf->ttys.tty_names) {
1349 ret = putenv(handler->conf->ttys.tty_names);
1350 if (ret < 0) {
1351 SYSERROR("Failed to set environment variable for container ptys");
1352 goto out_warn_father;
1353 }
1354 }
1355
1356 /* The container has been setup. We can now switch to an unprivileged
1357 * uid/gid.
1358 */
1359 new_uid = handler->conf->init_uid;
1360 new_gid = handler->conf->init_gid;
1361
1362 /* Avoid unnecessary syscalls. */
1363 if (new_uid == nsuid)
1364 new_uid = LXC_INVALID_UID;
1365
1366 if (new_gid == nsgid)
1367 new_gid = LXC_INVALID_GID;
1368
1369 if (!lxc_switch_uid_gid(new_uid, new_gid))
1370 goto out_warn_father;
1371
1372 /* If we are in a new user namespace we already dropped all groups when
1373 * we switched to root in the new user namespace further above. Only
1374 * drop groups if we can, so ensure that we have necessary privilege.
1375 */
1376 if (lxc_list_empty(&handler->conf->id_map))
1377 #if HAVE_LIBCAP
1378 if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE))
1379 #endif
1380 if (!lxc_setgroups(0, NULL))
1381 goto out_warn_father;
1382
1383 ret = lxc_ambient_caps_down();
1384 if (ret < 0) {
1385 ERROR("Failed to clear ambient capabilities");
1386 goto out_warn_father;
1387 }
1388
1389 if (handler->conf->monitor_signal_pdeath != SIGKILL) {
1390 ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath);
1391 if (ret < 0) {
1392 SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
1393 handler->conf->monitor_signal_pdeath);
1394 goto out_warn_father;
1395 }
1396 }
1397
1398 /* After this call, we are in error because this ops should not return
1399 * as it execs.
1400 */
1401 handler->ops->start(handler, handler->data);
1402
1403 out_warn_father:
1404 /* We want the parent to know something went wrong, so we return a
1405 * special error code.
1406 */
1407 lxc_sync_wake_parent(handler, LXC_SYNC_ERROR);
1408
1409 out_error:
1410 if (devnull_fd >= 0)
1411 close(devnull_fd);
1412
1413 return -1;
1414 }
1415
1416 static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
1417 {
1418 int i;
1419 struct lxc_terminal_info *tty;
1420 int ret = -1;
1421 int sock = handler->data_sock[1];
1422 struct lxc_conf *conf = handler->conf;
1423 struct lxc_tty_info *ttys = &conf->ttys;
1424
1425 if (!conf->ttys.max)
1426 return 0;
1427
1428 ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
1429 if (!ttys->tty)
1430 return -1;
1431
1432 for (i = 0; i < conf->ttys.max; i++) {
1433 int ttyfds[2];
1434
1435 ret = lxc_abstract_unix_recv_fds(sock, ttyfds, 2, NULL, 0);
1436 if (ret < 0)
1437 break;
1438
1439 tty = &ttys->tty[i];
1440 tty->busy = 0;
1441 tty->master = ttyfds[0];
1442 tty->slave = ttyfds[1];
1443 TRACE("Received pty with master fd %d and slave fd %d from "
1444 "parent", tty->master, tty->slave);
1445 }
1446
1447 if (ret < 0)
1448 SYSERROR("Failed to receive %zu ttys from child", ttys->max);
1449 else
1450 TRACE("Received %zu ttys from child", ttys->max);
1451
1452 return ret;
1453 }
1454
1455 int resolve_clone_flags(struct lxc_handler *handler)
1456 {
1457 int i;
1458 struct lxc_conf *conf = handler->conf;
1459
1460 for (i = 0; i < LXC_NS_MAX; i++) {
1461 if (conf->ns_keep != 0) {
1462 if ((conf->ns_keep & ns_info[i].clone_flag) == 0)
1463 handler->ns_clone_flags |= ns_info[i].clone_flag;
1464 } else if (conf->ns_clone != 0) {
1465 if ((conf->ns_clone & ns_info[i].clone_flag) > 0)
1466 handler->ns_clone_flags |= ns_info[i].clone_flag;
1467 } else {
1468 if (i == LXC_NS_USER && lxc_list_empty(&handler->conf->id_map))
1469 continue;
1470
1471 if (i == LXC_NS_NET && lxc_requests_empty_network(handler))
1472 continue;
1473
1474 if (i == LXC_NS_CGROUP && !cgns_supported())
1475 continue;
1476
1477 handler->ns_clone_flags |= ns_info[i].clone_flag;
1478 }
1479
1480 if (!conf->ns_share[i])
1481 continue;
1482
1483 handler->ns_clone_flags &= ~ns_info[i].clone_flag;
1484 TRACE("Sharing %s namespace", ns_info[i].proc_name);
1485 }
1486
1487 return 0;
1488 }
1489
1490 /* Note that this function is used with clone(CLONE_VM). Some glibc versions
1491 * used to reset the pid/tid to -1 when CLONE_VM was used without CLONE_THREAD.
1492 * But since the memory between parent and child is shared on CLONE_VM this
1493 * would invalidate the getpid() cache that glibc used to maintain and so
1494 * getpid() in the child would return the parent's pid. This is all fixed in
1495 * newer glibc versions where the getpid() cache is removed and the pid/tid is
1496 * not reset anymore.
1497 * However, if for whatever reason you - dear commiter - somehow need to get the
1498 * pid of the dummy intermediate process for do_share_ns() you need to call
1499 * lxc_raw_getpid(). The next lxc_raw_clone() call does not employ CLONE_VM and
1500 * will be fine.
1501 */
1502 static inline int do_share_ns(void *arg)
1503 {
1504 int i, flags, ret;
1505 struct lxc_handler *handler = arg;
1506
1507 for (i = 0; i < LXC_NS_MAX; i++) {
1508 if (handler->nsfd[i] < 0)
1509 continue;
1510
1511 ret = setns(handler->nsfd[i], 0);
1512 if (ret < 0) {
1513 /*
1514 * Note that joining a user and/or mount namespace
1515 * requires the process is not multithreaded otherwise
1516 * setns() will fail here.
1517 */
1518 SYSERROR("Failed to inherit %s namespace",
1519 ns_info[i].proc_name);
1520 return -1;
1521 }
1522
1523 DEBUG("Inherited %s namespace", ns_info[i].proc_name);
1524 }
1525
1526 flags = handler->ns_on_clone_flags;
1527 flags |= CLONE_PARENT;
1528 handler->pid = lxc_raw_clone_cb(do_start, handler, flags);
1529 if (handler->pid < 0)
1530 return -1;
1531
1532 return 0;
1533 }
1534
1535 static int lxc_setup_shmount(struct lxc_conf *conf)
1536 {
1537 size_t len_cont;
1538 char *full_cont_path;
1539 int ret = -1;
1540
1541 /* Construct the shmount path under the container root. */
1542 len_cont = strlen(conf->rootfs.mount) + 1 + strlen(conf->shmount.path_cont);
1543 /* +1 for the terminating '\0' */
1544 full_cont_path = malloc(len_cont + 1);
1545 if (!full_cont_path) {
1546 SYSERROR("Not enough memory");
1547 return -ENOMEM;
1548 }
1549
1550 ret = snprintf(full_cont_path, len_cont + 1, "%s/%s",
1551 conf->rootfs.mount, conf->shmount.path_cont);
1552 if (ret < 0 || ret >= len_cont + 1) {
1553 SYSERROR("Failed to create filename");
1554 free(full_cont_path);
1555 return -1;
1556 }
1557
1558 /* Check if shmount point is already set up. */
1559 if (is_shared_mountpoint(conf->shmount.path_host)) {
1560 INFO("Path \"%s\" is already MS_SHARED. Reusing",
1561 conf->shmount.path_host);
1562 free(full_cont_path);
1563 return 0;
1564 }
1565
1566 /* Create host and cont mount paths */
1567 ret = mkdir_p(conf->shmount.path_host, 0711);
1568 if (ret < 0 && errno != EEXIST) {
1569 SYSERROR("Failed to create directory \"%s\"",
1570 conf->shmount.path_host);
1571 free(full_cont_path);
1572 return ret;
1573 }
1574
1575 ret = mkdir_p(full_cont_path, 0711);
1576 if (ret < 0 && errno != EEXIST) {
1577 SYSERROR("Failed to create directory \"%s\"", full_cont_path);
1578 free(full_cont_path);
1579 return ret;
1580 }
1581
1582 /* Prepare host mountpoint */
1583 ret = mount("tmpfs", conf->shmount.path_host, "tmpfs", 0,
1584 "size=100k,mode=0711");
1585 if (ret < 0) {
1586 SYSERROR("Failed to mount \"%s\"", conf->shmount.path_host);
1587 free(full_cont_path);
1588 return ret;
1589 }
1590
1591 ret = mount(conf->shmount.path_host, conf->shmount.path_host, "none",
1592 MS_REC | MS_SHARED, "");
1593 if (ret < 0) {
1594 SYSERROR("Failed to make shared \"%s\"", conf->shmount.path_host);
1595 free(full_cont_path);
1596 return ret;
1597 }
1598
1599 INFO("Setup shared mount point \"%s\"", conf->shmount.path_host);
1600 free(full_cont_path);
1601 return 0;
1602 }
1603
1604 /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
1605 * exec()s the requested container binary.
1606 * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
1607 * right here should be double checked if they'd pose a security risk. (For
1608 * example, any {u}mount() operations performed here will be reflected on the
1609 * host!)
1610 */
1611 static int lxc_spawn(struct lxc_handler *handler)
1612 {
1613 int i, ret;
1614 char pidstr[20];
1615 bool wants_to_map_ids;
1616 struct lxc_list *id_map;
1617 const char *name = handler->name;
1618 const char *lxcpath = handler->lxcpath;
1619 bool share_ns = false;
1620 struct lxc_conf *conf = handler->conf;
1621 struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
1622
1623 id_map = &conf->id_map;
1624 wants_to_map_ids = !lxc_list_empty(id_map);
1625
1626 for (i = 0; i < LXC_NS_MAX; i++) {
1627 if (!conf->ns_share[i])
1628 continue;
1629
1630 handler->nsfd[i] = lxc_inherit_namespace(conf->ns_share[i], lxcpath, ns_info[i].proc_name);
1631 if (handler->nsfd[i] < 0)
1632 return -1;
1633
1634 share_ns = true;
1635 }
1636
1637 ret = lxc_sync_init(handler);
1638 if (ret < 0)
1639 return -1;
1640
1641 ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
1642 handler->data_sock);
1643 if (ret < 0)
1644 goto out_sync_fini;
1645
1646 ret = resolve_clone_flags(handler);
1647 if (ret < 0)
1648 goto out_sync_fini;
1649
1650 if (conf->shmount.path_host) {
1651 if (!conf->shmount.path_cont)
1652 goto out_sync_fini;
1653
1654 ret = lxc_setup_shmount(conf);
1655 if (ret < 0) {
1656 ERROR("Failed to setup shared mount point");
1657 goto out_sync_fini;
1658 }
1659 }
1660
1661 if (handler->ns_clone_flags & CLONE_NEWNET) {
1662 if (!lxc_list_empty(&conf->network)) {
1663
1664 /* Find gateway addresses from the link device, which is
1665 * no longer accessible inside the container. Do this
1666 * before creating network interfaces, since goto
1667 * out_delete_net does not work before lxc_clone.
1668 */
1669 ret = lxc_find_gateway_addresses(handler);
1670 if (ret < 0) {
1671 ERROR("Failed to find gateway addresses");
1672 goto out_sync_fini;
1673 }
1674
1675 /* That should be done before the clone because we will
1676 * fill the netdev index and use them in the child.
1677 */
1678 ret = lxc_create_network_priv(handler);
1679 if (ret < 0) {
1680 ERROR("Failed to create the network");
1681 goto out_delete_net;
1682 }
1683 }
1684 }
1685
1686 if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
1687 ERROR("Failed creating cgroups");
1688 goto out_delete_net;
1689 }
1690
1691 /* If the rootfs is not a blockdev, prevent the container from marking
1692 * it readonly.
1693 * If the container is unprivileged then skip rootfs pinning.
1694 */
1695 if (!wants_to_map_ids) {
1696 handler->pinfd = pin_rootfs(conf->rootfs.path);
1697 if (handler->pinfd == -1)
1698 INFO("Failed to pin the rootfs for container \"%s\"", handler->name);
1699 }
1700
1701 /* Create a process in a new set of namespaces. */
1702 handler->ns_on_clone_flags = handler->ns_clone_flags;
1703 if (handler->ns_clone_flags & CLONE_NEWUSER) {
1704 /* If CLONE_NEWUSER and CLONE_NEWNET was requested, we need to
1705 * clone a new user namespace first and only later unshare our
1706 * network namespace to ensure that network devices ownership is
1707 * set up correctly.
1708 */
1709 handler->ns_on_clone_flags &= ~CLONE_NEWNET;
1710 }
1711 /* The cgroup namespace gets unshare()ed not clone()ed. */
1712 handler->ns_on_clone_flags &= ~CLONE_NEWCGROUP;
1713
1714 if (share_ns) {
1715 pid_t attacher_pid;
1716
1717 attacher_pid = lxc_clone(do_share_ns, handler,
1718 CLONE_VFORK | CLONE_VM | CLONE_FILES);
1719 if (attacher_pid < 0) {
1720 SYSERROR(LXC_CLONE_ERROR);
1721 goto out_delete_net;
1722 }
1723
1724 ret = wait_for_pid(attacher_pid);
1725 if (ret < 0) {
1726 SYSERROR("Intermediate process failed");
1727 goto out_delete_net;
1728 }
1729 } else {
1730 handler->pid = lxc_raw_clone_cb(do_start, handler,
1731 handler->ns_on_clone_flags);
1732 }
1733 if (handler->pid < 0) {
1734 SYSERROR(LXC_CLONE_ERROR);
1735 goto out_delete_net;
1736 }
1737 TRACE("Cloned child process %d", handler->pid);
1738
1739 for (i = 0; i < LXC_NS_MAX; i++)
1740 if (handler->ns_on_clone_flags & ns_info[i].clone_flag)
1741 INFO("Cloned %s", ns_info[i].flag_name);
1742
1743 if (!lxc_try_preserve_namespaces(handler, handler->ns_on_clone_flags, handler->pid)) {
1744 ERROR("Failed to preserve cloned namespaces for lxc.hook.stop");
1745 goto out_delete_net;
1746 }
1747
1748 lxc_sync_fini_child(handler);
1749
1750 /* Map the container uids. The container became an invalid userid the
1751 * moment it was cloned with CLONE_NEWUSER. This call doesn't change
1752 * anything immediately, but allows the container to setuid(0) (0 being
1753 * mapped to something else on the host.) later to become a valid uid
1754 * again.
1755 */
1756 if (wants_to_map_ids) {
1757 if (!handler->conf->ns_share[LXC_NS_USER] &&
1758 (handler->conf->ns_keep & CLONE_NEWUSER) == 0) {
1759 ret = lxc_map_ids(id_map, handler->pid);
1760 if (ret < 0) {
1761 ERROR("Failed to set up id mapping.");
1762 goto out_delete_net;
1763 }
1764 }
1765 }
1766
1767 ret = lxc_sync_wake_child(handler, LXC_SYNC_STARTUP);
1768 if (ret < 0)
1769 goto out_delete_net;
1770
1771 ret = lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE);
1772 if (ret < 0)
1773 goto out_delete_net;
1774
1775 if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
1776 ERROR("Failed to setup cgroup limits for container \"%s\"", name);
1777 goto out_delete_net;
1778 }
1779
1780 if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
1781 goto out_delete_net;
1782
1783 if (!cgroup_ops->chown(cgroup_ops, handler->conf))
1784 goto out_delete_net;
1785
1786 /* Now we're ready to preserve the network namespace */
1787 ret = lxc_try_preserve_ns(handler->pid, "net");
1788 if (ret < 0) {
1789 if (ret != -EOPNOTSUPP) {
1790 SYSERROR("Failed to preserve net namespace");
1791 goto out_delete_net;
1792 }
1793 } else {
1794 handler->nsfd[LXC_NS_NET] = ret;
1795 DEBUG("Preserved net namespace via fd %d", ret);
1796
1797 ret = lxc_netns_set_nsid(handler->nsfd[LXC_NS_NET]);
1798 if (ret < 0)
1799 SYSERROR("Failed to allocate new network namespace id");
1800 else
1801 TRACE("Allocated new network namespace id");
1802 }
1803
1804 /* Create the network configuration. */
1805 if (handler->ns_clone_flags & CLONE_NEWNET) {
1806 ret = lxc_network_move_created_netdev_priv(handler->lxcpath,
1807 handler->name,
1808 &conf->network,
1809 handler->pid);
1810 if (ret < 0) {
1811 ERROR("Failed to create the configured network");
1812 goto out_delete_net;
1813 }
1814
1815 ret = lxc_create_network_unpriv(handler->lxcpath, handler->name,
1816 &conf->network, handler->pid, conf->hooks_version);
1817 if (ret < 0) {
1818 ERROR("Failed to create the configured network");
1819 goto out_delete_net;
1820 }
1821 }
1822
1823 ret = lxc_network_send_veth_names_to_child(handler);
1824 if (ret < 0) {
1825 ERROR("Failed to send veth names to child");
1826 goto out_delete_net;
1827 }
1828
1829 if (!lxc_list_empty(&conf->procs)) {
1830 ret = setup_proc_filesystem(&conf->procs, handler->pid);
1831 if (ret < 0)
1832 goto out_delete_net;
1833 }
1834
1835 /* Tell the child to continue its initialization. We'll get
1836 * LXC_SYNC_CGROUP when it is ready for us to setup cgroups.
1837 */
1838 ret = lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE);
1839 if (ret < 0)
1840 goto out_delete_net;
1841
1842 if (!lxc_list_empty(&conf->limits)) {
1843 ret = setup_resource_limits(&conf->limits, handler->pid);
1844 if (ret < 0) {
1845 ERROR("Failed to setup resource limits");
1846 goto out_delete_net;
1847 }
1848 }
1849
1850 ret = lxc_sync_barrier_child(handler, LXC_SYNC_CGROUP_UNSHARE);
1851 if (ret < 0)
1852 goto out_delete_net;
1853
1854 if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
1855 ERROR("Failed to setup legacy device cgroup controller limits");
1856 goto out_delete_net;
1857 }
1858 TRACE("Set up legacy device cgroup controller limits");
1859
1860 if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
1861 /* Now we're ready to preserve the cgroup namespace */
1862 ret = lxc_try_preserve_ns(handler->pid, "cgroup");
1863 if (ret < 0) {
1864 if (ret != -EOPNOTSUPP) {
1865 SYSERROR("Failed to preserve cgroup namespace");
1866 goto out_delete_net;
1867 }
1868 } else {
1869 handler->nsfd[LXC_NS_CGROUP] = ret;
1870 DEBUG("Preserved cgroup namespace via fd %d", ret);
1871 }
1872 }
1873
1874 ret = snprintf(pidstr, 20, "%d", handler->pid);
1875 if (ret < 0 || ret >= 20)
1876 goto out_delete_net;
1877
1878 ret = setenv("LXC_PID", pidstr, 1);
1879 if (ret < 0)
1880 SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
1881
1882 /* Run any host-side start hooks */
1883 ret = run_lxc_hooks(name, "start-host", conf, NULL);
1884 if (ret < 0) {
1885 ERROR("Failed to run lxc.hook.start-host");
1886 goto out_delete_net;
1887 }
1888
1889 /* Tell the child to complete its initialization and wait for it to exec
1890 * or return an error. (The child will never return
1891 * LXC_SYNC_READY_START+1. It will either close the sync pipe, causing
1892 * lxc_sync_barrier_child to return success, or return a different
1893 * value, causing us to error out).
1894 */
1895 ret = lxc_sync_barrier_child(handler, LXC_SYNC_READY_START);
1896 if (ret < 0)
1897 goto out_delete_net;
1898
1899 ret = lxc_network_recv_name_and_ifindex_from_child(handler);
1900 if (ret < 0) {
1901 ERROR("Failed to receive names and ifindices for network "
1902 "devices from child");
1903 goto out_delete_net;
1904 }
1905
1906 /* Now all networks are created, network devices are moved into place,
1907 * and the correct names and ifindeces in the respective namespaces have
1908 * been recorded. The corresponding structs have now all been filled. So
1909 * log them for debugging purposes.
1910 */
1911 lxc_log_configured_netdevs(conf);
1912
1913 /* Read tty fds allocated by child. */
1914 ret = lxc_recv_ttys_from_child(handler);
1915 if (ret < 0) {
1916 ERROR("Failed to receive tty info from child process");
1917 goto out_delete_net;
1918 }
1919
1920 ret = handler->ops->post_start(handler, handler->data);
1921 if (ret < 0)
1922 goto out_abort;
1923
1924 ret = lxc_set_state(name, handler, RUNNING);
1925 if (ret < 0) {
1926 ERROR("Failed to set state to \"%s\"", lxc_state2str(RUNNING));
1927 goto out_abort;
1928 }
1929
1930 lxc_sync_fini(handler);
1931
1932 return 0;
1933
1934 out_delete_net:
1935 if (handler->ns_clone_flags & CLONE_NEWNET)
1936 lxc_delete_network(handler);
1937
1938 out_abort:
1939 lxc_abort(name, handler);
1940
1941 out_sync_fini:
1942 lxc_sync_fini(handler);
1943 if (handler->pinfd >= 0) {
1944 close(handler->pinfd);
1945 handler->pinfd = -1;
1946 }
1947
1948 return -1;
1949 }
1950
1951 int __lxc_start(const char *name, struct lxc_handler *handler,
1952 struct lxc_operations* ops, void *data, const char *lxcpath,
1953 bool daemonize, int *error_num)
1954 {
1955 int ret, status;
1956 struct lxc_conf *conf = handler->conf;
1957 struct cgroup_ops *cgroup_ops;
1958
1959 ret = lxc_init(name, handler);
1960 if (ret < 0) {
1961 ERROR("Failed to initialize container \"%s\"", name);
1962 return -1;
1963 }
1964 handler->ops = ops;
1965 handler->data = data;
1966 handler->daemonize = daemonize;
1967 cgroup_ops = handler->cgroup_ops;
1968
1969 if (!attach_block_device(handler->conf)) {
1970 ERROR("Failed to attach block device");
1971 goto out_fini_nonet;
1972 }
1973
1974 if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
1975 ERROR("Failed to create monitor cgroup");
1976 goto out_fini_nonet;
1977 }
1978
1979 if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
1980 ERROR("Failed to enter monitor cgroup");
1981 goto out_fini_nonet;
1982 }
1983
1984 if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
1985 /* If the backing store is a device, mount it here and now. */
1986 if (rootfs_is_blockdev(conf)) {
1987 ret = unshare(CLONE_NEWNS);
1988 if (ret < 0) {
1989 ERROR("Failed to unshare CLONE_NEWNS");
1990 goto out_fini_nonet;
1991 }
1992 INFO("Unshared CLONE_NEWNS");
1993
1994 remount_all_slave();
1995 ret = lxc_setup_rootfs_prepare_root(conf, name, lxcpath);
1996 if (ret < 0) {
1997 ERROR("Error setting up rootfs mount as root before spawn");
1998 goto out_fini_nonet;
1999 }
2000 INFO("Set up container rootfs as host root");
2001 }
2002 }
2003
2004 ret = lxc_spawn(handler);
2005 if (ret < 0) {
2006 ERROR("Failed to spawn container \"%s\"", name);
2007 goto out_detach_blockdev;
2008 }
2009 /* close parent side of data socket */
2010 close(handler->data_sock[0]);
2011 handler->data_sock[0] = -1;
2012 close(handler->data_sock[1]);
2013 handler->data_sock[1] = -1;
2014
2015 handler->conf->reboot = REBOOT_NONE;
2016
2017 ret = lxc_poll(name, handler);
2018 if (ret) {
2019 ERROR("LXC mainloop exited with error: %d", ret);
2020 goto out_abort;
2021 }
2022
2023 if (!handler->init_died && handler->pid > 0) {
2024 ERROR("Child process is not killed");
2025 goto out_abort;
2026 }
2027
2028 status = lxc_wait_for_pid_status(handler->pid);
2029 if (status < 0)
2030 SYSERROR("Failed to retrieve status for %d", handler->pid);
2031
2032 /* If the child process exited but was not signaled, it didn't call
2033 * reboot. This should mean it was an lxc-execute which simply exited.
2034 * In any case, treat it as a 'halt'.
2035 */
2036 if (WIFSIGNALED(status)) {
2037 switch(WTERMSIG(status)) {
2038 case SIGINT: /* halt */
2039 DEBUG("Container \"%s\" is halting", name);
2040 break;
2041 case SIGHUP: /* reboot */
2042 DEBUG("Container \"%s\" is rebooting", name);
2043 handler->conf->reboot = REBOOT_REQ;
2044 break;
2045 case SIGSYS: /* seccomp */
2046 DEBUG("Container \"%s\" violated its seccomp policy", name);
2047 break;
2048 default:
2049 DEBUG("Unknown exit status for container \"%s\" init %d", name, WTERMSIG(status));
2050 break;
2051 }
2052 }
2053
2054 ret = lxc_restore_phys_nics_to_netns(handler);
2055 if (ret < 0)
2056 ERROR("Failed to move physical network devices back to parent "
2057 "network namespace");
2058
2059 if (handler->pinfd >= 0) {
2060 close(handler->pinfd);
2061 handler->pinfd = -1;
2062 }
2063
2064 lxc_monitor_send_exit_code(name, status, handler->lxcpath);
2065 lxc_error_set_and_log(handler->pid, status);
2066 if (error_num)
2067 *error_num = handler->exit_status;
2068
2069 out_fini:
2070 lxc_delete_network(handler);
2071
2072 out_detach_blockdev:
2073 detach_block_device(handler->conf);
2074
2075 out_fini_nonet:
2076 lxc_fini(name, handler);
2077 return ret;
2078
2079 out_abort:
2080 lxc_abort(name, handler);
2081 goto out_fini;
2082 }
2083
2084 struct start_args {
2085 char *const *argv;
2086 };
2087
2088 static int start(struct lxc_handler *handler, void* data)
2089 {
2090 struct start_args *arg = data;
2091
2092 NOTICE("Exec'ing \"%s\"", arg->argv[0]);
2093
2094 execvp(arg->argv[0], arg->argv);
2095 SYSERROR("Failed to exec \"%s\"", arg->argv[0]);
2096 return 0;
2097 }
2098
2099 static int post_start(struct lxc_handler *handler, void* data)
2100 {
2101 struct start_args *arg = data;
2102
2103 NOTICE("Started \"%s\" with pid \"%d\"", arg->argv[0], handler->pid);
2104 return 0;
2105 }
2106
2107 static struct lxc_operations start_ops = {
2108 .start = start,
2109 .post_start = post_start
2110 };
2111
2112 int lxc_start(const char *name, char *const argv[], struct lxc_handler *handler,
2113 const char *lxcpath, bool daemonize, int *error_num)
2114 {
2115 struct start_args start_arg = {
2116 .argv = argv,
2117 };
2118
2119 TRACE("Doing lxc_start");
2120 return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, daemonize, error_num);
2121 }
2122
2123 static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
2124 const char *name)
2125 {
2126 char destroy[MAXPATHLEN];
2127 struct lxc_container *c;
2128 int ret = 0;
2129 bool bret = true;
2130
2131 if (handler->conf->rootfs.path && handler->conf->rootfs.mount) {
2132 bret = do_destroy_container(handler);
2133 if (!bret) {
2134 ERROR("Error destroying rootfs for container \"%s\"", name);
2135 return;
2136 }
2137 }
2138 INFO("Destroyed rootfs for container \"%s\"", name);
2139
2140 ret = snprintf(destroy, MAXPATHLEN, "%s/%s", handler->lxcpath, name);
2141 if (ret < 0 || ret >= MAXPATHLEN) {
2142 ERROR("Error destroying directory for container \"%s\"", name);
2143 return;
2144 }
2145
2146 c = lxc_container_new(name, handler->lxcpath);
2147 if (c) {
2148 if (container_disk_lock(c)) {
2149 INFO("Could not update lxc_snapshots file");
2150 lxc_container_put(c);
2151 } else {
2152 mod_all_rdeps(c, false);
2153 container_disk_unlock(c);
2154 lxc_container_put(c);
2155 }
2156 }
2157
2158 if (!handler->am_root)
2159 ret = userns_exec_full(handler->conf, lxc_rmdir_onedev_wrapper,
2160 destroy, "lxc_rmdir_onedev_wrapper");
2161 else
2162 ret = lxc_rmdir_onedev(destroy, NULL);
2163
2164 if (ret < 0) {
2165 ERROR("Error destroying directory for container \"%s\"", name);
2166 return;
2167 }
2168 INFO("Destroyed directory for container \"%s\"", name);
2169 }
2170
2171 static int lxc_rmdir_onedev_wrapper(void *data)
2172 {
2173 char *arg = (char *) data;
2174 return lxc_rmdir_onedev(arg, NULL);
2175 }
2176
2177 static bool do_destroy_container(struct lxc_handler *handler)
2178 {
2179 int ret;
2180
2181 if (!handler->am_root) {
2182 ret = userns_exec_full(handler->conf, storage_destroy_wrapper,
2183 handler->conf, "storage_destroy_wrapper");
2184 if (ret < 0)
2185 return false;
2186
2187 return true;
2188 }
2189
2190 return storage_destroy(handler->conf);
2191 }