]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/start.c
tty.h: Ship our own minimal openpty.h
[mirror_lxc.git] / src / lxc / start.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
f549edcc
GK
24#include "config.h"
25
0ad19a3f 26#include <stdio.h>
27#undef _GNU_SOURCE
28#include <string.h>
29#include <stdlib.h>
30#include <dirent.h>
31#include <errno.h>
32#include <unistd.h>
33#include <signal.h>
b0a33c1e 34#include <fcntl.h>
35#include <termios.h>
0ad19a3f 36#include <sys/param.h>
37#include <sys/file.h>
f4d507d5 38#include <sys/mount.h>
b4f8660e 39#include <sys/stat.h>
0ad19a3f 40#include <sys/types.h>
8173e600 41#include <sys/socket.h>
0ad19a3f 42#include <sys/prctl.h>
ddceb1f9 43#include <sys/types.h>
42ff343d 44#include <sys/capability.h>
0ad19a3f 45#include <sys/wait.h>
b0a33c1e 46#include <sys/un.h>
47#include <sys/poll.h>
8173e600 48#include <sys/syscall.h>
ff218c25 49
15cd25fd 50#ifdef HAVE_SYS_SIGNALFD_H
8ca61733 51# include <sys/signalfd.h>
ff218c25 52#else
15cd25fd 53/* assume kernel headers are too old */
09d1bd23 54#include <stdint.h>
15cd25fd
DL
55struct signalfd_siginfo
56{
da2aef7f
DL
57 uint32_t ssi_signo;
58 int32_t ssi_errno;
59 int32_t ssi_code;
60 uint32_t ssi_pid;
61 uint32_t ssi_uid;
62 int32_t ssi_fd;
63 uint32_t ssi_tid;
64 uint32_t ssi_band;
65 uint32_t ssi_overrun;
66 uint32_t ssi_trapno;
67 int32_t ssi_status;
68 int32_t ssi_int;
69 uint64_t ssi_ptr;
70 uint64_t ssi_utime;
71 uint64_t ssi_stime;
72 uint64_t ssi_addr;
73 uint8_t __pad[48];
15cd25fd
DL
74};
75
8ca61733
MJ
76# ifndef __NR_signalfd4
77/* assume kernel headers are too old */
78# if __i386__
79# define __NR_signalfd4 327
80# elif __x86_64__
81# define __NR_signalfd4 289
bfa38025
MH
82# elif __powerpc__
83# define __NR_signalfd4 313
47f38330
SH
84# elif __s390x__
85# define __NR_signalfd4 322
8ca61733
MJ
86# endif
87#endif
88
89# ifndef __NR_signalfd
90/* assume kernel headers are too old */
91# if __i386__
92# define __NR_signalfd 321
93# elif __x86_64__
94# define __NR_signalfd 282
bfa38025
MH
95# elif __powerpc__
96# define __NR_signalfd 305
47f38330
SH
97# elif __s390x__
98# define __NR_signalfd 316
8ca61733
MJ
99# endif
100#endif
101
102int signalfd(int fd, const sigset_t *mask, int flags)
103{
104 int retval;
105
106 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
107 if (errno == ENOSYS && flags == 0)
108 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
109 return retval;
110}
ff218c25 111#endif
0ad19a3f 112
656994bb
MH
113#if !HAVE_DECL_PR_CAPBSET_DROP
114#define PR_CAPBSET_DROP 24
115#endif
116
63376d7d
DL
117#include "start.h"
118#include "conf.h"
119#include "log.h"
563f2f2c 120#include "cgroup.h"
e2bcd7db 121#include "error.h"
b0a33c1e 122#include "af_unix.h"
123#include "mainloop.h"
63376d7d 124#include "utils.h"
563f2f2c 125#include "utmp.h"
63376d7d 126#include "monitor.h"
96fa1ff0 127#include "commands.h"
63376d7d 128#include "console.h"
3c22086f 129#include "sync.h"
f549edcc 130#include "namespace.h"
e075f5d9 131#include "apparmor.h"
0d0527a9 132#include "lxcseccomp.h"
8173e600 133#include "caps.h"
36eb9bde
CLG
134
135lxc_log_define(lxc_start, lxc);
136
80090207
CLG
137static int match_fd(int fd)
138{
139 return (fd == 0 || fd == 1 || fd == 2);
140}
141
b119f362 142int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore)
80090207
CLG
143{
144 struct dirent dirent, *direntp;
145 int fd, fddir;
146 DIR *dir;
80090207 147
b119f362 148restart:
80090207
CLG
149 dir = opendir("/proc/self/fd");
150 if (!dir) {
151 WARN("failed to open directory: %m");
152 return -1;
153 }
154
155 fddir = dirfd(dir);
156
157 while (!readdir_r(dir, &dirent, &direntp)) {
80090207
CLG
158 if (!direntp)
159 break;
160
161 if (!strcmp(direntp->d_name, "."))
162 continue;
163
164 if (!strcmp(direntp->d_name, ".."))
165 continue;
166
167 fd = atoi(direntp->d_name);
168
f2faa8fa 169 if (fd == fddir || fd == lxc_log_fd || fd == fd_to_ignore)
80090207
CLG
170 continue;
171
172 if (match_fd(fd))
173 continue;
80090207 174
b119f362
SH
175 if (conf->close_all_fds) {
176 close(fd);
177 closedir(dir);
178 INFO("closed inherited fd %d", fd);
179 goto restart;
180 }
92c7f629 181 WARN("inherited fd %d", fd);
80090207
CLG
182 }
183
92c7f629
GK
184 closedir(dir); /* cannot fail */
185 return 0;
80090207
CLG
186}
187
83ee7875 188static int setup_signal_fd(sigset_t *oldmask)
b0a33c1e 189{
190 sigset_t mask;
191 int fd;
192
f3304a29
FW
193 /* Block everything except serious error signals */
194 if (sigfillset(&mask) ||
195 sigdelset(&mask, SIGILL) ||
196 sigdelset(&mask, SIGSEGV) ||
197 sigdelset(&mask, SIGBUS) ||
198 sigprocmask(SIG_BLOCK, &mask, oldmask)) {
199 SYSERROR("failed to set signal mask");
b0a33c1e 200 return -1;
201 }
202
203 fd = signalfd(-1, &mask, 0);
204 if (fd < 0) {
36eb9bde 205 SYSERROR("failed to create the signal fd");
b0a33c1e 206 return -1;
207 }
208
209 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
36eb9bde 210 SYSERROR("failed to set sigfd to close-on-exec");
b0a33c1e 211 close(fd);
212 return -1;
213 }
214
1ac470c0
DL
215 DEBUG("sigchild handler set");
216
b0a33c1e 217 return fd;
218}
219
83ee7875 220static int signal_handler(int fd, void *data,
b0a33c1e 221 struct lxc_epoll_descr *descr)
222{
15cd25fd
DL
223 struct signalfd_siginfo siginfo;
224 int ret;
82d89dce 225 pid_t *pid = data;
15cd25fd
DL
226
227 ret = read(fd, &siginfo, sizeof(siginfo));
228 if (ret < 0) {
f3304a29 229 ERROR("failed to read signal info");
15cd25fd
DL
230 return -1;
231 }
232
233 if (ret != sizeof(siginfo)) {
234 ERROR("unexpected siginfo size");
235 return -1;
236 }
237
f3304a29
FW
238 if (siginfo.ssi_signo != SIGCHLD) {
239 kill(*pid, siginfo.ssi_signo);
240 INFO("forwarded signal %d to pid %d", siginfo.ssi_signo, *pid);
241 return 0;
242 }
243
15cd25fd
DL
244 if (siginfo.ssi_code == CLD_STOPPED ||
245 siginfo.ssi_code == CLD_CONTINUED) {
246 INFO("container init process was stopped/continued");
247 return 0;
248 }
1ac470c0 249
82d89dce
DL
250 /* more robustness, protect ourself from a SIGCHLD sent
251 * by a process different from the container init
252 */
253 if (siginfo.ssi_pid != *pid) {
254 WARN("invalid pid for SIGCHLD");
255 return 0;
256 }
257
15cd25fd 258 DEBUG("container init process exited");
b0a33c1e 259 return 1;
260}
261
0a3ec350
DL
262int lxc_pid_callback(int fd, struct lxc_request *request,
263 struct lxc_handler *handler)
81c75799
DL
264{
265 struct lxc_answer answer;
266 int ret;
267
268 answer.pid = handler->pid;
269 answer.ret = 0;
270
271 ret = send(fd, &answer, sizeof(answer), 0);
272 if (ret < 0) {
273 WARN("failed to send answer to the peer");
274 return -1;
275 }
276
277 if (ret != sizeof(answer)) {
278 ERROR("partial answer sent");
279 return -1;
280 }
281
282 return 0;
283}
284
d5088cf2
CS
285int lxc_clone_flags_callback(int fd, struct lxc_request *request,
286 struct lxc_handler *handler)
287{
288 struct lxc_answer answer;
289 int ret;
290
291 answer.pid = 0;
292 answer.ret = handler->clone_flags;
293
294 ret = send(fd, &answer, sizeof(answer), 0);
295 if (ret < 0) {
296 WARN("failed to send answer to the peer");
297 return -1;
298 }
299
300 if (ret != sizeof(answer)) {
301 ERROR("partial answer sent");
302 return -1;
303 }
304
305 return 0;
306}
307
25c2aca5 308int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
66aeffc7
DL
309{
310 handler->state = state;
311 lxc_monitor_send_state(name, state);
312 return 0;
313}
314
1bc5cc8c 315int lxc_poll(const char *name, struct lxc_handler *handler)
b0a33c1e 316{
ca5f7926
DL
317 int sigfd = handler->sigfd;
318 int pid = handler->pid;
b0a33c1e 319 struct lxc_epoll_descr descr;
320
a9e61274 321 if (lxc_mainloop_open(&descr)) {
36eb9bde 322 ERROR("failed to create mainloop");
50c8bf05 323 goto out_sigfd;
b0a33c1e 324 }
325
83ee7875 326 if (lxc_mainloop_add_handler(&descr, sigfd, signal_handler, &pid)) {
36eb9bde 327 ERROR("failed to add handler for the signal");
b0a33c1e 328 goto out_mainloop_open;
329 }
330
63376d7d
DL
331 if (lxc_console_mainloop_add(&descr, handler)) {
332 ERROR("failed to add console handler to mainloop");
333 goto out_mainloop_open;
334 }
335
563f2f2c
DL
336 if (lxc_command_mainloop_add(name, &descr, handler)) {
337 ERROR("failed to add command handler to mainloop");
96fa1ff0 338 goto out_mainloop_open;
563f2f2c
DL
339 }
340
828695d9
SH
341 if (handler->conf->need_utmp_watch) {
342 if (lxc_utmp_mainloop_add(&descr, handler)) {
343 ERROR("failed to add utmp handler to mainloop");
344 goto out_mainloop_open;
345 }
563f2f2c 346 }
b0a33c1e 347
c3e13372 348 return lxc_mainloop(&descr);
b0a33c1e 349
350out_mainloop_open:
351 lxc_mainloop_close(&descr);
b0a33c1e 352out_sigfd:
353 close(sigfd);
c3e13372 354 return -1;
b0a33c1e 355}
356
4a2ca8b2
SH
357extern int lxc_caps_check(void);
358
fae349da 359struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf)
59eb99ba 360{
3a0f472d
DL
361 struct lxc_handler *handler;
362
4a2ca8b2
SH
363 if (!lxc_caps_check()) {
364 ERROR("Not running with sufficient privilege");
365 return NULL;
366 }
367
3a0f472d
DL
368 handler = malloc(sizeof(*handler));
369 if (!handler)
370 return NULL;
59eb99ba
DL
371
372 memset(handler, 0, sizeof(*handler));
373
fae349da
DL
374 handler->conf = conf;
375
e075f5d9 376 apparmor_handler_init(handler);
3bdf52d7
DL
377 handler->name = strdup(name);
378 if (!handler->name) {
379 ERROR("failed to allocate memory");
380 goto out_free;
381 }
382
d2e30e99
DE
383 if (lxc_command_init(name, handler))
384 goto out_free_name;
385
8f2c3a70
SH
386 if (lxc_read_seccomp_config(conf) != 0) {
387 ERROR("failed loading seccomp policy");
d2e30e99 388 goto out_close_maincmd_fd;
8f2c3a70
SH
389 }
390
0ad19a3f 391 /* Begin the set the state to STARTING*/
25c2aca5 392 if (lxc_set_state(name, handler, STARTING)) {
59eb99ba 393 ERROR("failed to set state '%s'", lxc_state2str(STARTING));
3bdf52d7 394 goto out_free_name;
0ad19a3f 395 }
396
773fb9ca
SH
397 if (run_lxc_hooks(name, "pre-start", conf)) {
398 ERROR("failed to run pre-start hooks for container '%s'.", name);
399 goto out_aborting;
400 }
26ddeedd 401
fae349da 402 if (lxc_create_tty(name, conf)) {
36eb9bde 403 ERROR("failed to create the ttys");
59eb99ba 404 goto out_aborting;
b0a33c1e 405 }
406
1560f6c9 407 if (lxc_create_console(conf)) {
63376d7d
DL
408 ERROR("failed to create console");
409 goto out_delete_tty;
410 }
411
b0a33c1e 412 /* the signal fd has to be created before forking otherwise
413 * if the child process exits before we setup the signal fd,
414 * the event will be lost and the command will be stuck */
83ee7875 415 handler->sigfd = setup_signal_fd(&handler->oldmask);
59eb99ba 416 if (handler->sigfd < 0) {
36eb9bde 417 ERROR("failed to set sigchild fd handler");
63376d7d 418 goto out_delete_console;
b0a33c1e 419 }
420
c3e13372 421 INFO("'%s' is initialized", name);
3a0f472d 422 return handler;
59eb99ba 423
63376d7d
DL
424out_delete_console:
425 lxc_delete_console(&conf->console);
59eb99ba 426out_delete_tty:
fae349da 427 lxc_delete_tty(&conf->tty_info);
59eb99ba 428out_aborting:
25c2aca5 429 lxc_set_state(name, handler, ABORTING);
d2e30e99
DE
430out_close_maincmd_fd:
431 close(conf->maincmd_fd);
432 conf->maincmd_fd = -1;
3bdf52d7
DL
433out_free_name:
434 free(handler->name);
435 handler->name = NULL;
3a0f472d
DL
436out_free:
437 free(handler);
c3e13372 438 return NULL;
59eb99ba
DL
439}
440
1bc5cc8c 441void lxc_fini(const char *name, struct lxc_handler *handler)
59eb99ba
DL
442{
443 /* The STOPPING state is there for future cleanup code
444 * which can take awhile
445 */
25c2aca5
MN
446 lxc_set_state(name, handler, STOPPING);
447 lxc_set_state(name, handler, STOPPED);
59eb99ba 448
773fb9ca
SH
449 if (run_lxc_hooks(name, "post-stop", handler->conf))
450 ERROR("failed to run post-stop hooks for container '%s'.", name);
26ddeedd 451
83ee7875 452 /* reset mask set by setup_signal_fd */
8f64a3f6
MN
453 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL))
454 WARN("failed to restore sigprocmask");
455
63376d7d 456 lxc_delete_console(&handler->conf->console);
b2431939 457 lxc_delete_tty(&handler->conf->tty_info);
d2e30e99
DE
458 close(handler->conf->maincmd_fd);
459 handler->conf->maincmd_fd = -1;
3bdf52d7 460 free(handler->name);
b2431939 461 free(handler);
59eb99ba
DL
462}
463
1bc5cc8c 464void lxc_abort(const char *name, struct lxc_handler *handler)
59eb99ba 465{
25c2aca5 466 lxc_set_state(name, handler, ABORTING);
7d9fb3e9
DL
467 if (handler->pid > 0)
468 kill(handler->pid, SIGKILL);
59eb99ba
DL
469}
470
828695d9
SH
471#include <sys/reboot.h>
472#include <linux/reboot.h>
473
e2fa1520
SH
474/*
475 * reboot(LINUX_REBOOT_CMD_CAD_ON) will return -EINVAL
476 * in a child pid namespace if container reboot support exists.
477 * Otherwise, it will either succeed or return -EPERM.
478 */
479static int container_reboot_supported(void *arg)
828695d9 480{
e2fa1520 481 int *cmd = arg;
828695d9 482 int ret;
828695d9 483
e2fa1520
SH
484 ret = reboot(*cmd);
485 if (ret == -1 && errno == EINVAL)
486 return 1;
487 return 0;
488}
489
490static int must_drop_cap_sys_boot(void)
491{
492 FILE *f = fopen("/proc/sys/kernel/ctrl-alt-del", "r");
493 int ret, cmd, v;
494 long stack_size = 4096;
495 void *stack = alloca(stack_size) + stack_size;
496 int status;
497 pid_t pid;
498
499 if (!f) {
500 DEBUG("failed to open /proc/sys/kernel/ctrl-alt-del");
828695d9 501 return 1;
e2fa1520 502 }
828695d9
SH
503
504 ret = fscanf(f, "%d", &v);
505 fclose(f);
e2fa1520
SH
506 if (ret != 1) {
507 DEBUG("Failed to read /proc/sys/kernel/ctrl-alt-del");
828695d9 508 return 1;
e2fa1520
SH
509 }
510 cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF;
511
512 pid = clone(container_reboot_supported, stack, CLONE_NEWPID | SIGCHLD, &cmd);
513 if (pid < 0) {
514 SYSERROR("failed to clone\n");
515 return -1;
516 }
517 if (wait(&status) < 0) {
518 SYSERROR("unexpected wait error: %m\n");
519 return -1;
520 }
521
522 if (WEXITSTATUS(status) != 1)
828695d9 523 return 1;
e2fa1520 524
828695d9
SH
525 return 0;
526}
527
ffe1e01a 528static int do_start(void *data)
50e98013 529{
23c53af9 530 struct lxc_handler *handler = data;
50e98013
DL
531
532 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
533 SYSERROR("failed to set sigprocmask");
9d7f9e52 534 return -1;
50e98013
DL
535 }
536
743ecd2e
DL
537 /* This prctl must be before the synchro, so if the parent
538 * dies before we set the parent death signal, we will detect
539 * its death with the synchro right after, otherwise we have
540 * a window where the parent can exit before we set the pdeath
541 * signal leading to a unsupervized container.
542 */
543 if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) {
544 SYSERROR("failed to set pdeath signal");
545 return -1;
546 }
547
3c22086f 548 lxc_sync_fini_parent(handler);
50e98013 549
3c22086f
CLG
550 /* Tell the parent task it can begin to configure the
551 * container and wait for it to finish
552 */
553 if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
9d7f9e52 554 return -1;
50e98013 555
69182a31 556 if (handler->conf->need_utmp_watch) {
828695d9
SH
557 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
558 SYSERROR("failed to remove CAP_SYS_BOOT capability");
559 return -1;
560 }
e2fa1520 561 DEBUG("Dropped cap_sys_boot\n");
e2fa1520
SH
562 }
563
564 /* Setup the container, ip, names, utsname, ... */
565 if (lxc_setup(handler->name, handler->conf)) {
566 ERROR("failed to setup the container");
567 goto out_warn_father;
568 }
50e98013 569
e075f5d9
SH
570 if (apparmor_load(handler) < 0)
571 goto out_warn_father;
572
8f2c3a70
SH
573 if (lxc_seccomp_load(handler->conf) != 0)
574 goto out_warn_father;
575
773fb9ca
SH
576 if (run_lxc_hooks(handler->name, "start", handler->conf)) {
577 ERROR("failed to run start hooks for container '%s'.", handler->name);
578 goto out_warn_father;
579 }
fc25b815 580
773fb9ca 581 close(handler->sigfd);
26ddeedd 582
e6126dbe
MN
583 /* after this call, we are in error because this
584 * ops should not return as it execs */
23c53af9 585 if (handler->ops->start(handler, handler->data))
e6126dbe 586 return -1;
50e98013
DL
587
588out_warn_father:
3c22086f 589 lxc_sync_wake_parent(handler, LXC_SYNC_POST_CONFIGURE);
50e98013
DL
590 return -1;
591}
592
7b35f3d6
SH
593int save_phys_nics(struct lxc_conf *conf)
594{
595 struct lxc_list *iterator;
596
597 lxc_list_for_each(iterator, &conf->network) {
598 struct lxc_netdev *netdev = iterator->elem;
599
600 if (netdev->type != LXC_NET_PHYS)
601 continue;
602 conf->saved_nics = realloc(conf->saved_nics,
603 (conf->num_savednics+1)*sizeof(struct saved_nic));
604 if (!conf->saved_nics) {
605 SYSERROR("failed to allocate memory");
606 return -1;
607 }
608 conf->saved_nics[conf->num_savednics].ifindex = netdev->ifindex;
609 conf->saved_nics[conf->num_savednics].orig_name = strdup(netdev->link);
610 if (!conf->saved_nics[conf->num_savednics].orig_name) {
611 SYSERROR("failed to allocate memory");
612 return -1;
613 }
614 INFO("stored saved_nic #%d idx %d name %s\n", conf->num_savednics,
615 conf->saved_nics[conf->num_savednics].ifindex,
616 conf->saved_nics[conf->num_savednics].orig_name);
617 conf->num_savednics++;
618 }
619
620 return 0;
621}
622
623
23c53af9 624int lxc_spawn(struct lxc_handler *handler)
59eb99ba 625{
99a6af52 626 int failed_before_rename = 0;
ffe1e01a 627 const char *name = handler->name;
0c547523 628 int pinfd;
50e98013 629
3c22086f 630 if (lxc_sync_init(handler))
9d7f9e52 631 return -1;
0ad19a3f 632
d5088cf2 633 handler->clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
fae349da 634 if (!lxc_list_empty(&handler->conf->network)) {
82d5ae15 635
d5088cf2 636 handler->clone_flags |= CLONE_NEWNET;
0ad19a3f 637
19a26f82
MK
638 /* Find gateway addresses from the link device, which is
639 * no longer accessible inside the container. Do this
640 * before creating network interfaces, since goto
641 * out_delete_net does not work before lxc_clone. */
642 if (lxc_find_gateway_addresses(handler)) {
643 ERROR("failed to find gateway addresses");
644 lxc_sync_fini(handler);
645 return -1;
646 }
647
82d5ae15
DL
648 /* that should be done before the clone because we will
649 * fill the netdev index and use them in the child
650 */
e3b4c4c4 651 if (lxc_create_network(handler)) {
82d5ae15 652 ERROR("failed to create the network");
3c22086f 653 lxc_sync_fini(handler);
32e1c760 654 return -1;
82d5ae15
DL
655 }
656 }
657
7b35f3d6
SH
658 if (save_phys_nics(handler->conf)) {
659 ERROR("failed to save physical nic info");
660 goto out_abort;
661 }
662
0c547523
SH
663 /*
664 * if the rootfs is not a blockdev, prevent the container from
665 * marking it readonly.
666 */
667
668 pinfd = pin_rootfs(handler->conf->rootfs.path);
669 if (pinfd == -1) {
670 ERROR("failed to pin the container's rootfs");
671 goto out_abort;
672 }
673
0ad19a3f 674 /* Create a process in a new set of namespaces */
d5088cf2 675 handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
59eb99ba 676 if (handler->pid < 0) {
36eb9bde 677 SYSERROR("failed to fork into a new namespace");
7fef7a06 678 goto out_delete_net;
0ad19a3f 679 }
680
3c22086f
CLG
681 lxc_sync_fini_child(handler);
682
683 if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
99a6af52 684 failed_before_rename = 1;
0ad19a3f 685
36b86299 686 if (lxc_cgroup_create(name, handler->pid))
7fef7a06 687 goto out_delete_net;
218d4250 688
99a6af52
MN
689 if (failed_before_rename)
690 goto out_delete_net;
691
0ad19a3f 692 /* Create the network configuration */
d5088cf2 693 if (handler->clone_flags & CLONE_NEWNET) {
fae349da 694 if (lxc_assign_network(&handler->conf->network, handler->pid)) {
82d5ae15 695 ERROR("failed to create the configured network");
7fef7a06 696 goto out_delete_net;
82d5ae15 697 }
0ad19a3f 698 }
699
3c22086f
CLG
700 /* Tell the child to continue its initialization and wait for
701 * it to exec or return an error
702 */
703 if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
704 return -1;
0ad19a3f 705
cc28d0b0
SH
706 if (detect_shared_rootfs())
707 umount2(handler->conf->rootfs.mount, MNT_DETACH);
708
709 if (setup_cgroup(name, &handler->conf->cgroup)) {
710 ERROR("failed to setup the cgroups for '%s'", name);
711 goto out_delete_net;
712 }
713
23c53af9 714 if (handler->ops->post_start(handler, handler->data))
e6126dbe
MN
715 goto out_abort;
716
25c2aca5 717 if (lxc_set_state(name, handler, RUNNING)) {
59eb99ba
DL
718 ERROR("failed to set state to %s",
719 lxc_state2str(RUNNING));
720 goto out_abort;
3f21c114 721 }
22ebac19 722
3c22086f 723 lxc_sync_fini(handler);
0c547523
SH
724
725 if (pinfd >= 0)
726 close(pinfd);
727
e6126dbe 728 return 0;
1ac470c0 729
7fef7a06 730out_delete_net:
d5088cf2 731 if (handler->clone_flags & CLONE_NEWNET)
74a2b586 732 lxc_delete_network(handler);
59eb99ba
DL
733out_abort:
734 lxc_abort(name, handler);
3c22086f 735 lxc_sync_fini(handler);
b79fcd86 736 return -1;
59eb99ba 737}
0ad19a3f 738
ee70bf78
CLG
739int __lxc_start(const char *name, struct lxc_conf *conf,
740 struct lxc_operations* ops, void *data)
59eb99ba 741{
3a0f472d 742 struct lxc_handler *handler;
e043236e 743 int err = -1;
59eb99ba 744 int status;
80090207 745
fae349da 746 handler = lxc_init(name, conf);
3a0f472d 747 if (!handler) {
59eb99ba 748 ERROR("failed to initialize the container");
66aeffc7 749 return -1;
0ad19a3f 750 }
ee70bf78
CLG
751 handler->ops = ops;
752 handler->data = data;
e6126dbe 753
69182a31 754 if (must_drop_cap_sys_boot()) {
f51db2b3 755 DEBUG("Dropping cap_sys_boot\n");
69182a31
SH
756 } else {
757 DEBUG("Not dropping cap_sys_boot or watching utmp\n");
758 handler->conf->need_utmp_watch = 0;
759 }
760
23c53af9 761 err = lxc_spawn(handler);
59eb99ba 762 if (err) {
ee70bf78 763 ERROR("failed to spawn '%s'", name);
74a2b586 764 goto out_fini_nonet;
0ad19a3f 765 }
766
3a0f472d 767 err = lxc_poll(name, handler);
e043236e 768 if (err) {
59eb99ba
DL
769 ERROR("mainloop exited with an error");
770 goto out_abort;
771 }
0ad19a3f 772
3a0f472d 773 while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR)
1bc5cc8c 774 continue;
e043236e 775
8b004f07
SH
776 /*
777 * If the child process exited but was not signaled,
778 * it didn't call reboot. This should mean it was an
779 * lxc-execute which simply exited. In any case, treat
780 * it as a 'halt'
781 */
782 if (WIFSIGNALED(status)) {
783 switch(WTERMSIG(status)) {
784 case SIGINT: /* halt */
785 DEBUG("Container halting");
786 break;
787 case SIGHUP: /* reboot */
788 DEBUG("Container rebooting");
789 handler->conf->reboot = 1;
790 break;
791 default:
792 DEBUG("unknown exit status for init: %d\n", WTERMSIG(status));
793 break;
794 }
828695d9
SH
795 }
796
7b35f3d6
SH
797 lxc_rename_phys_nics_on_shutdown(handler->conf);
798
3a0f472d 799 err = lxc_error_set_and_log(handler->pid, status);
9d7f9e52 800out_fini:
74a2b586
JK
801 lxc_delete_network(handler);
802
803out_fini_nonet:
36b86299 804 lxc_cgroup_destroy(name);
3a0f472d 805 lxc_fini(name, handler);
0ad19a3f 806 return err;
807
59eb99ba 808out_abort:
3a0f472d 809 lxc_abort(name, handler);
9d7f9e52 810 goto out_fini;
0ad19a3f 811}
ee70bf78
CLG
812
813struct start_args {
814 char *const *argv;
815};
816
817static int start(struct lxc_handler *handler, void* data)
818{
819 struct start_args *arg = data;
820
821 NOTICE("exec'ing '%s'", arg->argv[0]);
822
823 execvp(arg->argv[0], arg->argv);
824 SYSERROR("failed to exec %s", arg->argv[0]);
825 return 0;
826}
827
828static int post_start(struct lxc_handler *handler, void* data)
829{
830 struct start_args *arg = data;
831
832 NOTICE("'%s' started with pid '%d'", arg->argv[0], handler->pid);
833 return 0;
834}
835
836static struct lxc_operations start_ops = {
837 .start = start,
838 .post_start = post_start
839};
840
841int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf)
842{
843 struct start_args start_arg = {
844 .argv = argv,
845 };
846
b119f362 847 if (lxc_check_inherited(conf, -1))
ee70bf78
CLG
848 return -1;
849
828695d9 850 conf->need_utmp_watch = 1;
ee70bf78
CLG
851 return __lxc_start(name, conf, &start_ops, &start_arg);
852}