]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/start.c
055d381f644644a24af763b341986de1a05135b6
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "../config.h"
35 #include <namespace.h>
36 #include <sys/param.h>
38 #include <sys/mount.h>
39 #include <sys/types.h>
40 #include <sys/prctl.h>
41 #include <sys/types.h>
42 #include <sys/capability.h>
47 #ifdef HAVE_SYS_SIGNALFD_H
48 # include <sys/signalfd.h>
50 # ifndef __NR_signalfd4
51 /* assume kernel headers are too old */
53 # define __NR_signalfd4 327
55 # define __NR_signalfd4 289
57 # define __NR_signalfd4 313
59 # define __NR_signalfd4 322
63 # ifndef __NR_signalfd
64 /* assume kernel headers are too old */
66 # define __NR_signalfd 321
68 # define __NR_signalfd 282
70 # define __NR_signalfd 305
72 # define __NR_signalfd 316
76 int signalfd(int fd
, const sigset_t
*mask
, int flags
)
80 retval
= syscall (__NR_signalfd4
, fd
, mask
, _NSIG
/ 8, flags
);
81 if (errno
== ENOSYS
&& flags
== 0)
82 retval
= syscall (__NR_signalfd
, fd
, mask
, _NSIG
/ 8);
87 #if !HAVE_DECL_PR_CAPBSET_DROP
88 #define PR_CAPBSET_DROP 24
98 lxc_log_define(lxc_start
, lxc
);
100 LXC_TTY_HANDLER(SIGINT
);
101 LXC_TTY_HANDLER(SIGQUIT
);
103 static int setup_sigchld_fd(sigset_t
*oldmask
)
108 if (sigprocmask(SIG_BLOCK
, NULL
, &mask
)) {
109 SYSERROR("failed to get mask signal");
113 if (sigaddset(&mask
, SIGCHLD
) || sigprocmask(SIG_BLOCK
, &mask
, oldmask
)) {
114 SYSERROR("failed to set mask signal");
118 fd
= signalfd(-1, &mask
, 0);
120 SYSERROR("failed to create the signal fd");
124 if (fcntl(fd
, F_SETFD
, FD_CLOEXEC
)) {
125 SYSERROR("failed to set sigfd to close-on-exec");
130 DEBUG("sigchild handler set");
135 static int setup_tty_service(const char *name
, int *ttyfd
)
138 struct sockaddr_un addr
= { 0 };
139 char *offset
= &addr
.sun_path
[1];
141 strcpy(offset
, name
);
142 addr
.sun_path
[0] = '\0';
144 fd
= lxc_af_unix_open(addr
.sun_path
, SOCK_STREAM
, 0);
148 if (fcntl(fd
, F_SETFD
, FD_CLOEXEC
)) {
149 SYSERROR("failed to close-on-exec flag");
159 static int sigchld_handler(int fd
, void *data
,
160 struct lxc_epoll_descr
*descr
)
162 DEBUG("child exited");
167 static int ttyclient_handler(int fd
, void *data
,
168 struct lxc_epoll_descr
*descr
)
171 struct lxc_tty_info
*tty_info
= data
;
173 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
175 if (tty_info
->pty_info
[i
].busy
!= fd
)
178 lxc_mainloop_del_handler(descr
, fd
);
179 tty_info
->pty_info
[i
].busy
= 0;
186 static int ttyservice_handler(int fd
, void *data
,
187 struct lxc_epoll_descr
*descr
)
189 int conn
, ttynum
, val
= 1, ret
= -1;
190 struct lxc_tty_info
*tty_info
= data
;
192 conn
= accept(fd
, NULL
, 0);
194 SYSERROR("failed to accept tty client");
198 if (setsockopt(conn
, SOL_SOCKET
, SO_PASSCRED
, &val
, sizeof(val
))) {
199 SYSERROR("failed to enable credential on socket");
203 if (lxc_af_unix_rcv_credential(conn
, &ttynum
, sizeof(ttynum
)))
207 if (ttynum
> tty_info
->nbtty
)
210 if (tty_info
->pty_info
[ttynum
- 1].busy
)
216 /* fixup index tty1 => [0] */
218 ttynum
<= tty_info
->nbtty
&& tty_info
->pty_info
[ttynum
- 1].busy
;
221 /* we didn't find any available slot for tty */
222 if (ttynum
> tty_info
->nbtty
)
226 if (lxc_af_unix_send_fd(conn
, tty_info
->pty_info
[ttynum
- 1].master
,
227 &ttynum
, sizeof(ttynum
)) < 0) {
228 ERROR("failed to send tty to client");
232 if (lxc_mainloop_add_handler(descr
, conn
,
233 ttyclient_handler
, tty_info
)) {
234 ERROR("failed to add tty client handler");
238 tty_info
->pty_info
[ttynum
- 1].busy
= conn
;
248 int lxc_poll(const char *name
, struct lxc_handler
*handler
)
250 int sigfd
= handler
->sigfd
;
251 int pid
= handler
->pid
;
252 const struct lxc_tty_info
*tty_info
= &handler
->tty_info
;
254 int nfds
, ttyfd
= -1, ret
= -1;
255 struct lxc_epoll_descr descr
;
257 if (tty_info
->nbtty
&& setup_tty_service(name
, &ttyfd
)) {
258 ERROR("failed to create the tty service point");
262 /* sigfd + nb tty + tty service
263 * if tty is enabled */
264 nfds
= tty_info
->nbtty
+ 1 + tty_info
->nbtty
? 1 : 0;
266 if (lxc_mainloop_open(nfds
, &descr
)) {
267 ERROR("failed to create mainloop");
271 if (lxc_mainloop_add_handler(&descr
, sigfd
, sigchld_handler
, &pid
)) {
272 ERROR("failed to add handler for the signal");
273 goto out_mainloop_open
;
276 if (tty_info
->nbtty
) {
277 if (lxc_mainloop_add_handler(&descr
, ttyfd
,
280 ERROR("failed to add handler for the tty");
281 goto out_mainloop_open
;
285 ret
= lxc_mainloop(&descr
);
291 lxc_mainloop_close(&descr
);
299 static int save_init_pid(const char *name
, pid_t pid
)
301 char init
[MAXPATHLEN
];
305 snprintf(init
, MAXPATHLEN
, LXCPATH
"/%s/init", name
);
307 if (!asprintf(&val
, "%d\n", pid
)) {
308 SYSERROR("failed to allocate memory");
312 fd
= open(init
, O_WRONLY
|O_CREAT
|O_TRUNC
, S_IRUSR
|S_IWUSR
);
314 SYSERROR("failed to open '%s'", init
);
318 if (write(fd
, val
, strlen(val
)) < 0) {
319 SYSERROR("failed to write the init pid");
333 static void remove_init_pid(const char *name
, pid_t pid
)
335 char init
[MAXPATHLEN
];
337 snprintf(init
, MAXPATHLEN
, LXCPATH
"/%s/init", name
);
341 static int fdname(int fd
, char *name
, size_t size
)
343 char path
[MAXPATHLEN
];
346 snprintf(path
, MAXPATHLEN
, "/proc/self/fd/%d", fd
);
348 len
= readlink(path
, name
, size
);
352 return (len
<= 0) ? -1 : 0;
355 static int console_init(char *console
, size_t size
)
360 for (i
= 0; i
< 3; i
++) {
364 if (ttyname_r(i
, console
, size
)) {
365 SYSERROR("failed to retrieve tty name");
372 if (!fstat(0, &stat
)) {
373 if (S_ISREG(stat
.st_mode
) || S_ISCHR(stat
.st_mode
) ||
374 S_ISFIFO(stat
.st_mode
) || S_ISLNK(stat
.st_mode
))
375 return fdname(0, console
, size
);
380 DEBUG("console initialized");
385 struct lxc_handler
*lxc_init(const char *name
)
387 struct lxc_handler
*handler
;
389 handler
= malloc(sizeof(*handler
));
393 memset(handler
, 0, sizeof(*handler
));
395 handler
->lock
= lxc_get_lock(name
);
396 if (handler
->lock
< 0)
399 /* Begin the set the state to STARTING*/
400 if (lxc_setstate(name
, STARTING
)) {
401 ERROR("failed to set state '%s'", lxc_state2str(STARTING
));
405 if (console_init(handler
->tty
, sizeof(handler
->tty
))) {
406 ERROR("failed to initialize the console");
410 if (lxc_create_tty(name
, &handler
->tty_info
)) {
411 ERROR("failed to create the ttys");
415 /* the signal fd has to be created before forking otherwise
416 * if the child process exits before we setup the signal fd,
417 * the event will be lost and the command will be stuck */
418 handler
->sigfd
= setup_sigchld_fd(&handler
->oldmask
);
419 if (handler
->sigfd
< 0) {
420 ERROR("failed to set sigchild fd handler");
424 /* Avoid signals from terminal */
425 LXC_TTY_ADD_HANDLER(SIGINT
);
426 LXC_TTY_ADD_HANDLER(SIGQUIT
);
430 INFO("'%s' is initialized", name
);
435 lxc_delete_tty(&handler
->tty_info
);
437 lxc_setstate(name
, ABORTING
);
439 lxc_put_lock(handler
->lock
);
446 void lxc_fini(const char *name
, struct lxc_handler
*handler
)
448 /* The STOPPING state is there for future cleanup code
449 * which can take awhile
451 lxc_setstate(name
, STOPPING
);
452 lxc_setstate(name
, STOPPED
);
453 lxc_unlink_nsgroup(name
);
456 remove_init_pid(name
, handler
->pid
);
457 lxc_delete_tty(&handler
->tty_info
);
458 lxc_put_lock(handler
->lock
);
462 LXC_TTY_DEL_HANDLER(SIGQUIT
);
463 LXC_TTY_DEL_HANDLER(SIGINT
);
466 void lxc_abort(const char *name
, struct lxc_handler
*handler
)
468 lxc_setstate(name
, ABORTING
);
469 kill(handler
->pid
, SIGKILL
);
475 struct lxc_handler
*handler
;
479 static int do_start(void *arg
)
481 struct start_arg
*start_arg
= arg
;
482 struct lxc_handler
*handler
= start_arg
->handler
;
483 const char *name
= start_arg
->name
;
484 char *const *argv
= start_arg
->argv
;
485 int *sv
= start_arg
->sv
;
488 if (sigprocmask(SIG_SETMASK
, &handler
->oldmask
, NULL
)) {
489 SYSERROR("failed to set sigprocmask");
495 /* Be sure we don't inherit this after the exec */
496 fcntl(sv
[0], F_SETFD
, FD_CLOEXEC
);
498 /* Tell our father he can begin to configure the container */
499 if (write(sv
[0], &sync
, sizeof(sync
)) < 0) {
500 SYSERROR("failed to write socket");
504 /* Wait for the father to finish the configuration */
505 if (read(sv
[0], &sync
, sizeof(sync
)) < 0) {
506 SYSERROR("failed to read socket");
510 /* Setup the container, ip, names, utsname, ... */
511 if (lxc_setup(name
, handler
->tty
, &handler
->tty_info
)) {
512 ERROR("failed to setup the container");
513 goto out_warn_father
;
516 if (prctl(PR_CAPBSET_DROP
, CAP_SYS_BOOT
, 0, 0, 0)) {
517 SYSERROR("failed to remove CAP_SYS_BOOT capability");
521 NOTICE("exec'ing '%s'", argv
[0]);
523 execvp(argv
[0], argv
);
524 SYSERROR("failed to exec %s", argv
[0]);
527 /* If the exec fails, tell that to our father */
528 if (write(sv
[0], &err
, sizeof(err
)) < 0)
529 SYSERROR("failed to write the socket");
534 int lxc_spawn(const char *name
, struct lxc_handler
*handler
, char *const argv
[])
540 struct start_arg start_arg
= {
547 /* Synchro socketpair */
548 if (socketpair(AF_LOCAL
, SOCK_STREAM
, 0, sv
)) {
549 SYSERROR("failed to create communication socketpair");
553 clone_flags
= CLONE_NEWUTS
|CLONE_NEWPID
|CLONE_NEWIPC
|CLONE_NEWNS
;
554 if (conf_has_network(name
))
555 clone_flags
|= CLONE_NEWNET
;
557 /* Create a process in a new set of namespaces */
558 handler
->pid
= lxc_clone(do_start
, &start_arg
, clone_flags
);
559 if (handler
->pid
< 0) {
560 SYSERROR("failed to fork into a new namespace");
566 /* Wait for the child to be ready */
567 if (read(sv
[1], &sync
, sizeof(sync
)) < 0) {
568 SYSERROR("failed to read the socket");
572 if (lxc_rename_nsgroup(name
, handler
->pid
) || lxc_link_nsgroup(name
))
575 /* Create the network configuration */
576 if (clone_flags
& CLONE_NEWNET
&&
577 conf_create_network(name
, handler
->pid
)) {
578 ERROR("failed to create the configured network");
582 /* Tell the child to continue its initialization */
583 if (write(sv
[1], &sync
, sizeof(sync
)) < 0) {
584 SYSERROR("failed to write the socket");
588 /* Wait for the child to exec or returning an error */
589 if (read(sv
[1], &sync
, sizeof(sync
)) < 0) {
590 ERROR("failed to read the socket");
594 if (save_init_pid(name
, handler
->pid
)) {
595 ERROR("failed to save the init pid info");
599 if (lxc_setstate(name
, RUNNING
)) {
600 ERROR("failed to set state to %s",
601 lxc_state2str(RUNNING
));
607 NOTICE("'%s' started with pid '%d'", argv
[0], handler
->pid
);
616 lxc_abort(name
, handler
);
620 int lxc_start(const char *name
, char *const argv
[])
622 struct lxc_handler
*handler
;
626 handler
= lxc_init(name
);
628 ERROR("failed to initialize the container");
632 err
= lxc_spawn(name
, handler
, argv
);
634 ERROR("failed to spawn '%s'", argv
[0]);
638 err
= lxc_close_all_inherited_fd();
640 ERROR("unable to close inherited fds");
644 err
= lxc_poll(name
, handler
);
646 ERROR("mainloop exited with an error");
650 while (waitpid(handler
->pid
, &status
, 0) < 0 && errno
== EINTR
)
653 err
= lxc_error_set_and_log(handler
->pid
, status
);
655 lxc_fini(name
, handler
);
659 lxc_abort(name
, handler
);