]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/start.c
2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "../config.h"
35 #include <sys/param.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
39 #include <sys/prctl.h>
40 #include <sys/capability.h>
45 #ifdef HAVE_SYS_SIGNALFD_H
46 # include <sys/signalfd.h>
48 # ifndef __NR_signalfd4
49 /* assume kernel headers are too old */
51 # define __NR_signalfd4 327
53 # define __NR_signalfd4 289
55 # define __NR_signalfd4 313
57 # define __NR_signalfd4 322
61 # ifndef __NR_signalfd
62 /* assume kernel headers are too old */
64 # define __NR_signalfd 321
66 # define __NR_signalfd 282
68 # define __NR_signalfd 305
70 # define __NR_signalfd 316
74 int signalfd(int fd
, const sigset_t
*mask
, int flags
)
78 retval
= syscall (__NR_signalfd4
, fd
, mask
, _NSIG
/ 8, flags
);
79 if (errno
== ENOSYS
&& flags
== 0)
80 retval
= syscall (__NR_signalfd
, fd
, mask
, _NSIG
/ 8);
85 #if !HAVE_DECL_PR_CAPBSET_DROP
86 #define PR_CAPBSET_DROP 24
96 lxc_log_define(lxc_start
, lxc
);
99 LXC_TTY_HANDLER(SIGINT
);
100 LXC_TTY_HANDLER(SIGQUIT
);
102 static int setup_sigchld_fd(sigset_t
*oldmask
)
107 if (sigprocmask(SIG_BLOCK
, NULL
, &mask
)) {
108 SYSERROR("failed to get mask signal");
112 if (sigaddset(&mask
, SIGCHLD
) || sigprocmask(SIG_BLOCK
, &mask
, oldmask
)) {
113 SYSERROR("failed to set mask signal");
117 fd
= signalfd(-1, &mask
, 0);
119 SYSERROR("failed to create the signal fd");
123 if (fcntl(fd
, F_SETFD
, FD_CLOEXEC
)) {
124 SYSERROR("failed to set sigfd to close-on-exec");
132 static int setup_tty_service(const char *name
, int *ttyfd
)
135 struct sockaddr_un addr
= { 0 };
136 char *offset
= &addr
.sun_path
[1];
138 strcpy(offset
, name
);
139 addr
.sun_path
[0] = '\0';
141 fd
= lxc_af_unix_open(addr
.sun_path
, SOCK_STREAM
, 0);
145 if (fcntl(fd
, F_SETFD
, FD_CLOEXEC
)) {
146 SYSERROR("failed to close-on-exec flag");
156 static int sigchld_handler(int fd
, void *data
,
157 struct lxc_epoll_descr
*descr
)
161 waitpid(*pid
, NULL
, 0);
166 static int ttyclient_handler(int fd
, void *data
,
167 struct lxc_epoll_descr
*descr
)
170 struct lxc_tty_info
*tty_info
= data
;
172 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
174 if (tty_info
->pty_info
[i
].busy
!= fd
)
177 lxc_mainloop_del_handler(descr
, fd
);
178 tty_info
->pty_info
[i
].busy
= 0;
185 static int ttyservice_handler(int fd
, void *data
,
186 struct lxc_epoll_descr
*descr
)
188 int conn
, ttynum
, val
= 1, ret
= -1;
189 struct lxc_tty_info
*tty_info
= data
;
191 conn
= accept(fd
, NULL
, 0);
193 SYSERROR("failed to accept tty client");
197 if (setsockopt(conn
, SOL_SOCKET
, SO_PASSCRED
, &val
, sizeof(val
))) {
198 SYSERROR("failed to enable credential on socket");
202 if (lxc_af_unix_rcv_credential(conn
, &ttynum
, sizeof(ttynum
)))
205 if (ttynum
<= 0 || ttynum
> tty_info
->nbtty
)
208 /* fixup index array (eg. tty1 is index 0) */
211 if (tty_info
->pty_info
[ttynum
].busy
)
214 if (lxc_af_unix_send_fd(conn
, tty_info
->pty_info
[ttynum
].master
,
216 ERROR("failed to send tty to client");
220 if (lxc_mainloop_add_handler(descr
, conn
,
221 ttyclient_handler
, tty_info
)) {
222 ERROR("failed to add tty client handler");
226 tty_info
->pty_info
[ttynum
].busy
= conn
;
237 static int mainloop(const char *name
, pid_t pid
, int sigfd
,
238 const struct lxc_tty_info
*tty_info
)
240 int nfds
, ttyfd
= -1, ret
= -1;
241 struct lxc_epoll_descr descr
;
243 if (tty_info
->nbtty
&& setup_tty_service(name
, &ttyfd
)) {
244 ERROR("failed to create the tty service point");
248 /* sigfd + nb tty + tty service
249 * if tty is enabled */
250 nfds
= tty_info
->nbtty
+ 1 + tty_info
->nbtty
? 1 : 0;
252 if (lxc_mainloop_open(nfds
, &descr
)) {
253 ERROR("failed to create mainloop");
257 if (lxc_mainloop_add_handler(&descr
, sigfd
, sigchld_handler
, &pid
)) {
258 ERROR("failed to add handler for the signal");
259 goto out_mainloop_open
;
262 if (tty_info
->nbtty
) {
263 if (lxc_mainloop_add_handler(&descr
, ttyfd
,
266 ERROR("failed to add handler for the tty");
267 goto out_mainloop_open
;
271 ret
= lxc_mainloop(&descr
);
277 lxc_mainloop_close(&descr
);
285 int lxc_start(const char *name
, char *argv
[])
287 struct lxc_tty_info tty_info
= { 0 };
289 char init
[MAXPATHLEN
];
290 char tty
[MAXPATHLEN
];
292 int fd
, sigfd
, lock
, sv
[2], sync
= 0, err
= -LXC_ERROR_INTERNAL
;
296 lock
= lxc_get_lock(name
);
300 /* Begin the set the state to STARTING*/
301 if (lxc_setstate(name
, STARTING
)) {
302 ERROR("failed to set state '%s'",
303 lxc_state2str(STARTING
));
307 /* If we are not attached to a tty, disable it */
308 if (ttyname_r(0, tty
, sizeof(tty
)))
311 if (lxc_create_tty(name
, &tty_info
)) {
312 ERROR("failed to create the ttys");
316 /* the signal fd has to be created before forking otherwise
317 * if the child process exits before we setup the signal fd,
318 * the event will be lost and the command will be stuck */
319 sigfd
= setup_sigchld_fd(&oldmask
);
321 ERROR("failed to set sigchild fd handler");
325 /* Synchro socketpair */
326 if (socketpair(AF_LOCAL
, SOCK_STREAM
, 0, sv
)) {
327 SYSERROR("failed to create communication socketpair");
331 /* Avoid signals from terminal */
332 LXC_TTY_ADD_HANDLER(SIGINT
);
333 LXC_TTY_ADD_HANDLER(SIGQUIT
);
335 clone_flags
= CLONE_NEWPID
|CLONE_NEWIPC
|CLONE_NEWNS
;
336 if (conf_has_utsname(name
))
337 clone_flags
|= CLONE_NEWUTS
;
338 if (conf_has_network(name
))
339 clone_flags
|= CLONE_NEWNET
;
341 /* Create a process in a new set of namespaces */
342 pid
= fork_ns(clone_flags
);
344 SYSERROR("failed to fork into a new namespace");
350 if (sigprocmask(SIG_SETMASK
, &oldmask
, NULL
)) {
351 SYSERROR("failed to set sigprocmask");
357 /* Be sure we don't inherit this after the exec */
358 fcntl(sv
[0], F_SETFD
, FD_CLOEXEC
);
360 /* Tell our father he can begin to configure the container */
361 if (write(sv
[0], &sync
, sizeof(sync
)) < 0) {
362 SYSERROR("failed to write socket");
366 /* Wait for the father to finish the configuration */
367 if (read(sv
[0], &sync
, sizeof(sync
)) < 0) {
368 SYSERROR("failed to read socket");
372 /* Setup the container, ip, names, utsname, ... */
373 err
= lxc_setup(name
, tty
, &tty_info
);
375 ERROR("failed to setup the container");
376 if (write(sv
[0], &err
, sizeof(err
)) < 0)
377 SYSERROR("failed to write the socket");
381 if (prctl(PR_CAPBSET_DROP
, CAP_SYS_BOOT
, 0, 0, 0)) {
382 SYSERROR("failed to remove CAP_SYS_BOOT capability");
386 execvp(argv
[0], argv
);
387 SYSERROR("failed to exec %s", argv
[0]);
389 err
= LXC_ERROR_WRONG_COMMAND
;
390 /* If the exec fails, tell that to our father */
391 if (write(sv
[0], &err
, sizeof(err
)) < 0)
392 SYSERROR("failed to write the socket");
400 /* Wait for the child to be ready */
401 if (read(sv
[1], &sync
, sizeof(sync
)) < 0) {
402 SYSERROR("failed to read the socket");
406 if (lxc_link_nsgroup(name
, pid
))
407 WARN("cgroupfs not found: cgroup disabled");
409 /* Create the network configuration */
410 if (clone_flags
& CLONE_NEWNET
&& conf_create_network(name
, pid
)) {
411 ERROR("failed to create the configured network");
412 goto err_create_network
;
415 /* Tell the child to continue its initialization */
416 if (write(sv
[1], &sync
, sizeof(sync
)) < 0) {
417 SYSERROR("failed to write the socket");
421 /* Wait for the child to exec or returning an error */
422 err
= read(sv
[1], &sync
, sizeof(sync
));
424 ERROR("failed to read the socket");
430 waitpid(pid
, NULL
, 0);
431 goto err_child_failed
;
434 if (!asprintf(&val
, "%d\n", pid
)) {
435 SYSERROR("failed to allocate memory");
436 goto err_child_failed
;
439 snprintf(init
, MAXPATHLEN
, LXCPATH
"/%s/init", name
);
441 fd
= open(init
, O_WRONLY
|O_CREAT
|O_TRUNC
, S_IRUSR
|S_IWUSR
);
443 SYSERROR("failed to open '%s'", init
);
447 if (write(fd
, val
, strlen(val
)) < 0) {
448 SYSERROR("failed to write the init pid");
454 if (lxc_setstate(name
, RUNNING
)) {
455 ERROR("failed to set state to %s",
456 lxc_state2str(RUNNING
));
457 goto err_state_failed
;
460 if (mainloop(name
, pid
, sigfd
, &tty_info
)) {
461 ERROR("mainloop exited with an error");
462 goto err_mailoop_failed
;
465 if (lxc_setstate(name
, STOPPING
))
466 ERROR("failed to set state %s", lxc_state2str(STOPPING
));
468 if (clone_flags
& CLONE_NEWNET
&& conf_destroy_network(name
))
469 ERROR("failed to destroy the network");
473 if (lxc_setstate(name
, STOPPED
))
474 ERROR("failed to set state %s", lxc_state2str(STOPPED
));
476 lxc_delete_tty(&tty_info
);
477 lxc_unlink_nsgroup(name
);
481 LXC_TTY_DEL_HANDLER(SIGQUIT
);
482 LXC_TTY_DEL_HANDLER(SIGINT
);
493 if (clone_flags
& CLONE_NEWNET
)
494 conf_destroy_network(name
);
498 if (lxc_setstate(name
, ABORTING
))
499 ERROR("failed to set state %s", lxc_state2str(STOPPED
));