]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/start.c
encapsulate the lxc code
[mirror_lxc.git] / src / lxc / start.c
index c539fa94e8c278ec30f22c9546d9ecebd503740e..2b75c384b82ba0db5c35c02eee38675ef98a67c4 100644 (file)
@@ -21,7 +21,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
-#define _GNU_SOURCE
+#include "../config.h"
 #include <stdio.h>
 #undef _GNU_SOURCE
 #include <string.h>
@@ -30,6 +30,8 @@
 #include <errno.h>
 #include <unistd.h>
 #include <signal.h>
+#include <fcntl.h>
+#include <termios.h>
 #include <sys/param.h>
 #include <sys/file.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/capability.h>
 #include <sys/wait.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+
+#ifdef HAVE_SYS_SIGNALFD_H 
+#  include <sys/signalfd.h>
+#else
+#  ifndef __NR_signalfd4
+/* assume kernel headers are too old */
+#    if __i386__
+#      define __NR_signalfd4 327
+#    elif __x86_64__
+#      define __NR_signalfd4 289
+#    elif __powerpc__
+#      define __NR_signalfd4 313
+#    elif __s390x__
+#      define __NR_signalfd4 322
+#    endif
+#endif
+
+#  ifndef __NR_signalfd
+/* assume kernel headers are too old */
+#    if __i386__
+#      define __NR_signalfd 321
+#    elif __x86_64__
+#      define __NR_signalfd 282
+#    elif __powerpc__
+#      define __NR_signalfd 305
+#    elif __s390x__
+#      define __NR_signalfd 316
+#    endif
+#endif
+
+int signalfd(int fd, const sigset_t *mask, int flags)
+{
+       int retval;
+
+       retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
+       if (errno == ENOSYS && flags == 0)
+               retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
+       return retval;
+}
+#endif
+
+#if !HAVE_DECL_PR_CAPBSET_DROP
+#define PR_CAPBSET_DROP 24
+#endif
 
 #include "error.h"
+#include "af_unix.h"
+#include "mainloop.h"
 
 #include <lxc/lxc.h>
+#include <lxc/log.h>
+
+lxc_log_define(lxc_start, lxc);
 
 LXC_TTY_HANDLER(SIGINT);
 LXC_TTY_HANDLER(SIGQUIT);
 
-int lxc_start(const char *name, char *argv[])
+struct lxc_handler {
+       int sigfd;
+       int lock;
+       pid_t pid;
+       char tty[MAXPATHLEN];
+       sigset_t oldmask;
+       struct lxc_tty_info tty_info;
+};
+
+static int setup_sigchld_fd(sigset_t *oldmask)
+{
+       sigset_t mask;
+       int fd;
+
+       if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
+               SYSERROR("failed to get mask signal");
+               return -1;
+       }
+
+       if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
+               SYSERROR("failed to set mask signal");
+               return -1;
+       }
+
+       fd = signalfd(-1, &mask, 0);
+       if (fd < 0) {
+               SYSERROR("failed to create the signal fd");
+               return -1;
+       }
+
+       if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
+               SYSERROR("failed to set sigfd to close-on-exec");
+               close(fd);
+               return -1;
+       }
+
+       return fd;
+}
+
+static int setup_tty_service(const char *name, int *ttyfd)
+{
+       int fd;
+       struct sockaddr_un addr = { 0 };
+       char *offset = &addr.sun_path[1];
+       
+       strcpy(offset, name);
+       addr.sun_path[0] = '\0';
+
+       fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, 0);
+       if (fd < 0)
+               return -1;
+
+       if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
+               SYSERROR("failed to close-on-exec flag");
+               close(fd);
+               return -1;
+       }
+
+       *ttyfd = fd;
+
+       return 0;
+}
+
+static int sigchld_handler(int fd, void *data, 
+                          struct lxc_epoll_descr *descr)
+{
+       return 1;
+}
+
+static int ttyclient_handler(int fd, void *data,
+                            struct lxc_epoll_descr *descr)
+{
+       int i;
+       struct lxc_tty_info *tty_info = data;
+
+       for (i = 0; i < tty_info->nbtty; i++) {
+
+               if (tty_info->pty_info[i].busy != fd)
+                       continue;
+
+               lxc_mainloop_del_handler(descr, fd);
+               tty_info->pty_info[i].busy = 0;
+               close(fd);
+       }
+
+       return 0;
+}
+
+static int ttyservice_handler(int fd, void *data,
+                             struct lxc_epoll_descr *descr)
+{
+       int conn, ttynum, val = 1, ret = -1;
+       struct lxc_tty_info *tty_info = data;
+       
+       conn = accept(fd, NULL, 0);
+       if (conn < 0) {
+               SYSERROR("failed to accept tty client");
+               return -1;
+       }
+       
+       if (setsockopt(conn, SOL_SOCKET, SO_PASSCRED, &val, sizeof(val))) {
+               SYSERROR("failed to enable credential on socket");
+               goto out_close;
+       }
+
+       if (lxc_af_unix_rcv_credential(conn, &ttynum, sizeof(ttynum)))
+               goto out_close;
+
+       if (ttynum <= 0 || ttynum > tty_info->nbtty)
+               goto out_close;
+
+       /* fixup index array (eg. tty1 is index 0) */
+       ttynum--;
+
+       if (tty_info->pty_info[ttynum].busy)
+               goto out_close;
+
+       if (lxc_af_unix_send_fd(conn, tty_info->pty_info[ttynum].master, 
+                               NULL, 0) < 0) {
+               ERROR("failed to send tty to client");
+               goto out_close;
+       }
+
+       if (lxc_mainloop_add_handler(descr, conn, 
+                                    ttyclient_handler, tty_info)) {
+               ERROR("failed to add tty client handler");
+               goto out_close;
+       }
+
+       tty_info->pty_info[ttynum].busy = conn;
+
+       ret = 0;
+
+out:
+       return ret;
+out_close:
+       close(conn);
+       goto out;
+}
+
+static int lxc_poll(const char *name, struct lxc_handler *handler)
+{
+       int sigfd = handler->sigfd;
+       int pid = handler->pid;
+       const struct lxc_tty_info *tty_info = &handler->tty_info;
+
+       int nfds, ttyfd = -1, ret = -1;
+       struct lxc_epoll_descr descr;
+
+       if (tty_info->nbtty && setup_tty_service(name, &ttyfd)) {
+               ERROR("failed to create the tty service point");
+               goto out_sigfd;
+       }
+
+       /* sigfd + nb tty + tty service 
+        * if tty is enabled */
+       nfds = tty_info->nbtty + 1 + tty_info->nbtty ? 1 : 0;
+
+       if (lxc_mainloop_open(nfds, &descr)) {
+               ERROR("failed to create mainloop");
+               goto out_ttyfd;
+       }
+
+       if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
+               ERROR("failed to add handler for the signal");
+               goto out_mainloop_open;
+       }
+
+       if (tty_info->nbtty) {
+               if (lxc_mainloop_add_handler(&descr, ttyfd, 
+                                            ttyservice_handler, 
+                                            (void *)tty_info)) {
+                       ERROR("failed to add handler for the tty");
+                       goto out_mainloop_open;
+               }
+       }
+
+       ret = lxc_mainloop(&descr);
+
+out:
+       return ret;
+
+out_mainloop_open:
+       lxc_mainloop_close(&descr);
+out_ttyfd:
+       close(ttyfd);
+out_sigfd:
+       close(sigfd);
+       goto out;
+}
+
+static int save_init_pid(const char *name, pid_t pid)
 {
        char init[MAXPATHLEN];
-       char tty[MAXPATHLEN];
-       char *val = NULL;
-       int fd, lock, sv[2], sync = 0, err = -LXC_ERROR_INTERNAL;
-       pid_t pid;
-       int clone_flags;
+       char *val;
+       int fd, err = -1;
 
-       lock = lxc_get_lock(name);
-       if (lock < 0) {
-               if (lock == -EWOULDBLOCK)
-                       return -LXC_ERROR_BUSY;
-               if (lock == -ENOENT)
-                       return -LXC_ERROR_NOT_FOUND;
-               return -LXC_ERROR_LOCK;
+       snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
+
+       if (!asprintf(&val, "%d\n", pid)) {
+               SYSERROR("failed to allocate memory");
+               goto out;
        }
 
+       fd = open(init, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
+       if (fd < 0) {
+               SYSERROR("failed to open '%s'", init);
+               goto out_free;
+       }
+
+       if (write(fd, val, strlen(val)) < 0) {
+               SYSERROR("failed to write the init pid");
+               goto out_close;
+       }
+
+       err = 0;
+
+out_close:
+       close(fd);
+out_free:
+       free(val);
+out:
+       return err;
+}
+
+static void remove_init_pid(const char *name, pid_t pid)
+{
+       char init[MAXPATHLEN];
+
+       snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
+       unlink(init);
+}
+
+static int lxc_init(const char *name, struct lxc_handler *handler)
+{
+       int err = -1;
+
+       memset(handler, 0, sizeof(*handler));
+
+       handler->lock = lxc_get_lock(name);
+       if (handler->lock < 0)
+               goto out;
+
        /* Begin the set the state to STARTING*/
        if (lxc_setstate(name, STARTING)) {
-               lxc_log_error("failed to set state '%s'", 
-                             lxc_state2str(STARTING));
-               goto out;
+               ERROR("failed to set state '%s'", lxc_state2str(STARTING));
+               goto out_put_lock;
        }
 
        /* If we are not attached to a tty, disable it */
-       if (ttyname_r(0, tty, sizeof(tty))) 
-               tty[0] = '\0';
+       if (ttyname_r(0, handler->tty, sizeof(handler->tty)))
+               handler->tty[0] = '\0';
 
-       /* Synchro socketpair */
-       if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
-               lxc_log_syserror("failed to create communication socketpair");
-               goto out;
+       if (lxc_create_tty(name, &handler->tty_info)) {
+               ERROR("failed to create the ttys");
+               goto out_aborting;
+       }
+
+       /* the signal fd has to be created before forking otherwise
+        * if the child process exits before we setup the signal fd,
+        * the event will be lost and the command will be stuck */
+       handler->sigfd = setup_sigchld_fd(&handler->oldmask);
+       if (handler->sigfd < 0) {
+               ERROR("failed to set sigchild fd handler");
+               goto out_delete_tty;
        }
 
        /* Avoid signals from terminal */
        LXC_TTY_ADD_HANDLER(SIGINT);
        LXC_TTY_ADD_HANDLER(SIGQUIT);
 
+       err = 0;
+out:
+       return err;
+
+out_delete_tty:
+       lxc_delete_tty(&handler->tty_info);
+out_aborting:
+       lxc_setstate(name, ABORTING);
+out_put_lock:
+       lxc_put_lock(handler->lock);
+       goto out;
+}
+
+static void lxc_fini(const char *name, struct lxc_handler *handler)
+{
+       /* The STOPPING state is there for future cleanup code
+        * which can take awhile
+        */
+       lxc_setstate(name, STOPPING);
+
+       lxc_setstate(name, STOPPED);
+
+       remove_init_pid(name, handler->pid);
+
+       lxc_delete_tty(&handler->tty_info);
+
+       lxc_unlink_nsgroup(name);
+
+       lxc_put_lock(handler->lock);
+
+       LXC_TTY_DEL_HANDLER(SIGQUIT);
+       LXC_TTY_DEL_HANDLER(SIGINT);
+}
+
+static void lxc_abort(const char *name, struct lxc_handler *handler)
+{
+       lxc_setstate(name, ABORTING);
+       kill(handler->pid, SIGKILL);
+}
+
+static int lxc_spawn(const char *name, struct lxc_handler *handler, char *argv[])
+{
+       int sv[2];
+       int clone_flags;
+       int err = -1, sync;
+
+       /* Synchro socketpair */
+       if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
+               SYSERROR("failed to create communication socketpair");
+               goto out;
+       }
+
        clone_flags = CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
        if (conf_has_utsname(name))
                clone_flags |= CLONE_NEWUTS;
@@ -91,13 +428,18 @@ int lxc_start(const char *name, char *argv[])
                clone_flags |= CLONE_NEWNET;
 
        /* Create a process in a new set of namespaces */
-       pid = fork_ns(clone_flags);
-       if (pid < 0) {
-               lxc_log_syserror("failed to fork into a new namespace");
-               goto err_fork_ns;
+       handler->pid = fork_ns(clone_flags);
+       if (handler->pid < 0) {
+               SYSERROR("failed to fork into a new namespace");
+               goto out_close;
        }
 
-       if (!pid) {
+       if (!handler->pid) {
+
+               if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
+                       SYSERROR("failed to set sigprocmask");
+                       return -1;
+               }
 
                close(sv[1]);
 
@@ -106,37 +448,37 @@ int lxc_start(const char *name, char *argv[])
                
                /* Tell our father he can begin to configure the container */
                if (write(sv[0], &sync, sizeof(sync)) < 0) {
-                       lxc_log_syserror("failed to write socket");
+                       SYSERROR("failed to write socket");
                        goto out_child;
                }
 
                /* Wait for the father to finish the configuration */
                if (read(sv[0], &sync, sizeof(sync)) < 0) {
-                       lxc_log_syserror("failed to read socket");
+                       SYSERROR("failed to read socket");
                        goto out_child;
                }
 
                /* Setup the container, ip, names, utsname, ... */
-               err = lxc_setup(name, tty);
+               err = lxc_setup(name, handler->tty, &handler->tty_info);
                if (err) {
-                       lxc_log_error("failed to setup the container");
+                       ERROR("failed to setup the container");
                        if (write(sv[0], &err, sizeof(err)) < 0)
-                               lxc_log_syserror("failed to write the socket");
+                               SYSERROR("failed to write the socket");
                        goto out_child;
                }
 
                if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
-                       lxc_log_syserror("failed to remove CAP_SYS_BOOT capability");
+                       SYSERROR("failed to remove CAP_SYS_BOOT capability");
                        goto out_child;
                }
 
                execvp(argv[0], argv);
-               lxc_log_syserror("failed to exec %s", argv[0]);
+               SYSERROR("failed to exec %s", argv[0]);
 
                err = LXC_ERROR_WRONG_COMMAND;
                /* If the exec fails, tell that to our father */
                if (write(sv[0], &err, sizeof(err)) < 0)
-                       lxc_log_syserror("failed to write the socket");
+                       SYSERROR("failed to write the socket");
                
        out_child:
                exit(err);
@@ -146,107 +488,86 @@ int lxc_start(const char *name, char *argv[])
        
        /* Wait for the child to be ready */
        if (read(sv[1], &sync, sizeof(sync)) < 0) {
-               lxc_log_syserror("failed to read the socket");
-               goto err_pipe_read;
+               SYSERROR("failed to read the socket");
+               goto out_abort;
        }
 
-       if (lxc_link_nsgroup(name, pid))
-               lxc_log_warning("cgroupfs not found: cgroup disabled");
+       if (lxc_link_nsgroup(name, handler->pid))
+               WARN("cgroupfs not found: cgroup disabled");
 
        /* Create the network configuration */
-       if (clone_flags & CLONE_NEWNET && conf_create_network(name, pid)) {
-               lxc_log_error("failed to create the configured network");
-               goto err_create_network;
+       if (clone_flags & CLONE_NEWNET && conf_create_network(name, handler->pid)) {
+               ERROR("failed to create the configured network");
+               goto out_abort;
        }
 
        /* Tell the child to continue its initialization */
        if (write(sv[1], &sync, sizeof(sync)) < 0) {
-               lxc_log_syserror("failed to write the socket");
-               goto err_pipe_write;
+               SYSERROR("failed to write the socket");
+               goto out_abort;
        }
 
        /* Wait for the child to exec or returning an error */
        err = read(sv[1], &sync, sizeof(sync));
        if (err < 0) {
-               lxc_log_error("failed to read the socket");
-               goto err_pipe_read2;
+               ERROR("failed to read the socket");
+               goto out_abort;
        }
 
-       if (err > 0) {
-               err = sync;
-               waitpid(pid, NULL, 0);
-               goto err_child_failed;
+       if (save_init_pid(name, handler->pid)) {
+               ERROR("failed to save the init pid info");
+               goto out_abort;
        }
 
-       asprintf(&val, "%d\n", pid);
+       if (lxc_setstate(name, RUNNING)) {
+               ERROR("failed to set state to %s",
+                             lxc_state2str(RUNNING));
+               goto out_abort;
+       }
 
-       snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
+       err = 0;
 
-       fd = open(init, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
-       if (fd < 0) {
-               lxc_log_syserror("failed to open '%s'", init);
-               goto err_write;
-       }
-       
-       if (write(fd, val, strlen(val)) < 0) {
-               lxc_log_syserror("failed to write the init pid");
-               goto err_write;
-       }
+out_close:
+       close(sv[0]);
+       close(sv[1]);
+out:
+       return err;
 
-       close(fd);
+out_abort:
+       lxc_abort(name, handler);
+       goto out_close;
+}
 
-       if (lxc_setstate(name, RUNNING)) {
-               lxc_log_error("failed to set state to %s", 
-                             lxc_state2str(RUNNING));
-               goto err_state_failed;
+int lxc_start(const char *name, char *argv[])
+{
+       struct lxc_handler handler = { 0 };
+       int err = -LXC_ERROR_INTERNAL;
+       int status;
+
+       if (lxc_init(name, &handler)) {
+               ERROR("failed to initialize the container");
+               goto out;
        }
 
-wait_again:
-       if (waitpid(pid, NULL, 0) < 0) {
-               if (errno == EINTR) 
-                       goto wait_again;
-               lxc_log_syserror("failed to wait the pid %d", pid);
-               goto err_waitpid_failed;
+       err = lxc_spawn(name, &handler, argv);
+       if (err) {
+               ERROR("failed to spawn '%s'", argv[0]);
+               goto out;
        }
 
-       if (lxc_setstate(name, STOPPING))
-               lxc_log_error("failed to set state %s", lxc_state2str(STOPPING));
+       if (lxc_poll(name, &handler)) {
+               ERROR("mainloop exited with an error");
+               goto out_abort;
+       }
 
-       if (clone_flags & CLONE_NEWNET && conf_destroy_network(name))
-               lxc_log_error("failed to destroy the network");
+       waitpid(handler.pid, &status, 0);
 
        err = 0;
 out:
-       if (lxc_setstate(name, STOPPED))
-               lxc_log_error("failed to set state %s", lxc_state2str(STOPPED));
-
-       lxc_unlink_nsgroup(name);
-       unlink(init);
-       free(val);
-       lxc_put_lock(lock);
-       LXC_TTY_DEL_HANDLER(SIGQUIT);
-       LXC_TTY_DEL_HANDLER(SIGINT);
-
+       lxc_fini(name, &handler);
        return err;
 
-err_write:
-       close(fd);
-
-err_state_failed:
-err_child_failed:
-err_pipe_read2:
-err_pipe_write:
-       if (clone_flags & CLONE_NEWNET)
-               conf_destroy_network(name);
-err_create_network:
-err_pipe_read:
-err_waitpid_failed:
-       if (lxc_setstate(name, ABORTING))
-               lxc_log_error("failed to set state %s", lxc_state2str(STOPPED));
-
-       kill(pid, SIGKILL);
-err_fork_ns:
-       close(sv[0]);
-       close(sv[1]);
+out_abort:
+       lxc_abort(name, &handler);
        goto out;
 }