]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/start.c
055d381f644644a24af763b341986de1a05135b6
[mirror_lxc.git] / src / lxc / start.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24 #include "../config.h"
25 #include <stdio.h>
26 #undef _GNU_SOURCE
27 #include <string.h>
28 #include <stdlib.h>
29 #include <dirent.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <signal.h>
33 #include <fcntl.h>
34 #include <termios.h>
35 #include <namespace.h>
36 #include <sys/param.h>
37 #include <sys/file.h>
38 #include <sys/mount.h>
39 #include <sys/types.h>
40 #include <sys/prctl.h>
41 #include <sys/types.h>
42 #include <sys/capability.h>
43 #include <sys/wait.h>
44 #include <sys/un.h>
45 #include <sys/poll.h>
46
47 #ifdef HAVE_SYS_SIGNALFD_H
48 # include <sys/signalfd.h>
49 #else
50 # ifndef __NR_signalfd4
51 /* assume kernel headers are too old */
52 # if __i386__
53 # define __NR_signalfd4 327
54 # elif __x86_64__
55 # define __NR_signalfd4 289
56 # elif __powerpc__
57 # define __NR_signalfd4 313
58 # elif __s390x__
59 # define __NR_signalfd4 322
60 # endif
61 #endif
62
63 # ifndef __NR_signalfd
64 /* assume kernel headers are too old */
65 # if __i386__
66 # define __NR_signalfd 321
67 # elif __x86_64__
68 # define __NR_signalfd 282
69 # elif __powerpc__
70 # define __NR_signalfd 305
71 # elif __s390x__
72 # define __NR_signalfd 316
73 # endif
74 #endif
75
76 int signalfd(int fd, const sigset_t *mask, int flags)
77 {
78 int retval;
79
80 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
81 if (errno == ENOSYS && flags == 0)
82 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
83 return retval;
84 }
85 #endif
86
87 #if !HAVE_DECL_PR_CAPBSET_DROP
88 #define PR_CAPBSET_DROP 24
89 #endif
90
91 #include "error.h"
92 #include "af_unix.h"
93 #include "mainloop.h"
94
95 #include <lxc/lxc.h>
96 #include <lxc/log.h>
97
98 lxc_log_define(lxc_start, lxc);
99
100 LXC_TTY_HANDLER(SIGINT);
101 LXC_TTY_HANDLER(SIGQUIT);
102
103 static int setup_sigchld_fd(sigset_t *oldmask)
104 {
105 sigset_t mask;
106 int fd;
107
108 if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
109 SYSERROR("failed to get mask signal");
110 return -1;
111 }
112
113 if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
114 SYSERROR("failed to set mask signal");
115 return -1;
116 }
117
118 fd = signalfd(-1, &mask, 0);
119 if (fd < 0) {
120 SYSERROR("failed to create the signal fd");
121 return -1;
122 }
123
124 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
125 SYSERROR("failed to set sigfd to close-on-exec");
126 close(fd);
127 return -1;
128 }
129
130 DEBUG("sigchild handler set");
131
132 return fd;
133 }
134
135 static int setup_tty_service(const char *name, int *ttyfd)
136 {
137 int fd;
138 struct sockaddr_un addr = { 0 };
139 char *offset = &addr.sun_path[1];
140
141 strcpy(offset, name);
142 addr.sun_path[0] = '\0';
143
144 fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, 0);
145 if (fd < 0)
146 return -1;
147
148 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
149 SYSERROR("failed to close-on-exec flag");
150 close(fd);
151 return -1;
152 }
153
154 *ttyfd = fd;
155
156 return 0;
157 }
158
159 static int sigchld_handler(int fd, void *data,
160 struct lxc_epoll_descr *descr)
161 {
162 DEBUG("child exited");
163
164 return 1;
165 }
166
167 static int ttyclient_handler(int fd, void *data,
168 struct lxc_epoll_descr *descr)
169 {
170 int i;
171 struct lxc_tty_info *tty_info = data;
172
173 for (i = 0; i < tty_info->nbtty; i++) {
174
175 if (tty_info->pty_info[i].busy != fd)
176 continue;
177
178 lxc_mainloop_del_handler(descr, fd);
179 tty_info->pty_info[i].busy = 0;
180 close(fd);
181 }
182
183 return 0;
184 }
185
186 static int ttyservice_handler(int fd, void *data,
187 struct lxc_epoll_descr *descr)
188 {
189 int conn, ttynum, val = 1, ret = -1;
190 struct lxc_tty_info *tty_info = data;
191
192 conn = accept(fd, NULL, 0);
193 if (conn < 0) {
194 SYSERROR("failed to accept tty client");
195 return -1;
196 }
197
198 if (setsockopt(conn, SOL_SOCKET, SO_PASSCRED, &val, sizeof(val))) {
199 SYSERROR("failed to enable credential on socket");
200 goto out_close;
201 }
202
203 if (lxc_af_unix_rcv_credential(conn, &ttynum, sizeof(ttynum)))
204 goto out_close;
205
206 if (ttynum > 0) {
207 if (ttynum > tty_info->nbtty)
208 goto out_close;
209
210 if (tty_info->pty_info[ttynum - 1].busy)
211 goto out_close;
212
213 goto out_send;
214 }
215
216 /* fixup index tty1 => [0] */
217 for (ttynum = 1;
218 ttynum <= tty_info->nbtty && tty_info->pty_info[ttynum - 1].busy;
219 ttynum++);
220
221 /* we didn't find any available slot for tty */
222 if (ttynum > tty_info->nbtty)
223 goto out_close;
224
225 out_send:
226 if (lxc_af_unix_send_fd(conn, tty_info->pty_info[ttynum - 1].master,
227 &ttynum, sizeof(ttynum)) < 0) {
228 ERROR("failed to send tty to client");
229 goto out_close;
230 }
231
232 if (lxc_mainloop_add_handler(descr, conn,
233 ttyclient_handler, tty_info)) {
234 ERROR("failed to add tty client handler");
235 goto out_close;
236 }
237
238 tty_info->pty_info[ttynum - 1].busy = conn;
239
240 ret = 0;
241 out:
242 return ret;
243 out_close:
244 close(conn);
245 goto out;
246 }
247
248 int lxc_poll(const char *name, struct lxc_handler *handler)
249 {
250 int sigfd = handler->sigfd;
251 int pid = handler->pid;
252 const struct lxc_tty_info *tty_info = &handler->tty_info;
253
254 int nfds, ttyfd = -1, ret = -1;
255 struct lxc_epoll_descr descr;
256
257 if (tty_info->nbtty && setup_tty_service(name, &ttyfd)) {
258 ERROR("failed to create the tty service point");
259 goto out_sigfd;
260 }
261
262 /* sigfd + nb tty + tty service
263 * if tty is enabled */
264 nfds = tty_info->nbtty + 1 + tty_info->nbtty ? 1 : 0;
265
266 if (lxc_mainloop_open(nfds, &descr)) {
267 ERROR("failed to create mainloop");
268 goto out_ttyfd;
269 }
270
271 if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
272 ERROR("failed to add handler for the signal");
273 goto out_mainloop_open;
274 }
275
276 if (tty_info->nbtty) {
277 if (lxc_mainloop_add_handler(&descr, ttyfd,
278 ttyservice_handler,
279 (void *)tty_info)) {
280 ERROR("failed to add handler for the tty");
281 goto out_mainloop_open;
282 }
283 }
284
285 ret = lxc_mainloop(&descr);
286
287 out:
288 return ret;
289
290 out_mainloop_open:
291 lxc_mainloop_close(&descr);
292 out_ttyfd:
293 close(ttyfd);
294 out_sigfd:
295 close(sigfd);
296 goto out;
297 }
298
299 static int save_init_pid(const char *name, pid_t pid)
300 {
301 char init[MAXPATHLEN];
302 char *val;
303 int fd, err = -1;
304
305 snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
306
307 if (!asprintf(&val, "%d\n", pid)) {
308 SYSERROR("failed to allocate memory");
309 goto out;
310 }
311
312 fd = open(init, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
313 if (fd < 0) {
314 SYSERROR("failed to open '%s'", init);
315 goto out_free;
316 }
317
318 if (write(fd, val, strlen(val)) < 0) {
319 SYSERROR("failed to write the init pid");
320 goto out_close;
321 }
322
323 err = 0;
324
325 out_close:
326 close(fd);
327 out_free:
328 free(val);
329 out:
330 return err;
331 }
332
333 static void remove_init_pid(const char *name, pid_t pid)
334 {
335 char init[MAXPATHLEN];
336
337 snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
338 unlink(init);
339 }
340
341 static int fdname(int fd, char *name, size_t size)
342 {
343 char path[MAXPATHLEN];
344 ssize_t len;
345
346 snprintf(path, MAXPATHLEN, "/proc/self/fd/%d", fd);
347
348 len = readlink(path, name, size);
349 if (len > 0)
350 path[len] = '\0';
351
352 return (len <= 0) ? -1 : 0;
353 }
354
355 static int console_init(char *console, size_t size)
356 {
357 struct stat stat;
358 int i;
359
360 for (i = 0; i < 3; i++) {
361 if (!isatty(i))
362 continue;
363
364 if (ttyname_r(i, console, size)) {
365 SYSERROR("failed to retrieve tty name");
366 return -1;
367 }
368
369 return 0;
370 }
371
372 if (!fstat(0, &stat)) {
373 if (S_ISREG(stat.st_mode) || S_ISCHR(stat.st_mode) ||
374 S_ISFIFO(stat.st_mode) || S_ISLNK(stat.st_mode))
375 return fdname(0, console, size);
376 }
377
378 console[0] = '\0';
379
380 DEBUG("console initialized");
381
382 return 0;
383 }
384
385 struct lxc_handler *lxc_init(const char *name)
386 {
387 struct lxc_handler *handler;
388
389 handler = malloc(sizeof(*handler));
390 if (!handler)
391 return NULL;
392
393 memset(handler, 0, sizeof(*handler));
394
395 handler->lock = lxc_get_lock(name);
396 if (handler->lock < 0)
397 goto out_free;
398
399 /* Begin the set the state to STARTING*/
400 if (lxc_setstate(name, STARTING)) {
401 ERROR("failed to set state '%s'", lxc_state2str(STARTING));
402 goto out_put_lock;
403 }
404
405 if (console_init(handler->tty, sizeof(handler->tty))) {
406 ERROR("failed to initialize the console");
407 goto out_aborting;
408 }
409
410 if (lxc_create_tty(name, &handler->tty_info)) {
411 ERROR("failed to create the ttys");
412 goto out_aborting;
413 }
414
415 /* the signal fd has to be created before forking otherwise
416 * if the child process exits before we setup the signal fd,
417 * the event will be lost and the command will be stuck */
418 handler->sigfd = setup_sigchld_fd(&handler->oldmask);
419 if (handler->sigfd < 0) {
420 ERROR("failed to set sigchild fd handler");
421 goto out_delete_tty;
422 }
423
424 /* Avoid signals from terminal */
425 LXC_TTY_ADD_HANDLER(SIGINT);
426 LXC_TTY_ADD_HANDLER(SIGQUIT);
427
428 out:
429 if (handler)
430 INFO("'%s' is initialized", name);
431
432 return handler;
433
434 out_delete_tty:
435 lxc_delete_tty(&handler->tty_info);
436 out_aborting:
437 lxc_setstate(name, ABORTING);
438 out_put_lock:
439 lxc_put_lock(handler->lock);
440 out_free:
441 free(handler);
442 handler = NULL;
443 goto out;
444 }
445
446 void lxc_fini(const char *name, struct lxc_handler *handler)
447 {
448 /* The STOPPING state is there for future cleanup code
449 * which can take awhile
450 */
451 lxc_setstate(name, STOPPING);
452 lxc_setstate(name, STOPPED);
453 lxc_unlink_nsgroup(name);
454
455 if (handler) {
456 remove_init_pid(name, handler->pid);
457 lxc_delete_tty(&handler->tty_info);
458 lxc_put_lock(handler->lock);
459 free(handler);
460 }
461
462 LXC_TTY_DEL_HANDLER(SIGQUIT);
463 LXC_TTY_DEL_HANDLER(SIGINT);
464 }
465
466 void lxc_abort(const char *name, struct lxc_handler *handler)
467 {
468 lxc_setstate(name, ABORTING);
469 kill(handler->pid, SIGKILL);
470 }
471
472 struct start_arg {
473 const char *name;
474 char *const *argv;
475 struct lxc_handler *handler;
476 int *sv;
477 };
478
479 static int do_start(void *arg)
480 {
481 struct start_arg *start_arg = arg;
482 struct lxc_handler *handler = start_arg->handler;
483 const char *name = start_arg->name;
484 char *const *argv = start_arg->argv;
485 int *sv = start_arg->sv;
486 int err = -1, sync;
487
488 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
489 SYSERROR("failed to set sigprocmask");
490 goto out_child;
491 }
492
493 close(sv[1]);
494
495 /* Be sure we don't inherit this after the exec */
496 fcntl(sv[0], F_SETFD, FD_CLOEXEC);
497
498 /* Tell our father he can begin to configure the container */
499 if (write(sv[0], &sync, sizeof(sync)) < 0) {
500 SYSERROR("failed to write socket");
501 goto out_child;
502 }
503
504 /* Wait for the father to finish the configuration */
505 if (read(sv[0], &sync, sizeof(sync)) < 0) {
506 SYSERROR("failed to read socket");
507 goto out_child;
508 }
509
510 /* Setup the container, ip, names, utsname, ... */
511 if (lxc_setup(name, handler->tty, &handler->tty_info)) {
512 ERROR("failed to setup the container");
513 goto out_warn_father;
514 }
515
516 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
517 SYSERROR("failed to remove CAP_SYS_BOOT capability");
518 goto out_child;
519 }
520
521 NOTICE("exec'ing '%s'", argv[0]);
522
523 execvp(argv[0], argv);
524 SYSERROR("failed to exec %s", argv[0]);
525
526 out_warn_father:
527 /* If the exec fails, tell that to our father */
528 if (write(sv[0], &err, sizeof(err)) < 0)
529 SYSERROR("failed to write the socket");
530 out_child:
531 return -1;
532 }
533
534 int lxc_spawn(const char *name, struct lxc_handler *handler, char *const argv[])
535 {
536 int sv[2];
537 int clone_flags;
538 int err = -1, sync;
539
540 struct start_arg start_arg = {
541 .name = name,
542 .argv = argv,
543 .handler = handler,
544 .sv = sv,
545 };
546
547 /* Synchro socketpair */
548 if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
549 SYSERROR("failed to create communication socketpair");
550 goto out;
551 }
552
553 clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
554 if (conf_has_network(name))
555 clone_flags |= CLONE_NEWNET;
556
557 /* Create a process in a new set of namespaces */
558 handler->pid = lxc_clone(do_start, &start_arg, clone_flags);
559 if (handler->pid < 0) {
560 SYSERROR("failed to fork into a new namespace");
561 goto out_close;
562 }
563
564 close(sv[0]);
565
566 /* Wait for the child to be ready */
567 if (read(sv[1], &sync, sizeof(sync)) < 0) {
568 SYSERROR("failed to read the socket");
569 goto out_abort;
570 }
571
572 if (lxc_rename_nsgroup(name, handler->pid) || lxc_link_nsgroup(name))
573 goto out_abort;
574
575 /* Create the network configuration */
576 if (clone_flags & CLONE_NEWNET &&
577 conf_create_network(name, handler->pid)) {
578 ERROR("failed to create the configured network");
579 goto out_abort;
580 }
581
582 /* Tell the child to continue its initialization */
583 if (write(sv[1], &sync, sizeof(sync)) < 0) {
584 SYSERROR("failed to write the socket");
585 goto out_abort;
586 }
587
588 /* Wait for the child to exec or returning an error */
589 if (read(sv[1], &sync, sizeof(sync)) < 0) {
590 ERROR("failed to read the socket");
591 goto out_abort;
592 }
593
594 if (save_init_pid(name, handler->pid)) {
595 ERROR("failed to save the init pid info");
596 goto out_abort;
597 }
598
599 if (lxc_setstate(name, RUNNING)) {
600 ERROR("failed to set state to %s",
601 lxc_state2str(RUNNING));
602 goto out_abort;
603 }
604
605 err = 0;
606
607 NOTICE("'%s' started with pid '%d'", argv[0], handler->pid);
608
609 out_close:
610 close(sv[0]);
611 close(sv[1]);
612 out:
613 return err;
614
615 out_abort:
616 lxc_abort(name, handler);
617 goto out_close;
618 }
619
620 int lxc_start(const char *name, char *const argv[])
621 {
622 struct lxc_handler *handler;
623 int err = -1;
624 int status;
625
626 handler = lxc_init(name);
627 if (!handler) {
628 ERROR("failed to initialize the container");
629 goto out;
630 }
631
632 err = lxc_spawn(name, handler, argv);
633 if (err) {
634 ERROR("failed to spawn '%s'", argv[0]);
635 goto out;
636 }
637
638 err = lxc_close_all_inherited_fd();
639 if (err) {
640 ERROR("unable to close inherited fds");
641 goto out_abort;
642 }
643
644 err = lxc_poll(name, handler);
645 if (err) {
646 ERROR("mainloop exited with an error");
647 goto out_abort;
648 }
649
650 while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR)
651 continue;
652
653 err = lxc_error_set_and_log(handler->pid, status);
654 out:
655 lxc_fini(name, handler);
656 return err;
657
658 out_abort:
659 lxc_abort(name, handler);
660 goto out;
661 }