]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/start.c
encapsulate the lxc code
[mirror_lxc.git] / src / lxc / start.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
ff218c25 24#include "../config.h"
0ad19a3f 25#include <stdio.h>
26#undef _GNU_SOURCE
27#include <string.h>
28#include <stdlib.h>
29#include <dirent.h>
30#include <errno.h>
31#include <unistd.h>
32#include <signal.h>
b0a33c1e 33#include <fcntl.h>
34#include <termios.h>
0ad19a3f 35#include <sys/param.h>
36#include <sys/file.h>
f4d507d5 37#include <sys/mount.h>
0ad19a3f 38#include <sys/types.h>
0ad19a3f 39#include <sys/prctl.h>
42ff343d 40#include <sys/capability.h>
0ad19a3f 41#include <sys/wait.h>
b0a33c1e 42#include <sys/un.h>
43#include <sys/poll.h>
ff218c25 44
45#ifdef HAVE_SYS_SIGNALFD_H
8ca61733 46# include <sys/signalfd.h>
ff218c25 47#else
8ca61733
MJ
48# ifndef __NR_signalfd4
49/* assume kernel headers are too old */
50# if __i386__
51# define __NR_signalfd4 327
52# elif __x86_64__
53# define __NR_signalfd4 289
bfa38025
MH
54# elif __powerpc__
55# define __NR_signalfd4 313
47f38330
SH
56# elif __s390x__
57# define __NR_signalfd4 322
8ca61733
MJ
58# endif
59#endif
60
61# ifndef __NR_signalfd
62/* assume kernel headers are too old */
63# if __i386__
64# define __NR_signalfd 321
65# elif __x86_64__
66# define __NR_signalfd 282
bfa38025
MH
67# elif __powerpc__
68# define __NR_signalfd 305
47f38330
SH
69# elif __s390x__
70# define __NR_signalfd 316
8ca61733
MJ
71# endif
72#endif
73
74int signalfd(int fd, const sigset_t *mask, int flags)
75{
76 int retval;
77
78 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
79 if (errno == ENOSYS && flags == 0)
80 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
81 return retval;
82}
ff218c25 83#endif
0ad19a3f 84
656994bb
MH
85#if !HAVE_DECL_PR_CAPBSET_DROP
86#define PR_CAPBSET_DROP 24
87#endif
88
e2bcd7db 89#include "error.h"
b0a33c1e 90#include "af_unix.h"
91#include "mainloop.h"
e2bcd7db 92
b113348e 93#include <lxc/lxc.h>
36eb9bde
CLG
94#include <lxc/log.h>
95
96lxc_log_define(lxc_start, lxc);
97
0ad19a3f 98LXC_TTY_HANDLER(SIGINT);
99LXC_TTY_HANDLER(SIGQUIT);
100
ca5f7926
DL
101struct lxc_handler {
102 int sigfd;
59eb99ba 103 int lock;
ca5f7926 104 pid_t pid;
59eb99ba
DL
105 char tty[MAXPATHLEN];
106 sigset_t oldmask;
ca5f7926
DL
107 struct lxc_tty_info tty_info;
108};
109
b0a33c1e 110static int setup_sigchld_fd(sigset_t *oldmask)
111{
112 sigset_t mask;
113 int fd;
114
115 if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
36eb9bde 116 SYSERROR("failed to get mask signal");
b0a33c1e 117 return -1;
118 }
119
120 if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
36eb9bde 121 SYSERROR("failed to set mask signal");
b0a33c1e 122 return -1;
123 }
124
125 fd = signalfd(-1, &mask, 0);
126 if (fd < 0) {
36eb9bde 127 SYSERROR("failed to create the signal fd");
b0a33c1e 128 return -1;
129 }
130
131 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
36eb9bde 132 SYSERROR("failed to set sigfd to close-on-exec");
b0a33c1e 133 close(fd);
134 return -1;
135 }
136
137 return fd;
138}
139
140static int setup_tty_service(const char *name, int *ttyfd)
141{
142 int fd;
143 struct sockaddr_un addr = { 0 };
144 char *offset = &addr.sun_path[1];
145
146 strcpy(offset, name);
147 addr.sun_path[0] = '\0';
148
149 fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, 0);
150 if (fd < 0)
151 return -1;
152
153 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
36eb9bde 154 SYSERROR("failed to close-on-exec flag");
b0a33c1e 155 close(fd);
156 return -1;
157 }
158
159 *ttyfd = fd;
160
161 return 0;
162}
163
164static int sigchld_handler(int fd, void *data,
165 struct lxc_epoll_descr *descr)
166{
b0a33c1e 167 return 1;
168}
169
170static int ttyclient_handler(int fd, void *data,
171 struct lxc_epoll_descr *descr)
172{
173 int i;
174 struct lxc_tty_info *tty_info = data;
175
176 for (i = 0; i < tty_info->nbtty; i++) {
177
178 if (tty_info->pty_info[i].busy != fd)
179 continue;
180
181 lxc_mainloop_del_handler(descr, fd);
182 tty_info->pty_info[i].busy = 0;
183 close(fd);
184 }
185
186 return 0;
187}
188
189static int ttyservice_handler(int fd, void *data,
190 struct lxc_epoll_descr *descr)
191{
192 int conn, ttynum, val = 1, ret = -1;
193 struct lxc_tty_info *tty_info = data;
194
195 conn = accept(fd, NULL, 0);
196 if (conn < 0) {
36eb9bde 197 SYSERROR("failed to accept tty client");
b0a33c1e 198 return -1;
199 }
200
201 if (setsockopt(conn, SOL_SOCKET, SO_PASSCRED, &val, sizeof(val))) {
36eb9bde 202 SYSERROR("failed to enable credential on socket");
b0a33c1e 203 goto out_close;
204 }
205
206 if (lxc_af_unix_rcv_credential(conn, &ttynum, sizeof(ttynum)))
207 goto out_close;
208
209 if (ttynum <= 0 || ttynum > tty_info->nbtty)
210 goto out_close;
211
212 /* fixup index array (eg. tty1 is index 0) */
213 ttynum--;
214
215 if (tty_info->pty_info[ttynum].busy)
216 goto out_close;
217
218 if (lxc_af_unix_send_fd(conn, tty_info->pty_info[ttynum].master,
219 NULL, 0) < 0) {
36eb9bde 220 ERROR("failed to send tty to client");
b0a33c1e 221 goto out_close;
222 }
223
224 if (lxc_mainloop_add_handler(descr, conn,
225 ttyclient_handler, tty_info)) {
36eb9bde 226 ERROR("failed to add tty client handler");
b0a33c1e 227 goto out_close;
228 }
229
230 tty_info->pty_info[ttynum].busy = conn;
231
232 ret = 0;
233
234out:
235 return ret;
236out_close:
237 close(conn);
238 goto out;
239}
240
59eb99ba 241static int lxc_poll(const char *name, struct lxc_handler *handler)
b0a33c1e 242{
ca5f7926
DL
243 int sigfd = handler->sigfd;
244 int pid = handler->pid;
245 const struct lxc_tty_info *tty_info = &handler->tty_info;
246
b0a33c1e 247 int nfds, ttyfd = -1, ret = -1;
248 struct lxc_epoll_descr descr;
249
250 if (tty_info->nbtty && setup_tty_service(name, &ttyfd)) {
36eb9bde 251 ERROR("failed to create the tty service point");
b0a33c1e 252 goto out_sigfd;
253 }
254
255 /* sigfd + nb tty + tty service
256 * if tty is enabled */
257 nfds = tty_info->nbtty + 1 + tty_info->nbtty ? 1 : 0;
258
259 if (lxc_mainloop_open(nfds, &descr)) {
36eb9bde 260 ERROR("failed to create mainloop");
b0a33c1e 261 goto out_ttyfd;
262 }
263
264 if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
36eb9bde 265 ERROR("failed to add handler for the signal");
b0a33c1e 266 goto out_mainloop_open;
267 }
268
269 if (tty_info->nbtty) {
270 if (lxc_mainloop_add_handler(&descr, ttyfd,
271 ttyservice_handler,
272 (void *)tty_info)) {
36eb9bde 273 ERROR("failed to add handler for the tty");
b0a33c1e 274 goto out_mainloop_open;
275 }
276 }
277
278 ret = lxc_mainloop(&descr);
279
280out:
281 return ret;
282
283out_mainloop_open:
284 lxc_mainloop_close(&descr);
285out_ttyfd:
286 close(ttyfd);
287out_sigfd:
288 close(sigfd);
289 goto out;
290}
291
59eb99ba 292static int save_init_pid(const char *name, pid_t pid)
0ad19a3f 293{
22ebac19 294 char init[MAXPATHLEN];
59eb99ba
DL
295 char *val;
296 int fd, err = -1;
297
298 snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
299
300 if (!asprintf(&val, "%d\n", pid)) {
301 SYSERROR("failed to allocate memory");
302 goto out;
303 }
304
305 fd = open(init, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
306 if (fd < 0) {
307 SYSERROR("failed to open '%s'", init);
308 goto out_free;
309 }
310
311 if (write(fd, val, strlen(val)) < 0) {
312 SYSERROR("failed to write the init pid");
313 goto out_close;
314 }
f4d507d5 315
59eb99ba
DL
316 err = 0;
317
318out_close:
319 close(fd);
320out_free:
321 free(val);
322out:
323 return err;
324}
325
326static void remove_init_pid(const char *name, pid_t pid)
327{
328 char init[MAXPATHLEN];
329
330 snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
331 unlink(init);
332}
333
334static int lxc_init(const char *name, struct lxc_handler *handler)
335{
336 int err = -1;
337
338 memset(handler, 0, sizeof(*handler));
339
340 handler->lock = lxc_get_lock(name);
341 if (handler->lock < 0)
342 goto out;
0ad19a3f 343
0ad19a3f 344 /* Begin the set the state to STARTING*/
345 if (lxc_setstate(name, STARTING)) {
59eb99ba
DL
346 ERROR("failed to set state '%s'", lxc_state2str(STARTING));
347 goto out_put_lock;
0ad19a3f 348 }
349
caf249f4 350 /* If we are not attached to a tty, disable it */
59eb99ba
DL
351 if (ttyname_r(0, handler->tty, sizeof(handler->tty)))
352 handler->tty[0] = '\0';
939229eb 353
59eb99ba 354 if (lxc_create_tty(name, &handler->tty_info)) {
36eb9bde 355 ERROR("failed to create the ttys");
59eb99ba 356 goto out_aborting;
b0a33c1e 357 }
358
359 /* the signal fd has to be created before forking otherwise
360 * if the child process exits before we setup the signal fd,
361 * the event will be lost and the command will be stuck */
59eb99ba
DL
362 handler->sigfd = setup_sigchld_fd(&handler->oldmask);
363 if (handler->sigfd < 0) {
36eb9bde 364 ERROR("failed to set sigchild fd handler");
59eb99ba 365 goto out_delete_tty;
b0a33c1e 366 }
367
59eb99ba
DL
368 /* Avoid signals from terminal */
369 LXC_TTY_ADD_HANDLER(SIGINT);
370 LXC_TTY_ADD_HANDLER(SIGQUIT);
371
372 err = 0;
373out:
374 return err;
375
376out_delete_tty:
377 lxc_delete_tty(&handler->tty_info);
378out_aborting:
379 lxc_setstate(name, ABORTING);
380out_put_lock:
381 lxc_put_lock(handler->lock);
382 goto out;
383}
384
385static void lxc_fini(const char *name, struct lxc_handler *handler)
386{
387 /* The STOPPING state is there for future cleanup code
388 * which can take awhile
389 */
390 lxc_setstate(name, STOPPING);
391
392 lxc_setstate(name, STOPPED);
393
394 remove_init_pid(name, handler->pid);
395
396 lxc_delete_tty(&handler->tty_info);
397
398 lxc_unlink_nsgroup(name);
399
400 lxc_put_lock(handler->lock);
401
402 LXC_TTY_DEL_HANDLER(SIGQUIT);
403 LXC_TTY_DEL_HANDLER(SIGINT);
404}
405
406static void lxc_abort(const char *name, struct lxc_handler *handler)
407{
408 lxc_setstate(name, ABORTING);
409 kill(handler->pid, SIGKILL);
410}
411
412static int lxc_spawn(const char *name, struct lxc_handler *handler, char *argv[])
413{
414 int sv[2];
415 int clone_flags;
416 int err = -1, sync;
417
0ad19a3f 418 /* Synchro socketpair */
419 if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
36eb9bde 420 SYSERROR("failed to create communication socketpair");
f4d507d5 421 goto out;
0ad19a3f 422 }
423
f4d507d5 424 clone_flags = CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
0ad19a3f 425 if (conf_has_utsname(name))
426 clone_flags |= CLONE_NEWUTS;
427 if (conf_has_network(name))
428 clone_flags |= CLONE_NEWNET;
429
430 /* Create a process in a new set of namespaces */
59eb99ba
DL
431 handler->pid = fork_ns(clone_flags);
432 if (handler->pid < 0) {
36eb9bde 433 SYSERROR("failed to fork into a new namespace");
59eb99ba 434 goto out_close;
0ad19a3f 435 }
436
59eb99ba 437 if (!handler->pid) {
0ad19a3f 438
59eb99ba 439 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
36eb9bde 440 SYSERROR("failed to set sigprocmask");
b0a33c1e 441 return -1;
442 }
443
0ad19a3f 444 close(sv[1]);
445
446 /* Be sure we don't inherit this after the exec */
447 fcntl(sv[0], F_SETFD, FD_CLOEXEC);
448
449 /* Tell our father he can begin to configure the container */
450 if (write(sv[0], &sync, sizeof(sync)) < 0) {
36eb9bde 451 SYSERROR("failed to write socket");
57545890 452 goto out_child;
0ad19a3f 453 }
454
455 /* Wait for the father to finish the configuration */
456 if (read(sv[0], &sync, sizeof(sync)) < 0) {
36eb9bde 457 SYSERROR("failed to read socket");
57545890 458 goto out_child;
0ad19a3f 459 }
460
461 /* Setup the container, ip, names, utsname, ... */
59eb99ba 462 err = lxc_setup(name, handler->tty, &handler->tty_info);
e5bda9ee 463 if (err) {
36eb9bde 464 ERROR("failed to setup the container");
e5bda9ee 465 if (write(sv[0], &err, sizeof(err)) < 0)
36eb9bde 466 SYSERROR("failed to write the socket");
57545890 467 goto out_child;
0ad19a3f 468 }
469
42ff343d 470 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
36eb9bde 471 SYSERROR("failed to remove CAP_SYS_BOOT capability");
42ff343d 472 goto out_child;
473 }
474
0ad19a3f 475 execvp(argv[0], argv);
36eb9bde 476 SYSERROR("failed to exec %s", argv[0]);
0ad19a3f 477
b3223262 478 err = LXC_ERROR_WRONG_COMMAND;
0ad19a3f 479 /* If the exec fails, tell that to our father */
e5bda9ee 480 if (write(sv[0], &err, sizeof(err)) < 0)
36eb9bde 481 SYSERROR("failed to write the socket");
0ad19a3f 482
57545890 483 out_child:
e5bda9ee 484 exit(err);
0ad19a3f 485 }
486
487 close(sv[0]);
488
489 /* Wait for the child to be ready */
490 if (read(sv[1], &sync, sizeof(sync)) < 0) {
36eb9bde 491 SYSERROR("failed to read the socket");
59eb99ba 492 goto out_abort;
0ad19a3f 493 }
494
59eb99ba 495 if (lxc_link_nsgroup(name, handler->pid))
36eb9bde 496 WARN("cgroupfs not found: cgroup disabled");
218d4250 497
0ad19a3f 498 /* Create the network configuration */
59eb99ba 499 if (clone_flags & CLONE_NEWNET && conf_create_network(name, handler->pid)) {
36eb9bde 500 ERROR("failed to create the configured network");
59eb99ba 501 goto out_abort;
0ad19a3f 502 }
503
504 /* Tell the child to continue its initialization */
505 if (write(sv[1], &sync, sizeof(sync)) < 0) {
36eb9bde 506 SYSERROR("failed to write the socket");
59eb99ba 507 goto out_abort;
0ad19a3f 508 }
509
510 /* Wait for the child to exec or returning an error */
511 err = read(sv[1], &sync, sizeof(sync));
512 if (err < 0) {
36eb9bde 513 ERROR("failed to read the socket");
59eb99ba 514 goto out_abort;
0ad19a3f 515 }
516
59eb99ba
DL
517 if (save_init_pid(name, handler->pid)) {
518 ERROR("failed to save the init pid info");
519 goto out_abort;
0ad19a3f 520 }
521
59eb99ba
DL
522 if (lxc_setstate(name, RUNNING)) {
523 ERROR("failed to set state to %s",
524 lxc_state2str(RUNNING));
525 goto out_abort;
3f21c114 526 }
22ebac19 527
59eb99ba 528 err = 0;
22ebac19 529
59eb99ba
DL
530out_close:
531 close(sv[0]);
532 close(sv[1]);
533out:
534 return err;
0ad19a3f 535
59eb99ba
DL
536out_abort:
537 lxc_abort(name, handler);
538 goto out_close;
539}
0ad19a3f 540
59eb99ba
DL
541int lxc_start(const char *name, char *argv[])
542{
543 struct lxc_handler handler = { 0 };
544 int err = -LXC_ERROR_INTERNAL;
545 int status;
546
547 if (lxc_init(name, &handler)) {
548 ERROR("failed to initialize the container");
549 goto out;
0ad19a3f 550 }
551
59eb99ba
DL
552 err = lxc_spawn(name, &handler, argv);
553 if (err) {
554 ERROR("failed to spawn '%s'", argv[0]);
555 goto out;
0ad19a3f 556 }
557
59eb99ba
DL
558 if (lxc_poll(name, &handler)) {
559 ERROR("mainloop exited with an error");
560 goto out_abort;
561 }
0ad19a3f 562
59eb99ba 563 waitpid(handler.pid, &status, 0);
0ad19a3f 564
565 err = 0;
566out:
59eb99ba 567 lxc_fini(name, &handler);
0ad19a3f 568 return err;
569
59eb99ba
DL
570out_abort:
571 lxc_abort(name, &handler);
0ad19a3f 572 goto out;
573}