]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/start.c
remove the LXCPATH/<name>/nsgroup file
[mirror_lxc.git] / src / lxc / start.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
ff218c25 24#include "../config.h"
0ad19a3f 25#include <stdio.h>
26#undef _GNU_SOURCE
27#include <string.h>
28#include <stdlib.h>
29#include <dirent.h>
30#include <errno.h>
31#include <unistd.h>
32#include <signal.h>
b0a33c1e 33#include <fcntl.h>
34#include <termios.h>
50e98013 35#include <namespace.h>
0ad19a3f 36#include <sys/param.h>
37#include <sys/file.h>
f4d507d5 38#include <sys/mount.h>
0ad19a3f 39#include <sys/types.h>
0ad19a3f 40#include <sys/prctl.h>
ddceb1f9 41#include <sys/types.h>
42ff343d 42#include <sys/capability.h>
0ad19a3f 43#include <sys/wait.h>
b0a33c1e 44#include <sys/un.h>
45#include <sys/poll.h>
ff218c25 46
47#ifdef HAVE_SYS_SIGNALFD_H
8ca61733 48# include <sys/signalfd.h>
ff218c25 49#else
8ca61733
MJ
50# ifndef __NR_signalfd4
51/* assume kernel headers are too old */
52# if __i386__
53# define __NR_signalfd4 327
54# elif __x86_64__
55# define __NR_signalfd4 289
bfa38025
MH
56# elif __powerpc__
57# define __NR_signalfd4 313
47f38330
SH
58# elif __s390x__
59# define __NR_signalfd4 322
8ca61733
MJ
60# endif
61#endif
62
63# ifndef __NR_signalfd
64/* assume kernel headers are too old */
65# if __i386__
66# define __NR_signalfd 321
67# elif __x86_64__
68# define __NR_signalfd 282
bfa38025
MH
69# elif __powerpc__
70# define __NR_signalfd 305
47f38330
SH
71# elif __s390x__
72# define __NR_signalfd 316
8ca61733
MJ
73# endif
74#endif
75
76int signalfd(int fd, const sigset_t *mask, int flags)
77{
78 int retval;
79
80 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
81 if (errno == ENOSYS && flags == 0)
82 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
83 return retval;
84}
ff218c25 85#endif
0ad19a3f 86
656994bb
MH
87#if !HAVE_DECL_PR_CAPBSET_DROP
88#define PR_CAPBSET_DROP 24
89#endif
90
e2bcd7db 91#include "error.h"
b0a33c1e 92#include "af_unix.h"
93#include "mainloop.h"
96fa1ff0 94#include "commands.h"
e2bcd7db 95
b113348e 96#include <lxc/lxc.h>
36eb9bde
CLG
97#include <lxc/log.h>
98
99lxc_log_define(lxc_start, lxc);
100
0ad19a3f 101LXC_TTY_HANDLER(SIGINT);
102LXC_TTY_HANDLER(SIGQUIT);
103
b0a33c1e 104static int setup_sigchld_fd(sigset_t *oldmask)
105{
106 sigset_t mask;
107 int fd;
108
109 if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
36eb9bde 110 SYSERROR("failed to get mask signal");
b0a33c1e 111 return -1;
112 }
113
114 if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
36eb9bde 115 SYSERROR("failed to set mask signal");
b0a33c1e 116 return -1;
117 }
118
119 fd = signalfd(-1, &mask, 0);
120 if (fd < 0) {
36eb9bde 121 SYSERROR("failed to create the signal fd");
b0a33c1e 122 return -1;
123 }
124
125 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
36eb9bde 126 SYSERROR("failed to set sigfd to close-on-exec");
b0a33c1e 127 close(fd);
128 return -1;
129 }
130
1ac470c0
DL
131 DEBUG("sigchild handler set");
132
b0a33c1e 133 return fd;
134}
135
b0a33c1e 136static int sigchld_handler(int fd, void *data,
137 struct lxc_epoll_descr *descr)
138{
1ac470c0
DL
139 DEBUG("child exited");
140
b0a33c1e 141 return 1;
142}
143
66aeffc7
DL
144static int set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
145{
146 handler->state = state;
147 lxc_monitor_send_state(name, state);
148 return 0;
149}
150
1bc5cc8c 151int lxc_poll(const char *name, struct lxc_handler *handler)
b0a33c1e 152{
ca5f7926
DL
153 int sigfd = handler->sigfd;
154 int pid = handler->pid;
a9e61274 155 int ret = -1;
b0a33c1e 156 struct lxc_epoll_descr descr;
157
a9e61274 158 if (lxc_mainloop_open(&descr)) {
36eb9bde 159 ERROR("failed to create mainloop");
50c8bf05 160 goto out_sigfd;
b0a33c1e 161 }
162
163 if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
36eb9bde 164 ERROR("failed to add handler for the signal");
b0a33c1e 165 goto out_mainloop_open;
166 }
167
724e753c 168 if (lxc_command_mainloop_add(name, &descr, handler))
96fa1ff0 169 goto out_mainloop_open;
b0a33c1e 170
171 ret = lxc_mainloop(&descr);
172
173out:
174 return ret;
175
176out_mainloop_open:
177 lxc_mainloop_close(&descr);
b0a33c1e 178out_sigfd:
179 close(sigfd);
180 goto out;
181}
182
59eb99ba
DL
183static void remove_init_pid(const char *name, pid_t pid)
184{
185 char init[MAXPATHLEN];
186
187 snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
188 unlink(init);
189}
190
b8f57738
DL
191static int fdname(int fd, char *name, size_t size)
192{
193 char path[MAXPATHLEN];
acc86941 194 ssize_t len;
b8f57738
DL
195
196 snprintf(path, MAXPATHLEN, "/proc/self/fd/%d", fd);
197
acc86941
MN
198 len = readlink(path, name, size);
199 if (len > 0)
200 path[len] = '\0';
201
202 return (len <= 0) ? -1 : 0;
b8f57738
DL
203}
204
205static int console_init(char *console, size_t size)
206{
207 struct stat stat;
208 int i;
209
210 for (i = 0; i < 3; i++) {
211 if (!isatty(i))
212 continue;
213
214 if (ttyname_r(i, console, size)) {
215 SYSERROR("failed to retrieve tty name");
216 return -1;
217 }
af795875 218
b8f57738
DL
219 return 0;
220 }
221
222 if (!fstat(0, &stat)) {
223 if (S_ISREG(stat.st_mode) || S_ISCHR(stat.st_mode) ||
224 S_ISFIFO(stat.st_mode) || S_ISLNK(stat.st_mode))
225 return fdname(0, console, size);
226 }
227
228 console[0] = '\0';
1ac470c0
DL
229
230 DEBUG("console initialized");
231
b8f57738
DL
232 return 0;
233}
234
3a0f472d 235struct lxc_handler *lxc_init(const char *name)
59eb99ba 236{
3a0f472d
DL
237 struct lxc_handler *handler;
238
239 handler = malloc(sizeof(*handler));
240 if (!handler)
241 return NULL;
59eb99ba
DL
242
243 memset(handler, 0, sizeof(*handler));
244
245 handler->lock = lxc_get_lock(name);
246 if (handler->lock < 0)
3a0f472d 247 goto out_free;
0ad19a3f 248
0ad19a3f 249 /* Begin the set the state to STARTING*/
66aeffc7 250 if (set_state(name, handler, STARTING)) {
59eb99ba
DL
251 ERROR("failed to set state '%s'", lxc_state2str(STARTING));
252 goto out_put_lock;
0ad19a3f 253 }
254
b8f57738
DL
255 if (console_init(handler->tty, sizeof(handler->tty))) {
256 ERROR("failed to initialize the console");
257 goto out_aborting;
258 }
939229eb 259
59eb99ba 260 if (lxc_create_tty(name, &handler->tty_info)) {
36eb9bde 261 ERROR("failed to create the ttys");
59eb99ba 262 goto out_aborting;
b0a33c1e 263 }
264
265 /* the signal fd has to be created before forking otherwise
266 * if the child process exits before we setup the signal fd,
267 * the event will be lost and the command will be stuck */
59eb99ba
DL
268 handler->sigfd = setup_sigchld_fd(&handler->oldmask);
269 if (handler->sigfd < 0) {
36eb9bde 270 ERROR("failed to set sigchild fd handler");
59eb99ba 271 goto out_delete_tty;
b0a33c1e 272 }
273
59eb99ba
DL
274 /* Avoid signals from terminal */
275 LXC_TTY_ADD_HANDLER(SIGINT);
276 LXC_TTY_ADD_HANDLER(SIGQUIT);
277
59eb99ba 278out:
1ac470c0
DL
279 if (handler)
280 INFO("'%s' is initialized", name);
281
3a0f472d 282 return handler;
59eb99ba
DL
283
284out_delete_tty:
285 lxc_delete_tty(&handler->tty_info);
286out_aborting:
66aeffc7 287 set_state(name, handler, ABORTING);
59eb99ba
DL
288out_put_lock:
289 lxc_put_lock(handler->lock);
3a0f472d
DL
290out_free:
291 free(handler);
292 handler = NULL;
59eb99ba
DL
293 goto out;
294}
295
1bc5cc8c 296void lxc_fini(const char *name, struct lxc_handler *handler)
59eb99ba
DL
297{
298 /* The STOPPING state is there for future cleanup code
299 * which can take awhile
300 */
66aeffc7
DL
301 set_state(name, handler, STOPPING);
302 set_state(name, handler, STOPPED);
59eb99ba
DL
303 lxc_unlink_nsgroup(name);
304
3a0f472d
DL
305 if (handler) {
306 remove_init_pid(name, handler->pid);
307 lxc_delete_tty(&handler->tty_info);
308 lxc_put_lock(handler->lock);
309 free(handler);
310 }
59eb99ba
DL
311
312 LXC_TTY_DEL_HANDLER(SIGQUIT);
313 LXC_TTY_DEL_HANDLER(SIGINT);
314}
315
1bc5cc8c 316void lxc_abort(const char *name, struct lxc_handler *handler)
59eb99ba 317{
66aeffc7 318 set_state(name, handler, ABORTING);
59eb99ba
DL
319 kill(handler->pid, SIGKILL);
320}
321
50e98013
DL
322struct start_arg {
323 const char *name;
324 char *const *argv;
325 struct lxc_handler *handler;
326 int *sv;
327};
328
329static int do_start(void *arg)
330{
331 struct start_arg *start_arg = arg;
332 struct lxc_handler *handler = start_arg->handler;
333 const char *name = start_arg->name;
334 char *const *argv = start_arg->argv;
335 int *sv = start_arg->sv;
336 int err = -1, sync;
337
338 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
339 SYSERROR("failed to set sigprocmask");
340 goto out_child;
341 }
342
343 close(sv[1]);
344
345 /* Be sure we don't inherit this after the exec */
346 fcntl(sv[0], F_SETFD, FD_CLOEXEC);
347
348 /* Tell our father he can begin to configure the container */
349 if (write(sv[0], &sync, sizeof(sync)) < 0) {
350 SYSERROR("failed to write socket");
351 goto out_child;
352 }
353
354 /* Wait for the father to finish the configuration */
355 if (read(sv[0], &sync, sizeof(sync)) < 0) {
356 SYSERROR("failed to read socket");
357 goto out_child;
358 }
359
360 /* Setup the container, ip, names, utsname, ... */
361 if (lxc_setup(name, handler->tty, &handler->tty_info)) {
362 ERROR("failed to setup the container");
363 goto out_warn_father;
364 }
365
366 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
367 SYSERROR("failed to remove CAP_SYS_BOOT capability");
368 goto out_child;
369 }
370
6a6ad7af
DL
371 if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) {
372 SYSERROR("failed to set pdeath signal");
373 goto out_child;
374 }
375
1ac470c0
DL
376 NOTICE("exec'ing '%s'", argv[0]);
377
50e98013
DL
378 execvp(argv[0], argv);
379 SYSERROR("failed to exec %s", argv[0]);
380
381out_warn_father:
382 /* If the exec fails, tell that to our father */
383 if (write(sv[0], &err, sizeof(err)) < 0)
384 SYSERROR("failed to write the socket");
385out_child:
386 return -1;
387}
388
9618063c 389int lxc_spawn(const char *name, struct lxc_handler *handler, char *const argv[])
59eb99ba
DL
390{
391 int sv[2];
392 int clone_flags;
393 int err = -1, sync;
394
50e98013
DL
395 struct start_arg start_arg = {
396 .name = name,
397 .argv = argv,
398 .handler = handler,
399 .sv = sv,
400 };
401
0ad19a3f 402 /* Synchro socketpair */
403 if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
36eb9bde 404 SYSERROR("failed to create communication socketpair");
f4d507d5 405 goto out;
0ad19a3f 406 }
407
1ea6db29 408 clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
0ad19a3f 409 if (conf_has_network(name))
410 clone_flags |= CLONE_NEWNET;
411
412 /* Create a process in a new set of namespaces */
50e98013 413 handler->pid = lxc_clone(do_start, &start_arg, clone_flags);
59eb99ba 414 if (handler->pid < 0) {
36eb9bde 415 SYSERROR("failed to fork into a new namespace");
59eb99ba 416 goto out_close;
0ad19a3f 417 }
418
0ad19a3f 419 close(sv[0]);
420
421 /* Wait for the child to be ready */
422 if (read(sv[1], &sync, sizeof(sync)) < 0) {
36eb9bde 423 SYSERROR("failed to read the socket");
59eb99ba 424 goto out_abort;
0ad19a3f 425 }
426
9f44c578 427 if (lxc_rename_nsgroup(name, handler))
2b31f553 428 goto out_abort;
218d4250 429
0ad19a3f 430 /* Create the network configuration */
6203de18
DL
431 if (clone_flags & CLONE_NEWNET &&
432 conf_create_network(name, handler->pid)) {
36eb9bde 433 ERROR("failed to create the configured network");
59eb99ba 434 goto out_abort;
0ad19a3f 435 }
436
437 /* Tell the child to continue its initialization */
438 if (write(sv[1], &sync, sizeof(sync)) < 0) {
36eb9bde 439 SYSERROR("failed to write the socket");
59eb99ba 440 goto out_abort;
0ad19a3f 441 }
442
443 /* Wait for the child to exec or returning an error */
e043236e 444 if (read(sv[1], &sync, sizeof(sync)) < 0) {
36eb9bde 445 ERROR("failed to read the socket");
59eb99ba 446 goto out_abort;
0ad19a3f 447 }
448
66aeffc7 449 if (set_state(name, handler, RUNNING)) {
59eb99ba
DL
450 ERROR("failed to set state to %s",
451 lxc_state2str(RUNNING));
452 goto out_abort;
3f21c114 453 }
22ebac19 454
59eb99ba 455 err = 0;
22ebac19 456
1ac470c0
DL
457 NOTICE("'%s' started with pid '%d'", argv[0], handler->pid);
458
59eb99ba
DL
459out_close:
460 close(sv[0]);
461 close(sv[1]);
462out:
463 return err;
0ad19a3f 464
59eb99ba
DL
465out_abort:
466 lxc_abort(name, handler);
467 goto out_close;
468}
0ad19a3f 469
9618063c 470int lxc_start(const char *name, char *const argv[])
59eb99ba 471{
3a0f472d 472 struct lxc_handler *handler;
e043236e 473 int err = -1;
59eb99ba
DL
474 int status;
475
3a0f472d
DL
476 handler = lxc_init(name);
477 if (!handler) {
59eb99ba 478 ERROR("failed to initialize the container");
66aeffc7 479 return -1;
0ad19a3f 480 }
481
3a0f472d 482 err = lxc_spawn(name, handler, argv);
59eb99ba
DL
483 if (err) {
484 ERROR("failed to spawn '%s'", argv[0]);
485 goto out;
0ad19a3f 486 }
487
af795875 488 err = lxc_close_all_inherited_fd();
d983b93c
MN
489 if (err) {
490 ERROR("unable to close inherited fds");
491 goto out_abort;
492 }
493
3a0f472d 494 err = lxc_poll(name, handler);
e043236e 495 if (err) {
59eb99ba
DL
496 ERROR("mainloop exited with an error");
497 goto out_abort;
498 }
0ad19a3f 499
3a0f472d 500 while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR)
1bc5cc8c 501 continue;
e043236e 502
3a0f472d 503 err = lxc_error_set_and_log(handler->pid, status);
0ad19a3f 504out:
3a0f472d 505 lxc_fini(name, handler);
0ad19a3f 506 return err;
507
59eb99ba 508out_abort:
3a0f472d 509 lxc_abort(name, handler);
0ad19a3f 510 goto out;
511}