]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/start.c
add console.h to dist file
[mirror_lxc.git] / src / lxc / start.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24 #include "../config.h"
25 #include <stdio.h>
26 #undef _GNU_SOURCE
27 #include <string.h>
28 #include <stdlib.h>
29 #include <dirent.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <signal.h>
33 #include <fcntl.h>
34 #include <termios.h>
35 #include <namespace.h>
36 #include <sys/param.h>
37 #include <sys/file.h>
38 #include <sys/mount.h>
39 #include <sys/stat.h>
40 #include <sys/types.h>
41 #include <sys/prctl.h>
42 #include <sys/types.h>
43 #include <sys/capability.h>
44 #include <sys/wait.h>
45 #include <sys/un.h>
46 #include <sys/poll.h>
47
48 #ifdef HAVE_SYS_SIGNALFD_H
49 # include <sys/signalfd.h>
50 #else
51 # ifndef __NR_signalfd4
52 /* assume kernel headers are too old */
53 # if __i386__
54 # define __NR_signalfd4 327
55 # elif __x86_64__
56 # define __NR_signalfd4 289
57 # elif __powerpc__
58 # define __NR_signalfd4 313
59 # elif __s390x__
60 # define __NR_signalfd4 322
61 # endif
62 #endif
63
64 # ifndef __NR_signalfd
65 /* assume kernel headers are too old */
66 # if __i386__
67 # define __NR_signalfd 321
68 # elif __x86_64__
69 # define __NR_signalfd 282
70 # elif __powerpc__
71 # define __NR_signalfd 305
72 # elif __s390x__
73 # define __NR_signalfd 316
74 # endif
75 #endif
76
77 int signalfd(int fd, const sigset_t *mask, int flags)
78 {
79 int retval;
80
81 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
82 if (errno == ENOSYS && flags == 0)
83 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
84 return retval;
85 }
86 #endif
87
88 #if !HAVE_DECL_PR_CAPBSET_DROP
89 #define PR_CAPBSET_DROP 24
90 #endif
91
92 #include "start.h"
93 #include "conf.h"
94 #include "cgroup.h"
95 #include "log.h"
96 #include "error.h"
97 #include "af_unix.h"
98 #include "mainloop.h"
99 #include "utils.h"
100 #include "monitor.h"
101 #include "commands.h"
102 #include "console.h"
103
104 lxc_log_define(lxc_start, lxc);
105
106 LXC_TTY_HANDLER(SIGINT);
107 LXC_TTY_HANDLER(SIGQUIT);
108
109 static int setup_sigchld_fd(sigset_t *oldmask)
110 {
111 sigset_t mask;
112 int fd;
113
114 if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
115 SYSERROR("failed to get mask signal");
116 return -1;
117 }
118
119 if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
120 SYSERROR("failed to set mask signal");
121 return -1;
122 }
123
124 fd = signalfd(-1, &mask, 0);
125 if (fd < 0) {
126 SYSERROR("failed to create the signal fd");
127 return -1;
128 }
129
130 if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
131 SYSERROR("failed to set sigfd to close-on-exec");
132 close(fd);
133 return -1;
134 }
135
136 DEBUG("sigchild handler set");
137
138 return fd;
139 }
140
141 static int sigchld_handler(int fd, void *data,
142 struct lxc_epoll_descr *descr)
143 {
144 DEBUG("child exited");
145
146 return 1;
147 }
148
149 int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state)
150 {
151 handler->state = state;
152 lxc_monitor_send_state(name, state);
153 return 0;
154 }
155
156 int lxc_poll(const char *name, struct lxc_handler *handler)
157 {
158 int sigfd = handler->sigfd;
159 int pid = handler->pid;
160 struct lxc_epoll_descr descr;
161
162 if (lxc_mainloop_open(&descr)) {
163 ERROR("failed to create mainloop");
164 goto out_sigfd;
165 }
166
167 if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
168 ERROR("failed to add handler for the signal");
169 goto out_mainloop_open;
170 }
171
172 if (lxc_console_mainloop_add(&descr, handler)) {
173 ERROR("failed to add console handler to mainloop");
174 goto out_mainloop_open;
175 }
176
177 if (lxc_command_mainloop_add(name, &descr, handler))
178 goto out_mainloop_open;
179
180 return lxc_mainloop(&descr);
181
182 out_mainloop_open:
183 lxc_mainloop_close(&descr);
184 out_sigfd:
185 close(sigfd);
186 return -1;
187 }
188
189 struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf)
190 {
191 struct lxc_handler *handler;
192
193 handler = malloc(sizeof(*handler));
194 if (!handler)
195 return NULL;
196
197 memset(handler, 0, sizeof(*handler));
198
199 handler->conf = conf;
200
201 /* Begin the set the state to STARTING*/
202 if (lxc_set_state(name, handler, STARTING)) {
203 ERROR("failed to set state '%s'", lxc_state2str(STARTING));
204 goto out_free;
205 }
206
207 if (lxc_create_tty(name, conf)) {
208 ERROR("failed to create the ttys");
209 goto out_aborting;
210 }
211
212 if (lxc_create_console(conf)) {
213 ERROR("failed to create console");
214 goto out_delete_tty;
215 }
216
217 /* the signal fd has to be created before forking otherwise
218 * if the child process exits before we setup the signal fd,
219 * the event will be lost and the command will be stuck */
220 handler->sigfd = setup_sigchld_fd(&handler->oldmask);
221 if (handler->sigfd < 0) {
222 ERROR("failed to set sigchild fd handler");
223 goto out_delete_console;
224 }
225
226 /* Avoid signals from terminal */
227 LXC_TTY_ADD_HANDLER(SIGINT);
228 LXC_TTY_ADD_HANDLER(SIGQUIT);
229
230 INFO("'%s' is initialized", name);
231 return handler;
232
233 out_delete_console:
234 lxc_delete_console(&conf->console);
235 out_delete_tty:
236 lxc_delete_tty(&conf->tty_info);
237 out_aborting:
238 lxc_set_state(name, handler, ABORTING);
239 out_free:
240 free(handler);
241 return NULL;
242 }
243
244 void lxc_fini(const char *name, struct lxc_handler *handler)
245 {
246 /* The STOPPING state is there for future cleanup code
247 * which can take awhile
248 */
249 lxc_set_state(name, handler, STOPPING);
250 lxc_set_state(name, handler, STOPPED);
251 lxc_unlink_nsgroup(name);
252
253 lxc_delete_console(&handler->conf->console);
254 lxc_delete_tty(&handler->conf->tty_info);
255 free(handler);
256
257 LXC_TTY_DEL_HANDLER(SIGQUIT);
258 LXC_TTY_DEL_HANDLER(SIGINT);
259 }
260
261 void lxc_abort(const char *name, struct lxc_handler *handler)
262 {
263 lxc_set_state(name, handler, ABORTING);
264 kill(handler->pid, SIGKILL);
265 }
266
267 struct start_arg {
268 const char *name;
269 char *const *argv;
270 struct lxc_handler *handler;
271 int *sv;
272 };
273
274 static int do_start(void *arg)
275 {
276 struct start_arg *start_arg = arg;
277 struct lxc_handler *handler = start_arg->handler;
278 const char *name = start_arg->name;
279 char *const *argv = start_arg->argv;
280 int *sv = start_arg->sv;
281 int err = -1, sync;
282
283 if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
284 SYSERROR("failed to set sigprocmask");
285 return -1;
286 }
287
288 close(sv[1]);
289
290 /* Be sure we don't inherit this after the exec */
291 fcntl(sv[0], F_SETFD, FD_CLOEXEC);
292
293 /* Tell our father he can begin to configure the container */
294 if (write(sv[0], &sync, sizeof(sync)) < 0) {
295 SYSERROR("failed to write socket");
296 return -1;
297 }
298
299 /* Wait for the father to finish the configuration */
300 if (read(sv[0], &sync, sizeof(sync)) < 0) {
301 SYSERROR("failed to read socket");
302 return -1;
303 }
304
305 /* Setup the container, ip, names, utsname, ... */
306 if (lxc_setup(name, handler->conf)) {
307 ERROR("failed to setup the container");
308 goto out_warn_father;
309 }
310
311 if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
312 SYSERROR("failed to remove CAP_SYS_BOOT capability");
313 return -1;
314 }
315
316 if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) {
317 SYSERROR("failed to set pdeath signal");
318 return -1;
319 }
320
321 NOTICE("exec'ing '%s'", argv[0]);
322
323 execvp(argv[0], argv);
324 SYSERROR("failed to exec %s", argv[0]);
325
326 out_warn_father:
327 /* If the exec fails, tell that to our father */
328 if (write(sv[0], &err, sizeof(err)) < 0)
329 SYSERROR("failed to write the socket");
330 return -1;
331 }
332
333 int lxc_spawn(const char *name, struct lxc_handler *handler, char *const argv[])
334 {
335 int sv[2];
336 int clone_flags;
337 int err = -1, sync;
338
339 struct start_arg start_arg = {
340 .name = name,
341 .argv = argv,
342 .handler = handler,
343 .sv = sv,
344 };
345
346 /* Synchro socketpair */
347 if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
348 SYSERROR("failed to create communication socketpair");
349 return -1;
350 }
351
352 clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
353 if (!lxc_list_empty(&handler->conf->network)) {
354
355 clone_flags |= CLONE_NEWNET;
356
357 /* that should be done before the clone because we will
358 * fill the netdev index and use them in the child
359 */
360 if (lxc_create_network(&handler->conf->network)) {
361 ERROR("failed to create the network");
362 goto out_close;
363 }
364 }
365
366 /* Create a process in a new set of namespaces */
367 handler->pid = lxc_clone(do_start, &start_arg, clone_flags);
368 if (handler->pid < 0) {
369 SYSERROR("failed to fork into a new namespace");
370 goto out_delete_net;
371 }
372
373 close(sv[0]);
374
375 /* Wait for the child to be ready */
376 if (read(sv[1], &sync, sizeof(sync)) < 0) {
377 SYSERROR("failed to read the socket");
378 goto out_delete_net;
379 }
380
381 if (lxc_rename_nsgroup(name, handler))
382 goto out_delete_net;
383
384 /* Create the network configuration */
385 if (clone_flags & CLONE_NEWNET) {
386 if (lxc_assign_network(&handler->conf->network, handler->pid)) {
387 ERROR("failed to create the configured network");
388 goto out_delete_net;
389 }
390 }
391
392 /* Tell the child to continue its initialization */
393 if (write(sv[1], &sync, sizeof(sync)) < 0) {
394 SYSERROR("failed to write the socket");
395 goto out_abort;
396 }
397
398 /* Wait for the child to exec or returning an error */
399 if (read(sv[1], &sync, sizeof(sync)) < 0) {
400 ERROR("failed to read the socket");
401 goto out_abort;
402 }
403
404 if (lxc_set_state(name, handler, RUNNING)) {
405 ERROR("failed to set state to %s",
406 lxc_state2str(RUNNING));
407 goto out_abort;
408 }
409
410 err = 0;
411
412 NOTICE("'%s' started with pid '%d'", argv[0], handler->pid);
413
414 out_close:
415 close(sv[0]);
416 close(sv[1]);
417 return err;
418
419 out_delete_net:
420 if (clone_flags & CLONE_NEWNET)
421 lxc_delete_network(&handler->conf->network);
422 out_abort:
423 lxc_abort(name, handler);
424 close(sv[1]);
425 return -1;
426 }
427
428 int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf)
429 {
430 struct lxc_handler *handler;
431 int err = -1;
432 int status;
433
434 handler = lxc_init(name, conf);
435 if (!handler) {
436 ERROR("failed to initialize the container");
437 return -1;
438 }
439
440 err = lxc_spawn(name, handler, argv);
441 if (err) {
442 ERROR("failed to spawn '%s'", argv[0]);
443 goto out_fini;
444 }
445
446 err = lxc_close_all_inherited_fd();
447 if (err) {
448 ERROR("unable to close inherited fds");
449 goto out_abort;
450 }
451
452 err = lxc_poll(name, handler);
453 if (err) {
454 ERROR("mainloop exited with an error");
455 goto out_abort;
456 }
457
458 while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR)
459 continue;
460
461 err = lxc_error_set_and_log(handler->pid, status);
462 out_fini:
463 lxc_fini(name, handler);
464 return err;
465
466 out_abort:
467 lxc_abort(name, handler);
468 goto out_fini;
469 }