]>
Commit | Line | Data |
---|---|---|
0ad19a3f | 1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Daniel Lezcano <dlezcano at fr.ibm.com> | |
8 | * | |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 | */ | |
23 | ||
ff218c25 | 24 | #include "../config.h" |
0ad19a3f | 25 | #include <stdio.h> |
26 | #undef _GNU_SOURCE | |
27 | #include <string.h> | |
28 | #include <stdlib.h> | |
29 | #include <dirent.h> | |
30 | #include <errno.h> | |
31 | #include <unistd.h> | |
32 | #include <signal.h> | |
b0a33c1e | 33 | #include <fcntl.h> |
34 | #include <termios.h> | |
50e98013 | 35 | #include <namespace.h> |
0ad19a3f | 36 | #include <sys/param.h> |
37 | #include <sys/file.h> | |
f4d507d5 | 38 | #include <sys/mount.h> |
b4f8660e | 39 | #include <sys/stat.h> |
0ad19a3f | 40 | #include <sys/types.h> |
0ad19a3f | 41 | #include <sys/prctl.h> |
ddceb1f9 | 42 | #include <sys/types.h> |
42ff343d | 43 | #include <sys/capability.h> |
0ad19a3f | 44 | #include <sys/wait.h> |
b0a33c1e | 45 | #include <sys/un.h> |
46 | #include <sys/poll.h> | |
ff218c25 | 47 | |
48 | #ifdef HAVE_SYS_SIGNALFD_H | |
8ca61733 | 49 | # include <sys/signalfd.h> |
ff218c25 | 50 | #else |
8ca61733 MJ |
51 | # ifndef __NR_signalfd4 |
52 | /* assume kernel headers are too old */ | |
53 | # if __i386__ | |
54 | # define __NR_signalfd4 327 | |
55 | # elif __x86_64__ | |
56 | # define __NR_signalfd4 289 | |
bfa38025 MH |
57 | # elif __powerpc__ |
58 | # define __NR_signalfd4 313 | |
47f38330 SH |
59 | # elif __s390x__ |
60 | # define __NR_signalfd4 322 | |
8ca61733 MJ |
61 | # endif |
62 | #endif | |
63 | ||
64 | # ifndef __NR_signalfd | |
65 | /* assume kernel headers are too old */ | |
66 | # if __i386__ | |
67 | # define __NR_signalfd 321 | |
68 | # elif __x86_64__ | |
69 | # define __NR_signalfd 282 | |
bfa38025 MH |
70 | # elif __powerpc__ |
71 | # define __NR_signalfd 305 | |
47f38330 SH |
72 | # elif __s390x__ |
73 | # define __NR_signalfd 316 | |
8ca61733 MJ |
74 | # endif |
75 | #endif | |
76 | ||
77 | int signalfd(int fd, const sigset_t *mask, int flags) | |
78 | { | |
79 | int retval; | |
80 | ||
81 | retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags); | |
82 | if (errno == ENOSYS && flags == 0) | |
83 | retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8); | |
84 | return retval; | |
85 | } | |
ff218c25 | 86 | #endif |
0ad19a3f | 87 | |
656994bb MH |
88 | #if !HAVE_DECL_PR_CAPBSET_DROP |
89 | #define PR_CAPBSET_DROP 24 | |
90 | #endif | |
91 | ||
63376d7d DL |
92 | #include "start.h" |
93 | #include "conf.h" | |
6a3111b8 | 94 | #include "cgroup.h" |
63376d7d | 95 | #include "log.h" |
e2bcd7db | 96 | #include "error.h" |
b0a33c1e | 97 | #include "af_unix.h" |
98 | #include "mainloop.h" | |
63376d7d DL |
99 | #include "utils.h" |
100 | #include "monitor.h" | |
96fa1ff0 | 101 | #include "commands.h" |
63376d7d | 102 | #include "console.h" |
36eb9bde CLG |
103 | |
104 | lxc_log_define(lxc_start, lxc); | |
105 | ||
0ad19a3f | 106 | LXC_TTY_HANDLER(SIGINT); |
107 | LXC_TTY_HANDLER(SIGQUIT); | |
108 | ||
b0a33c1e | 109 | static int setup_sigchld_fd(sigset_t *oldmask) |
110 | { | |
111 | sigset_t mask; | |
112 | int fd; | |
113 | ||
114 | if (sigprocmask(SIG_BLOCK, NULL, &mask)) { | |
36eb9bde | 115 | SYSERROR("failed to get mask signal"); |
b0a33c1e | 116 | return -1; |
117 | } | |
118 | ||
119 | if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) { | |
36eb9bde | 120 | SYSERROR("failed to set mask signal"); |
b0a33c1e | 121 | return -1; |
122 | } | |
123 | ||
124 | fd = signalfd(-1, &mask, 0); | |
125 | if (fd < 0) { | |
36eb9bde | 126 | SYSERROR("failed to create the signal fd"); |
b0a33c1e | 127 | return -1; |
128 | } | |
129 | ||
130 | if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { | |
36eb9bde | 131 | SYSERROR("failed to set sigfd to close-on-exec"); |
b0a33c1e | 132 | close(fd); |
133 | return -1; | |
134 | } | |
135 | ||
1ac470c0 DL |
136 | DEBUG("sigchild handler set"); |
137 | ||
b0a33c1e | 138 | return fd; |
139 | } | |
140 | ||
b0a33c1e | 141 | static int sigchld_handler(int fd, void *data, |
142 | struct lxc_epoll_descr *descr) | |
143 | { | |
1ac470c0 DL |
144 | DEBUG("child exited"); |
145 | ||
b0a33c1e | 146 | return 1; |
147 | } | |
148 | ||
25c2aca5 | 149 | int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state) |
66aeffc7 DL |
150 | { |
151 | handler->state = state; | |
152 | lxc_monitor_send_state(name, state); | |
153 | return 0; | |
154 | } | |
155 | ||
1bc5cc8c | 156 | int lxc_poll(const char *name, struct lxc_handler *handler) |
b0a33c1e | 157 | { |
ca5f7926 DL |
158 | int sigfd = handler->sigfd; |
159 | int pid = handler->pid; | |
b0a33c1e | 160 | struct lxc_epoll_descr descr; |
161 | ||
a9e61274 | 162 | if (lxc_mainloop_open(&descr)) { |
36eb9bde | 163 | ERROR("failed to create mainloop"); |
50c8bf05 | 164 | goto out_sigfd; |
b0a33c1e | 165 | } |
166 | ||
167 | if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) { | |
36eb9bde | 168 | ERROR("failed to add handler for the signal"); |
b0a33c1e | 169 | goto out_mainloop_open; |
170 | } | |
171 | ||
63376d7d DL |
172 | if (lxc_console_mainloop_add(&descr, handler)) { |
173 | ERROR("failed to add console handler to mainloop"); | |
174 | goto out_mainloop_open; | |
175 | } | |
176 | ||
724e753c | 177 | if (lxc_command_mainloop_add(name, &descr, handler)) |
96fa1ff0 | 178 | goto out_mainloop_open; |
b0a33c1e | 179 | |
c3e13372 | 180 | return lxc_mainloop(&descr); |
b0a33c1e | 181 | |
182 | out_mainloop_open: | |
183 | lxc_mainloop_close(&descr); | |
b0a33c1e | 184 | out_sigfd: |
185 | close(sigfd); | |
c3e13372 | 186 | return -1; |
b0a33c1e | 187 | } |
188 | ||
fae349da | 189 | struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf) |
59eb99ba | 190 | { |
3a0f472d DL |
191 | struct lxc_handler *handler; |
192 | ||
193 | handler = malloc(sizeof(*handler)); | |
194 | if (!handler) | |
195 | return NULL; | |
59eb99ba DL |
196 | |
197 | memset(handler, 0, sizeof(*handler)); | |
198 | ||
fae349da DL |
199 | handler->conf = conf; |
200 | ||
0ad19a3f | 201 | /* Begin the set the state to STARTING*/ |
25c2aca5 | 202 | if (lxc_set_state(name, handler, STARTING)) { |
59eb99ba | 203 | ERROR("failed to set state '%s'", lxc_state2str(STARTING)); |
884866b3 | 204 | goto out_free; |
0ad19a3f | 205 | } |
206 | ||
fae349da | 207 | if (lxc_create_tty(name, conf)) { |
36eb9bde | 208 | ERROR("failed to create the ttys"); |
59eb99ba | 209 | goto out_aborting; |
b0a33c1e | 210 | } |
211 | ||
1560f6c9 | 212 | if (lxc_create_console(conf)) { |
63376d7d DL |
213 | ERROR("failed to create console"); |
214 | goto out_delete_tty; | |
215 | } | |
216 | ||
b0a33c1e | 217 | /* the signal fd has to be created before forking otherwise |
218 | * if the child process exits before we setup the signal fd, | |
219 | * the event will be lost and the command will be stuck */ | |
59eb99ba DL |
220 | handler->sigfd = setup_sigchld_fd(&handler->oldmask); |
221 | if (handler->sigfd < 0) { | |
36eb9bde | 222 | ERROR("failed to set sigchild fd handler"); |
63376d7d | 223 | goto out_delete_console; |
b0a33c1e | 224 | } |
225 | ||
59eb99ba DL |
226 | /* Avoid signals from terminal */ |
227 | LXC_TTY_ADD_HANDLER(SIGINT); | |
228 | LXC_TTY_ADD_HANDLER(SIGQUIT); | |
229 | ||
c3e13372 | 230 | INFO("'%s' is initialized", name); |
3a0f472d | 231 | return handler; |
59eb99ba | 232 | |
63376d7d DL |
233 | out_delete_console: |
234 | lxc_delete_console(&conf->console); | |
59eb99ba | 235 | out_delete_tty: |
fae349da | 236 | lxc_delete_tty(&conf->tty_info); |
59eb99ba | 237 | out_aborting: |
25c2aca5 | 238 | lxc_set_state(name, handler, ABORTING); |
3a0f472d DL |
239 | out_free: |
240 | free(handler); | |
c3e13372 | 241 | return NULL; |
59eb99ba DL |
242 | } |
243 | ||
1bc5cc8c | 244 | void lxc_fini(const char *name, struct lxc_handler *handler) |
59eb99ba DL |
245 | { |
246 | /* The STOPPING state is there for future cleanup code | |
247 | * which can take awhile | |
248 | */ | |
25c2aca5 MN |
249 | lxc_set_state(name, handler, STOPPING); |
250 | lxc_set_state(name, handler, STOPPED); | |
59eb99ba DL |
251 | lxc_unlink_nsgroup(name); |
252 | ||
63376d7d | 253 | lxc_delete_console(&handler->conf->console); |
b2431939 GK |
254 | lxc_delete_tty(&handler->conf->tty_info); |
255 | free(handler); | |
59eb99ba DL |
256 | |
257 | LXC_TTY_DEL_HANDLER(SIGQUIT); | |
258 | LXC_TTY_DEL_HANDLER(SIGINT); | |
259 | } | |
260 | ||
1bc5cc8c | 261 | void lxc_abort(const char *name, struct lxc_handler *handler) |
59eb99ba | 262 | { |
25c2aca5 | 263 | lxc_set_state(name, handler, ABORTING); |
59eb99ba DL |
264 | kill(handler->pid, SIGKILL); |
265 | } | |
266 | ||
50e98013 DL |
267 | struct start_arg { |
268 | const char *name; | |
269 | char *const *argv; | |
270 | struct lxc_handler *handler; | |
271 | int *sv; | |
272 | }; | |
273 | ||
274 | static int do_start(void *arg) | |
275 | { | |
276 | struct start_arg *start_arg = arg; | |
277 | struct lxc_handler *handler = start_arg->handler; | |
278 | const char *name = start_arg->name; | |
279 | char *const *argv = start_arg->argv; | |
280 | int *sv = start_arg->sv; | |
281 | int err = -1, sync; | |
282 | ||
283 | if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) { | |
284 | SYSERROR("failed to set sigprocmask"); | |
9d7f9e52 | 285 | return -1; |
50e98013 DL |
286 | } |
287 | ||
288 | close(sv[1]); | |
289 | ||
290 | /* Be sure we don't inherit this after the exec */ | |
291 | fcntl(sv[0], F_SETFD, FD_CLOEXEC); | |
292 | ||
293 | /* Tell our father he can begin to configure the container */ | |
294 | if (write(sv[0], &sync, sizeof(sync)) < 0) { | |
295 | SYSERROR("failed to write socket"); | |
9d7f9e52 | 296 | return -1; |
50e98013 DL |
297 | } |
298 | ||
299 | /* Wait for the father to finish the configuration */ | |
300 | if (read(sv[0], &sync, sizeof(sync)) < 0) { | |
301 | SYSERROR("failed to read socket"); | |
9d7f9e52 | 302 | return -1; |
50e98013 DL |
303 | } |
304 | ||
305 | /* Setup the container, ip, names, utsname, ... */ | |
fae349da | 306 | if (lxc_setup(name, handler->conf)) { |
50e98013 DL |
307 | ERROR("failed to setup the container"); |
308 | goto out_warn_father; | |
309 | } | |
310 | ||
311 | if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) { | |
312 | SYSERROR("failed to remove CAP_SYS_BOOT capability"); | |
9d7f9e52 | 313 | return -1; |
50e98013 DL |
314 | } |
315 | ||
6a6ad7af DL |
316 | if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) { |
317 | SYSERROR("failed to set pdeath signal"); | |
9d7f9e52 | 318 | return -1; |
6a6ad7af DL |
319 | } |
320 | ||
1ac470c0 DL |
321 | NOTICE("exec'ing '%s'", argv[0]); |
322 | ||
50e98013 DL |
323 | execvp(argv[0], argv); |
324 | SYSERROR("failed to exec %s", argv[0]); | |
325 | ||
326 | out_warn_father: | |
327 | /* If the exec fails, tell that to our father */ | |
328 | if (write(sv[0], &err, sizeof(err)) < 0) | |
329 | SYSERROR("failed to write the socket"); | |
50e98013 DL |
330 | return -1; |
331 | } | |
332 | ||
9618063c | 333 | int lxc_spawn(const char *name, struct lxc_handler *handler, char *const argv[]) |
59eb99ba DL |
334 | { |
335 | int sv[2]; | |
336 | int clone_flags; | |
337 | int err = -1, sync; | |
338 | ||
50e98013 DL |
339 | struct start_arg start_arg = { |
340 | .name = name, | |
341 | .argv = argv, | |
342 | .handler = handler, | |
343 | .sv = sv, | |
344 | }; | |
345 | ||
0ad19a3f | 346 | /* Synchro socketpair */ |
347 | if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) { | |
36eb9bde | 348 | SYSERROR("failed to create communication socketpair"); |
9d7f9e52 | 349 | return -1; |
0ad19a3f | 350 | } |
351 | ||
1ea6db29 | 352 | clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS; |
fae349da | 353 | if (!lxc_list_empty(&handler->conf->network)) { |
82d5ae15 | 354 | |
0ad19a3f | 355 | clone_flags |= CLONE_NEWNET; |
356 | ||
82d5ae15 DL |
357 | /* that should be done before the clone because we will |
358 | * fill the netdev index and use them in the child | |
359 | */ | |
fae349da | 360 | if (lxc_create_network(&handler->conf->network)) { |
82d5ae15 DL |
361 | ERROR("failed to create the network"); |
362 | goto out_close; | |
363 | } | |
364 | } | |
365 | ||
0ad19a3f | 366 | /* Create a process in a new set of namespaces */ |
50e98013 | 367 | handler->pid = lxc_clone(do_start, &start_arg, clone_flags); |
59eb99ba | 368 | if (handler->pid < 0) { |
36eb9bde | 369 | SYSERROR("failed to fork into a new namespace"); |
7fef7a06 | 370 | goto out_delete_net; |
0ad19a3f | 371 | } |
372 | ||
0ad19a3f | 373 | close(sv[0]); |
374 | ||
375 | /* Wait for the child to be ready */ | |
376 | if (read(sv[1], &sync, sizeof(sync)) < 0) { | |
36eb9bde | 377 | SYSERROR("failed to read the socket"); |
7fef7a06 | 378 | goto out_delete_net; |
0ad19a3f | 379 | } |
380 | ||
9f44c578 | 381 | if (lxc_rename_nsgroup(name, handler)) |
7fef7a06 | 382 | goto out_delete_net; |
218d4250 | 383 | |
0ad19a3f | 384 | /* Create the network configuration */ |
82d5ae15 | 385 | if (clone_flags & CLONE_NEWNET) { |
fae349da | 386 | if (lxc_assign_network(&handler->conf->network, handler->pid)) { |
82d5ae15 | 387 | ERROR("failed to create the configured network"); |
7fef7a06 | 388 | goto out_delete_net; |
82d5ae15 | 389 | } |
0ad19a3f | 390 | } |
391 | ||
392 | /* Tell the child to continue its initialization */ | |
393 | if (write(sv[1], &sync, sizeof(sync)) < 0) { | |
36eb9bde | 394 | SYSERROR("failed to write the socket"); |
59eb99ba | 395 | goto out_abort; |
0ad19a3f | 396 | } |
397 | ||
398 | /* Wait for the child to exec or returning an error */ | |
e043236e | 399 | if (read(sv[1], &sync, sizeof(sync)) < 0) { |
36eb9bde | 400 | ERROR("failed to read the socket"); |
59eb99ba | 401 | goto out_abort; |
0ad19a3f | 402 | } |
403 | ||
25c2aca5 | 404 | if (lxc_set_state(name, handler, RUNNING)) { |
59eb99ba DL |
405 | ERROR("failed to set state to %s", |
406 | lxc_state2str(RUNNING)); | |
407 | goto out_abort; | |
3f21c114 | 408 | } |
22ebac19 | 409 | |
59eb99ba | 410 | err = 0; |
22ebac19 | 411 | |
1ac470c0 DL |
412 | NOTICE("'%s' started with pid '%d'", argv[0], handler->pid); |
413 | ||
59eb99ba DL |
414 | out_close: |
415 | close(sv[0]); | |
416 | close(sv[1]); | |
59eb99ba | 417 | return err; |
0ad19a3f | 418 | |
7fef7a06 DL |
419 | out_delete_net: |
420 | if (clone_flags & CLONE_NEWNET) | |
421 | lxc_delete_network(&handler->conf->network); | |
59eb99ba DL |
422 | out_abort: |
423 | lxc_abort(name, handler); | |
b79fcd86 GK |
424 | close(sv[1]); |
425 | return -1; | |
59eb99ba | 426 | } |
0ad19a3f | 427 | |
fae349da | 428 | int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf) |
59eb99ba | 429 | { |
3a0f472d | 430 | struct lxc_handler *handler; |
e043236e | 431 | int err = -1; |
59eb99ba DL |
432 | int status; |
433 | ||
fae349da | 434 | handler = lxc_init(name, conf); |
3a0f472d | 435 | if (!handler) { |
59eb99ba | 436 | ERROR("failed to initialize the container"); |
66aeffc7 | 437 | return -1; |
0ad19a3f | 438 | } |
439 | ||
3a0f472d | 440 | err = lxc_spawn(name, handler, argv); |
59eb99ba DL |
441 | if (err) { |
442 | ERROR("failed to spawn '%s'", argv[0]); | |
9d7f9e52 | 443 | goto out_fini; |
0ad19a3f | 444 | } |
445 | ||
af795875 | 446 | err = lxc_close_all_inherited_fd(); |
d983b93c MN |
447 | if (err) { |
448 | ERROR("unable to close inherited fds"); | |
449 | goto out_abort; | |
450 | } | |
451 | ||
3a0f472d | 452 | err = lxc_poll(name, handler); |
e043236e | 453 | if (err) { |
59eb99ba DL |
454 | ERROR("mainloop exited with an error"); |
455 | goto out_abort; | |
456 | } | |
0ad19a3f | 457 | |
3a0f472d | 458 | while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR) |
1bc5cc8c | 459 | continue; |
e043236e | 460 | |
3a0f472d | 461 | err = lxc_error_set_and_log(handler->pid, status); |
9d7f9e52 | 462 | out_fini: |
3a0f472d | 463 | lxc_fini(name, handler); |
0ad19a3f | 464 | return err; |
465 | ||
59eb99ba | 466 | out_abort: |
3a0f472d | 467 | lxc_abort(name, handler); |
9d7f9e52 | 468 | goto out_fini; |
0ad19a3f | 469 | } |