]>
Commit | Line | Data |
---|---|---|
0ad19a3f | 1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Daniel Lezcano <dlezcano at fr.ibm.com> | |
8 | * | |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
22 | */ | |
23 | ||
ff218c25 | 24 | #include "../config.h" |
0ad19a3f | 25 | #include <stdio.h> |
26 | #undef _GNU_SOURCE | |
27 | #include <string.h> | |
28 | #include <stdlib.h> | |
29 | #include <dirent.h> | |
30 | #include <errno.h> | |
31 | #include <unistd.h> | |
32 | #include <signal.h> | |
b0a33c1e | 33 | #include <fcntl.h> |
34 | #include <termios.h> | |
50e98013 | 35 | #include <namespace.h> |
0ad19a3f | 36 | #include <sys/param.h> |
37 | #include <sys/file.h> | |
f4d507d5 | 38 | #include <sys/mount.h> |
0ad19a3f | 39 | #include <sys/types.h> |
0ad19a3f | 40 | #include <sys/prctl.h> |
ddceb1f9 | 41 | #include <sys/types.h> |
42ff343d | 42 | #include <sys/capability.h> |
0ad19a3f | 43 | #include <sys/wait.h> |
b0a33c1e | 44 | #include <sys/un.h> |
45 | #include <sys/poll.h> | |
ff218c25 | 46 | |
88d5514d DL |
47 | #include <lxc/lxc.h> |
48 | #include <lxc/confile.h> | |
49 | ||
ff218c25 | 50 | #ifdef HAVE_SYS_SIGNALFD_H |
8ca61733 | 51 | # include <sys/signalfd.h> |
ff218c25 | 52 | #else |
8ca61733 MJ |
53 | # ifndef __NR_signalfd4 |
54 | /* assume kernel headers are too old */ | |
55 | # if __i386__ | |
56 | # define __NR_signalfd4 327 | |
57 | # elif __x86_64__ | |
58 | # define __NR_signalfd4 289 | |
bfa38025 MH |
59 | # elif __powerpc__ |
60 | # define __NR_signalfd4 313 | |
47f38330 SH |
61 | # elif __s390x__ |
62 | # define __NR_signalfd4 322 | |
8ca61733 MJ |
63 | # endif |
64 | #endif | |
65 | ||
66 | # ifndef __NR_signalfd | |
67 | /* assume kernel headers are too old */ | |
68 | # if __i386__ | |
69 | # define __NR_signalfd 321 | |
70 | # elif __x86_64__ | |
71 | # define __NR_signalfd 282 | |
bfa38025 MH |
72 | # elif __powerpc__ |
73 | # define __NR_signalfd 305 | |
47f38330 SH |
74 | # elif __s390x__ |
75 | # define __NR_signalfd 316 | |
8ca61733 MJ |
76 | # endif |
77 | #endif | |
78 | ||
79 | int signalfd(int fd, const sigset_t *mask, int flags) | |
80 | { | |
81 | int retval; | |
82 | ||
83 | retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags); | |
84 | if (errno == ENOSYS && flags == 0) | |
85 | retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8); | |
86 | return retval; | |
87 | } | |
ff218c25 | 88 | #endif |
0ad19a3f | 89 | |
656994bb MH |
90 | #if !HAVE_DECL_PR_CAPBSET_DROP |
91 | #define PR_CAPBSET_DROP 24 | |
92 | #endif | |
93 | ||
e2bcd7db | 94 | #include "error.h" |
b0a33c1e | 95 | #include "af_unix.h" |
96 | #include "mainloop.h" | |
96fa1ff0 | 97 | #include "commands.h" |
e2bcd7db | 98 | |
b113348e | 99 | #include <lxc/lxc.h> |
36eb9bde CLG |
100 | #include <lxc/log.h> |
101 | ||
102 | lxc_log_define(lxc_start, lxc); | |
103 | ||
0ad19a3f | 104 | LXC_TTY_HANDLER(SIGINT); |
105 | LXC_TTY_HANDLER(SIGQUIT); | |
106 | ||
b0a33c1e | 107 | static int setup_sigchld_fd(sigset_t *oldmask) |
108 | { | |
109 | sigset_t mask; | |
110 | int fd; | |
111 | ||
112 | if (sigprocmask(SIG_BLOCK, NULL, &mask)) { | |
36eb9bde | 113 | SYSERROR("failed to get mask signal"); |
b0a33c1e | 114 | return -1; |
115 | } | |
116 | ||
117 | if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) { | |
36eb9bde | 118 | SYSERROR("failed to set mask signal"); |
b0a33c1e | 119 | return -1; |
120 | } | |
121 | ||
122 | fd = signalfd(-1, &mask, 0); | |
123 | if (fd < 0) { | |
36eb9bde | 124 | SYSERROR("failed to create the signal fd"); |
b0a33c1e | 125 | return -1; |
126 | } | |
127 | ||
128 | if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { | |
36eb9bde | 129 | SYSERROR("failed to set sigfd to close-on-exec"); |
b0a33c1e | 130 | close(fd); |
131 | return -1; | |
132 | } | |
133 | ||
1ac470c0 DL |
134 | DEBUG("sigchild handler set"); |
135 | ||
b0a33c1e | 136 | return fd; |
137 | } | |
138 | ||
b0a33c1e | 139 | static int sigchld_handler(int fd, void *data, |
140 | struct lxc_epoll_descr *descr) | |
141 | { | |
1ac470c0 DL |
142 | DEBUG("child exited"); |
143 | ||
b0a33c1e | 144 | return 1; |
145 | } | |
146 | ||
66aeffc7 DL |
147 | static int set_state(const char *name, struct lxc_handler *handler, lxc_state_t state) |
148 | { | |
149 | handler->state = state; | |
150 | lxc_monitor_send_state(name, state); | |
151 | return 0; | |
152 | } | |
153 | ||
1bc5cc8c | 154 | int lxc_poll(const char *name, struct lxc_handler *handler) |
b0a33c1e | 155 | { |
ca5f7926 DL |
156 | int sigfd = handler->sigfd; |
157 | int pid = handler->pid; | |
a9e61274 | 158 | int ret = -1; |
b0a33c1e | 159 | struct lxc_epoll_descr descr; |
160 | ||
a9e61274 | 161 | if (lxc_mainloop_open(&descr)) { |
36eb9bde | 162 | ERROR("failed to create mainloop"); |
50c8bf05 | 163 | goto out_sigfd; |
b0a33c1e | 164 | } |
165 | ||
166 | if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) { | |
36eb9bde | 167 | ERROR("failed to add handler for the signal"); |
b0a33c1e | 168 | goto out_mainloop_open; |
169 | } | |
170 | ||
724e753c | 171 | if (lxc_command_mainloop_add(name, &descr, handler)) |
96fa1ff0 | 172 | goto out_mainloop_open; |
b0a33c1e | 173 | |
174 | ret = lxc_mainloop(&descr); | |
175 | ||
176 | out: | |
177 | return ret; | |
178 | ||
179 | out_mainloop_open: | |
180 | lxc_mainloop_close(&descr); | |
b0a33c1e | 181 | out_sigfd: |
182 | close(sigfd); | |
183 | goto out; | |
184 | } | |
185 | ||
59eb99ba DL |
186 | static void remove_init_pid(const char *name, pid_t pid) |
187 | { | |
188 | char init[MAXPATHLEN]; | |
189 | ||
190 | snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name); | |
191 | unlink(init); | |
192 | } | |
193 | ||
b8f57738 DL |
194 | static int fdname(int fd, char *name, size_t size) |
195 | { | |
196 | char path[MAXPATHLEN]; | |
acc86941 | 197 | ssize_t len; |
b8f57738 DL |
198 | |
199 | snprintf(path, MAXPATHLEN, "/proc/self/fd/%d", fd); | |
200 | ||
acc86941 MN |
201 | len = readlink(path, name, size); |
202 | if (len > 0) | |
203 | path[len] = '\0'; | |
204 | ||
205 | return (len <= 0) ? -1 : 0; | |
b8f57738 DL |
206 | } |
207 | ||
208 | static int console_init(char *console, size_t size) | |
209 | { | |
210 | struct stat stat; | |
211 | int i; | |
212 | ||
213 | for (i = 0; i < 3; i++) { | |
214 | if (!isatty(i)) | |
215 | continue; | |
216 | ||
217 | if (ttyname_r(i, console, size)) { | |
218 | SYSERROR("failed to retrieve tty name"); | |
219 | return -1; | |
220 | } | |
af795875 | 221 | |
b8f57738 DL |
222 | return 0; |
223 | } | |
224 | ||
225 | if (!fstat(0, &stat)) { | |
226 | if (S_ISREG(stat.st_mode) || S_ISCHR(stat.st_mode) || | |
227 | S_ISFIFO(stat.st_mode) || S_ISLNK(stat.st_mode)) | |
228 | return fdname(0, console, size); | |
229 | } | |
230 | ||
231 | console[0] = '\0'; | |
1ac470c0 DL |
232 | |
233 | DEBUG("console initialized"); | |
234 | ||
b8f57738 DL |
235 | return 0; |
236 | } | |
237 | ||
3a0f472d | 238 | struct lxc_handler *lxc_init(const char *name) |
59eb99ba | 239 | { |
3a0f472d | 240 | struct lxc_handler *handler; |
88d5514d | 241 | char path[MAXPATHLEN]; |
3a0f472d DL |
242 | |
243 | handler = malloc(sizeof(*handler)); | |
244 | if (!handler) | |
245 | return NULL; | |
59eb99ba DL |
246 | |
247 | memset(handler, 0, sizeof(*handler)); | |
248 | ||
249 | handler->lock = lxc_get_lock(name); | |
250 | if (handler->lock < 0) | |
3a0f472d | 251 | goto out_free; |
0ad19a3f | 252 | |
0ad19a3f | 253 | /* Begin the set the state to STARTING*/ |
66aeffc7 | 254 | if (set_state(name, handler, STARTING)) { |
59eb99ba DL |
255 | ERROR("failed to set state '%s'", lxc_state2str(STARTING)); |
256 | goto out_put_lock; | |
0ad19a3f | 257 | } |
258 | ||
571e6ec8 | 259 | if (lxc_conf_init(&handler->conf)) { |
88d5514d DL |
260 | ERROR("failed to initialize the configuration"); |
261 | goto out_aborting; | |
262 | } | |
263 | ||
264 | snprintf(path, sizeof(path), LXCPATH "/%s/config", name); | |
265 | ||
266 | if (!access(path, F_OK)) { | |
267 | ||
571e6ec8 | 268 | if (lxc_config_read(path, &handler->conf)) { |
88d5514d DL |
269 | ERROR("failed to read the configuration file"); |
270 | goto out_aborting; | |
271 | } | |
272 | } | |
273 | ||
571e6ec8 | 274 | if (console_init(handler->conf.console, sizeof(handler->conf.console))) { |
b8f57738 DL |
275 | ERROR("failed to initialize the console"); |
276 | goto out_aborting; | |
277 | } | |
939229eb | 278 | |
571e6ec8 | 279 | if (lxc_create_tty(name, &handler->conf.tty_info)) { |
36eb9bde | 280 | ERROR("failed to create the ttys"); |
59eb99ba | 281 | goto out_aborting; |
b0a33c1e | 282 | } |
283 | ||
284 | /* the signal fd has to be created before forking otherwise | |
285 | * if the child process exits before we setup the signal fd, | |
286 | * the event will be lost and the command will be stuck */ | |
59eb99ba DL |
287 | handler->sigfd = setup_sigchld_fd(&handler->oldmask); |
288 | if (handler->sigfd < 0) { | |
36eb9bde | 289 | ERROR("failed to set sigchild fd handler"); |
59eb99ba | 290 | goto out_delete_tty; |
b0a33c1e | 291 | } |
292 | ||
59eb99ba DL |
293 | /* Avoid signals from terminal */ |
294 | LXC_TTY_ADD_HANDLER(SIGINT); | |
295 | LXC_TTY_ADD_HANDLER(SIGQUIT); | |
296 | ||
59eb99ba | 297 | out: |
1ac470c0 DL |
298 | if (handler) |
299 | INFO("'%s' is initialized", name); | |
300 | ||
3a0f472d | 301 | return handler; |
59eb99ba DL |
302 | |
303 | out_delete_tty: | |
571e6ec8 | 304 | lxc_delete_tty(&handler->conf.tty_info); |
59eb99ba | 305 | out_aborting: |
66aeffc7 | 306 | set_state(name, handler, ABORTING); |
59eb99ba DL |
307 | out_put_lock: |
308 | lxc_put_lock(handler->lock); | |
3a0f472d DL |
309 | out_free: |
310 | free(handler); | |
311 | handler = NULL; | |
59eb99ba DL |
312 | goto out; |
313 | } | |
314 | ||
1bc5cc8c | 315 | void lxc_fini(const char *name, struct lxc_handler *handler) |
59eb99ba DL |
316 | { |
317 | /* The STOPPING state is there for future cleanup code | |
318 | * which can take awhile | |
319 | */ | |
66aeffc7 DL |
320 | set_state(name, handler, STOPPING); |
321 | set_state(name, handler, STOPPED); | |
59eb99ba DL |
322 | lxc_unlink_nsgroup(name); |
323 | ||
3a0f472d DL |
324 | if (handler) { |
325 | remove_init_pid(name, handler->pid); | |
571e6ec8 | 326 | lxc_delete_tty(&handler->conf.tty_info); |
3a0f472d DL |
327 | lxc_put_lock(handler->lock); |
328 | free(handler); | |
329 | } | |
59eb99ba DL |
330 | |
331 | LXC_TTY_DEL_HANDLER(SIGQUIT); | |
332 | LXC_TTY_DEL_HANDLER(SIGINT); | |
333 | } | |
334 | ||
1bc5cc8c | 335 | void lxc_abort(const char *name, struct lxc_handler *handler) |
59eb99ba | 336 | { |
66aeffc7 | 337 | set_state(name, handler, ABORTING); |
59eb99ba DL |
338 | kill(handler->pid, SIGKILL); |
339 | } | |
340 | ||
50e98013 DL |
341 | struct start_arg { |
342 | const char *name; | |
343 | char *const *argv; | |
344 | struct lxc_handler *handler; | |
345 | int *sv; | |
346 | }; | |
347 | ||
348 | static int do_start(void *arg) | |
349 | { | |
350 | struct start_arg *start_arg = arg; | |
351 | struct lxc_handler *handler = start_arg->handler; | |
352 | const char *name = start_arg->name; | |
353 | char *const *argv = start_arg->argv; | |
354 | int *sv = start_arg->sv; | |
355 | int err = -1, sync; | |
356 | ||
357 | if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) { | |
358 | SYSERROR("failed to set sigprocmask"); | |
359 | goto out_child; | |
360 | } | |
361 | ||
362 | close(sv[1]); | |
363 | ||
364 | /* Be sure we don't inherit this after the exec */ | |
365 | fcntl(sv[0], F_SETFD, FD_CLOEXEC); | |
366 | ||
367 | /* Tell our father he can begin to configure the container */ | |
368 | if (write(sv[0], &sync, sizeof(sync)) < 0) { | |
369 | SYSERROR("failed to write socket"); | |
370 | goto out_child; | |
371 | } | |
372 | ||
373 | /* Wait for the father to finish the configuration */ | |
374 | if (read(sv[0], &sync, sizeof(sync)) < 0) { | |
375 | SYSERROR("failed to read socket"); | |
376 | goto out_child; | |
377 | } | |
378 | ||
379 | /* Setup the container, ip, names, utsname, ... */ | |
571e6ec8 | 380 | if (lxc_setup(name, &handler->conf)) { |
50e98013 DL |
381 | ERROR("failed to setup the container"); |
382 | goto out_warn_father; | |
383 | } | |
384 | ||
385 | if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) { | |
386 | SYSERROR("failed to remove CAP_SYS_BOOT capability"); | |
387 | goto out_child; | |
388 | } | |
389 | ||
6a6ad7af DL |
390 | if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) { |
391 | SYSERROR("failed to set pdeath signal"); | |
392 | goto out_child; | |
393 | } | |
394 | ||
1ac470c0 DL |
395 | NOTICE("exec'ing '%s'", argv[0]); |
396 | ||
50e98013 DL |
397 | execvp(argv[0], argv); |
398 | SYSERROR("failed to exec %s", argv[0]); | |
399 | ||
400 | out_warn_father: | |
401 | /* If the exec fails, tell that to our father */ | |
402 | if (write(sv[0], &err, sizeof(err)) < 0) | |
403 | SYSERROR("failed to write the socket"); | |
404 | out_child: | |
405 | return -1; | |
406 | } | |
407 | ||
9618063c | 408 | int lxc_spawn(const char *name, struct lxc_handler *handler, char *const argv[]) |
59eb99ba DL |
409 | { |
410 | int sv[2]; | |
411 | int clone_flags; | |
412 | int err = -1, sync; | |
413 | ||
50e98013 DL |
414 | struct start_arg start_arg = { |
415 | .name = name, | |
416 | .argv = argv, | |
417 | .handler = handler, | |
418 | .sv = sv, | |
419 | }; | |
420 | ||
0ad19a3f | 421 | /* Synchro socketpair */ |
422 | if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) { | |
36eb9bde | 423 | SYSERROR("failed to create communication socketpair"); |
f4d507d5 | 424 | goto out; |
0ad19a3f | 425 | } |
426 | ||
1ea6db29 | 427 | clone_flags = CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS; |
5f4535a3 | 428 | if (!lxc_list_empty(&handler->conf.network)) { |
82d5ae15 | 429 | |
0ad19a3f | 430 | clone_flags |= CLONE_NEWNET; |
431 | ||
82d5ae15 DL |
432 | /* that should be done before the clone because we will |
433 | * fill the netdev index and use them in the child | |
434 | */ | |
5f4535a3 | 435 | if (lxc_create_network(&handler->conf.network)) { |
82d5ae15 DL |
436 | ERROR("failed to create the network"); |
437 | goto out_close; | |
438 | } | |
439 | } | |
440 | ||
0ad19a3f | 441 | /* Create a process in a new set of namespaces */ |
50e98013 | 442 | handler->pid = lxc_clone(do_start, &start_arg, clone_flags); |
59eb99ba | 443 | if (handler->pid < 0) { |
36eb9bde | 444 | SYSERROR("failed to fork into a new namespace"); |
59eb99ba | 445 | goto out_close; |
0ad19a3f | 446 | } |
447 | ||
0ad19a3f | 448 | close(sv[0]); |
449 | ||
450 | /* Wait for the child to be ready */ | |
451 | if (read(sv[1], &sync, sizeof(sync)) < 0) { | |
36eb9bde | 452 | SYSERROR("failed to read the socket"); |
59eb99ba | 453 | goto out_abort; |
0ad19a3f | 454 | } |
455 | ||
9f44c578 | 456 | if (lxc_rename_nsgroup(name, handler)) |
2b31f553 | 457 | goto out_abort; |
218d4250 | 458 | |
0ad19a3f | 459 | /* Create the network configuration */ |
82d5ae15 | 460 | if (clone_flags & CLONE_NEWNET) { |
5f4535a3 | 461 | if (lxc_assign_network(&handler->conf.network, handler->pid)) { |
82d5ae15 DL |
462 | ERROR("failed to create the configured network"); |
463 | goto out_abort; | |
464 | } | |
0ad19a3f | 465 | } |
466 | ||
467 | /* Tell the child to continue its initialization */ | |
468 | if (write(sv[1], &sync, sizeof(sync)) < 0) { | |
36eb9bde | 469 | SYSERROR("failed to write the socket"); |
59eb99ba | 470 | goto out_abort; |
0ad19a3f | 471 | } |
472 | ||
473 | /* Wait for the child to exec or returning an error */ | |
e043236e | 474 | if (read(sv[1], &sync, sizeof(sync)) < 0) { |
36eb9bde | 475 | ERROR("failed to read the socket"); |
59eb99ba | 476 | goto out_abort; |
0ad19a3f | 477 | } |
478 | ||
66aeffc7 | 479 | if (set_state(name, handler, RUNNING)) { |
59eb99ba DL |
480 | ERROR("failed to set state to %s", |
481 | lxc_state2str(RUNNING)); | |
482 | goto out_abort; | |
3f21c114 | 483 | } |
22ebac19 | 484 | |
59eb99ba | 485 | err = 0; |
22ebac19 | 486 | |
1ac470c0 DL |
487 | NOTICE("'%s' started with pid '%d'", argv[0], handler->pid); |
488 | ||
59eb99ba DL |
489 | out_close: |
490 | close(sv[0]); | |
491 | close(sv[1]); | |
492 | out: | |
493 | return err; | |
0ad19a3f | 494 | |
59eb99ba DL |
495 | out_abort: |
496 | lxc_abort(name, handler); | |
497 | goto out_close; | |
498 | } | |
0ad19a3f | 499 | |
9618063c | 500 | int lxc_start(const char *name, char *const argv[]) |
59eb99ba | 501 | { |
3a0f472d | 502 | struct lxc_handler *handler; |
e043236e | 503 | int err = -1; |
59eb99ba DL |
504 | int status; |
505 | ||
3a0f472d DL |
506 | handler = lxc_init(name); |
507 | if (!handler) { | |
59eb99ba | 508 | ERROR("failed to initialize the container"); |
66aeffc7 | 509 | return -1; |
0ad19a3f | 510 | } |
511 | ||
3a0f472d | 512 | err = lxc_spawn(name, handler, argv); |
59eb99ba DL |
513 | if (err) { |
514 | ERROR("failed to spawn '%s'", argv[0]); | |
515 | goto out; | |
0ad19a3f | 516 | } |
517 | ||
af795875 | 518 | err = lxc_close_all_inherited_fd(); |
d983b93c MN |
519 | if (err) { |
520 | ERROR("unable to close inherited fds"); | |
521 | goto out_abort; | |
522 | } | |
523 | ||
3a0f472d | 524 | err = lxc_poll(name, handler); |
e043236e | 525 | if (err) { |
59eb99ba DL |
526 | ERROR("mainloop exited with an error"); |
527 | goto out_abort; | |
528 | } | |
0ad19a3f | 529 | |
3a0f472d | 530 | while (waitpid(handler->pid, &status, 0) < 0 && errno == EINTR) |
1bc5cc8c | 531 | continue; |
e043236e | 532 | |
3a0f472d | 533 | err = lxc_error_set_and_log(handler->pid, status); |
0ad19a3f | 534 | out: |
3a0f472d | 535 | lxc_fini(name, handler); |
0ad19a3f | 536 | return err; |
537 | ||
59eb99ba | 538 | out_abort: |
3a0f472d | 539 | lxc_abort(name, handler); |
0ad19a3f | 540 | goto out; |
541 | } |