]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cmd/lxc_init.c
Merge pull request #3235 from xinhua9569/master
[mirror_lxc.git] / src / lxc / cmd / lxc_init.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <ctype.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <getopt.h>
10 #include <libgen.h>
11 #include <limits.h>
12 #include <pthread.h>
13 #include <signal.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/stat.h>
18 #include <sys/types.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21
22 #include <lxc/lxccontainer.h>
23 #include <lxc/version.h>
24
25 #include "compiler.h"
26 #include "config.h"
27 #include "error.h"
28 #include "initutils.h"
29 #include "memory_utils.h"
30 #include "parse.h"
31 #include "raw_syscalls.h"
32 #include "string_utils.h"
33
34 /* option keys for long only options */
35 #define OPT_USAGE 0x1000
36 #define OPT_VERSION (OPT_USAGE - 1)
37
38 #define QUOTE(macro) #macro
39 #define QUOTEVAL(macro) QUOTE(macro)
40
41 static sig_atomic_t was_interrupted;
42
43 static void interrupt_handler(int sig)
44 {
45 if (!was_interrupted)
46 was_interrupted = sig;
47 }
48
49 static struct option long_options[] = {
50 { "name", required_argument, 0, 'n' },
51 { "help", no_argument, 0, 'h' },
52 { "usage", no_argument, 0, OPT_USAGE },
53 { "version", no_argument, 0, OPT_VERSION },
54 { "quiet", no_argument, 0, 'q' },
55 { "lxcpath", required_argument, 0, 'P' },
56 { 0, 0, 0, 0 }
57 };
58 static const char short_options[] = "n:hqo:l:P:";
59
60 struct arguments {
61 const struct option *options;
62 const char *shortopts;
63
64 const char *name;
65 bool quiet;
66 const char *lxcpath;
67
68 /* remaining arguments */
69 char *const *argv;
70 int argc;
71 };
72
73 static int arguments_parse(struct arguments *my_args, int argc,
74 char *const argv[]);
75
76 static struct arguments my_args = {
77 .options = long_options,
78 .shortopts = short_options
79 };
80
81 static void prevent_forking(void)
82 {
83 __do_free char *line = NULL;
84 __do_fclose FILE *f = NULL;
85 char path[PATH_MAX];
86 size_t len = 0;
87
88 f = fopen("/proc/self/cgroup", "r");
89 if (!f)
90 return;
91
92 while (getline(&line, &len, f) != -1) {
93 int fd, ret;
94 char *p, *p2;
95
96 p = strchr(line, ':');
97 if (!p)
98 continue;
99 p++;
100 p2 = strchr(p, ':');
101 if (!p2)
102 continue;
103 *p2 = '\0';
104
105 /* This is a cgroup v2 entry. Skip it. */
106 if ((p2 - p) == 0)
107 continue;
108
109 if (strcmp(p, "pids") != 0)
110 continue;
111 p2++;
112
113 p2 += lxc_char_left_gc(p2, strlen(p2));
114 p2[lxc_char_right_gc(p2, strlen(p2))] = '\0';
115
116 ret = snprintf(path, sizeof(path),
117 "/sys/fs/cgroup/pids/%s/pids.max", p2);
118 if (ret < 0 || (size_t)ret >= sizeof(path)) {
119 if (my_args.quiet)
120 fprintf(stderr, "Failed to create string\n");
121 return;
122 }
123
124 fd = open(path, O_WRONLY);
125 if (fd < 0) {
126 if (my_args.quiet)
127 fprintf(stderr, "Failed to open \"%s\"\n", path);
128 return;
129 }
130
131 ret = write(fd, "1", 1);
132 if (ret != 1 && !my_args.quiet)
133 fprintf(stderr, "Failed to write to \"%s\"\n", path);
134
135 close(fd);
136 return;
137 }
138 }
139
140 static void kill_children(pid_t pid)
141 {
142 __do_fclose FILE *f = NULL;
143 char path[PATH_MAX];
144 int ret;
145
146 ret = snprintf(path, sizeof(path), "/proc/%d/task/%d/children", pid, pid);
147 if (ret < 0 || (size_t)ret >= sizeof(path)) {
148 if (my_args.quiet)
149 fprintf(stderr, "Failed to create string\n");
150 return;
151 }
152
153 f = fopen(path, "r");
154 if (!f) {
155 if (my_args.quiet)
156 fprintf(stderr, "Failed to open %s\n", path);
157 return;
158 }
159
160 while (!feof(f)) {
161 pid_t find_pid;
162
163 if (fscanf(f, "%d ", &find_pid) != 1) {
164 if (my_args.quiet)
165 fprintf(stderr, "Failed to retrieve pid\n");
166 return;
167 }
168
169 (void)kill_children(find_pid);
170 (void)kill(find_pid, SIGKILL);
171 }
172 }
173
174 static void remove_self(void)
175 {
176 int ret;
177 ssize_t n;
178 char path[PATH_MAX] = {0};
179
180 n = readlink("/proc/self/exe", path, sizeof(path));
181 if (n < 0 || n >= PATH_MAX)
182 return;
183 path[n] = '\0';
184
185 ret = umount2(path, MNT_DETACH);
186 if (ret < 0)
187 return;
188
189 ret = unlink(path);
190 if (ret < 0)
191 return;
192 }
193
194 int main(int argc, char *argv[])
195 {
196 int i, logfd, ret;
197 pid_t pid;
198 struct sigaction act;
199 sigset_t mask, omask;
200 int have_status = 0, exit_with = 1, shutdown = 0;
201
202 if (arguments_parse(&my_args, argc, argv))
203 exit(EXIT_FAILURE);
204
205 if (!my_args.argc) {
206 if (my_args.quiet)
207 fprintf(stderr, "Please specify a command to execute\n");
208 exit(EXIT_FAILURE);
209 }
210
211 /* Mask all the signals so we are safe to install a signal handler and
212 * to fork.
213 */
214 ret = sigfillset(&mask);
215 if (ret < 0)
216 exit(EXIT_FAILURE);
217
218 ret = sigdelset(&mask, SIGILL);
219 if (ret < 0)
220 exit(EXIT_FAILURE);
221
222 ret = sigdelset(&mask, SIGSEGV);
223 if (ret < 0)
224 exit(EXIT_FAILURE);
225
226 ret = sigdelset(&mask, SIGBUS);
227 if (ret < 0)
228 exit(EXIT_FAILURE);
229
230 ret = pthread_sigmask(SIG_SETMASK, &mask, &omask);
231 if (ret < 0)
232 exit(EXIT_FAILURE);
233
234 ret = sigfillset(&act.sa_mask);
235 if (ret < 0)
236 exit(EXIT_FAILURE);
237
238 ret = sigdelset(&act.sa_mask, SIGILL);
239 if (ret < 0)
240 exit(EXIT_FAILURE);
241
242 ret = sigdelset(&act.sa_mask, SIGSEGV);
243 if (ret < 0)
244 exit(EXIT_FAILURE);
245
246 ret = sigdelset(&act.sa_mask, SIGBUS);
247 if (ret < 0)
248 exit(EXIT_FAILURE);
249
250 ret = sigdelset(&act.sa_mask, SIGSTOP);
251 if (ret < 0)
252 exit(EXIT_FAILURE);
253
254 ret = sigdelset(&act.sa_mask, SIGKILL);
255 if (ret < 0)
256 exit(EXIT_FAILURE);
257
258 act.sa_flags = 0;
259 act.sa_handler = interrupt_handler;
260
261 for (i = 1; i < NSIG; i++) {
262 /* Exclude some signals: ILL, SEGV and BUS are likely to reveal
263 * a bug and we want a core. STOP and KILL cannot be handled
264 * anyway: they're here for documentation. 32 and 33 are not
265 * defined.
266 */
267 if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
268 i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
269 continue;
270
271 ret = sigaction(i, &act, NULL);
272 if (ret < 0) {
273 if (errno == EINVAL)
274 continue;
275
276 if (my_args.quiet)
277 fprintf(stderr, "Failed to change signal action\n");
278 exit(EXIT_FAILURE);
279 }
280 }
281
282 remove_self();
283
284 pid = fork();
285 if (pid < 0)
286 exit(EXIT_FAILURE);
287
288 if (!pid) {
289 /* restore default signal handlers */
290 for (i = 1; i < NSIG; i++) {
291 sighandler_t sigerr;
292
293 if (i == SIGILL || i == SIGSEGV || i == SIGBUS ||
294 i == SIGSTOP || i == SIGKILL || i == 32 || i == 33)
295 continue;
296
297 sigerr = signal(i, SIG_DFL);
298 if (sigerr == SIG_ERR && !my_args.quiet)
299 fprintf(stderr, "Failed to reset to default action for signal \"%d\": %d\n", i, pid);
300 }
301
302 ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
303 if (ret < 0) {
304 if (my_args.quiet)
305 fprintf(stderr, "Failed to set signal mask\n");
306 exit(EXIT_FAILURE);
307 }
308
309 (void)setsid();
310
311 (void)ioctl(STDIN_FILENO, TIOCSCTTY, 0);
312
313 ret = execvp(my_args.argv[0], my_args.argv);
314 if (my_args.quiet)
315 fprintf(stderr, "Failed to exec \"%s\"\n", my_args.argv[0]);
316 exit(ret);
317 }
318 logfd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC);
319 if (logfd >= 0) {
320 ret = dup3(logfd, STDERR_FILENO, O_CLOEXEC);
321 if (ret < 0)
322 exit(EXIT_FAILURE);
323 }
324
325 (void)setproctitle("init");
326
327 /* Let's process the signals now. */
328 ret = sigdelset(&omask, SIGALRM);
329 if (ret < 0)
330 exit(EXIT_FAILURE);
331
332 ret = pthread_sigmask(SIG_SETMASK, &omask, NULL);
333 if (ret < 0) {
334 if (my_args.quiet)
335 fprintf(stderr, "Failed to set signal mask\n");
336 exit(EXIT_FAILURE);
337 }
338
339 /* No need of other inherited fds but stderr. */
340 close(STDIN_FILENO);
341 close(STDOUT_FILENO);
342
343 for (;;) {
344 int status;
345 pid_t waited_pid;
346
347 switch (was_interrupted) {
348 case 0:
349 /* Some applications send SIGHUP in order to get init to reload
350 * its configuration. We don't want to forward this onto the
351 * application itself, because it probably isn't expecting this
352 * signal since it was expecting init to do something with it.
353 *
354 * Instead, let's explicitly ignore it here. The actual
355 * terminal case is handled in the monitor's handler, which
356 * sends this task a SIGTERM in the case of a SIGHUP, which is
357 * what we want.
358 */
359 case SIGHUP:
360 break;
361 case SIGPWR:
362 case SIGTERM:
363 if (!shutdown) {
364 pid_t mypid = lxc_raw_getpid();
365
366 shutdown = 1;
367 prevent_forking();
368 if (mypid != 1) {
369 kill_children(mypid);
370 } else {
371 ret = kill(-1, SIGTERM);
372 if (ret < 0 && !my_args.quiet)
373 fprintf(stderr, "Failed to send SIGTERM to all children\n");
374 }
375 alarm(1);
376 }
377 break;
378 case SIGALRM: {
379 pid_t mypid = lxc_raw_getpid();
380
381 prevent_forking();
382 if (mypid != 1) {
383 kill_children(mypid);
384 } else {
385 ret = kill(-1, SIGKILL);
386 if (ret < 0 && !my_args.quiet)
387 fprintf(stderr, "Failed to send SIGTERM to all children\n");
388 }
389 break;
390 }
391 default:
392 ret = kill(pid, was_interrupted);
393 break;
394 }
395 ret = EXIT_SUCCESS;
396
397 was_interrupted = 0;
398 waited_pid = wait(&status);
399 if (waited_pid < 0) {
400 if (errno == ECHILD)
401 goto out;
402
403 if (errno == EINTR)
404 continue;
405
406 if (my_args.quiet)
407 fprintf(stderr, "Failed to wait on child %d\n", pid);
408 ret = -1;
409 goto out;
410 }
411
412 /* Reset timer each time a process exited. */
413 if (shutdown)
414 alarm(1);
415
416 /* Keep the exit code of the started application (not wrapped
417 * pid) and continue to wait for the end of the orphan group.
418 */
419 if (waited_pid == pid && !have_status) {
420 exit_with = lxc_error_set_and_log(waited_pid, status);
421 have_status = 1;
422 }
423 }
424 out:
425 if (ret < 0)
426 exit(EXIT_FAILURE);
427 exit(exit_with);
428 }
429
430 __noreturn static void print_usage_exit(const struct option longopts[])
431
432 {
433 fprintf(stderr, "Usage: lxc-init [-n|--name=NAME] [-h|--help] [--usage] [--version]\n\
434 [-q|--quiet] [-P|--lxcpath=LXCPATH]\n");
435 exit(EXIT_SUCCESS);
436 }
437
438 __noreturn static void print_version_exit(void)
439 {
440 printf("%s\n", LXC_VERSION);
441 exit(EXIT_SUCCESS);
442 }
443
444 static void print_help(void)
445 {
446 fprintf(stderr, "\
447 Usage: lxc-init --name=NAME -- COMMAND\n\
448 \n\
449 lxc-init start a COMMAND as PID 2 inside a container\n\
450 \n\
451 Options :\n\
452 -n, --name=NAME NAME of the container\n\
453 -q, --quiet Don't produce any output\n\
454 -P, --lxcpath=PATH Use specified container path\n\
455 -?, --help Give this help list\n\
456 --usage Give a short usage message\n\
457 --version Print the version number\n\
458 \n\
459 Mandatory or optional arguments to long options are also mandatory or optional\n\
460 for any corresponding short options.\n\
461 \n\
462 See the lxc-init man page for further information.\n\n");
463 }
464
465 static int arguments_parse(struct arguments *args, int argc,
466 char *const argv[])
467 {
468 for (;;) {
469 int c;
470 int index = 0;
471
472 c = getopt_long(argc, argv, args->shortopts, args->options, &index);
473 if (c == -1)
474 break;
475 switch (c) {
476 case 'n':
477 args->name = optarg;
478 break;
479 case 'o':
480 break;
481 case 'l':
482 break;
483 case 'q':
484 args->quiet = true;
485 break;
486 case 'P':
487 remove_trailing_slashes(optarg);
488 args->lxcpath = optarg;
489 break;
490 case OPT_USAGE:
491 print_usage_exit(args->options);
492 case OPT_VERSION:
493 print_version_exit();
494 case '?':
495 print_help();
496 exit(EXIT_FAILURE);
497 case 'h':
498 print_help();
499 exit(EXIT_SUCCESS);
500 }
501 }
502
503 /*
504 * Reclaim the remaining command arguments
505 */
506 args->argv = &argv[optind];
507 args->argc = argc - optind;
508
509 /* If no lxcpath was given, use default */
510 if (!args->lxcpath)
511 args->lxcpath = lxc_global_config_value("lxc.lxcpath");
512
513 /* Check the command options */
514 if (!args->name) {
515 if (!args->quiet)
516 fprintf(stderr, "lxc-init: missing container name, use --name option\n");
517 return -1;
518 }
519
520 return 0;
521 }