]>
git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/initutils.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include <sys/syscall.h>
16 #include "file_utils.h"
17 #include "initutils.h"
19 #include "memory_utils.h"
20 #include "process_utils.h"
26 static char *copy_global_config_value(char *p
)
34 if (p
[len
-1] == '\n') {
39 retbuf
= malloc(len
+ 1);
43 (void)strlcpy(retbuf
, p
, len
+ 1);
47 const char *lxc_global_config_value(const char *option_name
)
49 static const char * const options
[][2] = {
50 { "lxc.bdev.lvm.vg", DEFAULT_VG
},
51 { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL
},
52 { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT
},
53 { "lxc.bdev.rbd.rbdpool", DEFAULT_RBDPOOL
},
54 { "lxc.lxcpath", NULL
},
55 { "lxc.default_config", NULL
},
56 { "lxc.cgroup.pattern", NULL
},
57 { "lxc.cgroup.use", NULL
},
61 /* placed in the thread local storage pool for non-bionic targets */
62 static thread_local
const char *values
[sizeof(options
) / sizeof(options
[0])] = {0};
64 /* user_config_path is freed as soon as it is used */
65 char *user_config_path
= NULL
;
68 * The following variables are freed at bottom unconditionally.
69 * So NULL the value if it is to be returned to the caller
71 char *user_default_config_path
= NULL
;
72 char *user_lxc_path
= NULL
;
73 char *user_cgroup_pattern
= NULL
;
76 const char *user_home
= getenv("HOME");
80 user_config_path
= malloc(sizeof(char) * (22 + strlen(user_home
)));
81 user_default_config_path
= malloc(sizeof(char) * (26 + strlen(user_home
)));
82 user_lxc_path
= malloc(sizeof(char) * (19 + strlen(user_home
)));
84 sprintf(user_config_path
, "%s/.config/lxc/lxc.conf", user_home
);
85 sprintf(user_default_config_path
, "%s/.config/lxc/default.conf", user_home
);
86 sprintf(user_lxc_path
, "%s/.local/share/lxc/", user_home
);
89 user_config_path
= strdup(LXC_GLOBAL_CONF
);
90 user_default_config_path
= strdup(LXC_DEFAULT_CONFIG
);
91 user_lxc_path
= strdup(LXCPATH
);
92 if (!strequal(DEFAULT_CGROUP_PATTERN
, ""))
93 user_cgroup_pattern
= strdup(DEFAULT_CGROUP_PATTERN
);
96 const char * const (*ptr
)[2];
100 for (i
= 0, ptr
= options
; (*ptr
)[0]; ptr
++, i
++) {
101 if (strequal(option_name
, (*ptr
)[0]))
105 free(user_config_path
);
106 free(user_default_config_path
);
108 free(user_cgroup_pattern
);
114 free(user_config_path
);
115 free(user_default_config_path
);
117 free(user_cgroup_pattern
);
121 fin
= fopen_cloexec(user_config_path
, "r");
122 free(user_config_path
);
124 __do_free
char *line
= NULL
;
126 char *slider1
, *slider2
;
128 while (getline(&line
, &len
, fin
) > 0) {
132 slider1
= strstr(line
, option_name
);
136 /* see if there was just white space in front
139 for (slider2
= line
; slider2
< slider1
; slider2
++)
140 if (*slider2
!= ' ' && *slider2
!= '\t')
143 if (slider2
< slider1
)
146 slider1
= strchr(slider1
, '=');
150 /* see if there was just white space after
153 for (slider2
+= strlen(option_name
); slider2
< slider1
;
155 if (*slider2
!= ' ' && *slider2
!= '\t')
158 if (slider2
< slider1
)
162 while (*slider1
&& (*slider1
== ' ' || *slider1
== '\t'))
168 if (strequal(option_name
, "lxc.lxcpath")) {
170 user_lxc_path
= copy_global_config_value(slider1
);
171 remove_trailing_slashes(user_lxc_path
);
172 values
[i
] = move_ptr(user_lxc_path
);
176 values
[i
] = copy_global_config_value(slider1
);
181 /* could not find value, use default */
182 if (strequal(option_name
, "lxc.lxcpath")) {
183 remove_trailing_slashes(user_lxc_path
);
184 values
[i
] = move_ptr(user_lxc_path
);
185 } else if (strequal(option_name
, "lxc.default_config")) {
186 values
[i
] = move_ptr(user_default_config_path
);
187 } else if (strequal(option_name
, "lxc.cgroup.pattern")) {
188 values
[i
] = move_ptr(user_cgroup_pattern
);
190 values
[i
] = (*ptr
)[1];
193 /* special case: if default value is NULL,
194 * and there is no config, don't view that
203 free(user_cgroup_pattern
);
204 free(user_default_config_path
);
211 * Sets the process title to the specified title. Note that this may fail if
212 * the kernel doesn't support PR_SET_MM_MAP (kernels <3.18).
214 int setproctitle(char *title
)
216 __do_fclose
FILE *f
= NULL
;
218 char *buf_ptr
, *tmp_proctitle
;
219 char buf
[LXC_LINELEN
];
221 ssize_t bytes_read
= 0;
222 static char *proctitle
= NULL
;
225 * We don't really need to know all of this stuff, but unfortunately
226 * PR_SET_MM_MAP requires us to set it all at once, so we have to
227 * figure it out anyway.
229 unsigned long start_data
, end_data
, start_brk
, start_code
, end_code
,
230 start_stack
, arg_start
, arg_end
, env_start
, env_end
, brk_val
;
231 struct prctl_mm_map prctl_map
;
233 f
= fopen_cloexec("/proc/self/stat", "r");
241 bytes_read
= lxc_read_nointr(fd
, buf
, sizeof(buf
) - 1);
245 buf
[bytes_read
] = '\0';
247 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
249 buf_ptr
= strchr(buf
, ' ');
250 for (i
= 0; i
< 24; i
++) {
253 buf_ptr
= strchr(buf_ptr
+ 1, ' ');
258 i
= sscanf(buf_ptr
, "%lu %lu %lu", &start_code
, &end_code
, &start_stack
);
262 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
263 for (i
= 0; i
< 19; i
++) {
266 buf_ptr
= strchr(buf_ptr
+ 1, ' ');
272 i
= sscanf(buf_ptr
, "%lu %lu %lu %*u %*u %lu %lu", &start_data
,
273 &end_data
, &start_brk
, &env_start
, &env_end
);
277 /* Include the null byte here, because in the calculations below we
278 * want to have room for it. */
279 len
= strlen(title
) + 1;
281 tmp_proctitle
= realloc(proctitle
, len
);
285 proctitle
= tmp_proctitle
;
287 arg_start
= (unsigned long)proctitle
;
288 arg_end
= arg_start
+ len
;
290 brk_val
= syscall(__NR_brk
, 0);
292 prctl_map
= (struct prctl_mm_map
){
293 .start_code
= start_code
,
294 .end_code
= end_code
,
295 .start_stack
= start_stack
,
296 .start_data
= start_data
,
297 .end_data
= end_data
,
298 .start_brk
= start_brk
,
300 .arg_start
= arg_start
,
302 .env_start
= env_start
,
309 ret
= prctl(PR_SET_MM
, prctl_arg(PR_SET_MM_MAP
), prctl_arg(&prctl_map
),
310 prctl_arg(sizeof(prctl_map
)), prctl_arg(0));
312 (void)strlcpy((char *)arg_start
, title
, len
);
317 static void prevent_forking(void)
319 __do_free
char *line
= NULL
;
320 __do_fclose
FILE *f
= NULL
;
324 f
= fopen("/proc/self/cgroup", "re");
328 while (getline(&line
, &len
, f
) != -1) {
329 __do_close
int fd
= -EBADF
;
333 p
= strchr(line
, ':');
342 /* This is a cgroup v2 entry. Skip it. */
346 if (strcmp(p
, "pids") != 0)
350 p2
+= lxc_char_left_gc(p2
, strlen(p2
));
351 p2
[lxc_char_right_gc(p2
, strlen(p2
))] = '\0';
353 ret
= snprintf(path
, sizeof(path
),
354 "/sys/fs/cgroup/pids/%s/pids.max", p2
);
355 if (ret
< 0 || (size_t)ret
>= sizeof(path
)) {
356 fprintf(stderr
, "Failed to create string\n");
360 fd
= open(path
, O_WRONLY
| O_CLOEXEC
);
362 fprintf(stderr
, "Failed to open \"%s\"\n", path
);
366 ret
= write(fd
, "1", 1);
368 fprintf(stderr
, "Failed to write to \"%s\"\n", path
);
374 static void kill_children(pid_t pid
)
376 __do_fclose
FILE *f
= NULL
;
380 ret
= snprintf(path
, sizeof(path
), "/proc/%d/task/%d/children", pid
, pid
);
381 if (ret
< 0 || (size_t)ret
>= sizeof(path
)) {
382 fprintf(stderr
, "Failed to create string\n");
386 f
= fopen(path
, "re");
388 fprintf(stderr
, "Failed to open %s\n", path
);
395 if (fscanf(f
, "%d ", &find_pid
) != 1) {
396 fprintf(stderr
, "Failed to retrieve pid\n");
400 (void)kill_children(find_pid
);
401 (void)kill(find_pid
, SIGKILL
);
405 static void remove_self(void)
409 char path
[PATH_MAX
] = {0};
411 n
= readlink("/proc/self/exe", path
, sizeof(path
));
412 if (n
< 0 || n
>= PATH_MAX
)
416 ret
= umount2(path
, MNT_DETACH
);
425 static sig_atomic_t was_interrupted
;
427 static void interrupt_handler(int sig
)
429 if (!was_interrupted
)
430 was_interrupted
= sig
;
433 static int close_inherited(void)
437 struct dirent
*direntp
;
440 dir
= opendir("/proc/self/fd");
446 while ((direntp
= readdir(dir
))) {
449 if (strcmp(direntp
->d_name
, ".") == 0)
452 if (strcmp(direntp
->d_name
, "..") == 0)
455 ret
= lxc_safe_int(direntp
->d_name
, &fd
);
459 if (fd
== STDERR_FILENO
|| fd
== fddir
)
475 __noreturn
int lxc_container_init(int argc
, char *const *argv
, bool quiet
)
479 struct sigaction act
;
480 sigset_t mask
, omask
;
481 int have_status
= 0, exit_with
= 1, shutdown
= 0;
483 /* Mask all the signals so we are safe to install a signal handler and
486 ret
= sigfillset(&mask
);
490 ret
= sigdelset(&mask
, SIGILL
);
494 ret
= sigdelset(&mask
, SIGSEGV
);
498 ret
= sigdelset(&mask
, SIGBUS
);
502 ret
= pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
506 ret
= sigfillset(&act
.sa_mask
);
510 ret
= sigdelset(&act
.sa_mask
, SIGILL
);
514 ret
= sigdelset(&act
.sa_mask
, SIGSEGV
);
518 ret
= sigdelset(&act
.sa_mask
, SIGBUS
);
522 ret
= sigdelset(&act
.sa_mask
, SIGSTOP
);
526 ret
= sigdelset(&act
.sa_mask
, SIGKILL
);
531 act
.sa_handler
= interrupt_handler
;
533 for (i
= 1; i
< NSIG
; i
++) {
534 /* Exclude some signals: ILL, SEGV and BUS are likely to reveal
535 * a bug and we want a core. STOP and KILL cannot be handled
536 * anyway: they're here for documentation. 32 and 33 are not
539 if (i
== SIGILL
|| i
== SIGSEGV
|| i
== SIGBUS
||
540 i
== SIGSTOP
|| i
== SIGKILL
|| i
== 32 || i
== 33)
543 ret
= sigaction(i
, &act
, NULL
);
549 fprintf(stderr
, "Failed to change signal action\n");
561 /* restore default signal handlers */
562 for (i
= 1; i
< NSIG
; i
++) {
565 if (i
== SIGILL
|| i
== SIGSEGV
|| i
== SIGBUS
||
566 i
== SIGSTOP
|| i
== SIGKILL
|| i
== 32 || i
== 33)
569 sigerr
= signal(i
, SIG_DFL
);
570 if (sigerr
== SIG_ERR
&& !quiet
)
571 fprintf(stderr
, "Failed to reset to default action for signal \"%d\": %d\n", i
, pid
);
574 ret
= pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
577 fprintf(stderr
, "Failed to set signal mask\n");
583 (void)ioctl(STDIN_FILENO
, TIOCSCTTY
, 0);
585 ret
= execvp(argv
[0], argv
);
587 fprintf(stderr
, "Failed to exec \"%s\"\n", argv
[0]);
590 logfd
= open("/dev/console", O_WRONLY
| O_NOCTTY
| O_CLOEXEC
);
592 ret
= dup3(logfd
, STDERR_FILENO
, O_CLOEXEC
);
597 (void)setproctitle("init");
599 /* Let's process the signals now. */
600 ret
= sigdelset(&omask
, SIGALRM
);
604 ret
= pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
607 fprintf(stderr
, "Failed to set signal mask\n");
611 ret
= close_range(STDERR_FILENO
+ 1, UINT_MAX
, CLOSE_RANGE_UNSHARE
);
614 * Fallback to close_inherited() when the syscall is not
615 * available or when CLOSE_RANGE_UNSHARE isn't supported.
616 * On a regular kernel CLOSE_RANGE_UNSHARE should always be
617 * available but openSUSE Leap 15.3 seems to have a partial
618 * backport without CLOSE_RANGE_UNSHARE support.
620 if (errno
== ENOSYS
|| errno
== EINVAL
)
621 ret
= close_inherited();
624 fprintf(stderr
, "Aborting attach to prevent leaking file descriptors into container\n");
632 switch (was_interrupted
) {
634 /* Some applications send SIGHUP in order to get init to reload
635 * its configuration. We don't want to forward this onto the
636 * application itself, because it probably isn't expecting this
637 * signal since it was expecting init to do something with it.
639 * Instead, let's explicitly ignore it here. The actual
640 * terminal case is handled in the monitor's handler, which
641 * sends this task a SIGTERM in the case of a SIGHUP, which is
649 pid_t mypid
= lxc_raw_getpid();
654 kill_children(mypid
);
656 ret
= kill(-1, SIGTERM
);
657 if (ret
< 0 && !quiet
)
658 fprintf(stderr
, "Failed to send SIGTERM to all children\n");
664 pid_t mypid
= lxc_raw_getpid();
668 kill_children(mypid
);
670 ret
= kill(-1, SIGKILL
);
671 if (ret
< 0 && !quiet
)
672 fprintf(stderr
, "Failed to send SIGTERM to all children\n");
677 kill(pid
, was_interrupted
);
683 waited_pid
= wait(&status
);
684 if (waited_pid
< 0) {
692 fprintf(stderr
, "Failed to wait on child %d\n", pid
);
697 /* Reset timer each time a process exited. */
701 /* Keep the exit code of the started application (not wrapped
702 * pid) and continue to wait for the end of the orphan group.
704 if (waited_pid
== pid
&& !have_status
) {
705 exit_with
= lxc_error_set_and_log(waited_pid
, status
);