]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/process_utils.c
utils: use SYSTRACE() when logging stdio permission fixup failures
[mirror_lxc.git] / src / lxc / process_utils.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
6b9e666f 2
13be2733
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <errno.h>
38e5c2db
CB
7#include <sched.h>
8#include <signal.h>
13be2733
CB
9#include <stdio.h>
10#include <stdlib.h>
11#include <sys/syscall.h>
12#include <unistd.h>
13
633cb8be 14#include "compiler.h"
13be2733 15#include "config.h"
f40988c7 16#include "log.h"
38e5c2db 17#include "macro.h"
f40988c7 18#include "process_utils.h"
bed09c9c 19#include "syscall_numbers.h"
13be2733 20
f40988c7 21lxc_log_define(process_utils, lxc);
38e5c2db
CB
22
23/*
24 * This is based on raw_clone in systemd but adapted to our needs. This uses
25 * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
26 * doesn't really matter to us so disallow it.
27 *
28 * The nice thing about this is that we get fork() behavior. That is
29 * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
30 */
f7176c3e 31__returns_twice pid_t lxc_raw_legacy_clone(unsigned long flags, int *pidfd)
38e5c2db 32{
38e5c2db
CB
33
34#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
35 /* On s390/s390x and cris the order of the first and second arguments
36 * of the system call is reversed.
37 */
a59440be 38 return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd);
38e5c2db
CB
39#elif defined(__sparc__) && defined(__arch64__)
40 {
41 /*
42 * sparc64 always returns the other process id in %o0, and a
43 * boolean flag whether this is the child or the parent in %o1.
44 * Inline assembly is needed to get the flag returned in %o1.
45 */
b52e8e68
CB
46 register long g1 asm("g1") = __NR_clone;
47 register long o0 asm("o0") = flags | SIGCHLD;
48 register long o1 asm("o1") = 0; /* is parent/child indicator */
a59440be 49 register long o2 asm("o2") = (unsigned long)pidfd;
b52e8e68
CB
50 long is_error, retval, in_child;
51 pid_t child_pid;
6dfb1463 52
b52e8e68 53 asm volatile(
6dfb1463 54#if defined(__arch64__)
b52e8e68 55 "t 0x6d\n\t" /* 64-bit trap */
6dfb1463 56#else
b52e8e68 57 "t 0x10\n\t" /* 32-bit trap */
6dfb1463 58#endif
b52e8e68
CB
59 /*
60 * catch errors: On sparc, the carry bit (csr) in the
61 * processor status register (psr) is used instead of a
62 * full register.
63 */
5f7dd076 64 "addx %%g0, 0, %%g1"
a59440be
CB
65 : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */
66 : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */
67 : "%cc"); /* clobbers */
6dfb1463 68
b52e8e68
CB
69 is_error = g1;
70 retval = o0;
71 in_child = o1;
72
73 if (is_error) {
74 errno = retval;
6dfb1463
CB
75 return -1;
76 }
38e5c2db
CB
77
78 if (in_child)
79 return 0;
6dfb1463 80
b52e8e68 81 child_pid = retval;
6dfb1463 82 return child_pid;
38e5c2db
CB
83 }
84#elif defined(__ia64__)
85 /* On ia64 the stack and stack size are passed as separate arguments. */
a59440be 86 return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0), pidfd);
38e5c2db 87#else
a59440be 88 return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd);
38e5c2db
CB
89#endif
90}
91
2f46fe6e
CB
92__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
93{
94 pid_t pid;
95 struct lxc_clone_args args = {
96 .flags = flags,
97 .pidfd = ptr_to_u64(pidfd),
98 };
99
100 if (flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
101 CLONE_CHILD_CLEARTID | CLONE_SETTLS))
102 return ret_errno(EINVAL);
103
104 /* On CLONE_PARENT we inherit the parent's exit signal. */
105 if (!(flags & CLONE_PARENT))
106 args.exit_signal = SIGCHLD;
107
108 pid = lxc_clone3(&args, CLONE_ARGS_SIZE_VER0);
109 if (pid < 0 && errno == ENOSYS) {
110 SYSTRACE("Falling back to legacy clone");
f7176c3e 111 return lxc_raw_legacy_clone(flags, pidfd);
2f46fe6e
CB
112 }
113
114 return pid;
115}
116
a59440be
CB
117pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
118 int *pidfd)
38e5c2db
CB
119{
120 pid_t pid;
121
a59440be 122 pid = lxc_raw_clone(flags, pidfd);
38e5c2db
CB
123 if (pid < 0)
124 return -1;
125
126 /*
127 * exit() is not thread-safe and might mess with the parent's signal
128 * handlers and other stuff when exec() fails.
129 */
130 if (pid == 0)
131 _exit(fn(args));
132
133 return pid;
134}
d9bb2fba
CB
135
136int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
137 unsigned int flags)
138{
cf38fe06 139 return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
d9bb2fba 140}
f40988c7
CB
141
142/*
143 * Let's use the "standard stack limit" (i.e. glibc thread size default) for
144 * stack sizes: 8MB.
145 */
146#define __LXC_STACK_SIZE (8 * 1024 * 1024)
147pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd)
148{
149 pid_t ret;
150 void *stack;
151
152 stack = malloc(__LXC_STACK_SIZE);
153 if (!stack) {
154 SYSERROR("Failed to allocate clone stack");
155 return -ENOMEM;
156 }
157
158#ifdef __ia64__
159 ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
160#else
161 ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd);
162#endif
163 if (ret < 0)
164 SYSERROR("Failed to clone (%#x)", flags);
165
166 return ret;
167}