]>
Commit | Line | Data |
---|---|---|
f40988c7 CB |
1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
2 | ||
3 | #ifndef __LXC_PROCESS_UTILS_H | |
4 | #define __LXC_PROCESS_UTILS_H | |
5 | ||
6 | #ifndef _GNU_SOURCE | |
7 | #define _GNU_SOURCE 1 | |
8 | #endif | |
96086a6b | 9 | #include <linux/sched.h> |
f40988c7 CB |
10 | #include <sched.h> |
11 | #include <signal.h> | |
12 | #include <stdbool.h> | |
13 | #include <stdio.h> | |
14 | #include <stdlib.h> | |
15 | #include <sys/syscall.h> | |
16 | #include <unistd.h> | |
17 | ||
2f46fe6e | 18 | #include "compiler.h" |
96086a6b CB |
19 | #include "config.h" |
20 | #include "syscall_numbers.h" | |
21 | ||
f40988c7 CB |
22 | #ifndef CSIGNAL |
23 | #define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ | |
24 | #endif | |
25 | ||
26 | #ifndef CLONE_VM | |
27 | #define CLONE_VM 0x00000100 /* set if VM shared between processes */ | |
28 | #endif | |
29 | ||
30 | #ifndef CLONE_FS | |
31 | #define CLONE_FS 0x00000200 /* set if fs info shared between processes */ | |
32 | #endif | |
33 | ||
34 | #ifndef CLONE_FILES | |
35 | #define CLONE_FILES 0x00000400 /* set if open files shared between processes */ | |
36 | #endif | |
37 | ||
38 | #ifndef CLONE_SIGHAND | |
39 | #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ | |
40 | #endif | |
41 | ||
42 | #ifndef CLONE_PIDFD | |
43 | #define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ | |
44 | #endif | |
45 | ||
46 | #ifndef CLONE_PTRACE | |
47 | #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ | |
48 | #endif | |
49 | ||
50 | #ifndef CLONE_VFORK | |
51 | #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ | |
52 | #endif | |
53 | ||
54 | #ifndef CLONE_PARENT | |
55 | #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ | |
56 | #endif | |
57 | ||
58 | #ifndef CLONE_THREAD | |
59 | #define CLONE_THREAD 0x00010000 /* Same thread group? */ | |
60 | #endif | |
61 | ||
62 | #ifndef CLONE_NEWNS | |
63 | #define CLONE_NEWNS 0x00020000 /* New mount namespace group */ | |
64 | #endif | |
65 | ||
66 | #ifndef CLONE_SYSVSEM | |
67 | #define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ | |
68 | #endif | |
69 | ||
70 | #ifndef CLONE_SETTLS | |
71 | #define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ | |
72 | #endif | |
73 | ||
74 | #ifndef CLONE_PARENT_SETTID | |
75 | #define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ | |
76 | #endif | |
77 | ||
78 | #ifndef CLONE_CHILD_CLEARTID | |
79 | #define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */ | |
80 | #endif | |
81 | ||
82 | #ifndef CLONE_DETACHED | |
83 | #define CLONE_DETACHED 0x00400000 /* Unused, ignored */ | |
84 | #endif | |
85 | ||
86 | #ifndef CLONE_UNTRACED | |
87 | #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ | |
88 | #endif | |
89 | ||
90 | #ifndef CLONE_CHILD_SETTID | |
91 | #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ | |
92 | #endif | |
93 | ||
94 | #ifndef CLONE_NEWCGROUP | |
95 | #define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ | |
96 | #endif | |
97 | ||
98 | #ifndef CLONE_NEWUTS | |
99 | #define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ | |
100 | #endif | |
101 | ||
102 | #ifndef CLONE_NEWIPC | |
103 | #define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ | |
104 | #endif | |
105 | ||
106 | #ifndef CLONE_NEWUSER | |
107 | #define CLONE_NEWUSER 0x10000000 /* New user namespace */ | |
108 | #endif | |
109 | ||
110 | #ifndef CLONE_NEWPID | |
111 | #define CLONE_NEWPID 0x20000000 /* New pid namespace */ | |
112 | #endif | |
113 | ||
114 | #ifndef CLONE_NEWNET | |
115 | #define CLONE_NEWNET 0x40000000 /* New network namespace */ | |
116 | #endif | |
117 | ||
118 | #ifndef CLONE_IO | |
119 | #define CLONE_IO 0x80000000 /* Clone io context */ | |
120 | #endif | |
121 | ||
122 | /* Flags for the clone3() syscall. */ | |
123 | #ifndef CLONE_CLEAR_SIGHAND | |
124 | #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ | |
125 | #endif | |
126 | ||
127 | #ifndef CLONE_INTO_CGROUP | |
128 | #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ | |
129 | #endif | |
130 | ||
131 | /* | |
132 | * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 | |
133 | * syscalls only: | |
134 | */ | |
135 | #ifndef CLONE_NEWTIME | |
136 | #define CLONE_NEWTIME 0x00000080 /* New time namespace */ | |
137 | #endif | |
138 | ||
139 | /* waitid */ | |
140 | #ifndef P_PIDFD | |
141 | #define P_PIDFD 3 | |
142 | #endif | |
143 | ||
96086a6b CB |
144 | #ifndef CLONE_ARGS_SIZE_VER0 |
145 | #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ | |
146 | #endif | |
147 | ||
148 | #ifndef CLONE_ARGS_SIZE_VER1 | |
149 | #define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ | |
150 | #endif | |
151 | ||
152 | #ifndef CLONE_ARGS_SIZE_VER2 | |
153 | #define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ | |
154 | #endif | |
155 | ||
2f46fe6e CB |
156 | #ifndef ptr_to_u64 |
157 | #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) | |
158 | #endif | |
159 | #ifndef u64_to_ptr | |
160 | #define u64_to_ptr(x) ((void *)(uintptr_t)x) | |
161 | #endif | |
162 | ||
163 | struct lxc_clone_args { | |
96086a6b CB |
164 | __aligned_u64 flags; |
165 | __aligned_u64 pidfd; | |
166 | __aligned_u64 child_tid; | |
167 | __aligned_u64 parent_tid; | |
168 | __aligned_u64 exit_signal; | |
169 | __aligned_u64 stack; | |
170 | __aligned_u64 stack_size; | |
171 | __aligned_u64 tls; | |
172 | __aligned_u64 set_tid; | |
173 | __aligned_u64 set_tid_size; | |
174 | __aligned_u64 cgroup; | |
175 | }; | |
96086a6b | 176 | |
2f46fe6e | 177 | __returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size) |
96086a6b | 178 | { |
2f46fe6e | 179 | return syscall(__NR_clone3, args, size); |
96086a6b | 180 | } |
f40988c7 CB |
181 | |
182 | #if defined(__ia64__) | |
183 | int __clone2(int (*__fn)(void *__arg), void *__child_stack_base, | |
184 | size_t __child_stack_size, int __flags, void *__arg, ...); | |
185 | #else | |
186 | int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ... | |
187 | /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */); | |
188 | #endif | |
189 | ||
190 | /** | |
191 | * lxc_clone() - create a new process | |
192 | * | |
193 | * - allocate stack: | |
194 | * This function allocates a new stack the size of page and passes it to the | |
195 | * kernel. | |
196 | * | |
197 | * - support all CLONE_*flags: | |
198 | * This function supports all CLONE_* flags. If in doubt or not sufficiently | |
199 | * familiar with process creation in the kernel and interactions with libcs | |
200 | * this function should be used. | |
201 | * | |
202 | * - pthread_atfork() handlers depending on libc: | |
203 | * Whether this function runs pthread_atfork() handlers depends on the | |
204 | * corresponding libc wrapper. glibc currently does not run pthread_atfork() | |
205 | * handlers but does not guarantee that they are not. Other libcs might or | |
206 | * might not run pthread_atfork() handlers. If you require guarantees please | |
207 | * refer to the lxc_raw_clone*() functions in process_utils.{c,h}. | |
208 | * | |
209 | * - should call lxc_raw_getpid(): | |
210 | * The child should use lxc_raw_getpid() to retrieve its pid. | |
211 | */ | |
92a10958 | 212 | __hidden extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd); |
f40988c7 CB |
213 | |
214 | ||
215 | /* | |
216 | * lxc_raw_clone() - create a new process | |
217 | * | |
218 | * - fork() behavior: | |
219 | * This function returns 0 in the child and > 0 in the parent. | |
220 | * | |
221 | * - copy-on-write: | |
222 | * This function does not allocate a new stack and relies on copy-on-write | |
223 | * semantics. | |
224 | * | |
225 | * - supports subset of ClONE_* flags: | |
226 | * lxc_raw_clone() intentionally only supports a subset of the flags available | |
227 | * to the actual system call. Please refer to the implementation what flags | |
228 | * cannot be used. Also, please don't assume that just because a flag isn't | |
229 | * explicitly checked for as being unsupported that it is supported. If in | |
230 | * doubt or not sufficiently familiar with process creation in the kernel and | |
231 | * interactions with libcs this function should be used. | |
232 | * | |
233 | * - no pthread_atfork() handlers: | |
234 | * This function circumvents - as much as this this is possible - any libc | |
235 | * wrappers and thus does not run any pthread_atfork() handlers. Make sure | |
236 | * that this is safe to do in the context you are trying to call this | |
237 | * function. | |
238 | * | |
239 | * - must call lxc_raw_getpid(): | |
240 | * The child must use lxc_raw_getpid() to retrieve its pid. | |
241 | */ | |
92a10958 CB |
242 | __hidden extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd); |
243 | __hidden extern pid_t lxc_raw_legacy_clone(unsigned long flags, int *pidfd); | |
f40988c7 CB |
244 | |
245 | /* | |
246 | * lxc_raw_clone_cb() - create a new process | |
247 | * | |
248 | * - non-fork() behavior: | |
249 | * Function does return pid of the child or -1 on error. Pass in a callback | |
250 | * function via the "fn" argument that gets executed in the child process. | |
251 | * The "args" argument is passed to "fn". | |
252 | * | |
253 | * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb() | |
254 | * as well. | |
255 | */ | |
92a10958 CB |
256 | __hidden extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags, |
257 | int *pidfd); | |
f40988c7 CB |
258 | |
259 | #ifndef HAVE_EXECVEAT | |
260 | static inline int execveat(int dirfd, const char *pathname, char *const argv[], | |
261 | char *const envp[], int flags) | |
262 | { | |
263 | return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); | |
264 | } | |
265 | #else | |
266 | extern int execveat(int dirfd, const char *pathname, char *const argv[], | |
267 | char *const envp[], int flags); | |
268 | #endif | |
269 | ||
270 | /* | |
271 | * Because of older glibc's pid cache (up to 2.25) whenever clone() is called | |
272 | * the child must must retrieve it's own pid via lxc_raw_getpid(). | |
273 | */ | |
274 | static inline pid_t lxc_raw_getpid(void) | |
275 | { | |
276 | return (pid_t)syscall(SYS_getpid); | |
277 | } | |
278 | ||
279 | static inline pid_t lxc_raw_gettid(void) | |
280 | { | |
281 | #if __NR_gettid > 0 | |
282 | return syscall(__NR_gettid); | |
283 | #else | |
284 | return lxc_raw_getpid(); | |
285 | #endif | |
286 | } | |
287 | ||
92a10958 CB |
288 | __hidden extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, |
289 | unsigned int flags); | |
f40988c7 CB |
290 | |
291 | #endif /* __LXC_PROCESS_UTILS_H */ |