]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/process_utils.h
build: improve meson build
[mirror_lxc.git] / src / lxc / process_utils.h
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef __LXC_PROCESS_UTILS_H
4 #define __LXC_PROCESS_UTILS_H
5
6 #include "config.h"
7
8 #include <linux/sched.h>
9 #include <sched.h>
10 #include <signal.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <sys/syscall.h>
15 #include <unistd.h>
16
17 #include "compiler.h"
18 #include "syscall_numbers.h"
19
20 #ifndef CSIGNAL
21 #define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
22 #endif
23
24 #ifndef CLONE_VM
25 #define CLONE_VM 0x00000100 /* set if VM shared between processes */
26 #endif
27
28 #ifndef CLONE_FS
29 #define CLONE_FS 0x00000200 /* set if fs info shared between processes */
30 #endif
31
32 #ifndef CLONE_FILES
33 #define CLONE_FILES 0x00000400 /* set if open files shared between processes */
34 #endif
35
36 #ifndef CLONE_SIGHAND
37 #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
38 #endif
39
40 #ifndef CLONE_PIDFD
41 #define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
42 #endif
43
44 #ifndef CLONE_PTRACE
45 #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
46 #endif
47
48 #ifndef CLONE_VFORK
49 #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
50 #endif
51
52 #ifndef CLONE_PARENT
53 #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
54 #endif
55
56 #ifndef CLONE_THREAD
57 #define CLONE_THREAD 0x00010000 /* Same thread group? */
58 #endif
59
60 #ifndef CLONE_NEWNS
61 #define CLONE_NEWNS 0x00020000 /* New mount namespace group */
62 #endif
63
64 #ifndef CLONE_SYSVSEM
65 #define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
66 #endif
67
68 #ifndef CLONE_SETTLS
69 #define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
70 #endif
71
72 #ifndef CLONE_PARENT_SETTID
73 #define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
74 #endif
75
76 #ifndef CLONE_CHILD_CLEARTID
77 #define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
78 #endif
79
80 #ifndef CLONE_DETACHED
81 #define CLONE_DETACHED 0x00400000 /* Unused, ignored */
82 #endif
83
84 #ifndef CLONE_UNTRACED
85 #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
86 #endif
87
88 #ifndef CLONE_CHILD_SETTID
89 #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
90 #endif
91
92 #ifndef CLONE_NEWCGROUP
93 #define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
94 #endif
95
96 #ifndef CLONE_NEWUTS
97 #define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
98 #endif
99
100 #ifndef CLONE_NEWIPC
101 #define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
102 #endif
103
104 #ifndef CLONE_NEWUSER
105 #define CLONE_NEWUSER 0x10000000 /* New user namespace */
106 #endif
107
108 #ifndef CLONE_NEWPID
109 #define CLONE_NEWPID 0x20000000 /* New pid namespace */
110 #endif
111
112 #ifndef CLONE_NEWNET
113 #define CLONE_NEWNET 0x40000000 /* New network namespace */
114 #endif
115
116 #ifndef CLONE_IO
117 #define CLONE_IO 0x80000000 /* Clone io context */
118 #endif
119
120 /* Flags for the clone3() syscall. */
121 #ifndef CLONE_CLEAR_SIGHAND
122 #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
123 #endif
124
125 #ifndef CLONE_INTO_CGROUP
126 #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
127 #endif
128
129 /*
130 * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
131 * syscalls only:
132 */
133 #ifndef CLONE_NEWTIME
134 #define CLONE_NEWTIME 0x00000080 /* New time namespace */
135 #endif
136
137 /* waitid */
138 #ifndef P_PIDFD
139 #define P_PIDFD 3
140 #endif
141
142 #ifndef CLONE_ARGS_SIZE_VER0
143 #define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
144 #endif
145
146 #ifndef CLONE_ARGS_SIZE_VER1
147 #define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
148 #endif
149
150 #ifndef CLONE_ARGS_SIZE_VER2
151 #define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
152 #endif
153
154 #ifndef ptr_to_u64
155 #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
156 #endif
157 #ifndef u64_to_ptr
158 #define u64_to_ptr(x) ((void *)(uintptr_t)x)
159 #endif
160
161 struct lxc_clone_args {
162 __aligned_u64 flags;
163 __aligned_u64 pidfd;
164 __aligned_u64 child_tid;
165 __aligned_u64 parent_tid;
166 __aligned_u64 exit_signal;
167 __aligned_u64 stack;
168 __aligned_u64 stack_size;
169 __aligned_u64 tls;
170 __aligned_u64 set_tid;
171 __aligned_u64 set_tid_size;
172 __aligned_u64 cgroup;
173 };
174
175 __returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size)
176 {
177 return syscall(__NR_clone3, args, size);
178 }
179
180 #if defined(__ia64__)
181 int __clone2(int (*__fn)(void *__arg), void *__child_stack_base,
182 size_t __child_stack_size, int __flags, void *__arg, ...);
183 #else
184 int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...
185 /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */);
186 #endif
187
188 /**
189 * lxc_clone() - create a new process
190 *
191 * - allocate stack:
192 * This function allocates a new stack the size of page and passes it to the
193 * kernel.
194 *
195 * - support all CLONE_*flags:
196 * This function supports all CLONE_* flags. If in doubt or not sufficiently
197 * familiar with process creation in the kernel and interactions with libcs
198 * this function should be used.
199 *
200 * - pthread_atfork() handlers depending on libc:
201 * Whether this function runs pthread_atfork() handlers depends on the
202 * corresponding libc wrapper. glibc currently does not run pthread_atfork()
203 * handlers but does not guarantee that they are not. Other libcs might or
204 * might not run pthread_atfork() handlers. If you require guarantees please
205 * refer to the lxc_raw_clone*() functions in process_utils.{c,h}.
206 *
207 * - should call lxc_raw_getpid():
208 * The child should use lxc_raw_getpid() to retrieve its pid.
209 */
210 __hidden extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd);
211
212
213 /*
214 * lxc_raw_clone() - create a new process
215 *
216 * - fork() behavior:
217 * This function returns 0 in the child and > 0 in the parent.
218 *
219 * - copy-on-write:
220 * This function does not allocate a new stack and relies on copy-on-write
221 * semantics.
222 *
223 * - supports subset of ClONE_* flags:
224 * lxc_raw_clone() intentionally only supports a subset of the flags available
225 * to the actual system call. Please refer to the implementation what flags
226 * cannot be used. Also, please don't assume that just because a flag isn't
227 * explicitly checked for as being unsupported that it is supported. If in
228 * doubt or not sufficiently familiar with process creation in the kernel and
229 * interactions with libcs this function should be used.
230 *
231 * - no pthread_atfork() handlers:
232 * This function circumvents - as much as this this is possible - any libc
233 * wrappers and thus does not run any pthread_atfork() handlers. Make sure
234 * that this is safe to do in the context you are trying to call this
235 * function.
236 *
237 * - must call lxc_raw_getpid():
238 * The child must use lxc_raw_getpid() to retrieve its pid.
239 */
240 __hidden extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd);
241 __hidden extern pid_t lxc_raw_legacy_clone(unsigned long flags, int *pidfd);
242
243 /*
244 * lxc_raw_clone_cb() - create a new process
245 *
246 * - non-fork() behavior:
247 * Function does return pid of the child or -1 on error. Pass in a callback
248 * function via the "fn" argument that gets executed in the child process.
249 * The "args" argument is passed to "fn".
250 *
251 * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
252 * as well.
253 */
254 __hidden extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
255 int *pidfd);
256
257 #if !HAVE_EXECVEAT
258 static inline int execveat(int dirfd, const char *pathname, char *const argv[],
259 char *const envp[], int flags)
260 {
261 return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
262 }
263 #else
264 extern int execveat(int dirfd, const char *pathname, char *const argv[],
265 char *const envp[], int flags);
266 #endif
267
268 /*
269 * Because of older glibc's pid cache (up to 2.25) whenever clone() is called
270 * the child must must retrieve it's own pid via lxc_raw_getpid().
271 */
272 static inline pid_t lxc_raw_getpid(void)
273 {
274 return (pid_t)syscall(SYS_getpid);
275 }
276
277 static inline pid_t lxc_raw_gettid(void)
278 {
279 #if __NR_gettid > 0
280 return syscall(__NR_gettid);
281 #else
282 return lxc_raw_getpid();
283 #endif
284 }
285
286 __hidden extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
287 unsigned int flags);
288
289 #endif /* __LXC_PROCESS_UTILS_H */