]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/process_utils.h
Merge pull request #3956 from brauner/2021-08-27.list
[mirror_lxc.git] / src / lxc / process_utils.h
CommitLineData
f40988c7
CB
1/* SPDX-License-Identifier: LGPL-2.1+ */
2
3#ifndef __LXC_PROCESS_UTILS_H
4#define __LXC_PROCESS_UTILS_H
5
6#ifndef _GNU_SOURCE
7#define _GNU_SOURCE 1
8#endif
96086a6b 9#include <linux/sched.h>
f40988c7
CB
10#include <sched.h>
11#include <signal.h>
12#include <stdbool.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <sys/syscall.h>
16#include <unistd.h>
17
2f46fe6e 18#include "compiler.h"
96086a6b
CB
19#include "config.h"
20#include "syscall_numbers.h"
21
f40988c7
CB
22#ifndef CSIGNAL
23#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
24#endif
25
26#ifndef CLONE_VM
27#define CLONE_VM 0x00000100 /* set if VM shared between processes */
28#endif
29
30#ifndef CLONE_FS
31#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
32#endif
33
34#ifndef CLONE_FILES
35#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
36#endif
37
38#ifndef CLONE_SIGHAND
39#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
40#endif
41
42#ifndef CLONE_PIDFD
43#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
44#endif
45
46#ifndef CLONE_PTRACE
47#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
48#endif
49
50#ifndef CLONE_VFORK
51#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
52#endif
53
54#ifndef CLONE_PARENT
55#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
56#endif
57
58#ifndef CLONE_THREAD
59#define CLONE_THREAD 0x00010000 /* Same thread group? */
60#endif
61
62#ifndef CLONE_NEWNS
63#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
64#endif
65
66#ifndef CLONE_SYSVSEM
67#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
68#endif
69
70#ifndef CLONE_SETTLS
71#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
72#endif
73
74#ifndef CLONE_PARENT_SETTID
75#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
76#endif
77
78#ifndef CLONE_CHILD_CLEARTID
79#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
80#endif
81
82#ifndef CLONE_DETACHED
83#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
84#endif
85
86#ifndef CLONE_UNTRACED
87#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
88#endif
89
90#ifndef CLONE_CHILD_SETTID
91#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
92#endif
93
94#ifndef CLONE_NEWCGROUP
95#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
96#endif
97
98#ifndef CLONE_NEWUTS
99#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
100#endif
101
102#ifndef CLONE_NEWIPC
103#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
104#endif
105
106#ifndef CLONE_NEWUSER
107#define CLONE_NEWUSER 0x10000000 /* New user namespace */
108#endif
109
110#ifndef CLONE_NEWPID
111#define CLONE_NEWPID 0x20000000 /* New pid namespace */
112#endif
113
114#ifndef CLONE_NEWNET
115#define CLONE_NEWNET 0x40000000 /* New network namespace */
116#endif
117
118#ifndef CLONE_IO
119#define CLONE_IO 0x80000000 /* Clone io context */
120#endif
121
122/* Flags for the clone3() syscall. */
123#ifndef CLONE_CLEAR_SIGHAND
124#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
125#endif
126
127#ifndef CLONE_INTO_CGROUP
128#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
129#endif
130
131/*
132 * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
133 * syscalls only:
134 */
135#ifndef CLONE_NEWTIME
136#define CLONE_NEWTIME 0x00000080 /* New time namespace */
137#endif
138
139/* waitid */
140#ifndef P_PIDFD
141#define P_PIDFD 3
142#endif
143
96086a6b
CB
144#ifndef CLONE_ARGS_SIZE_VER0
145#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
146#endif
147
148#ifndef CLONE_ARGS_SIZE_VER1
149#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
150#endif
151
152#ifndef CLONE_ARGS_SIZE_VER2
153#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
154#endif
155
2f46fe6e
CB
156#ifndef ptr_to_u64
157#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
158#endif
159#ifndef u64_to_ptr
160#define u64_to_ptr(x) ((void *)(uintptr_t)x)
161#endif
162
163struct lxc_clone_args {
96086a6b
CB
164 __aligned_u64 flags;
165 __aligned_u64 pidfd;
166 __aligned_u64 child_tid;
167 __aligned_u64 parent_tid;
168 __aligned_u64 exit_signal;
169 __aligned_u64 stack;
170 __aligned_u64 stack_size;
171 __aligned_u64 tls;
172 __aligned_u64 set_tid;
173 __aligned_u64 set_tid_size;
174 __aligned_u64 cgroup;
175};
96086a6b 176
2f46fe6e 177__returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size)
96086a6b 178{
2f46fe6e 179 return syscall(__NR_clone3, args, size);
96086a6b 180}
f40988c7
CB
181
182#if defined(__ia64__)
183int __clone2(int (*__fn)(void *__arg), void *__child_stack_base,
184 size_t __child_stack_size, int __flags, void *__arg, ...);
185#else
186int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ...
187 /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */);
188#endif
189
190/**
191 * lxc_clone() - create a new process
192 *
193 * - allocate stack:
194 * This function allocates a new stack the size of page and passes it to the
195 * kernel.
196 *
197 * - support all CLONE_*flags:
198 * This function supports all CLONE_* flags. If in doubt or not sufficiently
199 * familiar with process creation in the kernel and interactions with libcs
200 * this function should be used.
201 *
202 * - pthread_atfork() handlers depending on libc:
203 * Whether this function runs pthread_atfork() handlers depends on the
204 * corresponding libc wrapper. glibc currently does not run pthread_atfork()
205 * handlers but does not guarantee that they are not. Other libcs might or
206 * might not run pthread_atfork() handlers. If you require guarantees please
207 * refer to the lxc_raw_clone*() functions in process_utils.{c,h}.
208 *
209 * - should call lxc_raw_getpid():
210 * The child should use lxc_raw_getpid() to retrieve its pid.
211 */
92a10958 212__hidden extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd);
f40988c7
CB
213
214
215/*
216 * lxc_raw_clone() - create a new process
217 *
218 * - fork() behavior:
219 * This function returns 0 in the child and > 0 in the parent.
220 *
221 * - copy-on-write:
222 * This function does not allocate a new stack and relies on copy-on-write
223 * semantics.
224 *
225 * - supports subset of ClONE_* flags:
226 * lxc_raw_clone() intentionally only supports a subset of the flags available
227 * to the actual system call. Please refer to the implementation what flags
228 * cannot be used. Also, please don't assume that just because a flag isn't
229 * explicitly checked for as being unsupported that it is supported. If in
230 * doubt or not sufficiently familiar with process creation in the kernel and
231 * interactions with libcs this function should be used.
232 *
233 * - no pthread_atfork() handlers:
234 * This function circumvents - as much as this this is possible - any libc
235 * wrappers and thus does not run any pthread_atfork() handlers. Make sure
236 * that this is safe to do in the context you are trying to call this
237 * function.
238 *
239 * - must call lxc_raw_getpid():
240 * The child must use lxc_raw_getpid() to retrieve its pid.
241 */
92a10958
CB
242__hidden extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd);
243__hidden extern pid_t lxc_raw_legacy_clone(unsigned long flags, int *pidfd);
f40988c7
CB
244
245/*
246 * lxc_raw_clone_cb() - create a new process
247 *
248 * - non-fork() behavior:
249 * Function does return pid of the child or -1 on error. Pass in a callback
250 * function via the "fn" argument that gets executed in the child process.
251 * The "args" argument is passed to "fn".
252 *
253 * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
254 * as well.
255 */
92a10958
CB
256__hidden extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
257 int *pidfd);
f40988c7
CB
258
259#ifndef HAVE_EXECVEAT
260static inline int execveat(int dirfd, const char *pathname, char *const argv[],
261 char *const envp[], int flags)
262{
263 return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
264}
265#else
266extern int execveat(int dirfd, const char *pathname, char *const argv[],
267 char *const envp[], int flags);
268#endif
269
270/*
271 * Because of older glibc's pid cache (up to 2.25) whenever clone() is called
272 * the child must must retrieve it's own pid via lxc_raw_getpid().
273 */
274static inline pid_t lxc_raw_getpid(void)
275{
276 return (pid_t)syscall(SYS_getpid);
277}
278
279static inline pid_t lxc_raw_gettid(void)
280{
281#if __NR_gettid > 0
282 return syscall(__NR_gettid);
283#else
284 return lxc_raw_getpid();
285#endif
286}
287
92a10958
CB
288__hidden extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
289 unsigned int flags);
f40988c7
CB
290
291#endif /* __LXC_PROCESS_UTILS_H */