]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/syscall_wrappers.h
Merge pull request #4010 from brauner/2021-10-23.fixes
[mirror_lxc.git] / src / lxc / syscall_wrappers.h
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef __LXC_SYSCALL_WRAPPER_H
4 #define __LXC_SYSCALL_WRAPPER_H
5
6 #include "config.h"
7
8 #include <asm/unistd.h>
9 #include <errno.h>
10 #include <linux/keyctl.h>
11 #include <sched.h>
12 #include <stdint.h>
13 #include <sys/prctl.h>
14 #include <sys/syscall.h>
15 #include <sys/types.h>
16 #include <unistd.h>
17
18 #include "macro.h"
19 #include "syscall_numbers.h"
20
21 #ifdef HAVE_LINUX_MEMFD_H
22 #include <linux/memfd.h>
23 #endif
24
25 #ifdef HAVE_SYS_SIGNALFD_H
26 #include <sys/signalfd.h>
27 #endif
28
29 #if HAVE_SYS_PERSONALITY_H
30 #include <sys/personality.h>
31 #endif
32
33 typedef int32_t key_serial_t;
34
35 #if !HAVE_KEYCTL
36 static inline long __keyctl(int cmd, unsigned long arg2, unsigned long arg3,
37 unsigned long arg4, unsigned long arg5)
38 {
39 return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
40 }
41 #define keyctl __keyctl
42 #endif
43
44 #ifndef F_LINUX_SPECIFIC_BASE
45 #define F_LINUX_SPECIFIC_BASE 1024
46 #endif
47 #ifndef F_ADD_SEALS
48 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
49 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
50 #endif
51 #ifndef F_SEAL_SEAL
52 #define F_SEAL_SEAL 0x0001
53 #define F_SEAL_SHRINK 0x0002
54 #define F_SEAL_GROW 0x0004
55 #define F_SEAL_WRITE 0x0008
56 #endif
57
58 #if !HAVE_MEMFD_CREATE
59 static inline int memfd_create_lxc(const char *name, unsigned int flags)
60 {
61 return syscall(__NR_memfd_create, name, flags);
62 }
63 #define memfd_create memfd_create_lxc
64 #else
65 extern int memfd_create(const char *name, unsigned int flags);
66 #endif
67
68 #if !HAVE_PIVOT_ROOT
69 static inline int pivot_root(const char *new_root, const char *put_old)
70 {
71 return syscall(__NR_pivot_root, new_root, put_old);
72 }
73 #else
74 extern int pivot_root(const char *new_root, const char *put_old);
75 #endif
76
77 /* Define sethostname() if missing from the C library */
78 #if !HAVE_SETHOSTNAME
79 static inline int sethostname(const char *name, size_t len)
80 {
81 return syscall(__NR_sethostname, name, len);
82 }
83 #endif
84
85 /* Define setns() if missing from the C library */
86 #if !HAVE_SETNS
87 static inline int setns(int fd, int nstype)
88 {
89 return syscall(__NR_setns, fd, nstype);
90 }
91 #endif
92
93 #if !HAVE_SYS_SIGNALFD_H
94 struct signalfd_siginfo {
95 uint32_t ssi_signo;
96 int32_t ssi_errno;
97 int32_t ssi_code;
98 uint32_t ssi_pid;
99 uint32_t ssi_uid;
100 int32_t ssi_fd;
101 uint32_t ssi_tid;
102 uint32_t ssi_band;
103 uint32_t ssi_overrun;
104 uint32_t ssi_trapno;
105 int32_t ssi_status;
106 int32_t ssi_int;
107 uint64_t ssi_ptr;
108 uint64_t ssi_utime;
109 uint64_t ssi_stime;
110 uint64_t ssi_addr;
111 uint8_t __pad[48];
112 };
113
114 static inline int signalfd(int fd, const sigset_t *mask, int flags)
115 {
116 int retval;
117
118 retval = syscall(__NR_signalfd4, fd, mask, _NSIG / 8, flags);
119 #ifdef __NR_signalfd
120 if (errno == ENOSYS && flags == 0)
121 retval = syscall(__NR_signalfd, fd, mask, _NSIG / 8);
122 #endif
123
124 return retval;
125 }
126 #endif
127
128 /* Define unshare() if missing from the C library */
129 #if !HAVE_UNSHARE
130 static inline int unshare(int flags)
131 {
132 return syscall(__NR_unshare, flags);
133 }
134 #else
135 extern int unshare(int);
136 #endif
137
138 /* Define faccessat() if missing from the C library */
139 #if !HAVE_FACCESSAT
140 static int faccessat(int __fd, const char *__file, int __type, int __flag)
141 {
142 return syscall(__NR_faccessat, __fd, __file, __type, __flag);
143 }
144 #endif
145
146 #if !HAVE_MOVE_MOUNT
147 static inline int move_mount_lxc(int from_dfd, const char *from_pathname,
148 int to_dfd, const char *to_pathname,
149 unsigned int flags)
150 {
151 return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
152 to_pathname, flags);
153 }
154 #define move_mount move_mount_lxc
155 #else
156 extern int move_mount(int from_dfd, const char *from_pathname, int to_dfd,
157 const char *to_pathname, unsigned int flags);
158 #endif
159
160 #if !HAVE_OPEN_TREE
161 static inline int open_tree_lxc(int dfd, const char *filename, unsigned int flags)
162 {
163 return syscall(__NR_open_tree, dfd, filename, flags);
164 }
165 #define open_tree open_tree_lxc
166 #else
167 extern int open_tree(int dfd, const char *filename, unsigned int flags);
168 #endif
169
170 #if !HAVE_FSOPEN
171 static inline int fsopen_lxc(const char *fs_name, unsigned int flags)
172 {
173 return syscall(__NR_fsopen, fs_name, flags);
174 }
175 #define fsopen fsopen_lxc
176 #else
177 extern int fsopen(const char *fs_name, unsigned int flags);
178 #endif
179
180 #if !HAVE_FSPICK
181 static inline int fspick_lxc(int dfd, const char *path, unsigned int flags)
182 {
183 return syscall(__NR_fspick, dfd, path, flags);
184 }
185 #define fspick fspick_lxc
186 #else
187 extern int fspick(int dfd, const char *path, unsigned int flags);
188 #endif
189
190 #if !HAVE_FSCONFIG
191 static inline int fsconfig_lxc(int fd, unsigned int cmd, const char *key, const void *value, int aux)
192 {
193 return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
194 }
195 #define fsconfig fsconfig_lxc
196 #else
197 extern int fsconfig(int fd, unsigned int cmd, const char *key, const void *value, int aux);
198 #endif
199
200 #if !HAVE_FSMOUNT
201 static inline int fsmount_lxc(int fs_fd, unsigned int flags, unsigned int attr_flags)
202 {
203 return syscall(__NR_fsmount, fs_fd, flags, attr_flags);
204 }
205 #define fsmount fsmount_lxc
206 #else
207 extern int fsmount(int fs_fd, unsigned int flags, unsigned int attr_flags);
208 #endif
209
210 /*
211 * mount_setattr()
212 */
213 struct lxc_mount_attr {
214 __u64 attr_set;
215 __u64 attr_clr;
216 __u64 propagation;
217 __u64 userns_fd;
218 };
219
220 #if !HAVE_MOUNT_SETATTR
221 static inline int mount_setattr(int dfd, const char *path, unsigned int flags,
222 struct lxc_mount_attr *attr, size_t size)
223 {
224 return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
225 }
226 #endif
227
228 /*
229 * Arguments for how openat2(2) should open the target path. If only @flags and
230 * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
231 *
232 * However, unlike openat(2), unknown or invalid bits in @flags result in
233 * -EINVAL rather than being silently ignored. @mode must be zero unless one of
234 * {O_CREAT, O_TMPFILE} are set.
235 *
236 * @flags: O_* flags.
237 * @mode: O_CREAT/O_TMPFILE file mode.
238 * @resolve: RESOLVE_* flags.
239 */
240 struct lxc_open_how {
241 __u64 flags;
242 __u64 mode;
243 __u64 resolve;
244 };
245
246 /* how->resolve flags for openat2(2). */
247 #ifndef RESOLVE_NO_XDEV
248 #define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
249 (includes bind-mounts). */
250 #endif
251
252 #ifndef RESOLVE_NO_MAGICLINKS
253 #define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
254 "magic-links". */
255 #endif
256
257 #ifndef RESOLVE_NO_SYMLINKS
258 #define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
259 (implies OEXT_NO_MAGICLINKS) */
260 #endif
261
262 #ifndef RESOLVE_BENEATH
263 #define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
264 "..", symlinks, and absolute
265 paths which escape the dirfd. */
266 #endif
267
268 #ifndef RESOLVE_IN_ROOT
269 #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
270 be scoped inside the dirfd
271 (similar to chroot(2)). */
272 #endif
273
274 #define PROTECT_LOOKUP_BENEATH (RESOLVE_BENEATH | RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS)
275 #define PROTECT_LOOKUP_BENEATH_WITH_SYMLINKS (PROTECT_LOOKUP_BENEATH & ~RESOLVE_NO_SYMLINKS)
276 #define PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS (PROTECT_LOOKUP_BENEATH & ~(RESOLVE_NO_SYMLINKS | RESOLVE_NO_MAGICLINKS))
277 #define PROTECT_LOOKUP_BENEATH_XDEV (PROTECT_LOOKUP_BENEATH & ~RESOLVE_NO_XDEV)
278
279 #define PROTECT_LOOKUP_ABSOLUTE (PROTECT_LOOKUP_BENEATH & ~RESOLVE_BENEATH)
280 #define PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_SYMLINKS)
281 #define PROTECT_LOOKUP_ABSOLUTE_WITH_MAGICLINKS (PROTECT_LOOKUP_ABSOLUTE & ~(RESOLVE_NO_SYMLINKS | RESOLVE_NO_MAGICLINKS))
282 #define PROTECT_LOOKUP_ABSOLUTE_XDEV (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_XDEV)
283 #define PROTECT_LOOKUP_ABSOLUTE_XDEV_SYMLINKS (PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS & ~RESOLVE_NO_XDEV)
284
285 #define PROTECT_OPATH_FILE (O_NOFOLLOW | O_PATH | O_CLOEXEC)
286 #define PROTECT_OPATH_DIRECTORY (PROTECT_OPATH_FILE | O_DIRECTORY)
287
288 #define PROTECT_OPEN_WITH_TRAILING_SYMLINKS (O_CLOEXEC | O_NOCTTY | O_RDONLY)
289 #define PROTECT_OPEN (PROTECT_OPEN_WITH_TRAILING_SYMLINKS | O_NOFOLLOW)
290
291 #define PROTECT_OPEN_W_WITH_TRAILING_SYMLINKS (O_CLOEXEC | O_NOCTTY | O_WRONLY)
292 #define PROTECT_OPEN_W (PROTECT_OPEN_W_WITH_TRAILING_SYMLINKS | O_NOFOLLOW)
293 #define PROTECT_OPEN_RW (O_CLOEXEC | O_NOCTTY | O_RDWR | O_NOFOLLOW)
294
295 #if !HAVE_OPENAT2
296 static inline int openat2(int dfd, const char *filename, struct lxc_open_how *how, size_t size)
297 {
298 return syscall(__NR_openat2, dfd, filename, how, size);
299 }
300 #endif /* HAVE_OPENAT2 */
301
302 #ifndef CLOSE_RANGE_UNSHARE
303 #define CLOSE_RANGE_UNSHARE (1U << 1)
304 #endif
305
306 #ifndef CLOSE_RANGE_CLOEXEC
307 #define CLOSE_RANGE_CLOEXEC (1U << 2)
308 #endif
309
310 #if !HAVE_CLOSE_RANGE
311 static inline int close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
312 {
313 return syscall(__NR_close_range, fd, max_fd, flags);
314 }
315 #endif
316
317 #if !HAVE_SYS_PERSONALITY_H
318 static inline int personality(unsigned long persona)
319 {
320 return syscall(__NR_personality, persona);
321 }
322 #endif
323
324 /* arg1 of prctl() */
325 #ifndef PR_SCHED_CORE
326 #define PR_SCHED_CORE 62
327 #endif
328
329 /* arg2 of prctl() */
330 #ifndef PR_SCHED_CORE_GET
331 #define PR_SCHED_CORE_GET 0
332 #endif
333
334 #ifndef PR_SCHED_CORE_CREATE
335 #define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
336 #endif
337
338 #ifndef PR_SCHED_CORE_SHARE_TO
339 #define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
340 #endif
341
342 #ifndef PR_SCHED_CORE_SHARE_FROM
343 #define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
344 #endif
345
346 #ifndef PR_SCHED_CORE_MAX
347 #define PR_SCHED_CORE_MAX 4
348 #endif
349
350 /* arg3 of prctl() */
351 #ifndef PR_SCHED_CORE_SCOPE_THREAD
352 #define PR_SCHED_CORE_SCOPE_THREAD 0
353 #endif
354
355 #ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
356 #define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
357 #endif
358
359 #ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
360 #define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
361 #endif
362
363 #define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
364
365 static inline bool core_scheduling_cookie_valid(__u64 cookie)
366 {
367 return (cookie > 0) && (cookie != INVALID_SCHED_CORE_COOKIE);
368 }
369
370 static inline int core_scheduling_cookie_get(pid_t pid, __u64 *cookie)
371 {
372 int ret;
373
374 if (!cookie)
375 return ret_errno(EINVAL);
376
377 ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
378 PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)cookie);
379 if (ret) {
380 *cookie = INVALID_SCHED_CORE_COOKIE;
381 return -errno;
382 }
383
384 return 0;
385 }
386
387 static inline int core_scheduling_cookie_create_threadgroup(pid_t pid)
388 {
389 int ret;
390
391 ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
392 PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
393 if (ret)
394 return -errno;
395
396 return 0;
397 }
398
399 static inline int core_scheduling_cookie_share_with(pid_t pid)
400 {
401 return prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, pid,
402 PR_SCHED_CORE_SCOPE_THREAD, 0);
403 }
404
405 #endif /* __LXC_SYSCALL_WRAPPER_H */