]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/syscall_wrappers.h
tree-wide: wipe direct or indirect linux/mount.h inclusion
[mirror_lxc.git] / src / lxc / syscall_wrappers.h
index 8661ceb4f89e008953575adbc90b2e6850ccfd1d..22ce536b44d5e929c7e5d370388138e9305a7e14 100644 (file)
-/* liblxcapi
- *
- * Copyright © 2018 Christian Brauner <christian.brauner@ubuntu.com>.
- * Copyright © 2018 Canonical Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
+/* SPDX-License-Identifier: LGPL-2.1+ */
 
 #ifndef __LXC_SYSCALL_WRAPPER_H
 #define __LXC_SYSCALL_WRAPPER_H
 
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE 1
-#endif
+#include "config.h"
+
 #include <asm/unistd.h>
+#include <errno.h>
 #include <linux/keyctl.h>
 #include <sched.h>
 #include <stdint.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "config.h"
+#include "macro.h"
+#include "syscall_numbers.h"
 
 #ifdef HAVE_LINUX_MEMFD_H
 #include <linux/memfd.h>
 #endif
 
+#ifdef HAVE_SYS_SIGNALFD_H
+#include <sys/signalfd.h>
+#endif
+
+#if HAVE_SYS_PERSONALITY_H
+#include <sys/personality.h>
+#endif
+
 typedef int32_t key_serial_t;
 
 #if !HAVE_KEYCTL
 static inline long __keyctl(int cmd, unsigned long arg2, unsigned long arg3,
                            unsigned long arg4, unsigned long arg5)
 {
-#ifdef __NR_keyctl
        return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
-#else
-       errno = ENOSYS;
-       return -1;
-#endif
 }
 #define keyctl __keyctl
 #endif
 
-#ifndef HAVE_MEMFD_CREATE
-static inline int memfd_create(const char *name, unsigned int flags) {
-       #ifndef __NR_memfd_create
-               #if defined __i386__
-                       #define __NR_memfd_create 356
-               #elif defined __x86_64__
-                       #define __NR_memfd_create 319
-               #elif defined __arm__
-                       #define __NR_memfd_create 385
-               #elif defined __aarch64__
-                       #define __NR_memfd_create 279
-               #elif defined __s390__
-                       #define __NR_memfd_create 350
-               #elif defined __powerpc__
-                       #define __NR_memfd_create 360
-               #elif defined __sparc__
-                       #define __NR_memfd_create 348
-               #elif defined __blackfin__
-                       #define __NR_memfd_create 390
-               #elif defined __ia64__
-                       #define __NR_memfd_create 1340
-               #elif defined _MIPS_SIM
-                       #if _MIPS_SIM == _MIPS_SIM_ABI32
-                               #define __NR_memfd_create 4354
-                       #endif
-                       #if _MIPS_SIM == _MIPS_SIM_NABI32
-                               #define __NR_memfd_create 6318
-                       #endif
-                       #if _MIPS_SIM == _MIPS_SIM_ABI64
-                               #define __NR_memfd_create 5314
-                       #endif
-               #endif
-       #endif
-       #ifdef __NR_memfd_create
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+#endif
+#ifndef F_SEAL_SEAL
+#define F_SEAL_SEAL 0x0001
+#define F_SEAL_SHRINK 0x0002
+#define F_SEAL_GROW 0x0004
+#define F_SEAL_WRITE 0x0008
+#endif
+
+#if !HAVE_MEMFD_CREATE
+static inline int memfd_create_lxc(const char *name, unsigned int flags)
+{
        return syscall(__NR_memfd_create, name, flags);
-       #else
-       errno = ENOSYS;
-       return -1;
-       #endif
 }
+#define memfd_create memfd_create_lxc
 #else
 extern int memfd_create(const char *name, unsigned int flags);
 #endif
 
 #if !HAVE_PIVOT_ROOT
-static int pivot_root(const char *new_root, const char *put_old)
+static inline int pivot_root(const char *new_root, const char *put_old)
 {
-#ifdef __NR_pivot_root
        return syscall(__NR_pivot_root, new_root, put_old);
-#else
-       errno = ENOSYS;
-       return -1;
-#endif
 }
 #else
 extern int pivot_root(const char *new_root, const char *put_old);
 #endif
 
-#if !defined(__NR_setns) && !defined(__NR_set_ns)
-       #if defined(__x86_64__)
-               #define __NR_setns 308
-       #elif defined(__i386__)
-               #define __NR_setns 346
-       #elif defined(__arm__)
-               #define __NR_setns 375
-       #elif defined(__aarch64__)
-               #define __NR_setns 375
-       #elif defined(__powerpc__)
-               #define __NR_setns 350
-       #elif defined(__s390__)
-               #define __NR_setns 339
-       #endif
-#endif
-
 /* Define sethostname() if missing from the C library */
-#ifndef HAVE_SETHOSTNAME
+#if !HAVE_SETHOSTNAME
 static inline int sethostname(const char *name, size_t len)
 {
-#ifdef __NR_sethostname
        return syscall(__NR_sethostname, name, len);
-#else
-       errno = ENOSYS;
-       return -1;
-#endif
 }
 #endif
 
 /* Define setns() if missing from the C library */
-#ifndef HAVE_SETNS
+#if !HAVE_SETNS
 static inline int setns(int fd, int nstype)
 {
-#ifdef __NR_setns
        return syscall(__NR_setns, fd, nstype);
-#elif defined(__NR_set_ns)
-       return syscall(__NR_set_ns, fd, nstype);
-#else
-       errno = ENOSYS;
-       return -1;
+}
 #endif
+
+#if !HAVE_SYS_SIGNALFD_H
+struct signalfd_siginfo {
+       uint32_t ssi_signo;
+       int32_t ssi_errno;
+       int32_t ssi_code;
+       uint32_t ssi_pid;
+       uint32_t ssi_uid;
+       int32_t ssi_fd;
+       uint32_t ssi_tid;
+       uint32_t ssi_band;
+       uint32_t ssi_overrun;
+       uint32_t ssi_trapno;
+       int32_t ssi_status;
+       int32_t ssi_int;
+       uint64_t ssi_ptr;
+       uint64_t ssi_utime;
+       uint64_t ssi_stime;
+       uint64_t ssi_addr;
+       uint8_t __pad[48];
+};
+
+static inline int signalfd(int fd, const sigset_t *mask, int flags)
+{
+       int retval;
+
+       retval = syscall(__NR_signalfd4, fd, mask, _NSIG / 8, flags);
+#ifdef __NR_signalfd
+       if (errno == ENOSYS && flags == 0)
+               retval = syscall(__NR_signalfd, fd, mask, _NSIG / 8);
+#endif
+
+       return retval;
 }
 #endif
 
 /* Define unshare() if missing from the C library */
-#ifndef HAVE_UNSHARE
+#if !HAVE_UNSHARE
 static inline int unshare(int flags)
 {
-#ifdef __NR_unshare
        return syscall(__NR_unshare, flags);
+}
 #else
-       errno = ENOSYS;
-       return -1;
+extern int unshare(int);
 #endif
+
+/* Define faccessat() if missing from the C library */
+#if !HAVE_FACCESSAT
+static int faccessat(int __fd, const char *__file, int __type, int __flag)
+{
+       return syscall(__NR_faccessat, __fd, __file, __type, __flag);
+}
+#endif
+
+#if !HAVE_MOVE_MOUNT
+static inline int move_mount_lxc(int from_dfd, const char *from_pathname,
+                                int to_dfd, const char *to_pathname,
+                                unsigned int flags)
+{
+       return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
+                      to_pathname, flags);
 }
+#define move_mount move_mount_lxc
 #else
-extern int unshare(int);
+extern int move_mount(int from_dfd, const char *from_pathname, int to_dfd,
+                     const char *to_pathname, unsigned int flags);
+#endif
+
+#if !HAVE_OPEN_TREE
+static inline int open_tree_lxc(int dfd, const char *filename, unsigned int flags)
+{
+       return syscall(__NR_open_tree, dfd, filename, flags);
+}
+#define open_tree open_tree_lxc
+#else
+extern int open_tree(int dfd, const char *filename, unsigned int flags);
+#endif
+
+#if !HAVE_FSOPEN
+static inline int fsopen_lxc(const char *fs_name, unsigned int flags)
+{
+       return syscall(__NR_fsopen, fs_name, flags);
+}
+#define fsopen fsopen_lxc
+#else
+extern int fsopen(const char *fs_name, unsigned int flags);
+#endif
+
+#if !HAVE_FSPICK
+static inline int fspick_lxc(int dfd, const char *path, unsigned int flags)
+{
+       return syscall(__NR_fspick, dfd, path, flags);
+}
+#define fspick fspick_lxc
+#else
+extern int fspick(int dfd, const char *path, unsigned int flags);
+#endif
+
+#if !HAVE_FSCONFIG
+static inline int fsconfig_lxc(int fd, unsigned int cmd, const char *key, const void *value, int aux)
+{
+       return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+#define fsconfig fsconfig_lxc
+#else
+extern int fsconfig(int fd, unsigned int cmd, const char *key, const void *value, int aux);
+#endif
+
+#if !HAVE_FSMOUNT
+static inline int fsmount_lxc(int fs_fd, unsigned int flags, unsigned int attr_flags)
+{
+       return syscall(__NR_fsmount, fs_fd, flags, attr_flags);
+}
+#define fsmount fsmount_lxc
+#else
+extern int fsmount(int fs_fd, unsigned int flags, unsigned int attr_flags);
 #endif
 
+/*
+ * mount_setattr()
+ */
+#if !HAVE_STRUCT_MOUNT_ATTR
+struct mount_attr {
+       __u64 attr_set;
+       __u64 attr_clr;
+       __u64 propagation;
+       __u64 userns_fd;
+};
+#endif
+
+#if !HAVE_MOUNT_SETATTR
+static inline int mount_setattr(int dfd, const char *path, unsigned int flags,
+                               struct mount_attr *attr, size_t size)
+{
+       return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+#endif
+
+/*
+ * Arguments for how openat2(2) should open the target path. If only @flags and
+ * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
+ *
+ * However, unlike openat(2), unknown or invalid bits in @flags result in
+ * -EINVAL rather than being silently ignored. @mode must be zero unless one of
+ * {O_CREAT, O_TMPFILE} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @resolve: RESOLVE_* flags.
+ */
+struct lxc_open_how {
+       __u64 flags;
+       __u64 mode;
+       __u64 resolve;
+};
+
+/* how->resolve flags for openat2(2). */
+#ifndef RESOLVE_NO_XDEV
+#define RESOLVE_NO_XDEV                0x01 /* Block mount-point crossings
+                                       (includes bind-mounts). */
+#endif
+
+#ifndef RESOLVE_NO_MAGICLINKS
+#define RESOLVE_NO_MAGICLINKS  0x02 /* Block traversal through procfs-style
+                                       "magic-links". */
+#endif
+
+#ifndef RESOLVE_NO_SYMLINKS
+#define RESOLVE_NO_SYMLINKS    0x04 /* Block traversal through all symlinks
+                                       (implies OEXT_NO_MAGICLINKS) */
+#endif
+
+#ifndef RESOLVE_BENEATH
+#define RESOLVE_BENEATH                0x08 /* Block "lexical" trickery like
+                                       "..", symlinks, and absolute
+                                       paths which escape the dirfd. */
+#endif
+
+#ifndef RESOLVE_IN_ROOT
+#define RESOLVE_IN_ROOT                0x10 /* Make all jumps to "/" and ".."
+                                       be scoped inside the dirfd
+                                       (similar to chroot(2)). */
+#endif
+
+#define PROTECT_LOOKUP_BENEATH  (RESOLVE_BENEATH | RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS)
+#define PROTECT_LOOKUP_BENEATH_WITH_SYMLINKS (PROTECT_LOOKUP_BENEATH & ~RESOLVE_NO_SYMLINKS)
+#define PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS (PROTECT_LOOKUP_BENEATH & ~(RESOLVE_NO_SYMLINKS | RESOLVE_NO_MAGICLINKS))
+#define PROTECT_LOOKUP_BENEATH_XDEV (PROTECT_LOOKUP_BENEATH & ~RESOLVE_NO_XDEV)
+
+#define PROTECT_LOOKUP_ABSOLUTE (PROTECT_LOOKUP_BENEATH & ~RESOLVE_BENEATH)
+#define PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_SYMLINKS)
+#define PROTECT_LOOKUP_ABSOLUTE_WITH_MAGICLINKS (PROTECT_LOOKUP_ABSOLUTE & ~(RESOLVE_NO_SYMLINKS | RESOLVE_NO_MAGICLINKS))
+#define PROTECT_LOOKUP_ABSOLUTE_XDEV (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_XDEV)
+#define PROTECT_LOOKUP_ABSOLUTE_XDEV_SYMLINKS (PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS & ~RESOLVE_NO_XDEV)
+
+#define PROTECT_OPATH_FILE (O_NOFOLLOW | O_PATH | O_CLOEXEC)
+#define PROTECT_OPATH_DIRECTORY (PROTECT_OPATH_FILE | O_DIRECTORY)
+
+#define PROTECT_OPEN_WITH_TRAILING_SYMLINKS (O_CLOEXEC | O_NOCTTY | O_RDONLY)
+#define PROTECT_OPEN (PROTECT_OPEN_WITH_TRAILING_SYMLINKS | O_NOFOLLOW)
+
+#define PROTECT_OPEN_W_WITH_TRAILING_SYMLINKS (O_CLOEXEC | O_NOCTTY | O_WRONLY)
+#define PROTECT_OPEN_W (PROTECT_OPEN_W_WITH_TRAILING_SYMLINKS | O_NOFOLLOW)
+#define PROTECT_OPEN_RW (O_CLOEXEC | O_NOCTTY | O_RDWR | O_NOFOLLOW)
+
+#if !HAVE_OPENAT2
+static inline int openat2(int dfd, const char *filename, struct lxc_open_how *how, size_t size)
+{
+       return syscall(__NR_openat2, dfd, filename, how, size);
+}
+#endif /* HAVE_OPENAT2 */
+
+#ifndef CLOSE_RANGE_UNSHARE
+#define CLOSE_RANGE_UNSHARE    (1U << 1)
+#endif
+
+#ifndef CLOSE_RANGE_CLOEXEC
+#define CLOSE_RANGE_CLOEXEC    (1U << 2)
+#endif
+
+#if !HAVE_CLOSE_RANGE
+static inline int close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
+{
+       return syscall(__NR_close_range, fd, max_fd, flags);
+}
+#endif
+
+#if !HAVE_SYS_PERSONALITY_H
+static inline int personality(unsigned long persona)
+{
+       return syscall(__NR_personality, persona);
+}
+#endif
+
+/* arg1 of prctl() */
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+#endif
+
+/* arg2 of prctl() */
+#ifndef PR_SCHED_CORE_GET
+#define PR_SCHED_CORE_GET 0
+#endif
+
+#ifndef PR_SCHED_CORE_CREATE
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_TO
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_FROM
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_MAX
+#define PR_SCHED_CORE_MAX 4
+#endif
+
+/* arg3 of prctl() */
+#ifndef PR_SCHED_CORE_SCOPE_THREAD
+#define PR_SCHED_CORE_SCOPE_THREAD 0
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
+#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
+#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#endif
+
+#define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
+
+static inline bool core_scheduling_cookie_valid(__u64 cookie)
+{
+       return (cookie > 0) && (cookie != INVALID_SCHED_CORE_COOKIE);
+}
+
+static inline int core_scheduling_cookie_get(pid_t pid, __u64 *cookie)
+{
+       int ret;
+
+       if (!cookie)
+               return ret_errno(EINVAL);
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)cookie);
+       if (ret) {
+               *cookie = INVALID_SCHED_CORE_COOKIE;
+               return -errno;
+       }
+
+       return 0;
+}
+
+static inline int core_scheduling_cookie_create_threadgroup(pid_t pid)
+{
+       int ret;
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
+       if (ret)
+               return -errno;
+
+       return 0;
+}
+
+static inline int core_scheduling_cookie_share_with(pid_t pid)
+{
+       return prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, pid,
+                    PR_SCHED_CORE_SCOPE_THREAD, 0);
+}
+
 #endif /* __LXC_SYSCALL_WRAPPER_H */