]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/raw_syscalls.c
seccomp: s/seccomp_notif_alloc/seccomp_notify_alloc/g
[mirror_lxc.git] / src / lxc / raw_syscalls.c
CommitLineData
13be2733
CB
1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE 1
3#endif
4#include <errno.h>
38e5c2db
CB
5#include <sched.h>
6#include <signal.h>
13be2733
CB
7#include <stdio.h>
8#include <stdlib.h>
9#include <sys/syscall.h>
10#include <unistd.h>
11
633cb8be 12#include "compiler.h"
13be2733 13#include "config.h"
38e5c2db
CB
14#include "macro.h"
15#include "raw_syscalls.h"
13be2733
CB
16
17int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
18 char *const envp[], int flags)
19{
20#ifdef __NR_execveat
21 syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
22#else
23 errno = ENOSYS;
13be2733 24#endif
e4767d47 25 return -1;
13be2733 26}
38e5c2db
CB
27
28/*
29 * This is based on raw_clone in systemd but adapted to our needs. This uses
30 * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
31 * doesn't really matter to us so disallow it.
32 *
33 * The nice thing about this is that we get fork() behavior. That is
34 * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
35 */
a59440be 36__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
38e5c2db 37{
38e5c2db 38 /*
b6991178 39 * These flags don't interest at all so we don't jump through any hoops
38e5c2db
CB
40 * of retrieving them and passing them to the kernel.
41 */
42 errno = EINVAL;
43 if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
44 CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
45 return -EINVAL;
46
47#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
48 /* On s390/s390x and cris the order of the first and second arguments
49 * of the system call is reversed.
50 */
a59440be 51 return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd);
38e5c2db
CB
52#elif defined(__sparc__) && defined(__arch64__)
53 {
54 /*
55 * sparc64 always returns the other process id in %o0, and a
56 * boolean flag whether this is the child or the parent in %o1.
57 * Inline assembly is needed to get the flag returned in %o1.
58 */
b52e8e68
CB
59 register long g1 asm("g1") = __NR_clone;
60 register long o0 asm("o0") = flags | SIGCHLD;
61 register long o1 asm("o1") = 0; /* is parent/child indicator */
a59440be 62 register long o2 asm("o2") = (unsigned long)pidfd;
b52e8e68
CB
63 long is_error, retval, in_child;
64 pid_t child_pid;
6dfb1463 65
b52e8e68 66 asm volatile(
6dfb1463 67#if defined(__arch64__)
b52e8e68 68 "t 0x6d\n\t" /* 64-bit trap */
6dfb1463 69#else
b52e8e68 70 "t 0x10\n\t" /* 32-bit trap */
6dfb1463 71#endif
b52e8e68
CB
72 /*
73 * catch errors: On sparc, the carry bit (csr) in the
74 * processor status register (psr) is used instead of a
75 * full register.
76 */
77 "addx %%g0, 0, %g1"
a59440be
CB
78 : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */
79 : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */
80 : "%cc"); /* clobbers */
6dfb1463 81
b52e8e68
CB
82 is_error = g1;
83 retval = o0;
84 in_child = o1;
85
86 if (is_error) {
87 errno = retval;
6dfb1463
CB
88 return -1;
89 }
38e5c2db
CB
90
91 if (in_child)
92 return 0;
6dfb1463 93
b52e8e68 94 child_pid = retval;
6dfb1463 95 return child_pid;
38e5c2db
CB
96 }
97#elif defined(__ia64__)
98 /* On ia64 the stack and stack size are passed as separate arguments. */
a59440be 99 return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0), pidfd);
38e5c2db 100#else
a59440be 101 return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd);
38e5c2db
CB
102#endif
103}
104
a59440be
CB
105pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
106 int *pidfd)
38e5c2db
CB
107{
108 pid_t pid;
109
a59440be 110 pid = lxc_raw_clone(flags, pidfd);
38e5c2db
CB
111 if (pid < 0)
112 return -1;
113
114 /*
115 * exit() is not thread-safe and might mess with the parent's signal
116 * handlers and other stuff when exec() fails.
117 */
118 if (pid == 0)
119 _exit(fn(args));
120
121 return pid;
122}
d9bb2fba
CB
123
124int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
125 unsigned int flags)
126{
127#ifdef __NR_pidfd_send_signal
128 syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
129#else
130 errno = ENOSYS;
131#endif
132 return -1;
133}