]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/raw_syscalls.c
f58b8d89876211d457ee1829096f6064e5bf9ab5
[mirror_lxc.git] / src / lxc / raw_syscalls.c
1 #ifndef _GNU_SOURCE
2 #define _GNU_SOURCE 1
3 #endif
4 #include <errno.h>
5 #include <sched.h>
6 #include <signal.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <sys/syscall.h>
10 #include <unistd.h>
11
12 #include "compiler.h"
13 #include "config.h"
14 #include "macro.h"
15 #include "raw_syscalls.h"
16
17 int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[],
18 char *const envp[], int flags)
19 {
20 #ifdef __NR_execveat
21 syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
22 #else
23 errno = ENOSYS;
24 #endif
25 return -1;
26 }
27
28 /*
29 * This is based on raw_clone in systemd but adapted to our needs. This uses
30 * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
31 * doesn't really matter to us so disallow it.
32 *
33 * The nice thing about this is that we get fork() behavior. That is
34 * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
35 */
36 __returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd)
37 {
38 /*
39 * These flags don't interest at all so we don't jump through any hoops
40 * of retrieving them and passing them to the kernel.
41 */
42 errno = EINVAL;
43 if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
44 CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
45 return -EINVAL;
46
47 #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
48 /* On s390/s390x and cris the order of the first and second arguments
49 * of the system call is reversed.
50 */
51 return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd);
52 #elif defined(__sparc__) && defined(__arch64__)
53 {
54 /*
55 * sparc64 always returns the other process id in %o0, and a
56 * boolean flag whether this is the child or the parent in %o1.
57 * Inline assembly is needed to get the flag returned in %o1.
58 */
59 register long g1 asm("g1") = __NR_clone;
60 register long o0 asm("o0") = flags | SIGCHLD;
61 register long o1 asm("o1") = 0; /* is parent/child indicator */
62 register long o2 asm("o2") = (unsigned long)pidfd;
63 long is_error, retval, in_child;
64 pid_t child_pid;
65
66 asm volatile(
67 #if defined(__arch64__)
68 "t 0x6d\n\t" /* 64-bit trap */
69 #else
70 "t 0x10\n\t" /* 32-bit trap */
71 #endif
72 /*
73 * catch errors: On sparc, the carry bit (csr) in the
74 * processor status register (psr) is used instead of a
75 * full register.
76 */
77 "addx %%g0, 0, %g1"
78 : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */
79 : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */
80 : "%cc"); /* clobbers */
81
82 is_error = g1;
83 retval = o0;
84 in_child = o1;
85
86 if (is_error) {
87 errno = retval;
88 return -1;
89 }
90
91 if (in_child)
92 return 0;
93
94 child_pid = retval;
95 return child_pid;
96 }
97 #elif defined(__ia64__)
98 /* On ia64 the stack and stack size are passed as separate arguments. */
99 return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0), pidfd);
100 #else
101 return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd);
102 #endif
103 }
104
105 pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags,
106 int *pidfd)
107 {
108 pid_t pid;
109
110 pid = lxc_raw_clone(flags, pidfd);
111 if (pid < 0)
112 return -1;
113
114 /*
115 * exit() is not thread-safe and might mess with the parent's signal
116 * handlers and other stuff when exec() fails.
117 */
118 if (pid == 0)
119 _exit(fn(args));
120
121 return pid;
122 }
123
124 int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
125 unsigned int flags)
126 {
127 #ifdef __NR_pidfd_send_signal
128 syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
129 #else
130 errno = ENOSYS;
131 #endif
132 return -1;
133 }