]>
Commit | Line | Data |
---|---|---|
cc73685d | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
6b9e666f | 2 | |
13be2733 CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE 1 | |
5 | #endif | |
6 | #include <errno.h> | |
38e5c2db CB |
7 | #include <sched.h> |
8 | #include <signal.h> | |
13be2733 CB |
9 | #include <stdio.h> |
10 | #include <stdlib.h> | |
11 | #include <sys/syscall.h> | |
12 | #include <unistd.h> | |
13 | ||
633cb8be | 14 | #include "compiler.h" |
13be2733 | 15 | #include "config.h" |
f40988c7 | 16 | #include "log.h" |
38e5c2db | 17 | #include "macro.h" |
f40988c7 | 18 | #include "process_utils.h" |
bed09c9c | 19 | #include "syscall_numbers.h" |
13be2733 | 20 | |
f40988c7 | 21 | lxc_log_define(process_utils, lxc); |
38e5c2db CB |
22 | |
23 | /* | |
24 | * This is based on raw_clone in systemd but adapted to our needs. This uses | |
25 | * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and | |
26 | * doesn't really matter to us so disallow it. | |
27 | * | |
28 | * The nice thing about this is that we get fork() behavior. That is | |
29 | * lxc_raw_clone() returns 0 in the child and the child pid in the parent. | |
30 | */ | |
f7176c3e | 31 | __returns_twice pid_t lxc_raw_legacy_clone(unsigned long flags, int *pidfd) |
38e5c2db | 32 | { |
38e5c2db CB |
33 | |
34 | #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) | |
35 | /* On s390/s390x and cris the order of the first and second arguments | |
36 | * of the system call is reversed. | |
37 | */ | |
a59440be | 38 | return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd); |
38e5c2db CB |
39 | #elif defined(__sparc__) && defined(__arch64__) |
40 | { | |
41 | /* | |
42 | * sparc64 always returns the other process id in %o0, and a | |
43 | * boolean flag whether this is the child or the parent in %o1. | |
44 | * Inline assembly is needed to get the flag returned in %o1. | |
45 | */ | |
b52e8e68 CB |
46 | register long g1 asm("g1") = __NR_clone; |
47 | register long o0 asm("o0") = flags | SIGCHLD; | |
48 | register long o1 asm("o1") = 0; /* is parent/child indicator */ | |
a59440be | 49 | register long o2 asm("o2") = (unsigned long)pidfd; |
b52e8e68 CB |
50 | long is_error, retval, in_child; |
51 | pid_t child_pid; | |
6dfb1463 | 52 | |
b52e8e68 | 53 | asm volatile( |
6dfb1463 | 54 | #if defined(__arch64__) |
b52e8e68 | 55 | "t 0x6d\n\t" /* 64-bit trap */ |
6dfb1463 | 56 | #else |
b52e8e68 | 57 | "t 0x10\n\t" /* 32-bit trap */ |
6dfb1463 | 58 | #endif |
b52e8e68 CB |
59 | /* |
60 | * catch errors: On sparc, the carry bit (csr) in the | |
61 | * processor status register (psr) is used instead of a | |
62 | * full register. | |
63 | */ | |
5f7dd076 | 64 | "addx %%g0, 0, %%g1" |
a59440be CB |
65 | : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */ |
66 | : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */ | |
67 | : "%cc"); /* clobbers */ | |
6dfb1463 | 68 | |
b52e8e68 CB |
69 | is_error = g1; |
70 | retval = o0; | |
71 | in_child = o1; | |
72 | ||
73 | if (is_error) { | |
74 | errno = retval; | |
6dfb1463 CB |
75 | return -1; |
76 | } | |
38e5c2db CB |
77 | |
78 | if (in_child) | |
79 | return 0; | |
6dfb1463 | 80 | |
b52e8e68 | 81 | child_pid = retval; |
6dfb1463 | 82 | return child_pid; |
38e5c2db CB |
83 | } |
84 | #elif defined(__ia64__) | |
85 | /* On ia64 the stack and stack size are passed as separate arguments. */ | |
a59440be | 86 | return syscall(__NR_clone, flags | SIGCHLD, NULL, prctl_arg(0), pidfd); |
38e5c2db | 87 | #else |
a59440be | 88 | return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd); |
38e5c2db CB |
89 | #endif |
90 | } | |
91 | ||
2f46fe6e CB |
92 | __returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd) |
93 | { | |
94 | pid_t pid; | |
95 | struct lxc_clone_args args = { | |
96 | .flags = flags, | |
97 | .pidfd = ptr_to_u64(pidfd), | |
98 | }; | |
99 | ||
100 | if (flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | | |
101 | CLONE_CHILD_CLEARTID | CLONE_SETTLS)) | |
102 | return ret_errno(EINVAL); | |
103 | ||
104 | /* On CLONE_PARENT we inherit the parent's exit signal. */ | |
105 | if (!(flags & CLONE_PARENT)) | |
106 | args.exit_signal = SIGCHLD; | |
107 | ||
108 | pid = lxc_clone3(&args, CLONE_ARGS_SIZE_VER0); | |
109 | if (pid < 0 && errno == ENOSYS) { | |
110 | SYSTRACE("Falling back to legacy clone"); | |
f7176c3e | 111 | return lxc_raw_legacy_clone(flags, pidfd); |
2f46fe6e CB |
112 | } |
113 | ||
114 | return pid; | |
115 | } | |
116 | ||
a59440be CB |
117 | pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags, |
118 | int *pidfd) | |
38e5c2db CB |
119 | { |
120 | pid_t pid; | |
121 | ||
a59440be | 122 | pid = lxc_raw_clone(flags, pidfd); |
38e5c2db CB |
123 | if (pid < 0) |
124 | return -1; | |
125 | ||
126 | /* | |
127 | * exit() is not thread-safe and might mess with the parent's signal | |
128 | * handlers and other stuff when exec() fails. | |
129 | */ | |
130 | if (pid == 0) | |
131 | _exit(fn(args)); | |
132 | ||
133 | return pid; | |
134 | } | |
d9bb2fba CB |
135 | |
136 | int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, | |
137 | unsigned int flags) | |
138 | { | |
cf38fe06 | 139 | return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); |
d9bb2fba | 140 | } |
f40988c7 CB |
141 | |
142 | /* | |
143 | * Let's use the "standard stack limit" (i.e. glibc thread size default) for | |
144 | * stack sizes: 8MB. | |
145 | */ | |
146 | #define __LXC_STACK_SIZE (8 * 1024 * 1024) | |
147 | pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) | |
148 | { | |
149 | pid_t ret; | |
150 | void *stack; | |
151 | ||
152 | stack = malloc(__LXC_STACK_SIZE); | |
153 | if (!stack) { | |
154 | SYSERROR("Failed to allocate clone stack"); | |
155 | return -ENOMEM; | |
156 | } | |
157 | ||
158 | #ifdef __ia64__ | |
159 | ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); | |
160 | #else | |
161 | ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); | |
162 | #endif | |
163 | if (ret < 0) | |
164 | SYSERROR("Failed to clone (%#x)", flags); | |
165 | ||
166 | return ret; | |
167 | } |