]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
87b2d440 DH |
2 | /* |
3 | * memfd GUP test-case | |
4 | * This tests memfd interactions with get_user_pages(). We require the | |
5 | * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This | |
6 | * file-system delays _all_ reads by 1s and forces direct-IO. This means, any | |
7 | * read() on files in that file-system will pin the receive-buffer pages for at | |
8 | * least 1s via get_user_pages(). | |
9 | * | |
10 | * We use this trick to race ADD_SEALS against a write on a memfd object. The | |
11 | * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use | |
12 | * the read() syscall with our memory-mapped memfd object as receive buffer to | |
13 | * force the kernel to write into our memfd object. | |
14 | */ | |
15 | ||
16 | #define _GNU_SOURCE | |
17 | #define __EXPORTED_HEADERS__ | |
18 | ||
19 | #include <errno.h> | |
20 | #include <inttypes.h> | |
21 | #include <limits.h> | |
22 | #include <linux/falloc.h> | |
2c11aa94 | 23 | #include <fcntl.h> |
87b2d440 DH |
24 | #include <linux/memfd.h> |
25 | #include <sched.h> | |
26 | #include <stdio.h> | |
27 | #include <stdlib.h> | |
28 | #include <signal.h> | |
29 | #include <string.h> | |
30 | #include <sys/mman.h> | |
31 | #include <sys/stat.h> | |
32 | #include <sys/syscall.h> | |
33 | #include <sys/wait.h> | |
34 | #include <unistd.h> | |
b501ac3c | 35 | #include <asm/types.h> |
87b2d440 | 36 | |
29f34d1d MAL |
37 | #include "common.h" |
38 | ||
87b2d440 | 39 | #define MFD_DEF_SIZE 8192 |
0e64f1d7 | 40 | #define STACK_SIZE 65536 |
87b2d440 | 41 | |
c5c63835 MAL |
42 | static size_t mfd_def_size = MFD_DEF_SIZE; |
43 | ||
87b2d440 DH |
44 | static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) |
45 | { | |
46 | int r, fd; | |
47 | ||
48 | fd = sys_memfd_create(name, flags); | |
49 | if (fd < 0) { | |
50 | printf("memfd_create(\"%s\", %u) failed: %m\n", | |
51 | name, flags); | |
52 | abort(); | |
53 | } | |
54 | ||
55 | r = ftruncate(fd, sz); | |
56 | if (r < 0) { | |
57 | printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz); | |
58 | abort(); | |
59 | } | |
60 | ||
61 | return fd; | |
62 | } | |
63 | ||
64 | static __u64 mfd_assert_get_seals(int fd) | |
65 | { | |
66 | long r; | |
67 | ||
68 | r = fcntl(fd, F_GET_SEALS); | |
69 | if (r < 0) { | |
70 | printf("GET_SEALS(%d) failed: %m\n", fd); | |
71 | abort(); | |
72 | } | |
73 | ||
74 | return r; | |
75 | } | |
76 | ||
77 | static void mfd_assert_has_seals(int fd, __u64 seals) | |
78 | { | |
79 | __u64 s; | |
80 | ||
81 | s = mfd_assert_get_seals(fd); | |
82 | if (s != seals) { | |
83 | printf("%llu != %llu = GET_SEALS(%d)\n", | |
84 | (unsigned long long)seals, (unsigned long long)s, fd); | |
85 | abort(); | |
86 | } | |
87 | } | |
88 | ||
89 | static void mfd_assert_add_seals(int fd, __u64 seals) | |
90 | { | |
91 | long r; | |
92 | __u64 s; | |
93 | ||
94 | s = mfd_assert_get_seals(fd); | |
95 | r = fcntl(fd, F_ADD_SEALS, seals); | |
96 | if (r < 0) { | |
97 | printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n", | |
98 | fd, (unsigned long long)s, (unsigned long long)seals); | |
99 | abort(); | |
100 | } | |
101 | } | |
102 | ||
103 | static int mfd_busy_add_seals(int fd, __u64 seals) | |
104 | { | |
105 | long r; | |
106 | __u64 s; | |
107 | ||
108 | r = fcntl(fd, F_GET_SEALS); | |
109 | if (r < 0) | |
110 | s = 0; | |
111 | else | |
112 | s = r; | |
113 | ||
114 | r = fcntl(fd, F_ADD_SEALS, seals); | |
115 | if (r < 0 && errno != EBUSY) { | |
116 | printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n", | |
117 | fd, (unsigned long long)s, (unsigned long long)seals); | |
118 | abort(); | |
119 | } | |
120 | ||
121 | return r; | |
122 | } | |
123 | ||
124 | static void *mfd_assert_mmap_shared(int fd) | |
125 | { | |
126 | void *p; | |
127 | ||
128 | p = mmap(NULL, | |
c5c63835 | 129 | mfd_def_size, |
87b2d440 DH |
130 | PROT_READ | PROT_WRITE, |
131 | MAP_SHARED, | |
132 | fd, | |
133 | 0); | |
134 | if (p == MAP_FAILED) { | |
135 | printf("mmap() failed: %m\n"); | |
136 | abort(); | |
137 | } | |
138 | ||
139 | return p; | |
140 | } | |
141 | ||
142 | static void *mfd_assert_mmap_private(int fd) | |
143 | { | |
144 | void *p; | |
145 | ||
146 | p = mmap(NULL, | |
c5c63835 | 147 | mfd_def_size, |
87b2d440 DH |
148 | PROT_READ | PROT_WRITE, |
149 | MAP_PRIVATE, | |
150 | fd, | |
151 | 0); | |
152 | if (p == MAP_FAILED) { | |
153 | printf("mmap() failed: %m\n"); | |
154 | abort(); | |
155 | } | |
156 | ||
157 | return p; | |
158 | } | |
159 | ||
160 | static int global_mfd = -1; | |
161 | static void *global_p = NULL; | |
162 | ||
163 | static int sealing_thread_fn(void *arg) | |
164 | { | |
165 | int sig, r; | |
166 | ||
167 | /* | |
168 | * This thread first waits 200ms so any pending operation in the parent | |
169 | * is correctly started. After that, it tries to seal @global_mfd as | |
170 | * SEAL_WRITE. This _must_ fail as the parent thread has a read() into | |
171 | * that memory mapped object still ongoing. | |
172 | * We then wait one more second and try sealing again. This time it | |
173 | * must succeed as there shouldn't be anyone else pinning the pages. | |
174 | */ | |
175 | ||
176 | /* wait 200ms for FUSE-request to be active */ | |
177 | usleep(200000); | |
178 | ||
179 | /* unmount mapping before sealing to avoid i_mmap_writable failures */ | |
c5c63835 | 180 | munmap(global_p, mfd_def_size); |
87b2d440 DH |
181 | |
182 | /* Try sealing the global file; expect EBUSY or success. Current | |
183 | * kernels will never succeed, but in the future, kernels might | |
184 | * implement page-replacements or other fancy ways to avoid racing | |
185 | * writes. */ | |
186 | r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE); | |
187 | if (r >= 0) { | |
188 | printf("HURRAY! This kernel fixed GUP races!\n"); | |
189 | } else { | |
190 | /* wait 1s more so the FUSE-request is done */ | |
191 | sleep(1); | |
192 | ||
193 | /* try sealing the global file again */ | |
194 | mfd_assert_add_seals(global_mfd, F_SEAL_WRITE); | |
195 | } | |
196 | ||
197 | return 0; | |
198 | } | |
199 | ||
200 | static pid_t spawn_sealing_thread(void) | |
201 | { | |
202 | uint8_t *stack; | |
203 | pid_t pid; | |
204 | ||
205 | stack = malloc(STACK_SIZE); | |
206 | if (!stack) { | |
207 | printf("malloc(STACK_SIZE) failed: %m\n"); | |
208 | abort(); | |
209 | } | |
210 | ||
211 | pid = clone(sealing_thread_fn, | |
212 | stack + STACK_SIZE, | |
213 | SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, | |
214 | NULL); | |
215 | if (pid < 0) { | |
216 | printf("clone() failed: %m\n"); | |
217 | abort(); | |
218 | } | |
219 | ||
220 | return pid; | |
221 | } | |
222 | ||
223 | static void join_sealing_thread(pid_t pid) | |
224 | { | |
225 | waitpid(pid, NULL, 0); | |
226 | } | |
227 | ||
228 | int main(int argc, char **argv) | |
229 | { | |
c5c63835 | 230 | char *zero; |
87b2d440 DH |
231 | int fd, mfd, r; |
232 | void *p; | |
233 | int was_sealed; | |
234 | pid_t pid; | |
235 | ||
236 | if (argc < 2) { | |
237 | printf("error: please pass path to file in fuse_mnt mount-point\n"); | |
238 | abort(); | |
239 | } | |
240 | ||
c5c63835 MAL |
241 | if (argc >= 3) { |
242 | if (!strcmp(argv[2], "hugetlbfs")) { | |
243 | unsigned long hpage_size = default_huge_page_size(); | |
244 | ||
245 | if (!hpage_size) { | |
246 | printf("Unable to determine huge page size\n"); | |
247 | abort(); | |
248 | } | |
249 | ||
250 | hugetlbfs_test = 1; | |
251 | mfd_def_size = hpage_size * 2; | |
252 | } else { | |
253 | printf("Unknown option: %s\n", argv[2]); | |
254 | abort(); | |
255 | } | |
256 | } | |
257 | ||
258 | zero = calloc(sizeof(*zero), mfd_def_size); | |
259 | ||
87b2d440 DH |
260 | /* open FUSE memfd file for GUP testing */ |
261 | printf("opening: %s\n", argv[1]); | |
262 | fd = open(argv[1], O_RDONLY | O_CLOEXEC); | |
263 | if (fd < 0) { | |
264 | printf("cannot open(\"%s\"): %m\n", argv[1]); | |
265 | abort(); | |
266 | } | |
267 | ||
268 | /* create new memfd-object */ | |
269 | mfd = mfd_assert_new("kern_memfd_fuse", | |
c5c63835 | 270 | mfd_def_size, |
87b2d440 DH |
271 | MFD_CLOEXEC | MFD_ALLOW_SEALING); |
272 | ||
273 | /* mmap memfd-object for writing */ | |
274 | p = mfd_assert_mmap_shared(mfd); | |
275 | ||
276 | /* pass mfd+mapping to a separate sealing-thread which tries to seal | |
277 | * the memfd objects with SEAL_WRITE while we write into it */ | |
278 | global_mfd = mfd; | |
279 | global_p = p; | |
280 | pid = spawn_sealing_thread(); | |
281 | ||
282 | /* Use read() on the FUSE file to read into our memory-mapped memfd | |
283 | * object. This races the other thread which tries to seal the | |
284 | * memfd-object. | |
285 | * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s. | |
286 | * This guarantees that the receive-buffer is pinned for 1s until the | |
287 | * data is written into it. The racing ADD_SEALS should thus fail as | |
288 | * the pages are still pinned. */ | |
c5c63835 | 289 | r = read(fd, p, mfd_def_size); |
87b2d440 DH |
290 | if (r < 0) { |
291 | printf("read() failed: %m\n"); | |
292 | abort(); | |
293 | } else if (!r) { | |
294 | printf("unexpected EOF on read()\n"); | |
295 | abort(); | |
296 | } | |
297 | ||
298 | was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE; | |
299 | ||
300 | /* Wait for sealing-thread to finish and verify that it | |
301 | * successfully sealed the file after the second try. */ | |
302 | join_sealing_thread(pid); | |
303 | mfd_assert_has_seals(mfd, F_SEAL_WRITE); | |
304 | ||
305 | /* *IF* the memfd-object was sealed at the time our read() returned, | |
306 | * then the kernel did a page-replacement or canceled the read() (or | |
307 | * whatever magic it did..). In that case, the memfd object is still | |
308 | * all zero. | |
309 | * In case the memfd-object was *not* sealed, the read() was successfull | |
310 | * and the memfd object must *not* be all zero. | |
311 | * Note that in real scenarios, there might be a mixture of both, but | |
312 | * in this test-cases, we have explicit 200ms delays which should be | |
313 | * enough to avoid any in-flight writes. */ | |
314 | ||
315 | p = mfd_assert_mmap_private(mfd); | |
c5c63835 | 316 | if (was_sealed && memcmp(p, zero, mfd_def_size)) { |
87b2d440 DH |
317 | printf("memfd sealed during read() but data not discarded\n"); |
318 | abort(); | |
c5c63835 | 319 | } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) { |
87b2d440 DH |
320 | printf("memfd sealed after read() but data discarded\n"); |
321 | abort(); | |
322 | } | |
323 | ||
324 | close(mfd); | |
325 | close(fd); | |
326 | ||
327 | printf("fuse: DONE\n"); | |
c5c63835 | 328 | free(zero); |
87b2d440 DH |
329 | |
330 | return 0; | |
331 | } |