]> git.proxmox.com Git - mirror_qemu.git/blob - util/userfaultfd.c
util/userfaultfd: Add uffd_open()
[mirror_qemu.git] / util / userfaultfd.c
1 /*
2 * Linux UFFD-WP support
3 *
4 * Copyright Virtuozzo GmbH, 2020
5 *
6 * Authors:
7 * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/bitops.h"
15 #include "qemu/error-report.h"
16 #include "qemu/userfaultfd.h"
17 #include "trace.h"
18 #include <poll.h>
19 #include <sys/syscall.h>
20 #include <sys/ioctl.h>
21
22 int uffd_open(int flags)
23 {
24 #if defined(__NR_userfaultfd)
25 return syscall(__NR_userfaultfd, flags);
26 #else
27 return -EINVAL;
28 #endif
29 }
30
31 /**
32 * uffd_query_features: query UFFD features
33 *
34 * Returns: 0 on success, negative value in case of an error
35 *
36 * @features: parameter to receive 'uffdio_api.features'
37 */
38 int uffd_query_features(uint64_t *features)
39 {
40 int uffd_fd;
41 struct uffdio_api api_struct = { 0 };
42 int ret = -1;
43
44 uffd_fd = uffd_open(O_CLOEXEC);
45 if (uffd_fd < 0) {
46 trace_uffd_query_features_nosys(errno);
47 return -1;
48 }
49
50 api_struct.api = UFFD_API;
51 api_struct.features = 0;
52
53 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
54 trace_uffd_query_features_api_failed(errno);
55 goto out;
56 }
57 *features = api_struct.features;
58 ret = 0;
59
60 out:
61 close(uffd_fd);
62 return ret;
63 }
64
65 /**
66 * uffd_create_fd: create UFFD file descriptor
67 *
68 * Returns non-negative file descriptor or negative value in case of an error
69 *
70 * @features: UFFD features to request
71 * @non_blocking: create UFFD file descriptor for non-blocking operation
72 */
73 int uffd_create_fd(uint64_t features, bool non_blocking)
74 {
75 int uffd_fd;
76 int flags;
77 struct uffdio_api api_struct = { 0 };
78 uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
79
80 flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
81 uffd_fd = uffd_open(flags);
82 if (uffd_fd < 0) {
83 trace_uffd_create_fd_nosys(errno);
84 return -1;
85 }
86
87 api_struct.api = UFFD_API;
88 api_struct.features = features;
89 if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
90 trace_uffd_create_fd_api_failed(errno);
91 goto fail;
92 }
93 if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
94 trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
95 goto fail;
96 }
97
98 return uffd_fd;
99
100 fail:
101 close(uffd_fd);
102 return -1;
103 }
104
105 /**
106 * uffd_close_fd: close UFFD file descriptor
107 *
108 * @uffd_fd: UFFD file descriptor
109 */
110 void uffd_close_fd(int uffd_fd)
111 {
112 assert(uffd_fd >= 0);
113 close(uffd_fd);
114 }
115
116 /**
117 * uffd_register_memory: register memory range via UFFD-IO
118 *
119 * Returns 0 in case of success, negative value in case of an error
120 *
121 * @uffd_fd: UFFD file descriptor
122 * @addr: base address of memory range
123 * @length: length of memory range
124 * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
125 * @ioctls: optional pointer to receive supported IOCTL mask
126 */
127 int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
128 uint64_t mode, uint64_t *ioctls)
129 {
130 struct uffdio_register uffd_register;
131
132 uffd_register.range.start = (uintptr_t) addr;
133 uffd_register.range.len = length;
134 uffd_register.mode = mode;
135
136 if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
137 trace_uffd_register_memory_failed(addr, length, mode, errno);
138 return -1;
139 }
140 if (ioctls) {
141 *ioctls = uffd_register.ioctls;
142 }
143
144 return 0;
145 }
146
147 /**
148 * uffd_unregister_memory: un-register memory range with UFFD-IO
149 *
150 * Returns 0 in case of success, negative value in case of an error
151 *
152 * @uffd_fd: UFFD file descriptor
153 * @addr: base address of memory range
154 * @length: length of memory range
155 */
156 int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
157 {
158 struct uffdio_range uffd_range;
159
160 uffd_range.start = (uintptr_t) addr;
161 uffd_range.len = length;
162
163 if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
164 trace_uffd_unregister_memory_failed(addr, length, errno);
165 return -1;
166 }
167
168 return 0;
169 }
170
171 /**
172 * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
173 *
174 * Returns 0 on success, negative value in case of error
175 *
176 * @uffd_fd: UFFD file descriptor
177 * @addr: base address of memory range
178 * @length: length of memory range
179 * @wp: write-protect/unprotect
180 * @dont_wake: do not wake threads waiting on wr-protected page
181 */
182 int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
183 bool wp, bool dont_wake)
184 {
185 struct uffdio_writeprotect uffd_writeprotect;
186
187 uffd_writeprotect.range.start = (uintptr_t) addr;
188 uffd_writeprotect.range.len = length;
189 if (!wp && dont_wake) {
190 /* DONTWAKE is meaningful only on protection release */
191 uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
192 } else {
193 uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
194 }
195
196 if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
197 error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
198 " mode=%" PRIx64 " errno=%i", addr, length,
199 (uint64_t) uffd_writeprotect.mode, errno);
200 return -1;
201 }
202
203 return 0;
204 }
205
206 /**
207 * uffd_copy_page: copy range of pages to destination via UFFD-IO
208 *
209 * Copy range of source pages to the destination to resolve
210 * missing page fault somewhere in the destination range.
211 *
212 * Returns 0 on success, negative value in case of an error
213 *
214 * @uffd_fd: UFFD file descriptor
215 * @dst_addr: destination base address
216 * @src_addr: source base address
217 * @length: length of the range to copy
218 * @dont_wake: do not wake threads waiting on missing page
219 */
220 int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
221 uint64_t length, bool dont_wake)
222 {
223 struct uffdio_copy uffd_copy;
224
225 uffd_copy.dst = (uintptr_t) dst_addr;
226 uffd_copy.src = (uintptr_t) src_addr;
227 uffd_copy.len = length;
228 uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
229
230 if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
231 error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
232 " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
233 length, (uint64_t) uffd_copy.mode, errno);
234 return -1;
235 }
236
237 return 0;
238 }
239
240 /**
241 * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
242 *
243 * Fill range pages with zeroes to resolve missing page fault within the range.
244 *
245 * Returns 0 on success, negative value in case of an error
246 *
247 * @uffd_fd: UFFD file descriptor
248 * @addr: base address
249 * @length: length of the range to fill with zeroes
250 * @dont_wake: do not wake threads waiting on missing page
251 */
252 int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
253 {
254 struct uffdio_zeropage uffd_zeropage;
255
256 uffd_zeropage.range.start = (uintptr_t) addr;
257 uffd_zeropage.range.len = length;
258 uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
259
260 if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
261 error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
262 " mode=%" PRIx64 " errno=%i", addr, length,
263 (uint64_t) uffd_zeropage.mode, errno);
264 return -1;
265 }
266
267 return 0;
268 }
269
270 /**
271 * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
272 *
273 * Wake up threads waiting on any page/pages from the designated range.
274 * The main use case is when during some period, page faults are resolved
275 * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
276 * for the whole memory range are satisfied in a single call to uffd_wakeup().
277 *
278 * Returns 0 on success, negative value in case of an error
279 *
280 * @uffd_fd: UFFD file descriptor
281 * @addr: base address
282 * @length: length of the range
283 */
284 int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
285 {
286 struct uffdio_range uffd_range;
287
288 uffd_range.start = (uintptr_t) addr;
289 uffd_range.len = length;
290
291 if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
292 error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
293 addr, length, errno);
294 return -1;
295 }
296
297 return 0;
298 }
299
300 /**
301 * uffd_read_events: read pending UFFD events
302 *
303 * Returns number of fetched messages, 0 if non is available or
304 * negative value in case of an error
305 *
306 * @uffd_fd: UFFD file descriptor
307 * @msgs: pointer to message buffer
308 * @count: number of messages that can fit in the buffer
309 */
310 int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
311 {
312 ssize_t res;
313 do {
314 res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
315 } while (res < 0 && errno == EINTR);
316
317 if ((res < 0 && errno == EAGAIN)) {
318 return 0;
319 }
320 if (res < 0) {
321 error_report("uffd_read_events() failed: errno=%i", errno);
322 return -1;
323 }
324
325 return (int) (res / sizeof(struct uffd_msg));
326 }
327
328 /**
329 * uffd_poll_events: poll UFFD file descriptor for read
330 *
331 * Returns true if events are available for read, false otherwise
332 *
333 * @uffd_fd: UFFD file descriptor
334 * @tmo: timeout value
335 */
336 bool uffd_poll_events(int uffd_fd, int tmo)
337 {
338 int res;
339 struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
340
341 do {
342 res = poll(&poll_fd, 1, tmo);
343 } while (res < 0 && errno == EINTR);
344
345 if (res == 0) {
346 return false;
347 }
348 if (res < 0) {
349 error_report("uffd_poll_events() failed: errno=%i", errno);
350 return false;
351 }
352
353 return (poll_fd.revents & POLLIN) != 0;
354 }