2 * Linux UFFD-WP support
4 * Copyright Virtuozzo GmbH, 2020
7 * Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qemu/bitops.h"
15 #include "qemu/error-report.h"
16 #include "qemu/userfaultfd.h"
19 #include <sys/syscall.h>
20 #include <sys/ioctl.h>
22 int uffd_open(int flags
)
24 #if defined(__NR_userfaultfd)
25 return syscall(__NR_userfaultfd
, flags
);
32 * uffd_query_features: query UFFD features
34 * Returns: 0 on success, negative value in case of an error
36 * @features: parameter to receive 'uffdio_api.features'
38 int uffd_query_features(uint64_t *features
)
41 struct uffdio_api api_struct
= { 0 };
44 uffd_fd
= uffd_open(O_CLOEXEC
);
46 trace_uffd_query_features_nosys(errno
);
50 api_struct
.api
= UFFD_API
;
51 api_struct
.features
= 0;
53 if (ioctl(uffd_fd
, UFFDIO_API
, &api_struct
)) {
54 trace_uffd_query_features_api_failed(errno
);
57 *features
= api_struct
.features
;
66 * uffd_create_fd: create UFFD file descriptor
68 * Returns non-negative file descriptor or negative value in case of an error
70 * @features: UFFD features to request
71 * @non_blocking: create UFFD file descriptor for non-blocking operation
73 int uffd_create_fd(uint64_t features
, bool non_blocking
)
77 struct uffdio_api api_struct
= { 0 };
78 uint64_t ioctl_mask
= BIT(_UFFDIO_REGISTER
) | BIT(_UFFDIO_UNREGISTER
);
80 flags
= O_CLOEXEC
| (non_blocking
? O_NONBLOCK
: 0);
81 uffd_fd
= uffd_open(flags
);
83 trace_uffd_create_fd_nosys(errno
);
87 api_struct
.api
= UFFD_API
;
88 api_struct
.features
= features
;
89 if (ioctl(uffd_fd
, UFFDIO_API
, &api_struct
)) {
90 trace_uffd_create_fd_api_failed(errno
);
93 if ((api_struct
.ioctls
& ioctl_mask
) != ioctl_mask
) {
94 trace_uffd_create_fd_api_noioctl(ioctl_mask
, api_struct
.ioctls
);
106 * uffd_close_fd: close UFFD file descriptor
108 * @uffd_fd: UFFD file descriptor
110 void uffd_close_fd(int uffd_fd
)
112 assert(uffd_fd
>= 0);
117 * uffd_register_memory: register memory range via UFFD-IO
119 * Returns 0 in case of success, negative value in case of an error
121 * @uffd_fd: UFFD file descriptor
122 * @addr: base address of memory range
123 * @length: length of memory range
124 * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
125 * @ioctls: optional pointer to receive supported IOCTL mask
127 int uffd_register_memory(int uffd_fd
, void *addr
, uint64_t length
,
128 uint64_t mode
, uint64_t *ioctls
)
130 struct uffdio_register uffd_register
;
132 uffd_register
.range
.start
= (uintptr_t) addr
;
133 uffd_register
.range
.len
= length
;
134 uffd_register
.mode
= mode
;
136 if (ioctl(uffd_fd
, UFFDIO_REGISTER
, &uffd_register
)) {
137 trace_uffd_register_memory_failed(addr
, length
, mode
, errno
);
141 *ioctls
= uffd_register
.ioctls
;
148 * uffd_unregister_memory: un-register memory range with UFFD-IO
150 * Returns 0 in case of success, negative value in case of an error
152 * @uffd_fd: UFFD file descriptor
153 * @addr: base address of memory range
154 * @length: length of memory range
156 int uffd_unregister_memory(int uffd_fd
, void *addr
, uint64_t length
)
158 struct uffdio_range uffd_range
;
160 uffd_range
.start
= (uintptr_t) addr
;
161 uffd_range
.len
= length
;
163 if (ioctl(uffd_fd
, UFFDIO_UNREGISTER
, &uffd_range
)) {
164 trace_uffd_unregister_memory_failed(addr
, length
, errno
);
172 * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
174 * Returns 0 on success, negative value in case of error
176 * @uffd_fd: UFFD file descriptor
177 * @addr: base address of memory range
178 * @length: length of memory range
179 * @wp: write-protect/unprotect
180 * @dont_wake: do not wake threads waiting on wr-protected page
182 int uffd_change_protection(int uffd_fd
, void *addr
, uint64_t length
,
183 bool wp
, bool dont_wake
)
185 struct uffdio_writeprotect uffd_writeprotect
;
187 uffd_writeprotect
.range
.start
= (uintptr_t) addr
;
188 uffd_writeprotect
.range
.len
= length
;
189 if (!wp
&& dont_wake
) {
190 /* DONTWAKE is meaningful only on protection release */
191 uffd_writeprotect
.mode
= UFFDIO_WRITEPROTECT_MODE_DONTWAKE
;
193 uffd_writeprotect
.mode
= (wp
? UFFDIO_WRITEPROTECT_MODE_WP
: 0);
196 if (ioctl(uffd_fd
, UFFDIO_WRITEPROTECT
, &uffd_writeprotect
)) {
197 error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
198 " mode=%" PRIx64
" errno=%i", addr
, length
,
199 (uint64_t) uffd_writeprotect
.mode
, errno
);
207 * uffd_copy_page: copy range of pages to destination via UFFD-IO
209 * Copy range of source pages to the destination to resolve
210 * missing page fault somewhere in the destination range.
212 * Returns 0 on success, negative value in case of an error
214 * @uffd_fd: UFFD file descriptor
215 * @dst_addr: destination base address
216 * @src_addr: source base address
217 * @length: length of the range to copy
218 * @dont_wake: do not wake threads waiting on missing page
220 int uffd_copy_page(int uffd_fd
, void *dst_addr
, void *src_addr
,
221 uint64_t length
, bool dont_wake
)
223 struct uffdio_copy uffd_copy
;
225 uffd_copy
.dst
= (uintptr_t) dst_addr
;
226 uffd_copy
.src
= (uintptr_t) src_addr
;
227 uffd_copy
.len
= length
;
228 uffd_copy
.mode
= dont_wake
? UFFDIO_COPY_MODE_DONTWAKE
: 0;
230 if (ioctl(uffd_fd
, UFFDIO_COPY
, &uffd_copy
)) {
231 error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
232 " mode=%" PRIx64
" errno=%i", dst_addr
, src_addr
,
233 length
, (uint64_t) uffd_copy
.mode
, errno
);
241 * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
243 * Fill range pages with zeroes to resolve missing page fault within the range.
245 * Returns 0 on success, negative value in case of an error
247 * @uffd_fd: UFFD file descriptor
248 * @addr: base address
249 * @length: length of the range to fill with zeroes
250 * @dont_wake: do not wake threads waiting on missing page
252 int uffd_zero_page(int uffd_fd
, void *addr
, uint64_t length
, bool dont_wake
)
254 struct uffdio_zeropage uffd_zeropage
;
256 uffd_zeropage
.range
.start
= (uintptr_t) addr
;
257 uffd_zeropage
.range
.len
= length
;
258 uffd_zeropage
.mode
= dont_wake
? UFFDIO_ZEROPAGE_MODE_DONTWAKE
: 0;
260 if (ioctl(uffd_fd
, UFFDIO_ZEROPAGE
, &uffd_zeropage
)) {
261 error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
262 " mode=%" PRIx64
" errno=%i", addr
, length
,
263 (uint64_t) uffd_zeropage
.mode
, errno
);
271 * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
273 * Wake up threads waiting on any page/pages from the designated range.
274 * The main use case is when during some period, page faults are resolved
275 * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
276 * for the whole memory range are satisfied in a single call to uffd_wakeup().
278 * Returns 0 on success, negative value in case of an error
280 * @uffd_fd: UFFD file descriptor
281 * @addr: base address
282 * @length: length of the range
284 int uffd_wakeup(int uffd_fd
, void *addr
, uint64_t length
)
286 struct uffdio_range uffd_range
;
288 uffd_range
.start
= (uintptr_t) addr
;
289 uffd_range
.len
= length
;
291 if (ioctl(uffd_fd
, UFFDIO_WAKE
, &uffd_range
)) {
292 error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64
" errno=%i",
293 addr
, length
, errno
);
301 * uffd_read_events: read pending UFFD events
303 * Returns number of fetched messages, 0 if non is available or
304 * negative value in case of an error
306 * @uffd_fd: UFFD file descriptor
307 * @msgs: pointer to message buffer
308 * @count: number of messages that can fit in the buffer
310 int uffd_read_events(int uffd_fd
, struct uffd_msg
*msgs
, int count
)
314 res
= read(uffd_fd
, msgs
, count
* sizeof(struct uffd_msg
));
315 } while (res
< 0 && errno
== EINTR
);
317 if ((res
< 0 && errno
== EAGAIN
)) {
321 error_report("uffd_read_events() failed: errno=%i", errno
);
325 return (int) (res
/ sizeof(struct uffd_msg
));
329 * uffd_poll_events: poll UFFD file descriptor for read
331 * Returns true if events are available for read, false otherwise
333 * @uffd_fd: UFFD file descriptor
334 * @tmo: timeout value
336 bool uffd_poll_events(int uffd_fd
, int tmo
)
339 struct pollfd poll_fd
= { .fd
= uffd_fd
, .events
= POLLIN
, .revents
= 0 };
342 res
= poll(&poll_fd
, 1, tmo
);
343 } while (res
< 0 && errno
== EINTR
);
349 error_report("uffd_poll_events() failed: errno=%i", errno
);
353 return (poll_fd
.revents
& POLLIN
) != 0;