]>
Commit | Line | Data |
---|---|---|
dd873966 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
c1fcf220 DDAG |
2 | /* |
3 | * include/linux/userfaultfd.h | |
4 | * | |
5 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | |
6 | * Copyright (C) 2015 Red Hat, Inc. | |
7 | * | |
8 | */ | |
9 | ||
10 | #ifndef _LINUX_USERFAULTFD_H | |
11 | #define _LINUX_USERFAULTFD_H | |
12 | ||
13 | #include <linux/types.h> | |
14 | ||
93e0932b PX |
15 | /* ioctls for /dev/userfaultfd */ |
16 | #define USERFAULTFD_IOC 0xAA | |
17 | #define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) | |
18 | ||
c1fcf220 | 19 | /* |
3a5eb5b4 PB |
20 | * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and |
21 | * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In | |
22 | * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ | |
23 | * means the userland is reading). | |
c1fcf220 | 24 | */ |
3a5eb5b4 | 25 | #define UFFD_API ((__u64)0xAA) |
278f064e EH |
26 | #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ |
27 | UFFDIO_REGISTER_MODE_WP | \ | |
28 | UFFDIO_REGISTER_MODE_MINOR) | |
dc6f8d45 CH |
29 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ |
30 | UFFD_FEATURE_EVENT_FORK | \ | |
3a5eb5b4 | 31 | UFFD_FEATURE_EVENT_REMAP | \ |
278f064e | 32 | UFFD_FEATURE_EVENT_REMOVE | \ |
74c98e20 | 33 | UFFD_FEATURE_EVENT_UNMAP | \ |
3a5eb5b4 | 34 | UFFD_FEATURE_MISSING_HUGETLBFS | \ |
d4083f50 AP |
35 | UFFD_FEATURE_MISSING_SHMEM | \ |
36 | UFFD_FEATURE_SIGBUS | \ | |
278f064e | 37 | UFFD_FEATURE_THREAD_ID | \ |
327d4b7f | 38 | UFFD_FEATURE_MINOR_HUGETLBFS | \ |
e4082063 | 39 | UFFD_FEATURE_MINOR_SHMEM | \ |
d525f73f | 40 | UFFD_FEATURE_EXACT_ADDRESS | \ |
d0bf492f | 41 | UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ |
da3c22c7 | 42 | UFFD_FEATURE_WP_UNPOPULATED | \ |
efb91426 DHB |
43 | UFFD_FEATURE_POISON | \ |
44 | UFFD_FEATURE_WP_ASYNC) | |
c1fcf220 DDAG |
45 | #define UFFD_API_IOCTLS \ |
46 | ((__u64)1 << _UFFDIO_REGISTER | \ | |
47 | (__u64)1 << _UFFDIO_UNREGISTER | \ | |
48 | (__u64)1 << _UFFDIO_API) | |
49 | #define UFFD_API_RANGE_IOCTLS \ | |
50 | ((__u64)1 << _UFFDIO_WAKE | \ | |
51 | (__u64)1 << _UFFDIO_COPY | \ | |
dc6f8d45 | 52 | (__u64)1 << _UFFDIO_ZEROPAGE | \ |
278f064e | 53 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
da3c22c7 TH |
54 | (__u64)1 << _UFFDIO_CONTINUE | \ |
55 | (__u64)1 << _UFFDIO_POISON) | |
3a5eb5b4 PB |
56 | #define UFFD_API_RANGE_IOCTLS_BASIC \ |
57 | ((__u64)1 << _UFFDIO_WAKE | \ | |
278f064e | 58 | (__u64)1 << _UFFDIO_COPY | \ |
da3c22c7 | 59 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
d525f73f | 60 | (__u64)1 << _UFFDIO_CONTINUE | \ |
da3c22c7 | 61 | (__u64)1 << _UFFDIO_POISON) |
c1fcf220 DDAG |
62 | |
63 | /* | |
64 | * Valid ioctl command number range with this API is from 0x00 to | |
65 | * 0x3F. UFFDIO_API is the fixed number, everything else can be | |
66 | * changed by implementing a different UFFD_API. If sticking to the | |
67 | * same UFFD_API more ioctl can be added and userland will be aware of | |
68 | * which ioctl the running kernel implements through the ioctl command | |
69 | * bitmask written by the UFFDIO_API. | |
70 | */ | |
71 | #define _UFFDIO_REGISTER (0x00) | |
72 | #define _UFFDIO_UNREGISTER (0x01) | |
73 | #define _UFFDIO_WAKE (0x02) | |
74 | #define _UFFDIO_COPY (0x03) | |
75 | #define _UFFDIO_ZEROPAGE (0x04) | |
dc6f8d45 | 76 | #define _UFFDIO_WRITEPROTECT (0x06) |
278f064e | 77 | #define _UFFDIO_CONTINUE (0x07) |
da3c22c7 | 78 | #define _UFFDIO_POISON (0x08) |
c1fcf220 DDAG |
79 | #define _UFFDIO_API (0x3F) |
80 | ||
81 | /* userfaultfd ioctl ids */ | |
82 | #define UFFDIO 0xAA | |
83 | #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ | |
84 | struct uffdio_api) | |
85 | #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ | |
86 | struct uffdio_register) | |
87 | #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ | |
88 | struct uffdio_range) | |
89 | #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ | |
90 | struct uffdio_range) | |
91 | #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ | |
92 | struct uffdio_copy) | |
93 | #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ | |
94 | struct uffdio_zeropage) | |
dc6f8d45 CH |
95 | #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ |
96 | struct uffdio_writeprotect) | |
327d4b7f BR |
97 | #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ |
98 | struct uffdio_continue) | |
da3c22c7 TH |
99 | #define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \ |
100 | struct uffdio_poison) | |
c1fcf220 DDAG |
101 | |
102 | /* read() structure */ | |
103 | struct uffd_msg { | |
104 | __u8 event; | |
105 | ||
106 | __u8 reserved1; | |
107 | __u16 reserved2; | |
108 | __u32 reserved3; | |
109 | ||
110 | union { | |
111 | struct { | |
112 | __u64 flags; | |
113 | __u64 address; | |
d4083f50 AP |
114 | union { |
115 | __u32 ptid; | |
116 | } feat; | |
c1fcf220 DDAG |
117 | } pagefault; |
118 | ||
3a5eb5b4 PB |
119 | struct { |
120 | __u32 ufd; | |
121 | } fork; | |
122 | ||
123 | struct { | |
124 | __u64 from; | |
125 | __u64 to; | |
126 | __u64 len; | |
127 | } remap; | |
128 | ||
129 | struct { | |
130 | __u64 start; | |
131 | __u64 end; | |
74c98e20 | 132 | } remove; |
3a5eb5b4 | 133 | |
c1fcf220 DDAG |
134 | struct { |
135 | /* unused reserved fields */ | |
136 | __u64 reserved1; | |
137 | __u64 reserved2; | |
138 | __u64 reserved3; | |
139 | } reserved; | |
140 | } arg; | |
b89485a5 | 141 | } __attribute__((packed)); |
c1fcf220 DDAG |
142 | |
143 | /* | |
144 | * Start at 0x12 and not at 0 to be more strict against bugs. | |
145 | */ | |
146 | #define UFFD_EVENT_PAGEFAULT 0x12 | |
c1fcf220 | 147 | #define UFFD_EVENT_FORK 0x13 |
3a5eb5b4 | 148 | #define UFFD_EVENT_REMAP 0x14 |
74c98e20 CH |
149 | #define UFFD_EVENT_REMOVE 0x15 |
150 | #define UFFD_EVENT_UNMAP 0x16 | |
c1fcf220 DDAG |
151 | |
152 | /* flags for UFFD_EVENT_PAGEFAULT */ | |
153 | #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ | |
154 | #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ | |
278f064e | 155 | #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ |
c1fcf220 DDAG |
156 | |
157 | struct uffdio_api { | |
158 | /* userland asks for an API number and the features to enable */ | |
159 | __u64 api; | |
160 | /* | |
161 | * Kernel answers below with the all available features for | |
162 | * the API, this notifies userland of which events and/or | |
163 | * which flags for each event are enabled in the current | |
164 | * kernel. | |
165 | * | |
166 | * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE | |
167 | * are to be considered implicitly always enabled in all kernels as | |
168 | * long as the uffdio_api.api requested matches UFFD_API. | |
3a5eb5b4 PB |
169 | * |
170 | * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER | |
171 | * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on | |
172 | * hugetlbfs virtual memory ranges. Adding or not adding | |
173 | * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has | |
174 | * no real functional effect after UFFDIO_API returns, but | |
175 | * it's only useful for an initial feature set probe at | |
176 | * UFFDIO_API time. There are two ways to use it: | |
177 | * | |
178 | * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the | |
179 | * uffdio_api.features before calling UFFDIO_API, an error | |
180 | * will be returned by UFFDIO_API on a kernel without | |
181 | * hugetlbfs missing support | |
182 | * | |
183 | * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in | |
184 | * uffdio_api.features and instead it will be set by the | |
185 | * kernel in the uffdio_api.features if the kernel supports | |
186 | * it, so userland can later check if the feature flag is | |
187 | * present in uffdio_api.features after UFFDIO_API | |
188 | * succeeded. | |
189 | * | |
190 | * UFFD_FEATURE_MISSING_SHMEM works the same as | |
191 | * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem | |
192 | * (i.e. tmpfs and other shmem based APIs). | |
d4083f50 AP |
193 | * |
194 | * UFFD_FEATURE_SIGBUS feature means no page-fault | |
195 | * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead | |
196 | * a SIGBUS signal will be sent to the faulting process. | |
197 | * | |
198 | * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will | |
199 | * be returned, if feature is not requested 0 will be returned. | |
278f064e EH |
200 | * |
201 | * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults | |
202 | * can be intercepted (via REGISTER_MODE_MINOR) for | |
203 | * hugetlbfs-backed pages. | |
327d4b7f BR |
204 | * |
205 | * UFFD_FEATURE_MINOR_SHMEM indicates the same support as | |
206 | * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. | |
e4082063 AW |
207 | * |
208 | * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page | |
209 | * faults would be provided and the offset within the page would not be | |
210 | * masked. | |
d525f73f CQ |
211 | * |
212 | * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd | |
213 | * write-protection mode is supported on both shmem and hugetlbfs. | |
d0bf492f CLG |
214 | * |
215 | * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd | |
216 | * write-protection mode will always apply to unpopulated pages | |
217 | * (i.e. empty ptes). This will be the default behavior for shmem | |
218 | * & hugetlbfs, so this flag only affects anonymous memory behavior | |
219 | * when userfault write-protection mode is registered. | |
efb91426 DHB |
220 | * |
221 | * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection | |
222 | * asynchronous mode is supported in which the write fault is | |
223 | * automatically resolved and write-protection is un-set. | |
224 | * It implies UFFD_FEATURE_WP_UNPOPULATED. | |
c1fcf220 | 225 | */ |
c1fcf220 DDAG |
226 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) |
227 | #define UFFD_FEATURE_EVENT_FORK (1<<1) | |
3a5eb5b4 | 228 | #define UFFD_FEATURE_EVENT_REMAP (1<<2) |
74c98e20 | 229 | #define UFFD_FEATURE_EVENT_REMOVE (1<<3) |
3a5eb5b4 PB |
230 | #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) |
231 | #define UFFD_FEATURE_MISSING_SHMEM (1<<5) | |
74c98e20 | 232 | #define UFFD_FEATURE_EVENT_UNMAP (1<<6) |
d4083f50 AP |
233 | #define UFFD_FEATURE_SIGBUS (1<<7) |
234 | #define UFFD_FEATURE_THREAD_ID (1<<8) | |
278f064e | 235 | #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) |
327d4b7f | 236 | #define UFFD_FEATURE_MINOR_SHMEM (1<<10) |
e4082063 | 237 | #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) |
d525f73f | 238 | #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) |
d0bf492f | 239 | #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) |
da3c22c7 | 240 | #define UFFD_FEATURE_POISON (1<<14) |
efb91426 | 241 | #define UFFD_FEATURE_WP_ASYNC (1<<15) |
c1fcf220 DDAG |
242 | __u64 features; |
243 | ||
244 | __u64 ioctls; | |
245 | }; | |
246 | ||
247 | struct uffdio_range { | |
248 | __u64 start; | |
249 | __u64 len; | |
250 | }; | |
251 | ||
252 | struct uffdio_register { | |
253 | struct uffdio_range range; | |
254 | #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) | |
255 | #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) | |
278f064e | 256 | #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) |
c1fcf220 DDAG |
257 | __u64 mode; |
258 | ||
259 | /* | |
260 | * kernel answers which ioctl commands are available for the | |
261 | * range, keep at the end as the last 8 bytes aren't read. | |
262 | */ | |
263 | __u64 ioctls; | |
264 | }; | |
265 | ||
266 | struct uffdio_copy { | |
267 | __u64 dst; | |
268 | __u64 src; | |
269 | __u64 len; | |
dc6f8d45 | 270 | #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) |
c1fcf220 | 271 | /* |
dc6f8d45 CH |
272 | * UFFDIO_COPY_MODE_WP will map the page write protected on |
273 | * the fly. UFFDIO_COPY_MODE_WP is available only if the | |
274 | * write protected ioctl is implemented for the range | |
275 | * according to the uffdio_register.ioctls. | |
c1fcf220 | 276 | */ |
dc6f8d45 | 277 | #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) |
c1fcf220 DDAG |
278 | __u64 mode; |
279 | ||
280 | /* | |
281 | * "copy" is written by the ioctl and must be at the end: the | |
282 | * copy_from_user will not read the last 8 bytes. | |
283 | */ | |
284 | __s64 copy; | |
285 | }; | |
286 | ||
287 | struct uffdio_zeropage { | |
288 | struct uffdio_range range; | |
289 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) | |
290 | __u64 mode; | |
291 | ||
292 | /* | |
293 | * "zeropage" is written by the ioctl and must be at the end: | |
294 | * the copy_from_user will not read the last 8 bytes. | |
295 | */ | |
296 | __s64 zeropage; | |
297 | }; | |
298 | ||
dc6f8d45 CH |
299 | struct uffdio_writeprotect { |
300 | struct uffdio_range range; | |
301 | /* | |
302 | * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, | |
303 | * unset the flag to undo protection of a range which was previously | |
304 | * write protected. | |
305 | * | |
306 | * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up | |
307 | * any wait thread after the operation succeeds. | |
308 | * | |
309 | * NOTE: Write protecting a region (WP=1) is unrelated to page faults, | |
310 | * therefore DONTWAKE flag is meaningless with WP=1. Removing write | |
311 | * protection (WP=0) in response to a page fault wakes the faulting | |
312 | * task unless DONTWAKE is set. | |
313 | */ | |
314 | #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) | |
315 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) | |
316 | __u64 mode; | |
317 | }; | |
318 | ||
278f064e EH |
319 | struct uffdio_continue { |
320 | struct uffdio_range range; | |
321 | #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) | |
d0bf492f CLG |
322 | /* |
323 | * UFFDIO_CONTINUE_MODE_WP will map the page write protected on | |
324 | * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the | |
325 | * write protected ioctl is implemented for the range | |
326 | * according to the uffdio_register.ioctls. | |
327 | */ | |
328 | #define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1) | |
278f064e EH |
329 | __u64 mode; |
330 | ||
331 | /* | |
332 | * Fields below here are written by the ioctl and must be at the end: | |
333 | * the copy_from_user will not read past here. | |
334 | */ | |
335 | __s64 mapped; | |
336 | }; | |
337 | ||
da3c22c7 TH |
338 | struct uffdio_poison { |
339 | struct uffdio_range range; | |
340 | #define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0) | |
341 | __u64 mode; | |
342 | ||
343 | /* | |
344 | * Fields below here are written by the ioctl and must be at the end: | |
345 | * the copy_from_user will not read past here. | |
346 | */ | |
347 | __s64 updated; | |
348 | }; | |
349 | ||
b3c818a4 EF |
350 | /* |
351 | * Flags for the userfaultfd(2) system call itself. | |
352 | */ | |
353 | ||
354 | /* | |
355 | * Create a userfaultfd that can handle page faults only in user mode. | |
356 | */ | |
357 | #define UFFD_USER_MODE_ONLY 1 | |
358 | ||
c1fcf220 | 359 | #endif /* _LINUX_USERFAULTFD_H */ |