]>
Commit | Line | Data |
---|---|---|
dd873966 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
c1fcf220 DDAG |
2 | /* |
3 | * include/linux/userfaultfd.h | |
4 | * | |
5 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | |
6 | * Copyright (C) 2015 Red Hat, Inc. | |
7 | * | |
8 | */ | |
9 | ||
10 | #ifndef _LINUX_USERFAULTFD_H | |
11 | #define _LINUX_USERFAULTFD_H | |
12 | ||
13 | #include <linux/types.h> | |
14 | ||
c1fcf220 | 15 | /* |
3a5eb5b4 PB |
16 | * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and |
17 | * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In | |
18 | * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ | |
19 | * means the userland is reading). | |
c1fcf220 | 20 | */ |
3a5eb5b4 | 21 | #define UFFD_API ((__u64)0xAA) |
278f064e EH |
22 | #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ |
23 | UFFDIO_REGISTER_MODE_WP | \ | |
24 | UFFDIO_REGISTER_MODE_MINOR) | |
dc6f8d45 CH |
25 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ |
26 | UFFD_FEATURE_EVENT_FORK | \ | |
3a5eb5b4 | 27 | UFFD_FEATURE_EVENT_REMAP | \ |
278f064e | 28 | UFFD_FEATURE_EVENT_REMOVE | \ |
74c98e20 | 29 | UFFD_FEATURE_EVENT_UNMAP | \ |
3a5eb5b4 | 30 | UFFD_FEATURE_MISSING_HUGETLBFS | \ |
d4083f50 AP |
31 | UFFD_FEATURE_MISSING_SHMEM | \ |
32 | UFFD_FEATURE_SIGBUS | \ | |
278f064e | 33 | UFFD_FEATURE_THREAD_ID | \ |
327d4b7f | 34 | UFFD_FEATURE_MINOR_HUGETLBFS | \ |
e4082063 AW |
35 | UFFD_FEATURE_MINOR_SHMEM | \ |
36 | UFFD_FEATURE_EXACT_ADDRESS) | |
c1fcf220 DDAG |
37 | #define UFFD_API_IOCTLS \ |
38 | ((__u64)1 << _UFFDIO_REGISTER | \ | |
39 | (__u64)1 << _UFFDIO_UNREGISTER | \ | |
40 | (__u64)1 << _UFFDIO_API) | |
41 | #define UFFD_API_RANGE_IOCTLS \ | |
42 | ((__u64)1 << _UFFDIO_WAKE | \ | |
43 | (__u64)1 << _UFFDIO_COPY | \ | |
dc6f8d45 | 44 | (__u64)1 << _UFFDIO_ZEROPAGE | \ |
278f064e EH |
45 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
46 | (__u64)1 << _UFFDIO_CONTINUE) | |
3a5eb5b4 PB |
47 | #define UFFD_API_RANGE_IOCTLS_BASIC \ |
48 | ((__u64)1 << _UFFDIO_WAKE | \ | |
278f064e EH |
49 | (__u64)1 << _UFFDIO_COPY | \ |
50 | (__u64)1 << _UFFDIO_CONTINUE) | |
c1fcf220 DDAG |
51 | |
52 | /* | |
53 | * Valid ioctl command number range with this API is from 0x00 to | |
54 | * 0x3F. UFFDIO_API is the fixed number, everything else can be | |
55 | * changed by implementing a different UFFD_API. If sticking to the | |
56 | * same UFFD_API more ioctl can be added and userland will be aware of | |
57 | * which ioctl the running kernel implements through the ioctl command | |
58 | * bitmask written by the UFFDIO_API. | |
59 | */ | |
60 | #define _UFFDIO_REGISTER (0x00) | |
61 | #define _UFFDIO_UNREGISTER (0x01) | |
62 | #define _UFFDIO_WAKE (0x02) | |
63 | #define _UFFDIO_COPY (0x03) | |
64 | #define _UFFDIO_ZEROPAGE (0x04) | |
dc6f8d45 | 65 | #define _UFFDIO_WRITEPROTECT (0x06) |
278f064e | 66 | #define _UFFDIO_CONTINUE (0x07) |
c1fcf220 DDAG |
67 | #define _UFFDIO_API (0x3F) |
68 | ||
69 | /* userfaultfd ioctl ids */ | |
70 | #define UFFDIO 0xAA | |
71 | #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ | |
72 | struct uffdio_api) | |
73 | #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ | |
74 | struct uffdio_register) | |
75 | #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ | |
76 | struct uffdio_range) | |
77 | #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ | |
78 | struct uffdio_range) | |
79 | #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ | |
80 | struct uffdio_copy) | |
81 | #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ | |
82 | struct uffdio_zeropage) | |
dc6f8d45 CH |
83 | #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ |
84 | struct uffdio_writeprotect) | |
327d4b7f BR |
85 | #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ |
86 | struct uffdio_continue) | |
c1fcf220 DDAG |
87 | |
88 | /* read() structure */ | |
89 | struct uffd_msg { | |
90 | __u8 event; | |
91 | ||
92 | __u8 reserved1; | |
93 | __u16 reserved2; | |
94 | __u32 reserved3; | |
95 | ||
96 | union { | |
97 | struct { | |
98 | __u64 flags; | |
99 | __u64 address; | |
d4083f50 AP |
100 | union { |
101 | __u32 ptid; | |
102 | } feat; | |
c1fcf220 DDAG |
103 | } pagefault; |
104 | ||
3a5eb5b4 PB |
105 | struct { |
106 | __u32 ufd; | |
107 | } fork; | |
108 | ||
109 | struct { | |
110 | __u64 from; | |
111 | __u64 to; | |
112 | __u64 len; | |
113 | } remap; | |
114 | ||
115 | struct { | |
116 | __u64 start; | |
117 | __u64 end; | |
74c98e20 | 118 | } remove; |
3a5eb5b4 | 119 | |
c1fcf220 DDAG |
120 | struct { |
121 | /* unused reserved fields */ | |
122 | __u64 reserved1; | |
123 | __u64 reserved2; | |
124 | __u64 reserved3; | |
125 | } reserved; | |
126 | } arg; | |
b89485a5 | 127 | } __attribute__((packed)); |
c1fcf220 DDAG |
128 | |
129 | /* | |
130 | * Start at 0x12 and not at 0 to be more strict against bugs. | |
131 | */ | |
132 | #define UFFD_EVENT_PAGEFAULT 0x12 | |
c1fcf220 | 133 | #define UFFD_EVENT_FORK 0x13 |
3a5eb5b4 | 134 | #define UFFD_EVENT_REMAP 0x14 |
74c98e20 CH |
135 | #define UFFD_EVENT_REMOVE 0x15 |
136 | #define UFFD_EVENT_UNMAP 0x16 | |
c1fcf220 DDAG |
137 | |
138 | /* flags for UFFD_EVENT_PAGEFAULT */ | |
139 | #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ | |
140 | #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ | |
278f064e | 141 | #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ |
c1fcf220 DDAG |
142 | |
143 | struct uffdio_api { | |
144 | /* userland asks for an API number and the features to enable */ | |
145 | __u64 api; | |
146 | /* | |
147 | * Kernel answers below with the all available features for | |
148 | * the API, this notifies userland of which events and/or | |
149 | * which flags for each event are enabled in the current | |
150 | * kernel. | |
151 | * | |
152 | * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE | |
153 | * are to be considered implicitly always enabled in all kernels as | |
154 | * long as the uffdio_api.api requested matches UFFD_API. | |
3a5eb5b4 PB |
155 | * |
156 | * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER | |
157 | * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on | |
158 | * hugetlbfs virtual memory ranges. Adding or not adding | |
159 | * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has | |
160 | * no real functional effect after UFFDIO_API returns, but | |
161 | * it's only useful for an initial feature set probe at | |
162 | * UFFDIO_API time. There are two ways to use it: | |
163 | * | |
164 | * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the | |
165 | * uffdio_api.features before calling UFFDIO_API, an error | |
166 | * will be returned by UFFDIO_API on a kernel without | |
167 | * hugetlbfs missing support | |
168 | * | |
169 | * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in | |
170 | * uffdio_api.features and instead it will be set by the | |
171 | * kernel in the uffdio_api.features if the kernel supports | |
172 | * it, so userland can later check if the feature flag is | |
173 | * present in uffdio_api.features after UFFDIO_API | |
174 | * succeeded. | |
175 | * | |
176 | * UFFD_FEATURE_MISSING_SHMEM works the same as | |
177 | * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem | |
178 | * (i.e. tmpfs and other shmem based APIs). | |
d4083f50 AP |
179 | * |
180 | * UFFD_FEATURE_SIGBUS feature means no page-fault | |
181 | * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead | |
182 | * a SIGBUS signal will be sent to the faulting process. | |
183 | * | |
184 | * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will | |
185 | * be returned, if feature is not requested 0 will be returned. | |
278f064e EH |
186 | * |
187 | * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults | |
188 | * can be intercepted (via REGISTER_MODE_MINOR) for | |
189 | * hugetlbfs-backed pages. | |
327d4b7f BR |
190 | * |
191 | * UFFD_FEATURE_MINOR_SHMEM indicates the same support as | |
192 | * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. | |
e4082063 AW |
193 | * |
194 | * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page | |
195 | * faults would be provided and the offset within the page would not be | |
196 | * masked. | |
c1fcf220 | 197 | */ |
c1fcf220 DDAG |
198 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) |
199 | #define UFFD_FEATURE_EVENT_FORK (1<<1) | |
3a5eb5b4 | 200 | #define UFFD_FEATURE_EVENT_REMAP (1<<2) |
74c98e20 | 201 | #define UFFD_FEATURE_EVENT_REMOVE (1<<3) |
3a5eb5b4 PB |
202 | #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) |
203 | #define UFFD_FEATURE_MISSING_SHMEM (1<<5) | |
74c98e20 | 204 | #define UFFD_FEATURE_EVENT_UNMAP (1<<6) |
d4083f50 AP |
205 | #define UFFD_FEATURE_SIGBUS (1<<7) |
206 | #define UFFD_FEATURE_THREAD_ID (1<<8) | |
278f064e | 207 | #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) |
327d4b7f | 208 | #define UFFD_FEATURE_MINOR_SHMEM (1<<10) |
e4082063 | 209 | #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) |
c1fcf220 DDAG |
210 | __u64 features; |
211 | ||
212 | __u64 ioctls; | |
213 | }; | |
214 | ||
215 | struct uffdio_range { | |
216 | __u64 start; | |
217 | __u64 len; | |
218 | }; | |
219 | ||
220 | struct uffdio_register { | |
221 | struct uffdio_range range; | |
222 | #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) | |
223 | #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) | |
278f064e | 224 | #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) |
c1fcf220 DDAG |
225 | __u64 mode; |
226 | ||
227 | /* | |
228 | * kernel answers which ioctl commands are available for the | |
229 | * range, keep at the end as the last 8 bytes aren't read. | |
230 | */ | |
231 | __u64 ioctls; | |
232 | }; | |
233 | ||
234 | struct uffdio_copy { | |
235 | __u64 dst; | |
236 | __u64 src; | |
237 | __u64 len; | |
dc6f8d45 | 238 | #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) |
c1fcf220 | 239 | /* |
dc6f8d45 CH |
240 | * UFFDIO_COPY_MODE_WP will map the page write protected on |
241 | * the fly. UFFDIO_COPY_MODE_WP is available only if the | |
242 | * write protected ioctl is implemented for the range | |
243 | * according to the uffdio_register.ioctls. | |
c1fcf220 | 244 | */ |
dc6f8d45 | 245 | #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) |
c1fcf220 DDAG |
246 | __u64 mode; |
247 | ||
248 | /* | |
249 | * "copy" is written by the ioctl and must be at the end: the | |
250 | * copy_from_user will not read the last 8 bytes. | |
251 | */ | |
252 | __s64 copy; | |
253 | }; | |
254 | ||
255 | struct uffdio_zeropage { | |
256 | struct uffdio_range range; | |
257 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) | |
258 | __u64 mode; | |
259 | ||
260 | /* | |
261 | * "zeropage" is written by the ioctl and must be at the end: | |
262 | * the copy_from_user will not read the last 8 bytes. | |
263 | */ | |
264 | __s64 zeropage; | |
265 | }; | |
266 | ||
dc6f8d45 CH |
267 | struct uffdio_writeprotect { |
268 | struct uffdio_range range; | |
269 | /* | |
270 | * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, | |
271 | * unset the flag to undo protection of a range which was previously | |
272 | * write protected. | |
273 | * | |
274 | * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up | |
275 | * any wait thread after the operation succeeds. | |
276 | * | |
277 | * NOTE: Write protecting a region (WP=1) is unrelated to page faults, | |
278 | * therefore DONTWAKE flag is meaningless with WP=1. Removing write | |
279 | * protection (WP=0) in response to a page fault wakes the faulting | |
280 | * task unless DONTWAKE is set. | |
281 | */ | |
282 | #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) | |
283 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) | |
284 | __u64 mode; | |
285 | }; | |
286 | ||
278f064e EH |
287 | struct uffdio_continue { |
288 | struct uffdio_range range; | |
289 | #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) | |
290 | __u64 mode; | |
291 | ||
292 | /* | |
293 | * Fields below here are written by the ioctl and must be at the end: | |
294 | * the copy_from_user will not read past here. | |
295 | */ | |
296 | __s64 mapped; | |
297 | }; | |
298 | ||
b3c818a4 EF |
299 | /* |
300 | * Flags for the userfaultfd(2) system call itself. | |
301 | */ | |
302 | ||
303 | /* | |
304 | * Create a userfaultfd that can handle page faults only in user mode. | |
305 | */ | |
306 | #define UFFD_USER_MODE_ONLY 1 | |
307 | ||
c1fcf220 | 308 | #endif /* _LINUX_USERFAULTFD_H */ |