]>
Commit | Line | Data |
---|---|---|
dd873966 | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
c1fcf220 DDAG |
2 | /* |
3 | * include/linux/userfaultfd.h | |
4 | * | |
5 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> | |
6 | * Copyright (C) 2015 Red Hat, Inc. | |
7 | * | |
8 | */ | |
9 | ||
10 | #ifndef _LINUX_USERFAULTFD_H | |
11 | #define _LINUX_USERFAULTFD_H | |
12 | ||
13 | #include <linux/types.h> | |
14 | ||
c1fcf220 | 15 | /* |
3a5eb5b4 PB |
16 | * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and |
17 | * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In | |
18 | * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ | |
19 | * means the userland is reading). | |
c1fcf220 | 20 | */ |
3a5eb5b4 | 21 | #define UFFD_API ((__u64)0xAA) |
278f064e EH |
22 | #define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ |
23 | UFFDIO_REGISTER_MODE_WP | \ | |
24 | UFFDIO_REGISTER_MODE_MINOR) | |
dc6f8d45 CH |
25 | #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ |
26 | UFFD_FEATURE_EVENT_FORK | \ | |
3a5eb5b4 | 27 | UFFD_FEATURE_EVENT_REMAP | \ |
278f064e | 28 | UFFD_FEATURE_EVENT_REMOVE | \ |
74c98e20 | 29 | UFFD_FEATURE_EVENT_UNMAP | \ |
3a5eb5b4 | 30 | UFFD_FEATURE_MISSING_HUGETLBFS | \ |
d4083f50 AP |
31 | UFFD_FEATURE_MISSING_SHMEM | \ |
32 | UFFD_FEATURE_SIGBUS | \ | |
278f064e EH |
33 | UFFD_FEATURE_THREAD_ID | \ |
34 | UFFD_FEATURE_MINOR_HUGETLBFS) | |
c1fcf220 DDAG |
35 | #define UFFD_API_IOCTLS \ |
36 | ((__u64)1 << _UFFDIO_REGISTER | \ | |
37 | (__u64)1 << _UFFDIO_UNREGISTER | \ | |
38 | (__u64)1 << _UFFDIO_API) | |
39 | #define UFFD_API_RANGE_IOCTLS \ | |
40 | ((__u64)1 << _UFFDIO_WAKE | \ | |
41 | (__u64)1 << _UFFDIO_COPY | \ | |
dc6f8d45 | 42 | (__u64)1 << _UFFDIO_ZEROPAGE | \ |
278f064e EH |
43 | (__u64)1 << _UFFDIO_WRITEPROTECT | \ |
44 | (__u64)1 << _UFFDIO_CONTINUE) | |
3a5eb5b4 PB |
45 | #define UFFD_API_RANGE_IOCTLS_BASIC \ |
46 | ((__u64)1 << _UFFDIO_WAKE | \ | |
278f064e EH |
47 | (__u64)1 << _UFFDIO_COPY | \ |
48 | (__u64)1 << _UFFDIO_CONTINUE) | |
c1fcf220 DDAG |
49 | |
50 | /* | |
51 | * Valid ioctl command number range with this API is from 0x00 to | |
52 | * 0x3F. UFFDIO_API is the fixed number, everything else can be | |
53 | * changed by implementing a different UFFD_API. If sticking to the | |
54 | * same UFFD_API more ioctl can be added and userland will be aware of | |
55 | * which ioctl the running kernel implements through the ioctl command | |
56 | * bitmask written by the UFFDIO_API. | |
57 | */ | |
58 | #define _UFFDIO_REGISTER (0x00) | |
59 | #define _UFFDIO_UNREGISTER (0x01) | |
60 | #define _UFFDIO_WAKE (0x02) | |
61 | #define _UFFDIO_COPY (0x03) | |
62 | #define _UFFDIO_ZEROPAGE (0x04) | |
dc6f8d45 | 63 | #define _UFFDIO_WRITEPROTECT (0x06) |
278f064e | 64 | #define _UFFDIO_CONTINUE (0x07) |
c1fcf220 DDAG |
65 | #define _UFFDIO_API (0x3F) |
66 | ||
67 | /* userfaultfd ioctl ids */ | |
68 | #define UFFDIO 0xAA | |
69 | #define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ | |
70 | struct uffdio_api) | |
71 | #define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ | |
72 | struct uffdio_register) | |
73 | #define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ | |
74 | struct uffdio_range) | |
75 | #define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ | |
76 | struct uffdio_range) | |
77 | #define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ | |
78 | struct uffdio_copy) | |
79 | #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ | |
80 | struct uffdio_zeropage) | |
dc6f8d45 CH |
81 | #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ |
82 | struct uffdio_writeprotect) | |
278f064e EH |
83 | #define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ |
84 | struct uffdio_continue) | |
c1fcf220 DDAG |
85 | |
86 | /* read() structure */ | |
87 | struct uffd_msg { | |
88 | __u8 event; | |
89 | ||
90 | __u8 reserved1; | |
91 | __u16 reserved2; | |
92 | __u32 reserved3; | |
93 | ||
94 | union { | |
95 | struct { | |
96 | __u64 flags; | |
97 | __u64 address; | |
d4083f50 AP |
98 | union { |
99 | __u32 ptid; | |
100 | } feat; | |
c1fcf220 DDAG |
101 | } pagefault; |
102 | ||
3a5eb5b4 PB |
103 | struct { |
104 | __u32 ufd; | |
105 | } fork; | |
106 | ||
107 | struct { | |
108 | __u64 from; | |
109 | __u64 to; | |
110 | __u64 len; | |
111 | } remap; | |
112 | ||
113 | struct { | |
114 | __u64 start; | |
115 | __u64 end; | |
74c98e20 | 116 | } remove; |
3a5eb5b4 | 117 | |
c1fcf220 DDAG |
118 | struct { |
119 | /* unused reserved fields */ | |
120 | __u64 reserved1; | |
121 | __u64 reserved2; | |
122 | __u64 reserved3; | |
123 | } reserved; | |
124 | } arg; | |
b89485a5 | 125 | } __attribute__((packed)); |
c1fcf220 DDAG |
126 | |
127 | /* | |
128 | * Start at 0x12 and not at 0 to be more strict against bugs. | |
129 | */ | |
130 | #define UFFD_EVENT_PAGEFAULT 0x12 | |
c1fcf220 | 131 | #define UFFD_EVENT_FORK 0x13 |
3a5eb5b4 | 132 | #define UFFD_EVENT_REMAP 0x14 |
74c98e20 CH |
133 | #define UFFD_EVENT_REMOVE 0x15 |
134 | #define UFFD_EVENT_UNMAP 0x16 | |
c1fcf220 DDAG |
135 | |
136 | /* flags for UFFD_EVENT_PAGEFAULT */ | |
137 | #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ | |
138 | #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ | |
278f064e | 139 | #define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ |
c1fcf220 DDAG |
140 | |
141 | struct uffdio_api { | |
142 | /* userland asks for an API number and the features to enable */ | |
143 | __u64 api; | |
144 | /* | |
145 | * Kernel answers below with the all available features for | |
146 | * the API, this notifies userland of which events and/or | |
147 | * which flags for each event are enabled in the current | |
148 | * kernel. | |
149 | * | |
150 | * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE | |
151 | * are to be considered implicitly always enabled in all kernels as | |
152 | * long as the uffdio_api.api requested matches UFFD_API. | |
3a5eb5b4 PB |
153 | * |
154 | * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER | |
155 | * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on | |
156 | * hugetlbfs virtual memory ranges. Adding or not adding | |
157 | * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has | |
158 | * no real functional effect after UFFDIO_API returns, but | |
159 | * it's only useful for an initial feature set probe at | |
160 | * UFFDIO_API time. There are two ways to use it: | |
161 | * | |
162 | * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the | |
163 | * uffdio_api.features before calling UFFDIO_API, an error | |
164 | * will be returned by UFFDIO_API on a kernel without | |
165 | * hugetlbfs missing support | |
166 | * | |
167 | * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in | |
168 | * uffdio_api.features and instead it will be set by the | |
169 | * kernel in the uffdio_api.features if the kernel supports | |
170 | * it, so userland can later check if the feature flag is | |
171 | * present in uffdio_api.features after UFFDIO_API | |
172 | * succeeded. | |
173 | * | |
174 | * UFFD_FEATURE_MISSING_SHMEM works the same as | |
175 | * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem | |
176 | * (i.e. tmpfs and other shmem based APIs). | |
d4083f50 AP |
177 | * |
178 | * UFFD_FEATURE_SIGBUS feature means no page-fault | |
179 | * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead | |
180 | * a SIGBUS signal will be sent to the faulting process. | |
181 | * | |
182 | * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will | |
183 | * be returned, if feature is not requested 0 will be returned. | |
278f064e EH |
184 | * |
185 | * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults | |
186 | * can be intercepted (via REGISTER_MODE_MINOR) for | |
187 | * hugetlbfs-backed pages. | |
c1fcf220 | 188 | */ |
c1fcf220 DDAG |
189 | #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) |
190 | #define UFFD_FEATURE_EVENT_FORK (1<<1) | |
3a5eb5b4 | 191 | #define UFFD_FEATURE_EVENT_REMAP (1<<2) |
74c98e20 | 192 | #define UFFD_FEATURE_EVENT_REMOVE (1<<3) |
3a5eb5b4 PB |
193 | #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) |
194 | #define UFFD_FEATURE_MISSING_SHMEM (1<<5) | |
74c98e20 | 195 | #define UFFD_FEATURE_EVENT_UNMAP (1<<6) |
d4083f50 AP |
196 | #define UFFD_FEATURE_SIGBUS (1<<7) |
197 | #define UFFD_FEATURE_THREAD_ID (1<<8) | |
278f064e | 198 | #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) |
c1fcf220 DDAG |
199 | __u64 features; |
200 | ||
201 | __u64 ioctls; | |
202 | }; | |
203 | ||
204 | struct uffdio_range { | |
205 | __u64 start; | |
206 | __u64 len; | |
207 | }; | |
208 | ||
209 | struct uffdio_register { | |
210 | struct uffdio_range range; | |
211 | #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) | |
212 | #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) | |
278f064e | 213 | #define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) |
c1fcf220 DDAG |
214 | __u64 mode; |
215 | ||
216 | /* | |
217 | * kernel answers which ioctl commands are available for the | |
218 | * range, keep at the end as the last 8 bytes aren't read. | |
219 | */ | |
220 | __u64 ioctls; | |
221 | }; | |
222 | ||
223 | struct uffdio_copy { | |
224 | __u64 dst; | |
225 | __u64 src; | |
226 | __u64 len; | |
dc6f8d45 | 227 | #define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) |
c1fcf220 | 228 | /* |
dc6f8d45 CH |
229 | * UFFDIO_COPY_MODE_WP will map the page write protected on |
230 | * the fly. UFFDIO_COPY_MODE_WP is available only if the | |
231 | * write protected ioctl is implemented for the range | |
232 | * according to the uffdio_register.ioctls. | |
c1fcf220 | 233 | */ |
dc6f8d45 | 234 | #define UFFDIO_COPY_MODE_WP ((__u64)1<<1) |
c1fcf220 DDAG |
235 | __u64 mode; |
236 | ||
237 | /* | |
238 | * "copy" is written by the ioctl and must be at the end: the | |
239 | * copy_from_user will not read the last 8 bytes. | |
240 | */ | |
241 | __s64 copy; | |
242 | }; | |
243 | ||
244 | struct uffdio_zeropage { | |
245 | struct uffdio_range range; | |
246 | #define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) | |
247 | __u64 mode; | |
248 | ||
249 | /* | |
250 | * "zeropage" is written by the ioctl and must be at the end: | |
251 | * the copy_from_user will not read the last 8 bytes. | |
252 | */ | |
253 | __s64 zeropage; | |
254 | }; | |
255 | ||
dc6f8d45 CH |
256 | struct uffdio_writeprotect { |
257 | struct uffdio_range range; | |
258 | /* | |
259 | * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, | |
260 | * unset the flag to undo protection of a range which was previously | |
261 | * write protected. | |
262 | * | |
263 | * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up | |
264 | * any wait thread after the operation succeeds. | |
265 | * | |
266 | * NOTE: Write protecting a region (WP=1) is unrelated to page faults, | |
267 | * therefore DONTWAKE flag is meaningless with WP=1. Removing write | |
268 | * protection (WP=0) in response to a page fault wakes the faulting | |
269 | * task unless DONTWAKE is set. | |
270 | */ | |
271 | #define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) | |
272 | #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) | |
273 | __u64 mode; | |
274 | }; | |
275 | ||
278f064e EH |
276 | struct uffdio_continue { |
277 | struct uffdio_range range; | |
278 | #define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) | |
279 | __u64 mode; | |
280 | ||
281 | /* | |
282 | * Fields below here are written by the ioctl and must be at the end: | |
283 | * the copy_from_user will not read past here. | |
284 | */ | |
285 | __s64 mapped; | |
286 | }; | |
287 | ||
b3c818a4 EF |
288 | /* |
289 | * Flags for the userfaultfd(2) system call itself. | |
290 | */ | |
291 | ||
292 | /* | |
293 | * Create a userfaultfd that can handle page faults only in user mode. | |
294 | */ | |
295 | #define UFFD_USER_MODE_ONLY 1 | |
296 | ||
c1fcf220 | 297 | #endif /* _LINUX_USERFAULTFD_H */ |