]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
cddb8a5c AA |
2 | #ifndef _LINUX_MMU_NOTIFIER_H |
3 | #define _LINUX_MMU_NOTIFIER_H | |
4 | ||
5 | #include <linux/list.h> | |
6 | #include <linux/spinlock.h> | |
7 | #include <linux/mm_types.h> | |
b72327fc | 8 | #include <linux/mmap_lock.h> |
21a92735 | 9 | #include <linux/srcu.h> |
99cb252f | 10 | #include <linux/interval_tree.h> |
cddb8a5c | 11 | |
984cfe4e | 12 | struct mmu_notifier_subscriptions; |
cddb8a5c | 13 | struct mmu_notifier; |
56f434f4 | 14 | struct mmu_notifier_range; |
99cb252f | 15 | struct mmu_interval_notifier; |
cddb8a5c | 16 | |
d87f055b JG |
17 | /** |
18 | * enum mmu_notifier_event - reason for the mmu notifier callback | |
19 | * @MMU_NOTIFY_UNMAP: either munmap() that unmap the range or a mremap() that | |
20 | * move the range | |
21 | * | |
22 | * @MMU_NOTIFY_CLEAR: clear page table entry (many reasons for this like | |
23 | * madvise() or replacing a page by another one, ...). | |
24 | * | |
25 | * @MMU_NOTIFY_PROTECTION_VMA: update is due to protection change for the range | |
26 | * ie using the vma access permission (vm_page_prot) to update the whole range | |
27 | * is enough no need to inspect changes to the CPU page table (mprotect() | |
28 | * syscall) | |
29 | * | |
30 | * @MMU_NOTIFY_PROTECTION_PAGE: update is due to change in read/write flag for | |
31 | * pages in the range so to mirror those changes the user must inspect the CPU | |
32 | * page table (from the end callback). | |
33 | * | |
34 | * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same | |
35 | * access flags). User should soft dirty the page in the end callback to make | |
36 | * sure that anyone relying on soft dirtyness catch pages that might be written | |
37 | * through non CPU mappings. | |
99cb252f JG |
38 | * |
39 | * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal | |
40 | * that the mm refcount is zero and the range is no longer accessible. | |
998427b3 RC |
41 | * |
42 | * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal | |
43 | * a device driver to possibly ignore the invalidation if the | |
44 | * migrate_pgmap_owner field matches the driver's device private pgmap owner. | |
d87f055b JG |
45 | */ |
46 | enum mmu_notifier_event { | |
47 | MMU_NOTIFY_UNMAP = 0, | |
48 | MMU_NOTIFY_CLEAR, | |
49 | MMU_NOTIFY_PROTECTION_VMA, | |
50 | MMU_NOTIFY_PROTECTION_PAGE, | |
51 | MMU_NOTIFY_SOFT_DIRTY, | |
99cb252f | 52 | MMU_NOTIFY_RELEASE, |
998427b3 | 53 | MMU_NOTIFY_MIGRATE, |
d87f055b JG |
54 | }; |
55 | ||
27560ee9 JG |
56 | #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) |
57 | ||
cddb8a5c AA |
58 | struct mmu_notifier_ops { |
59 | /* | |
60 | * Called either by mmu_notifier_unregister or when the mm is | |
61 | * being destroyed by exit_mmap, always before all pages are | |
62 | * freed. This can run concurrently with other mmu notifier | |
63 | * methods (the ones invoked outside the mm context) and it | |
64 | * should tear down all secondary mmu mappings and freeze the | |
65 | * secondary mmu. If this method isn't implemented you've to | |
66 | * be sure that nothing could possibly write to the pages | |
67 | * through the secondary mmu by the time the last thread with | |
68 | * tsk->mm == mm exits. | |
69 | * | |
70 | * As side note: the pages freed after ->release returns could | |
71 | * be immediately reallocated by the gart at an alias physical | |
72 | * address with a different cache model, so if ->release isn't | |
73 | * implemented because all _software_ driven memory accesses | |
74 | * through the secondary mmu are terminated by the time the | |
75 | * last thread of this mm quits, you've also to be sure that | |
76 | * speculative _hardware_ operations can't allocate dirty | |
77 | * cachelines in the cpu that could not be snooped and made | |
78 | * coherent with the other read and write operations happening | |
79 | * through the gart alias address, so leading to memory | |
80 | * corruption. | |
81 | */ | |
1991722a | 82 | void (*release)(struct mmu_notifier *subscription, |
cddb8a5c AA |
83 | struct mm_struct *mm); |
84 | ||
85 | /* | |
86 | * clear_flush_young is called after the VM is | |
87 | * test-and-clearing the young/accessed bitflag in the | |
88 | * pte. This way the VM will provide proper aging to the | |
89 | * accesses to the page through the secondary MMUs and not | |
90 | * only to the ones through the Linux pte. | |
57128468 ALC |
91 | * Start-end is necessary in case the secondary MMU is mapping the page |
92 | * at a smaller granularity than the primary MMU. | |
cddb8a5c | 93 | */ |
1991722a | 94 | int (*clear_flush_young)(struct mmu_notifier *subscription, |
cddb8a5c | 95 | struct mm_struct *mm, |
57128468 ALC |
96 | unsigned long start, |
97 | unsigned long end); | |
cddb8a5c | 98 | |
1d7715c6 VD |
99 | /* |
100 | * clear_young is a lightweight version of clear_flush_young. Like the | |
101 | * latter, it is supposed to test-and-clear the young/accessed bitflag | |
102 | * in the secondary pte, but it may omit flushing the secondary tlb. | |
103 | */ | |
1991722a | 104 | int (*clear_young)(struct mmu_notifier *subscription, |
1d7715c6 VD |
105 | struct mm_struct *mm, |
106 | unsigned long start, | |
107 | unsigned long end); | |
108 | ||
8ee53820 AA |
109 | /* |
110 | * test_young is called to check the young/accessed bitflag in | |
111 | * the secondary pte. This is used to know if the page is | |
112 | * frequently used without actually clearing the flag or tearing | |
113 | * down the secondary mapping on the page. | |
114 | */ | |
1991722a | 115 | int (*test_young)(struct mmu_notifier *subscription, |
8ee53820 AA |
116 | struct mm_struct *mm, |
117 | unsigned long address); | |
118 | ||
828502d3 IE |
119 | /* |
120 | * change_pte is called in cases that pte mapping to page is changed: | |
121 | * for example, when ksm remaps pte to point to a new shared page. | |
122 | */ | |
1991722a | 123 | void (*change_pte)(struct mmu_notifier *subscription, |
828502d3 IE |
124 | struct mm_struct *mm, |
125 | unsigned long address, | |
126 | pte_t pte); | |
127 | ||
cddb8a5c AA |
128 | /* |
129 | * invalidate_range_start() and invalidate_range_end() must be | |
c1e8d7c6 | 130 | * paired and are called only when the mmap_lock and/or the |
0f0a327f JR |
131 | * locks protecting the reverse maps are held. If the subsystem |
132 | * can't guarantee that no additional references are taken to | |
133 | * the pages in the range, it has to implement the | |
134 | * invalidate_range() notifier to remove any references taken | |
135 | * after invalidate_range_start(). | |
cddb8a5c AA |
136 | * |
137 | * Invalidation of multiple concurrent ranges may be | |
138 | * optionally permitted by the driver. Either way the | |
139 | * establishment of sptes is forbidden in the range passed to | |
140 | * invalidate_range_begin/end for the whole duration of the | |
141 | * invalidate_range_begin/end critical section. | |
142 | * | |
143 | * invalidate_range_start() is called when all pages in the | |
144 | * range are still mapped and have at least a refcount of one. | |
145 | * | |
146 | * invalidate_range_end() is called when all pages in the | |
147 | * range have been unmapped and the pages have been freed by | |
148 | * the VM. | |
149 | * | |
150 | * The VM will remove the page table entries and potentially | |
151 | * the page between invalidate_range_start() and | |
152 | * invalidate_range_end(). If the page must not be freed | |
153 | * because of pending I/O or other circumstances then the | |
154 | * invalidate_range_start() callback (or the initial mapping | |
155 | * by the driver) must make sure that the refcount is kept | |
156 | * elevated. | |
157 | * | |
158 | * If the driver increases the refcount when the pages are | |
159 | * initially mapped into an address space then either | |
160 | * invalidate_range_start() or invalidate_range_end() may | |
161 | * decrease the refcount. If the refcount is decreased on | |
162 | * invalidate_range_start() then the VM can free pages as page | |
163 | * table entries are removed. If the refcount is only | |
164 | * droppped on invalidate_range_end() then the driver itself | |
165 | * will drop the last refcount but it must take care to flush | |
166 | * any secondary tlb before doing the final free on the | |
167 | * page. Pages will no longer be referenced by the linux | |
168 | * address space but may still be referenced by sptes until | |
169 | * the last refcount is dropped. | |
5ff7091f | 170 | * |
93065ac7 | 171 | * If blockable argument is set to false then the callback cannot |
c2655835 SC |
172 | * sleep and has to return with -EAGAIN if sleeping would be required. |
173 | * 0 should be returned otherwise. Please note that notifiers that can | |
174 | * fail invalidate_range_start are not allowed to implement | |
175 | * invalidate_range_end, as there is no mechanism for informing the | |
176 | * notifier that its start failed. | |
cddb8a5c | 177 | */ |
1991722a | 178 | int (*invalidate_range_start)(struct mmu_notifier *subscription, |
5d6527a7 | 179 | const struct mmu_notifier_range *range); |
1991722a | 180 | void (*invalidate_range_end)(struct mmu_notifier *subscription, |
5d6527a7 | 181 | const struct mmu_notifier_range *range); |
0f0a327f JR |
182 | |
183 | /* | |
184 | * invalidate_range() is either called between | |
185 | * invalidate_range_start() and invalidate_range_end() when the | |
186 | * VM has to free pages that where unmapped, but before the | |
187 | * pages are actually freed, or outside of _start()/_end() when | |
188 | * a (remote) TLB is necessary. | |
189 | * | |
190 | * If invalidate_range() is used to manage a non-CPU TLB with | |
191 | * shared page-tables, it not necessary to implement the | |
192 | * invalidate_range_start()/end() notifiers, as | |
193 | * invalidate_range() alread catches the points in time when an | |
0f10851e | 194 | * external TLB range needs to be flushed. For more in depth |
ad56b738 | 195 | * discussion on this see Documentation/vm/mmu_notifier.rst |
0f0a327f | 196 | * |
0f0a327f JR |
197 | * Note that this function might be called with just a sub-range |
198 | * of what was passed to invalidate_range_start()/end(), if | |
199 | * called between those functions. | |
200 | */ | |
1991722a JG |
201 | void (*invalidate_range)(struct mmu_notifier *subscription, |
202 | struct mm_struct *mm, | |
203 | unsigned long start, | |
204 | unsigned long end); | |
2c7933f5 JG |
205 | |
206 | /* | |
207 | * These callbacks are used with the get/put interface to manage the | |
208 | * lifetime of the mmu_notifier memory. alloc_notifier() returns a new | |
209 | * notifier for use with the mm. | |
210 | * | |
211 | * free_notifier() is only called after the mmu_notifier has been | |
212 | * fully put, calls to any ops callback are prevented and no ops | |
213 | * callbacks are currently running. It is called from a SRCU callback | |
214 | * and cannot sleep. | |
215 | */ | |
216 | struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm); | |
1991722a | 217 | void (*free_notifier)(struct mmu_notifier *subscription); |
cddb8a5c AA |
218 | }; |
219 | ||
220 | /* | |
c1e8d7c6 | 221 | * The notifier chains are protected by mmap_lock and/or the reverse map |
cddb8a5c | 222 | * semaphores. Notifier chains are only changed when all reverse maps and |
c1e8d7c6 | 223 | * the mmap_lock locks are taken. |
cddb8a5c AA |
224 | * |
225 | * Therefore notifier chains can only be traversed when either | |
226 | * | |
c1e8d7c6 | 227 | * 1. mmap_lock is held. |
c8c06efa | 228 | * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). |
cddb8a5c AA |
229 | * 3. No other concurrent thread can access the list (release) |
230 | */ | |
231 | struct mmu_notifier { | |
232 | struct hlist_node hlist; | |
233 | const struct mmu_notifier_ops *ops; | |
2c7933f5 JG |
234 | struct mm_struct *mm; |
235 | struct rcu_head rcu; | |
236 | unsigned int users; | |
cddb8a5c AA |
237 | }; |
238 | ||
99cb252f JG |
239 | /** |
240 | * struct mmu_interval_notifier_ops | |
241 | * @invalidate: Upon return the caller must stop using any SPTEs within this | |
242 | * range. This function can sleep. Return false only if sleeping | |
243 | * was required but mmu_notifier_range_blockable(range) is false. | |
244 | */ | |
245 | struct mmu_interval_notifier_ops { | |
5292e24a | 246 | bool (*invalidate)(struct mmu_interval_notifier *interval_sub, |
99cb252f JG |
247 | const struct mmu_notifier_range *range, |
248 | unsigned long cur_seq); | |
249 | }; | |
250 | ||
251 | struct mmu_interval_notifier { | |
252 | struct interval_tree_node interval_tree; | |
253 | const struct mmu_interval_notifier_ops *ops; | |
254 | struct mm_struct *mm; | |
255 | struct hlist_node deferred_item; | |
256 | unsigned long invalidate_seq; | |
257 | }; | |
258 | ||
56f434f4 JG |
259 | #ifdef CONFIG_MMU_NOTIFIER |
260 | ||
261 | #ifdef CONFIG_LOCKDEP | |
262 | extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; | |
263 | #endif | |
264 | ||
265 | struct mmu_notifier_range { | |
266 | struct vm_area_struct *vma; | |
267 | struct mm_struct *mm; | |
268 | unsigned long start; | |
269 | unsigned long end; | |
270 | unsigned flags; | |
271 | enum mmu_notifier_event event; | |
998427b3 | 272 | void *migrate_pgmap_owner; |
56f434f4 JG |
273 | }; |
274 | ||
cddb8a5c AA |
275 | static inline int mm_has_notifiers(struct mm_struct *mm) |
276 | { | |
984cfe4e | 277 | return unlikely(mm->notifier_subscriptions); |
cddb8a5c AA |
278 | } |
279 | ||
2c7933f5 JG |
280 | struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, |
281 | struct mm_struct *mm); | |
282 | static inline struct mmu_notifier * | |
283 | mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm) | |
284 | { | |
285 | struct mmu_notifier *ret; | |
286 | ||
b72327fc | 287 | mmap_write_lock(mm); |
2c7933f5 | 288 | ret = mmu_notifier_get_locked(ops, mm); |
b72327fc | 289 | mmap_write_unlock(mm); |
2c7933f5 JG |
290 | return ret; |
291 | } | |
1991722a | 292 | void mmu_notifier_put(struct mmu_notifier *subscription); |
2c7933f5 JG |
293 | void mmu_notifier_synchronize(void); |
294 | ||
1991722a | 295 | extern int mmu_notifier_register(struct mmu_notifier *subscription, |
cddb8a5c | 296 | struct mm_struct *mm); |
1991722a | 297 | extern int __mmu_notifier_register(struct mmu_notifier *subscription, |
cddb8a5c | 298 | struct mm_struct *mm); |
1991722a | 299 | extern void mmu_notifier_unregister(struct mmu_notifier *subscription, |
cddb8a5c | 300 | struct mm_struct *mm); |
99cb252f | 301 | |
5292e24a JG |
302 | unsigned long |
303 | mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub); | |
304 | int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, | |
99cb252f JG |
305 | struct mm_struct *mm, unsigned long start, |
306 | unsigned long length, | |
307 | const struct mmu_interval_notifier_ops *ops); | |
308 | int mmu_interval_notifier_insert_locked( | |
5292e24a | 309 | struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, |
99cb252f JG |
310 | unsigned long start, unsigned long length, |
311 | const struct mmu_interval_notifier_ops *ops); | |
5292e24a | 312 | void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); |
99cb252f JG |
313 | |
314 | /** | |
315 | * mmu_interval_set_seq - Save the invalidation sequence | |
5292e24a | 316 | * @interval_sub - The subscription passed to invalidate |
99cb252f JG |
317 | * @cur_seq - The cur_seq passed to the invalidate() callback |
318 | * | |
319 | * This must be called unconditionally from the invalidate callback of a | |
320 | * struct mmu_interval_notifier_ops under the same lock that is used to call | |
321 | * mmu_interval_read_retry(). It updates the sequence number for later use by | |
322 | * mmu_interval_read_retry(). The provided cur_seq will always be odd. | |
323 | * | |
324 | * If the caller does not call mmu_interval_read_begin() or | |
325 | * mmu_interval_read_retry() then this call is not required. | |
326 | */ | |
5292e24a JG |
327 | static inline void |
328 | mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, | |
329 | unsigned long cur_seq) | |
99cb252f | 330 | { |
5292e24a | 331 | WRITE_ONCE(interval_sub->invalidate_seq, cur_seq); |
99cb252f JG |
332 | } |
333 | ||
334 | /** | |
335 | * mmu_interval_read_retry - End a read side critical section against a VA range | |
5292e24a | 336 | * interval_sub: The subscription |
99cb252f JG |
337 | * seq: The return of the paired mmu_interval_read_begin() |
338 | * | |
339 | * This MUST be called under a user provided lock that is also held | |
340 | * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). | |
341 | * | |
342 | * Each call should be paired with a single mmu_interval_read_begin() and | |
343 | * should be used to conclude the read side. | |
344 | * | |
345 | * Returns true if an invalidation collided with this critical section, and | |
346 | * the caller should retry. | |
347 | */ | |
5292e24a JG |
348 | static inline bool |
349 | mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, | |
350 | unsigned long seq) | |
99cb252f | 351 | { |
5292e24a | 352 | return interval_sub->invalidate_seq != seq; |
99cb252f JG |
353 | } |
354 | ||
355 | /** | |
356 | * mmu_interval_check_retry - Test if a collision has occurred | |
5292e24a | 357 | * interval_sub: The subscription |
99cb252f JG |
358 | * seq: The return of the matching mmu_interval_read_begin() |
359 | * | |
360 | * This can be used in the critical section between mmu_interval_read_begin() | |
361 | * and mmu_interval_read_retry(). A return of true indicates an invalidation | |
362 | * has collided with this critical region and a future | |
363 | * mmu_interval_read_retry() will return true. | |
364 | * | |
365 | * False is not reliable and only suggests a collision may not have | |
366 | * occured. It can be called many times and does not have to hold the user | |
367 | * provided lock. | |
368 | * | |
369 | * This call can be used as part of loops and other expensive operations to | |
370 | * expedite a retry. | |
371 | */ | |
5292e24a JG |
372 | static inline bool |
373 | mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, | |
374 | unsigned long seq) | |
99cb252f JG |
375 | { |
376 | /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ | |
5292e24a | 377 | return READ_ONCE(interval_sub->invalidate_seq) != seq; |
99cb252f JG |
378 | } |
379 | ||
984cfe4e | 380 | extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm); |
cddb8a5c AA |
381 | extern void __mmu_notifier_release(struct mm_struct *mm); |
382 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | |
57128468 ALC |
383 | unsigned long start, |
384 | unsigned long end); | |
1d7715c6 VD |
385 | extern int __mmu_notifier_clear_young(struct mm_struct *mm, |
386 | unsigned long start, | |
387 | unsigned long end); | |
8ee53820 AA |
388 | extern int __mmu_notifier_test_young(struct mm_struct *mm, |
389 | unsigned long address); | |
828502d3 IE |
390 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, |
391 | unsigned long address, pte_t pte); | |
ac46d4f3 JG |
392 | extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); |
393 | extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r, | |
4645b9fe | 394 | bool only_end); |
0f0a327f JR |
395 | extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, |
396 | unsigned long start, unsigned long end); | |
c6d23413 JG |
397 | extern bool |
398 | mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); | |
cddb8a5c | 399 | |
4a83bfe9 JG |
400 | static inline bool |
401 | mmu_notifier_range_blockable(const struct mmu_notifier_range *range) | |
402 | { | |
27560ee9 | 403 | return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); |
4a83bfe9 JG |
404 | } |
405 | ||
cddb8a5c AA |
406 | static inline void mmu_notifier_release(struct mm_struct *mm) |
407 | { | |
408 | if (mm_has_notifiers(mm)) | |
409 | __mmu_notifier_release(mm); | |
410 | } | |
411 | ||
412 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | |
57128468 ALC |
413 | unsigned long start, |
414 | unsigned long end) | |
cddb8a5c AA |
415 | { |
416 | if (mm_has_notifiers(mm)) | |
57128468 | 417 | return __mmu_notifier_clear_flush_young(mm, start, end); |
cddb8a5c AA |
418 | return 0; |
419 | } | |
420 | ||
1d7715c6 VD |
421 | static inline int mmu_notifier_clear_young(struct mm_struct *mm, |
422 | unsigned long start, | |
423 | unsigned long end) | |
424 | { | |
425 | if (mm_has_notifiers(mm)) | |
426 | return __mmu_notifier_clear_young(mm, start, end); | |
427 | return 0; | |
428 | } | |
429 | ||
8ee53820 AA |
430 | static inline int mmu_notifier_test_young(struct mm_struct *mm, |
431 | unsigned long address) | |
432 | { | |
433 | if (mm_has_notifiers(mm)) | |
434 | return __mmu_notifier_test_young(mm, address); | |
435 | return 0; | |
436 | } | |
437 | ||
828502d3 IE |
438 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, |
439 | unsigned long address, pte_t pte) | |
440 | { | |
441 | if (mm_has_notifiers(mm)) | |
442 | __mmu_notifier_change_pte(mm, address, pte); | |
443 | } | |
444 | ||
ac46d4f3 JG |
445 | static inline void |
446 | mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) | |
cddb8a5c | 447 | { |
810e24e0 DV |
448 | might_sleep(); |
449 | ||
23b68395 | 450 | lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
ac46d4f3 | 451 | if (mm_has_notifiers(range->mm)) { |
27560ee9 | 452 | range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE; |
ac46d4f3 JG |
453 | __mmu_notifier_invalidate_range_start(range); |
454 | } | |
23b68395 | 455 | lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
93065ac7 MH |
456 | } |
457 | ||
ac46d4f3 JG |
458 | static inline int |
459 | mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) | |
93065ac7 | 460 | { |
23b68395 DV |
461 | int ret = 0; |
462 | ||
463 | lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); | |
ac46d4f3 | 464 | if (mm_has_notifiers(range->mm)) { |
27560ee9 | 465 | range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE; |
23b68395 | 466 | ret = __mmu_notifier_invalidate_range_start(range); |
ac46d4f3 | 467 | } |
23b68395 DV |
468 | lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
469 | return ret; | |
cddb8a5c AA |
470 | } |
471 | ||
ac46d4f3 JG |
472 | static inline void |
473 | mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) | |
cddb8a5c | 474 | { |
810e24e0 DV |
475 | if (mmu_notifier_range_blockable(range)) |
476 | might_sleep(); | |
477 | ||
ac46d4f3 JG |
478 | if (mm_has_notifiers(range->mm)) |
479 | __mmu_notifier_invalidate_range_end(range, false); | |
4645b9fe JG |
480 | } |
481 | ||
ac46d4f3 JG |
482 | static inline void |
483 | mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) | |
4645b9fe | 484 | { |
ac46d4f3 JG |
485 | if (mm_has_notifiers(range->mm)) |
486 | __mmu_notifier_invalidate_range_end(range, true); | |
cddb8a5c AA |
487 | } |
488 | ||
1897bdc4 JR |
489 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, |
490 | unsigned long start, unsigned long end) | |
491 | { | |
0f0a327f JR |
492 | if (mm_has_notifiers(mm)) |
493 | __mmu_notifier_invalidate_range(mm, start, end); | |
1897bdc4 JR |
494 | } |
495 | ||
984cfe4e | 496 | static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) |
cddb8a5c | 497 | { |
984cfe4e | 498 | mm->notifier_subscriptions = NULL; |
cddb8a5c AA |
499 | } |
500 | ||
984cfe4e | 501 | static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) |
cddb8a5c AA |
502 | { |
503 | if (mm_has_notifiers(mm)) | |
984cfe4e | 504 | __mmu_notifier_subscriptions_destroy(mm); |
cddb8a5c AA |
505 | } |
506 | ||
ac46d4f3 JG |
507 | |
508 | static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, | |
6f4f13e8 JG |
509 | enum mmu_notifier_event event, |
510 | unsigned flags, | |
511 | struct vm_area_struct *vma, | |
ac46d4f3 JG |
512 | struct mm_struct *mm, |
513 | unsigned long start, | |
514 | unsigned long end) | |
515 | { | |
bf198b2b JG |
516 | range->vma = vma; |
517 | range->event = event; | |
ac46d4f3 JG |
518 | range->mm = mm; |
519 | range->start = start; | |
520 | range->end = end; | |
bf198b2b | 521 | range->flags = flags; |
ac46d4f3 JG |
522 | } |
523 | ||
c1a06df6 RC |
524 | static inline void mmu_notifier_range_init_migrate( |
525 | struct mmu_notifier_range *range, unsigned int flags, | |
526 | struct vm_area_struct *vma, struct mm_struct *mm, | |
527 | unsigned long start, unsigned long end, void *pgmap) | |
528 | { | |
529 | mmu_notifier_range_init(range, MMU_NOTIFY_MIGRATE, flags, vma, mm, | |
530 | start, end); | |
531 | range->migrate_pgmap_owner = pgmap; | |
532 | } | |
533 | ||
cddb8a5c AA |
534 | #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ |
535 | ({ \ | |
536 | int __young; \ | |
537 | struct vm_area_struct *___vma = __vma; \ | |
538 | unsigned long ___address = __address; \ | |
539 | __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ | |
540 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ | |
57128468 ALC |
541 | ___address, \ |
542 | ___address + \ | |
543 | PAGE_SIZE); \ | |
cddb8a5c AA |
544 | __young; \ |
545 | }) | |
546 | ||
91a4ee26 AA |
547 | #define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ |
548 | ({ \ | |
549 | int __young; \ | |
550 | struct vm_area_struct *___vma = __vma; \ | |
551 | unsigned long ___address = __address; \ | |
552 | __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ | |
553 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ | |
57128468 ALC |
554 | ___address, \ |
555 | ___address + \ | |
556 | PMD_SIZE); \ | |
91a4ee26 AA |
557 | __young; \ |
558 | }) | |
559 | ||
1d7715c6 VD |
560 | #define ptep_clear_young_notify(__vma, __address, __ptep) \ |
561 | ({ \ | |
562 | int __young; \ | |
563 | struct vm_area_struct *___vma = __vma; \ | |
564 | unsigned long ___address = __address; \ | |
565 | __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ | |
566 | __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ | |
567 | ___address + PAGE_SIZE); \ | |
568 | __young; \ | |
569 | }) | |
570 | ||
571 | #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ | |
572 | ({ \ | |
573 | int __young; \ | |
574 | struct vm_area_struct *___vma = __vma; \ | |
575 | unsigned long ___address = __address; \ | |
576 | __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ | |
577 | __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ | |
578 | ___address + PMD_SIZE); \ | |
579 | __young; \ | |
580 | }) | |
581 | ||
34ee645e JR |
582 | #define ptep_clear_flush_notify(__vma, __address, __ptep) \ |
583 | ({ \ | |
584 | unsigned long ___addr = __address & PAGE_MASK; \ | |
585 | struct mm_struct *___mm = (__vma)->vm_mm; \ | |
586 | pte_t ___pte; \ | |
587 | \ | |
588 | ___pte = ptep_clear_flush(__vma, __address, __ptep); \ | |
589 | mmu_notifier_invalidate_range(___mm, ___addr, \ | |
590 | ___addr + PAGE_SIZE); \ | |
591 | \ | |
592 | ___pte; \ | |
593 | }) | |
594 | ||
8809aa2d | 595 | #define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ |
34ee645e JR |
596 | ({ \ |
597 | unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ | |
598 | struct mm_struct *___mm = (__vma)->vm_mm; \ | |
599 | pmd_t ___pmd; \ | |
600 | \ | |
8809aa2d | 601 | ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ |
34ee645e JR |
602 | mmu_notifier_invalidate_range(___mm, ___haddr, \ |
603 | ___haddr + HPAGE_PMD_SIZE); \ | |
604 | \ | |
605 | ___pmd; \ | |
606 | }) | |
607 | ||
a00cc7d9 MW |
608 | #define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ |
609 | ({ \ | |
610 | unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ | |
611 | struct mm_struct *___mm = (__vma)->vm_mm; \ | |
612 | pud_t ___pud; \ | |
613 | \ | |
614 | ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ | |
615 | mmu_notifier_invalidate_range(___mm, ___haddr, \ | |
616 | ___haddr + HPAGE_PUD_SIZE); \ | |
617 | \ | |
618 | ___pud; \ | |
619 | }) | |
620 | ||
48af0d7c XG |
621 | /* |
622 | * set_pte_at_notify() sets the pte _after_ running the notifier. | |
623 | * This is safe to start by updating the secondary MMUs, because the primary MMU | |
624 | * pte invalidate must have already happened with a ptep_clear_flush() before | |
625 | * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is | |
626 | * required when we change both the protection of the mapping from read-only to | |
627 | * read-write and the pfn (like during copy on write page faults). Otherwise the | |
628 | * old page would remain mapped readonly in the secondary MMUs after the new | |
629 | * page is already writable by some CPU through the primary MMU. | |
630 | */ | |
828502d3 IE |
631 | #define set_pte_at_notify(__mm, __address, __ptep, __pte) \ |
632 | ({ \ | |
633 | struct mm_struct *___mm = __mm; \ | |
634 | unsigned long ___address = __address; \ | |
635 | pte_t ___pte = __pte; \ | |
636 | \ | |
828502d3 | 637 | mmu_notifier_change_pte(___mm, ___address, ___pte); \ |
48af0d7c | 638 | set_pte_at(___mm, ___address, __ptep, ___pte); \ |
828502d3 IE |
639 | }) |
640 | ||
cddb8a5c AA |
641 | #else /* CONFIG_MMU_NOTIFIER */ |
642 | ||
ac46d4f3 JG |
643 | struct mmu_notifier_range { |
644 | unsigned long start; | |
645 | unsigned long end; | |
646 | }; | |
647 | ||
648 | static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, | |
649 | unsigned long start, | |
650 | unsigned long end) | |
651 | { | |
652 | range->start = start; | |
653 | range->end = end; | |
654 | } | |
655 | ||
6f4f13e8 | 656 | #define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ |
ac46d4f3 | 657 | _mmu_notifier_range_init(range, start, end) |
c1a06df6 RC |
658 | #define mmu_notifier_range_init_migrate(range, flags, vma, mm, start, end, \ |
659 | pgmap) \ | |
660 | _mmu_notifier_range_init(range, start, end) | |
ac46d4f3 | 661 | |
4a83bfe9 JG |
662 | static inline bool |
663 | mmu_notifier_range_blockable(const struct mmu_notifier_range *range) | |
664 | { | |
665 | return true; | |
666 | } | |
ac46d4f3 | 667 | |
4d4bbd85 MH |
668 | static inline int mm_has_notifiers(struct mm_struct *mm) |
669 | { | |
670 | return 0; | |
671 | } | |
672 | ||
cddb8a5c AA |
673 | static inline void mmu_notifier_release(struct mm_struct *mm) |
674 | { | |
675 | } | |
676 | ||
677 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | |
57128468 ALC |
678 | unsigned long start, |
679 | unsigned long end) | |
8ee53820 AA |
680 | { |
681 | return 0; | |
682 | } | |
683 | ||
684 | static inline int mmu_notifier_test_young(struct mm_struct *mm, | |
685 | unsigned long address) | |
cddb8a5c AA |
686 | { |
687 | return 0; | |
688 | } | |
689 | ||
828502d3 IE |
690 | static inline void mmu_notifier_change_pte(struct mm_struct *mm, |
691 | unsigned long address, pte_t pte) | |
692 | { | |
693 | } | |
694 | ||
ac46d4f3 JG |
695 | static inline void |
696 | mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) | |
cddb8a5c AA |
697 | { |
698 | } | |
699 | ||
ac46d4f3 JG |
700 | static inline int |
701 | mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) | |
93065ac7 MH |
702 | { |
703 | return 0; | |
704 | } | |
705 | ||
ac46d4f3 JG |
706 | static inline |
707 | void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) | |
cddb8a5c AA |
708 | { |
709 | } | |
710 | ||
ac46d4f3 JG |
711 | static inline void |
712 | mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) | |
4645b9fe JG |
713 | { |
714 | } | |
715 | ||
1897bdc4 JR |
716 | static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, |
717 | unsigned long start, unsigned long end) | |
718 | { | |
719 | } | |
720 | ||
984cfe4e | 721 | static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) |
cddb8a5c AA |
722 | { |
723 | } | |
724 | ||
984cfe4e | 725 | static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) |
cddb8a5c AA |
726 | { |
727 | } | |
728 | ||
c6d23413 JG |
729 | #define mmu_notifier_range_update_to_read_only(r) false |
730 | ||
cddb8a5c | 731 | #define ptep_clear_flush_young_notify ptep_clear_flush_young |
91a4ee26 | 732 | #define pmdp_clear_flush_young_notify pmdp_clear_flush_young |
33c3fc71 VD |
733 | #define ptep_clear_young_notify ptep_test_and_clear_young |
734 | #define pmdp_clear_young_notify pmdp_test_and_clear_young | |
34ee645e | 735 | #define ptep_clear_flush_notify ptep_clear_flush |
8809aa2d | 736 | #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush |
a00cc7d9 | 737 | #define pudp_huge_clear_flush_notify pudp_huge_clear_flush |
828502d3 | 738 | #define set_pte_at_notify set_pte_at |
cddb8a5c | 739 | |
2c7933f5 JG |
740 | static inline void mmu_notifier_synchronize(void) |
741 | { | |
742 | } | |
743 | ||
cddb8a5c AA |
744 | #endif /* CONFIG_MMU_NOTIFIER */ |
745 | ||
746 | #endif /* _LINUX_MMU_NOTIFIER_H */ |