]>
Commit | Line | Data |
---|---|---|
5cc9ed4b CW |
1 | /* |
2 | * Copyright © 2012-2014 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
b588c92b | 25 | #include <drm/i915_drm.h> |
5cc9ed4b CW |
26 | #include "i915_drv.h" |
27 | #include "i915_trace.h" | |
28 | #include "intel_drv.h" | |
29 | #include <linux/mmu_context.h> | |
30 | #include <linux/mmu_notifier.h> | |
31 | #include <linux/mempolicy.h> | |
32 | #include <linux/swap.h> | |
6e84f315 | 33 | #include <linux/sched/mm.h> |
5cc9ed4b | 34 | |
ad46cb53 CW |
35 | struct i915_mm_struct { |
36 | struct mm_struct *mm; | |
f470b190 | 37 | struct drm_i915_private *i915; |
ad46cb53 CW |
38 | struct i915_mmu_notifier *mn; |
39 | struct hlist_node node; | |
40 | struct kref kref; | |
41 | struct work_struct work; | |
42 | }; | |
43 | ||
5cc9ed4b CW |
44 | #if defined(CONFIG_MMU_NOTIFIER) |
45 | #include <linux/interval_tree.h> | |
46 | ||
47 | struct i915_mmu_notifier { | |
48 | spinlock_t lock; | |
49 | struct hlist_node node; | |
50 | struct mmu_notifier mn; | |
f808c13f | 51 | struct rb_root_cached objects; |
484d9a84 | 52 | struct i915_mm_struct *mm; |
5cc9ed4b CW |
53 | }; |
54 | ||
55 | struct i915_mmu_object { | |
ad46cb53 | 56 | struct i915_mmu_notifier *mn; |
768e159f | 57 | struct drm_i915_gem_object *obj; |
5cc9ed4b | 58 | struct interval_tree_node it; |
5cc9ed4b CW |
59 | }; |
60 | ||
484d9a84 | 61 | static void add_object(struct i915_mmu_object *mo) |
ec8b0dd5 | 62 | { |
484d9a84 CW |
63 | GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); |
64 | interval_tree_insert(&mo->it, &mo->mn->objects); | |
ec8b0dd5 CW |
65 | } |
66 | ||
484d9a84 | 67 | static void del_object(struct i915_mmu_object *mo) |
ec8b0dd5 | 68 | { |
484d9a84 | 69 | if (RB_EMPTY_NODE(&mo->it.rb)) |
768e159f | 70 | return; |
ec8b0dd5 | 71 | |
484d9a84 CW |
72 | interval_tree_remove(&mo->it, &mo->mn->objects); |
73 | RB_CLEAR_NODE(&mo->it.rb); | |
768e159f CW |
74 | } |
75 | ||
484d9a84 CW |
76 | static void |
77 | __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) | |
768e159f | 78 | { |
484d9a84 CW |
79 | struct i915_mmu_object *mo = obj->userptr.mmu_object; |
80 | ||
81 | /* | |
82 | * During mm_invalidate_range we need to cancel any userptr that | |
83 | * overlaps the range being invalidated. Doing so requires the | |
84 | * struct_mutex, and that risks recursion. In order to cause | |
85 | * recursion, the user must alias the userptr address space with | |
86 | * a GTT mmapping (possible with a MAP_FIXED) - then when we have | |
87 | * to invalidate that mmaping, mm_invalidate_range is called with | |
88 | * the userptr address *and* the struct_mutex held. To prevent that | |
89 | * we set a flag under the i915_mmu_notifier spinlock to indicate | |
90 | * whether this object is valid. | |
91 | */ | |
92 | if (!mo) | |
768e159f CW |
93 | return; |
94 | ||
484d9a84 CW |
95 | spin_lock(&mo->mn->lock); |
96 | if (value) | |
97 | add_object(mo); | |
98 | else | |
99 | del_object(mo); | |
100 | spin_unlock(&mo->mn->lock); | |
ec8b0dd5 CW |
101 | } |
102 | ||
484d9a84 CW |
103 | static int |
104 | userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, | |
105 | const struct mmu_notifier_range *range) | |
5cc9ed4b | 106 | { |
380996aa CW |
107 | struct i915_mmu_notifier *mn = |
108 | container_of(_mn, struct i915_mmu_notifier, mn); | |
768e159f | 109 | struct interval_tree_node *it; |
484d9a84 | 110 | struct mutex *unlock = NULL; |
5d6527a7 | 111 | unsigned long end; |
484d9a84 | 112 | int ret = 0; |
768e159f | 113 | |
f808c13f | 114 | if (RB_EMPTY_ROOT(&mn->objects.rb_root)) |
93065ac7 | 115 | return 0; |
380996aa CW |
116 | |
117 | /* interval ranges are inclusive, but invalidate range is exclusive */ | |
5d6527a7 | 118 | end = range->end - 1; |
380996aa CW |
119 | |
120 | spin_lock(&mn->lock); | |
5d6527a7 | 121 | it = interval_tree_iter_first(&mn->objects, range->start, end); |
768e159f | 122 | while (it) { |
484d9a84 CW |
123 | struct drm_i915_gem_object *obj; |
124 | ||
dfcd6660 | 125 | if (!mmu_notifier_range_blockable(range)) { |
484d9a84 CW |
126 | ret = -EAGAIN; |
127 | break; | |
93065ac7 | 128 | } |
484d9a84 CW |
129 | |
130 | /* | |
131 | * The mmu_object is released late when destroying the | |
768e159f CW |
132 | * GEM object so it is entirely possible to gain a |
133 | * reference on an object in the process of being freed | |
134 | * since our serialisation is via the spinlock and not | |
135 | * the struct_mutex - and consequently use it after it | |
136 | * is freed and then double free it. To prevent that | |
137 | * use-after-free we only acquire a reference on the | |
138 | * object if it is not in the process of being destroyed. | |
139 | */ | |
484d9a84 CW |
140 | obj = container_of(it, struct i915_mmu_object, it)->obj; |
141 | if (!kref_get_unless_zero(&obj->base.refcount)) { | |
142 | it = interval_tree_iter_next(it, range->start, end); | |
143 | continue; | |
144 | } | |
145 | spin_unlock(&mn->lock); | |
146 | ||
147 | if (!unlock) { | |
148 | unlock = &mn->mm->i915->drm.struct_mutex; | |
149 | ||
150 | switch (mutex_trylock_recursive(unlock)) { | |
151 | default: | |
152 | case MUTEX_TRYLOCK_FAILED: | |
9e267d28 | 153 | if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { |
484d9a84 CW |
154 | i915_gem_object_put(obj); |
155 | return -EINTR; | |
156 | } | |
157 | /* fall through */ | |
158 | case MUTEX_TRYLOCK_SUCCESS: | |
159 | break; | |
160 | ||
161 | case MUTEX_TRYLOCK_RECURSIVE: | |
162 | unlock = ERR_PTR(-EEXIST); | |
163 | break; | |
164 | } | |
165 | } | |
166 | ||
167 | ret = i915_gem_object_unbind(obj); | |
168 | if (ret == 0) | |
169 | ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); | |
170 | i915_gem_object_put(obj); | |
171 | if (ret) | |
172 | goto unlock; | |
5cc9ed4b | 173 | |
484d9a84 CW |
174 | spin_lock(&mn->lock); |
175 | ||
176 | /* | |
177 | * As we do not (yet) protect the mmu from concurrent insertion | |
178 | * over this range, there is no guarantee that this search will | |
179 | * terminate given a pathologic workload. | |
180 | */ | |
181 | it = interval_tree_iter_first(&mn->objects, range->start, end); | |
5cc9ed4b | 182 | } |
380996aa | 183 | spin_unlock(&mn->lock); |
393afc2c | 184 | |
484d9a84 CW |
185 | unlock: |
186 | if (!IS_ERR_OR_NULL(unlock)) | |
187 | mutex_unlock(unlock); | |
188 | ||
189 | return ret; | |
93065ac7 | 190 | |
5cc9ed4b CW |
191 | } |
192 | ||
193 | static const struct mmu_notifier_ops i915_gem_userptr_notifier = { | |
484d9a84 | 194 | .invalidate_range_start = userptr_mn_invalidate_range_start, |
5cc9ed4b CW |
195 | }; |
196 | ||
197 | static struct i915_mmu_notifier * | |
484d9a84 | 198 | i915_mmu_notifier_create(struct i915_mm_struct *mm) |
5cc9ed4b | 199 | { |
ad46cb53 | 200 | struct i915_mmu_notifier *mn; |
5cc9ed4b | 201 | |
ad46cb53 CW |
202 | mn = kmalloc(sizeof(*mn), GFP_KERNEL); |
203 | if (mn == NULL) | |
5cc9ed4b CW |
204 | return ERR_PTR(-ENOMEM); |
205 | ||
ad46cb53 CW |
206 | spin_lock_init(&mn->lock); |
207 | mn->mn.ops = &i915_gem_userptr_notifier; | |
f808c13f | 208 | mn->objects = RB_ROOT_CACHED; |
484d9a84 | 209 | mn->mm = mm; |
ad46cb53 | 210 | |
ad46cb53 | 211 | return mn; |
5cc9ed4b CW |
212 | } |
213 | ||
5cc9ed4b CW |
214 | static void |
215 | i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) | |
216 | { | |
ad46cb53 | 217 | struct i915_mmu_object *mo; |
5cc9ed4b | 218 | |
484d9a84 CW |
219 | mo = fetch_and_zero(&obj->userptr.mmu_object); |
220 | if (!mo) | |
5cc9ed4b CW |
221 | return; |
222 | ||
768e159f CW |
223 | spin_lock(&mo->mn->lock); |
224 | del_object(mo); | |
225 | spin_unlock(&mo->mn->lock); | |
ad46cb53 | 226 | kfree(mo); |
ad46cb53 CW |
227 | } |
228 | ||
229 | static struct i915_mmu_notifier * | |
230 | i915_mmu_notifier_find(struct i915_mm_struct *mm) | |
231 | { | |
7741b547 DV |
232 | struct i915_mmu_notifier *mn; |
233 | int err = 0; | |
e9681366 CW |
234 | |
235 | mn = mm->mn; | |
236 | if (mn) | |
237 | return mn; | |
238 | ||
484d9a84 | 239 | mn = i915_mmu_notifier_create(mm); |
7741b547 DV |
240 | if (IS_ERR(mn)) |
241 | err = PTR_ERR(mn); | |
242 | ||
e9681366 | 243 | down_write(&mm->mm->mmap_sem); |
f470b190 | 244 | mutex_lock(&mm->i915->mm_lock); |
7741b547 DV |
245 | if (mm->mn == NULL && !err) { |
246 | /* Protected by mmap_sem (write-lock) */ | |
247 | err = __mmu_notifier_register(&mn->mn, mm->mm); | |
248 | if (!err) { | |
249 | /* Protected by mm_lock */ | |
250 | mm->mn = fetch_and_zero(&mn); | |
251 | } | |
cb8d50df TU |
252 | } else if (mm->mn) { |
253 | /* | |
254 | * Someone else raced and successfully installed the mmu | |
255 | * notifier, we can cancel our own errors. | |
256 | */ | |
7741b547 | 257 | err = 0; |
ad46cb53 | 258 | } |
f470b190 | 259 | mutex_unlock(&mm->i915->mm_lock); |
e9681366 CW |
260 | up_write(&mm->mm->mmap_sem); |
261 | ||
484d9a84 | 262 | if (mn && !IS_ERR(mn)) |
7741b547 | 263 | kfree(mn); |
7741b547 DV |
264 | |
265 | return err ? ERR_PTR(err) : mm->mn; | |
5cc9ed4b CW |
266 | } |
267 | ||
268 | static int | |
269 | i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, | |
270 | unsigned flags) | |
271 | { | |
ad46cb53 CW |
272 | struct i915_mmu_notifier *mn; |
273 | struct i915_mmu_object *mo; | |
5cc9ed4b CW |
274 | |
275 | if (flags & I915_USERPTR_UNSYNCHRONIZED) | |
276 | return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; | |
277 | ||
ad46cb53 CW |
278 | if (WARN_ON(obj->userptr.mm == NULL)) |
279 | return -EINVAL; | |
5cc9ed4b | 280 | |
ad46cb53 CW |
281 | mn = i915_mmu_notifier_find(obj->userptr.mm); |
282 | if (IS_ERR(mn)) | |
283 | return PTR_ERR(mn); | |
5cc9ed4b | 284 | |
ad46cb53 | 285 | mo = kzalloc(sizeof(*mo), GFP_KERNEL); |
484d9a84 | 286 | if (!mo) |
ad46cb53 | 287 | return -ENOMEM; |
5cc9ed4b | 288 | |
ad46cb53 | 289 | mo->mn = mn; |
ad46cb53 | 290 | mo->obj = obj; |
768e159f CW |
291 | mo->it.start = obj->userptr.ptr; |
292 | mo->it.last = obj->userptr.ptr + obj->base.size - 1; | |
484d9a84 | 293 | RB_CLEAR_NODE(&mo->it.rb); |
ad46cb53 CW |
294 | |
295 | obj->userptr.mmu_object = mo; | |
5cc9ed4b | 296 | return 0; |
ad46cb53 CW |
297 | } |
298 | ||
299 | static void | |
300 | i915_mmu_notifier_free(struct i915_mmu_notifier *mn, | |
301 | struct mm_struct *mm) | |
302 | { | |
303 | if (mn == NULL) | |
304 | return; | |
5cc9ed4b | 305 | |
ad46cb53 | 306 | mmu_notifier_unregister(&mn->mn, mm); |
5cc9ed4b | 307 | kfree(mn); |
5cc9ed4b CW |
308 | } |
309 | ||
310 | #else | |
311 | ||
484d9a84 CW |
312 | static void |
313 | __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) | |
314 | { | |
315 | } | |
316 | ||
5cc9ed4b CW |
317 | static void |
318 | i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) | |
319 | { | |
320 | } | |
321 | ||
322 | static int | |
323 | i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, | |
324 | unsigned flags) | |
325 | { | |
326 | if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) | |
327 | return -ENODEV; | |
328 | ||
329 | if (!capable(CAP_SYS_ADMIN)) | |
330 | return -EPERM; | |
331 | ||
332 | return 0; | |
333 | } | |
ad46cb53 CW |
334 | |
335 | static void | |
336 | i915_mmu_notifier_free(struct i915_mmu_notifier *mn, | |
337 | struct mm_struct *mm) | |
338 | { | |
339 | } | |
340 | ||
5cc9ed4b CW |
341 | #endif |
342 | ||
ad46cb53 CW |
343 | static struct i915_mm_struct * |
344 | __i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) | |
345 | { | |
346 | struct i915_mm_struct *mm; | |
347 | ||
348 | /* Protected by dev_priv->mm_lock */ | |
349 | hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) | |
350 | if (mm->mm == real) | |
351 | return mm; | |
352 | ||
353 | return NULL; | |
354 | } | |
355 | ||
356 | static int | |
357 | i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) | |
358 | { | |
359 | struct drm_i915_private *dev_priv = to_i915(obj->base.dev); | |
360 | struct i915_mm_struct *mm; | |
361 | int ret = 0; | |
362 | ||
363 | /* During release of the GEM object we hold the struct_mutex. This | |
364 | * precludes us from calling mmput() at that time as that may be | |
365 | * the last reference and so call exit_mmap(). exit_mmap() will | |
366 | * attempt to reap the vma, and if we were holding a GTT mmap | |
367 | * would then call drm_gem_vm_close() and attempt to reacquire | |
368 | * the struct mutex. So in order to avoid that recursion, we have | |
369 | * to defer releasing the mm reference until after we drop the | |
370 | * struct_mutex, i.e. we need to schedule a worker to do the clean | |
371 | * up. | |
372 | */ | |
373 | mutex_lock(&dev_priv->mm_lock); | |
374 | mm = __i915_mm_struct_find(dev_priv, current->mm); | |
375 | if (mm == NULL) { | |
376 | mm = kmalloc(sizeof(*mm), GFP_KERNEL); | |
377 | if (mm == NULL) { | |
378 | ret = -ENOMEM; | |
379 | goto out; | |
380 | } | |
381 | ||
382 | kref_init(&mm->kref); | |
f470b190 | 383 | mm->i915 = to_i915(obj->base.dev); |
ad46cb53 CW |
384 | |
385 | mm->mm = current->mm; | |
f1f10076 | 386 | mmgrab(current->mm); |
ad46cb53 CW |
387 | |
388 | mm->mn = NULL; | |
389 | ||
390 | /* Protected by dev_priv->mm_lock */ | |
391 | hash_add(dev_priv->mm_structs, | |
392 | &mm->node, (unsigned long)mm->mm); | |
393 | } else | |
394 | kref_get(&mm->kref); | |
395 | ||
396 | obj->userptr.mm = mm; | |
397 | out: | |
398 | mutex_unlock(&dev_priv->mm_lock); | |
399 | return ret; | |
400 | } | |
401 | ||
402 | static void | |
403 | __i915_mm_struct_free__worker(struct work_struct *work) | |
404 | { | |
405 | struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); | |
406 | i915_mmu_notifier_free(mm->mn, mm->mm); | |
407 | mmdrop(mm->mm); | |
408 | kfree(mm); | |
409 | } | |
410 | ||
411 | static void | |
412 | __i915_mm_struct_free(struct kref *kref) | |
413 | { | |
414 | struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); | |
415 | ||
416 | /* Protected by dev_priv->mm_lock */ | |
417 | hash_del(&mm->node); | |
f470b190 | 418 | mutex_unlock(&mm->i915->mm_lock); |
ad46cb53 CW |
419 | |
420 | INIT_WORK(&mm->work, __i915_mm_struct_free__worker); | |
8a2421bd | 421 | queue_work(mm->i915->mm.userptr_wq, &mm->work); |
ad46cb53 CW |
422 | } |
423 | ||
424 | static void | |
425 | i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) | |
426 | { | |
427 | if (obj->userptr.mm == NULL) | |
428 | return; | |
429 | ||
430 | kref_put_mutex(&obj->userptr.mm->kref, | |
431 | __i915_mm_struct_free, | |
432 | &to_i915(obj->base.dev)->mm_lock); | |
433 | obj->userptr.mm = NULL; | |
434 | } | |
435 | ||
5cc9ed4b CW |
436 | struct get_pages_work { |
437 | struct work_struct work; | |
438 | struct drm_i915_gem_object *obj; | |
439 | struct task_struct *task; | |
440 | }; | |
441 | ||
03ac84f1 | 442 | static struct sg_table * |
5602452e TU |
443 | __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, |
444 | struct page **pvec, int num_pages) | |
e2273302 | 445 | { |
5602452e TU |
446 | unsigned int max_segment = i915_sg_segment_size(); |
447 | struct sg_table *st; | |
84e8978e | 448 | unsigned int sg_page_sizes; |
e2273302 ID |
449 | int ret; |
450 | ||
5602452e TU |
451 | st = kmalloc(sizeof(*st), GFP_KERNEL); |
452 | if (!st) | |
453 | return ERR_PTR(-ENOMEM); | |
454 | ||
455 | alloc_table: | |
456 | ret = __sg_alloc_table_from_pages(st, pvec, num_pages, | |
457 | 0, num_pages << PAGE_SHIFT, | |
458 | max_segment, | |
459 | GFP_KERNEL); | |
460 | if (ret) { | |
461 | kfree(st); | |
03ac84f1 | 462 | return ERR_PTR(ret); |
5602452e | 463 | } |
e2273302 | 464 | |
5602452e | 465 | ret = i915_gem_gtt_prepare_pages(obj, st); |
e2273302 | 466 | if (ret) { |
5602452e TU |
467 | sg_free_table(st); |
468 | ||
469 | if (max_segment > PAGE_SIZE) { | |
470 | max_segment = PAGE_SIZE; | |
471 | goto alloc_table; | |
472 | } | |
473 | ||
474 | kfree(st); | |
03ac84f1 | 475 | return ERR_PTR(ret); |
e2273302 ID |
476 | } |
477 | ||
84e8978e | 478 | sg_page_sizes = i915_sg_page_sizes(st->sgl); |
a5c08166 | 479 | |
84e8978e | 480 | __i915_gem_object_set_pages(obj, st, sg_page_sizes); |
b91b09ee | 481 | |
5602452e | 482 | return st; |
e2273302 ID |
483 | } |
484 | ||
5cc9ed4b CW |
485 | static void |
486 | __i915_gem_userptr_get_pages_worker(struct work_struct *_work) | |
487 | { | |
488 | struct get_pages_work *work = container_of(_work, typeof(*work), work); | |
489 | struct drm_i915_gem_object *obj = work->obj; | |
68d6c840 | 490 | const int npages = obj->base.size >> PAGE_SHIFT; |
5cc9ed4b CW |
491 | struct page **pvec; |
492 | int pinned, ret; | |
493 | ||
494 | ret = -ENOMEM; | |
495 | pinned = 0; | |
496 | ||
0ee931c4 | 497 | pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); |
5cc9ed4b | 498 | if (pvec != NULL) { |
ad46cb53 | 499 | struct mm_struct *mm = obj->userptr.mm->mm; |
9beae1ea LS |
500 | unsigned int flags = 0; |
501 | ||
0b100760 | 502 | if (!i915_gem_object_is_readonly(obj)) |
9beae1ea | 503 | flags |= FOLL_WRITE; |
5cc9ed4b | 504 | |
40313f0c | 505 | ret = -EFAULT; |
388f7934 | 506 | if (mmget_not_zero(mm)) { |
40313f0c CW |
507 | down_read(&mm->mmap_sem); |
508 | while (pinned < npages) { | |
509 | ret = get_user_pages_remote | |
510 | (work->task, mm, | |
511 | obj->userptr.ptr + pinned * PAGE_SIZE, | |
512 | npages - pinned, | |
9beae1ea | 513 | flags, |
5b56d49f | 514 | pvec + pinned, NULL, NULL); |
40313f0c CW |
515 | if (ret < 0) |
516 | break; | |
517 | ||
518 | pinned += ret; | |
519 | } | |
520 | up_read(&mm->mmap_sem); | |
521 | mmput(mm); | |
5cc9ed4b | 522 | } |
5cc9ed4b CW |
523 | } |
524 | ||
1233e2db | 525 | mutex_lock(&obj->mm.lock); |
68d6c840 | 526 | if (obj->userptr.work == &work->work) { |
03ac84f1 CW |
527 | struct sg_table *pages = ERR_PTR(ret); |
528 | ||
68d6c840 | 529 | if (pinned == npages) { |
5602452e TU |
530 | pages = __i915_gem_userptr_alloc_pages(obj, pvec, |
531 | npages); | |
03ac84f1 | 532 | if (!IS_ERR(pages)) { |
68d6c840 | 533 | pinned = 0; |
03ac84f1 | 534 | pages = NULL; |
68d6c840 | 535 | } |
5cc9ed4b | 536 | } |
03ac84f1 CW |
537 | |
538 | obj->userptr.work = ERR_CAST(pages); | |
42953b3c CW |
539 | if (IS_ERR(pages)) |
540 | __i915_gem_userptr_set_active(obj, false); | |
5cc9ed4b | 541 | } |
1233e2db | 542 | mutex_unlock(&obj->mm.lock); |
5cc9ed4b | 543 | |
c6f92f9f | 544 | release_pages(pvec, pinned); |
2098105e | 545 | kvfree(pvec); |
5cc9ed4b | 546 | |
f0cd5182 | 547 | i915_gem_object_put(obj); |
5cc9ed4b CW |
548 | put_task_struct(work->task); |
549 | kfree(work); | |
550 | } | |
551 | ||
03ac84f1 | 552 | static struct sg_table * |
1c8782dd | 553 | __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) |
e4b946bf CW |
554 | { |
555 | struct get_pages_work *work; | |
556 | ||
557 | /* Spawn a worker so that we can acquire the | |
558 | * user pages without holding our mutex. Access | |
559 | * to the user pages requires mmap_sem, and we have | |
560 | * a strict lock ordering of mmap_sem, struct_mutex - | |
561 | * we already hold struct_mutex here and so cannot | |
562 | * call gup without encountering a lock inversion. | |
563 | * | |
564 | * Userspace will keep on repeating the operation | |
565 | * (thanks to EAGAIN) until either we hit the fast | |
566 | * path or the worker completes. If the worker is | |
567 | * cancelled or superseded, the task is still run | |
568 | * but the results ignored. (This leads to | |
569 | * complications that we may have a stray object | |
570 | * refcount that we need to be wary of when | |
571 | * checking for existing objects during creation.) | |
572 | * If the worker encounters an error, it reports | |
573 | * that error back to this function through | |
574 | * obj->userptr.work = ERR_PTR. | |
575 | */ | |
e4b946bf CW |
576 | work = kmalloc(sizeof(*work), GFP_KERNEL); |
577 | if (work == NULL) | |
03ac84f1 | 578 | return ERR_PTR(-ENOMEM); |
e4b946bf CW |
579 | |
580 | obj->userptr.work = &work->work; | |
e4b946bf | 581 | |
25dc556a | 582 | work->obj = i915_gem_object_get(obj); |
e4b946bf CW |
583 | |
584 | work->task = current; | |
585 | get_task_struct(work->task); | |
586 | ||
587 | INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); | |
8a2421bd | 588 | queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); |
e4b946bf | 589 | |
03ac84f1 | 590 | return ERR_PTR(-EAGAIN); |
e4b946bf CW |
591 | } |
592 | ||
b91b09ee | 593 | static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) |
5cc9ed4b CW |
594 | { |
595 | const int num_pages = obj->base.size >> PAGE_SHIFT; | |
1c8782dd | 596 | struct mm_struct *mm = obj->userptr.mm->mm; |
5cc9ed4b | 597 | struct page **pvec; |
03ac84f1 | 598 | struct sg_table *pages; |
e4b946bf | 599 | bool active; |
1c8782dd | 600 | int pinned; |
5cc9ed4b CW |
601 | |
602 | /* If userspace should engineer that these pages are replaced in | |
603 | * the vma between us binding this page into the GTT and completion | |
604 | * of rendering... Their loss. If they change the mapping of their | |
605 | * pages they need to create a new bo to point to the new vma. | |
606 | * | |
607 | * However, that still leaves open the possibility of the vma | |
608 | * being copied upon fork. Which falls under the same userspace | |
609 | * synchronisation issue as a regular bo, except that this time | |
610 | * the process may not be expecting that a particular piece of | |
611 | * memory is tied to the GPU. | |
612 | * | |
613 | * Fortunately, we can hook into the mmu_notifier in order to | |
614 | * discard the page references prior to anything nasty happening | |
615 | * to the vma (discard or cloning) which should prevent the more | |
616 | * egregious cases from causing harm. | |
617 | */ | |
364c8172 CW |
618 | |
619 | if (obj->userptr.work) { | |
e4b946bf | 620 | /* active flag should still be held for the pending work */ |
364c8172 | 621 | if (IS_ERR(obj->userptr.work)) |
b91b09ee | 622 | return PTR_ERR(obj->userptr.work); |
364c8172 | 623 | else |
b91b09ee | 624 | return -EAGAIN; |
364c8172 | 625 | } |
e4b946bf | 626 | |
5cc9ed4b CW |
627 | pvec = NULL; |
628 | pinned = 0; | |
5cc9ed4b | 629 | |
15c344f4 | 630 | if (mm == current->mm) { |
2098105e | 631 | pvec = kvmalloc_array(num_pages, sizeof(struct page *), |
0ee931c4 | 632 | GFP_KERNEL | |
1c8782dd CW |
633 | __GFP_NORETRY | |
634 | __GFP_NOWARN); | |
635 | if (pvec) /* defer to worker if malloc fails */ | |
636 | pinned = __get_user_pages_fast(obj->userptr.ptr, | |
637 | num_pages, | |
0b100760 | 638 | !i915_gem_object_is_readonly(obj), |
1c8782dd | 639 | pvec); |
5cc9ed4b | 640 | } |
e4b946bf CW |
641 | |
642 | active = false; | |
1c8782dd CW |
643 | if (pinned < 0) { |
644 | pages = ERR_PTR(pinned); | |
645 | pinned = 0; | |
646 | } else if (pinned < num_pages) { | |
647 | pages = __i915_gem_userptr_get_pages_schedule(obj); | |
648 | active = pages == ERR_PTR(-EAGAIN); | |
649 | } else { | |
5602452e | 650 | pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); |
1c8782dd | 651 | active = !IS_ERR(pages); |
5cc9ed4b | 652 | } |
1c8782dd CW |
653 | if (active) |
654 | __i915_gem_userptr_set_active(obj, true); | |
1c8782dd CW |
655 | |
656 | if (IS_ERR(pages)) | |
c6f92f9f | 657 | release_pages(pvec, pinned); |
2098105e | 658 | kvfree(pvec); |
1c8782dd | 659 | |
b91b09ee | 660 | return PTR_ERR_OR_ZERO(pages); |
5cc9ed4b CW |
661 | } |
662 | ||
663 | static void | |
03ac84f1 CW |
664 | i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, |
665 | struct sg_table *pages) | |
5cc9ed4b | 666 | { |
85d1225e DG |
667 | struct sgt_iter sgt_iter; |
668 | struct page *page; | |
5cc9ed4b | 669 | |
484d9a84 CW |
670 | /* Cancel any inflight work and force them to restart their gup */ |
671 | obj->userptr.work = NULL; | |
e4b946bf | 672 | __i915_gem_userptr_set_active(obj, false); |
484d9a84 CW |
673 | if (!pages) |
674 | return; | |
5cc9ed4b | 675 | |
ee8efa80 | 676 | __i915_gem_object_release_shmem(obj, pages, true); |
03ac84f1 | 677 | i915_gem_gtt_finish_pages(obj, pages); |
e2273302 | 678 | |
03ac84f1 | 679 | for_each_sgt_page(page, sgt_iter, pages) { |
a4f5ea64 | 680 | if (obj->mm.dirty) |
5cc9ed4b CW |
681 | set_page_dirty(page); |
682 | ||
683 | mark_page_accessed(page); | |
09cbfeaf | 684 | put_page(page); |
5cc9ed4b | 685 | } |
a4f5ea64 | 686 | obj->mm.dirty = false; |
5cc9ed4b | 687 | |
03ac84f1 CW |
688 | sg_free_table(pages); |
689 | kfree(pages); | |
5cc9ed4b CW |
690 | } |
691 | ||
692 | static void | |
693 | i915_gem_userptr_release(struct drm_i915_gem_object *obj) | |
694 | { | |
695 | i915_gem_userptr_release__mmu_notifier(obj); | |
ad46cb53 | 696 | i915_gem_userptr_release__mm_struct(obj); |
5cc9ed4b CW |
697 | } |
698 | ||
699 | static int | |
700 | i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) | |
701 | { | |
ad46cb53 | 702 | if (obj->userptr.mmu_object) |
5cc9ed4b CW |
703 | return 0; |
704 | ||
705 | return i915_gem_userptr_init__mmu_notifier(obj, 0); | |
706 | } | |
707 | ||
708 | static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { | |
3599a91c | 709 | .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | |
484d9a84 CW |
710 | I915_GEM_OBJECT_IS_SHRINKABLE | |
711 | I915_GEM_OBJECT_ASYNC_CANCEL, | |
5cc9ed4b CW |
712 | .get_pages = i915_gem_userptr_get_pages, |
713 | .put_pages = i915_gem_userptr_put_pages, | |
de472664 | 714 | .dmabuf_export = i915_gem_userptr_dmabuf_export, |
5cc9ed4b CW |
715 | .release = i915_gem_userptr_release, |
716 | }; | |
717 | ||
a5a5ae2a | 718 | /* |
5cc9ed4b CW |
719 | * Creates a new mm object that wraps some normal memory from the process |
720 | * context - user memory. | |
721 | * | |
722 | * We impose several restrictions upon the memory being mapped | |
723 | * into the GPU. | |
724 | * 1. It must be page aligned (both start/end addresses, i.e ptr and size). | |
ec8b0dd5 | 725 | * 2. It must be normal system memory, not a pointer into another map of IO |
5cc9ed4b | 726 | * space (e.g. it must not be a GTT mmapping of another object). |
ec8b0dd5 | 727 | * 3. We only allow a bo as large as we could in theory map into the GTT, |
5cc9ed4b | 728 | * that is we limit the size to the total size of the GTT. |
ec8b0dd5 | 729 | * 4. The bo is marked as being snoopable. The backing pages are left |
5cc9ed4b CW |
730 | * accessible directly by the CPU, but reads and writes by the GPU may |
731 | * incur the cost of a snoop (unless you have an LLC architecture). | |
732 | * | |
733 | * Synchronisation between multiple users and the GPU is left to userspace | |
734 | * through the normal set-domain-ioctl. The kernel will enforce that the | |
735 | * GPU relinquishes the VMA before it is returned back to the system | |
736 | * i.e. upon free(), munmap() or process termination. However, the userspace | |
737 | * malloc() library may not immediately relinquish the VMA after free() and | |
738 | * instead reuse it whilst the GPU is still reading and writing to the VMA. | |
739 | * Caveat emptor. | |
740 | * | |
741 | * Also note, that the object created here is not currently a "first class" | |
742 | * object, in that several ioctls are banned. These are the CPU access | |
743 | * ioctls: mmap(), pwrite and pread. In practice, you are expected to use | |
cc917ab4 CW |
744 | * direct access via your pointer rather than use those ioctls. Another |
745 | * restriction is that we do not allow userptr surfaces to be pinned to the | |
746 | * hardware and so we reject any attempt to create a framebuffer out of a | |
747 | * userptr. | |
5cc9ed4b CW |
748 | * |
749 | * If you think this is a good interface to use to pass GPU memory between | |
750 | * drivers, please use dma-buf instead. In fact, wherever possible use | |
751 | * dma-buf instead. | |
752 | */ | |
753 | int | |
a5a5ae2a CW |
754 | i915_gem_userptr_ioctl(struct drm_device *dev, |
755 | void *data, | |
756 | struct drm_file *file) | |
5cc9ed4b | 757 | { |
0031fb96 | 758 | struct drm_i915_private *dev_priv = to_i915(dev); |
5cc9ed4b CW |
759 | struct drm_i915_gem_userptr *args = data; |
760 | struct drm_i915_gem_object *obj; | |
761 | int ret; | |
762 | u32 handle; | |
763 | ||
0031fb96 | 764 | if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { |
ca377809 TU |
765 | /* We cannot support coherent userptr objects on hw without |
766 | * LLC and broken snooping. | |
767 | */ | |
768 | return -ENODEV; | |
769 | } | |
770 | ||
5cc9ed4b CW |
771 | if (args->flags & ~(I915_USERPTR_READ_ONLY | |
772 | I915_USERPTR_UNSYNCHRONIZED)) | |
773 | return -EINVAL; | |
774 | ||
c11c7bfd MA |
775 | if (!args->user_size) |
776 | return -EINVAL; | |
777 | ||
5cc9ed4b CW |
778 | if (offset_in_page(args->user_ptr | args->user_size)) |
779 | return -EINVAL; | |
780 | ||
96d4f267 | 781 | if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) |
5cc9ed4b CW |
782 | return -EFAULT; |
783 | ||
784 | if (args->flags & I915_USERPTR_READ_ONLY) { | |
0b100760 CW |
785 | struct i915_hw_ppgtt *ppgtt; |
786 | ||
787 | /* | |
788 | * On almost all of the older hw, we cannot tell the GPU that | |
789 | * a page is readonly. | |
5cc9ed4b | 790 | */ |
0b100760 CW |
791 | ppgtt = dev_priv->kernel_context->ppgtt; |
792 | if (!ppgtt || !ppgtt->vm.has_read_only) | |
793 | return -ENODEV; | |
5cc9ed4b CW |
794 | } |
795 | ||
13f1bfd3 | 796 | obj = i915_gem_object_alloc(); |
5cc9ed4b CW |
797 | if (obj == NULL) |
798 | return -ENOMEM; | |
799 | ||
800 | drm_gem_private_object_init(dev, &obj->base, args->user_size); | |
801 | i915_gem_object_init(obj, &i915_gem_userptr_ops); | |
c0a51fd0 CK |
802 | obj->read_domains = I915_GEM_DOMAIN_CPU; |
803 | obj->write_domain = I915_GEM_DOMAIN_CPU; | |
b8f55be6 | 804 | i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); |
5cc9ed4b CW |
805 | |
806 | obj->userptr.ptr = args->user_ptr; | |
0b100760 CW |
807 | if (args->flags & I915_USERPTR_READ_ONLY) |
808 | i915_gem_object_set_readonly(obj); | |
5cc9ed4b CW |
809 | |
810 | /* And keep a pointer to the current->mm for resolving the user pages | |
811 | * at binding. This means that we need to hook into the mmu_notifier | |
812 | * in order to detect if the mmu is destroyed. | |
813 | */ | |
ad46cb53 CW |
814 | ret = i915_gem_userptr_init__mm_struct(obj); |
815 | if (ret == 0) | |
5cc9ed4b CW |
816 | ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); |
817 | if (ret == 0) | |
818 | ret = drm_gem_handle_create(file, &obj->base, &handle); | |
819 | ||
820 | /* drop reference from allocate - handle holds it now */ | |
f0cd5182 | 821 | i915_gem_object_put(obj); |
5cc9ed4b CW |
822 | if (ret) |
823 | return ret; | |
824 | ||
825 | args->handle = handle; | |
826 | return 0; | |
827 | } | |
828 | ||
8a2421bd | 829 | int i915_gem_init_userptr(struct drm_i915_private *dev_priv) |
5cc9ed4b | 830 | { |
ad46cb53 CW |
831 | mutex_init(&dev_priv->mm_lock); |
832 | hash_init(dev_priv->mm_structs); | |
8a2421bd CW |
833 | |
834 | dev_priv->mm.userptr_wq = | |
21cc6431 | 835 | alloc_workqueue("i915-userptr-acquire", |
457db89b | 836 | WQ_HIGHPRI | WQ_UNBOUND, |
21cc6431 | 837 | 0); |
8a2421bd CW |
838 | if (!dev_priv->mm.userptr_wq) |
839 | return -ENOMEM; | |
840 | ||
841 | return 0; | |
842 | } | |
843 | ||
844 | void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) | |
845 | { | |
846 | destroy_workqueue(dev_priv->mm.userptr_wq); | |
5cc9ed4b | 847 | } |