]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Move obj->active:5 to obj->flags
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b 1/*
be6a0376 2 * Copyright © 2008-2015 Intel Corporation
673a394b
EA
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7 28#include <drm/drmP.h>
0de23977 29#include <drm/drm_vma_manager.h>
760285e7 30#include <drm/i915_drm.h>
673a394b 31#include "i915_drv.h"
c13d87ea 32#include "i915_gem_dmabuf.h"
eb82289a 33#include "i915_vgpu.h"
1c5d22f7 34#include "i915_trace.h"
652c393a 35#include "intel_drv.h"
5d723d7a 36#include "intel_frontbuffer.h"
0ccdacf6 37#include "intel_mocs.h"
c13d87ea 38#include <linux/reservation.h>
5949eac4 39#include <linux/shmem_fs.h>
5a0e3ad6 40#include <linux/slab.h>
673a394b 41#include <linux/swap.h>
79e53945 42#include <linux/pci.h>
1286ff73 43#include <linux/dma-buf.h>
673a394b 44
05394f39 45static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
e62b59e4 46static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
61050808 47
c76ce038
CW
48static bool cpu_cache_is_coherent(struct drm_device *dev,
49 enum i915_cache_level level)
50{
51 return HAS_LLC(dev) || level != I915_CACHE_NONE;
52}
53
2c22569b
CW
54static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
55{
b50a5371
AS
56 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
57 return false;
58
2c22569b
CW
59 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
60 return true;
61
62 return obj->pin_display;
63}
64
4f1959ee
AS
65static int
66insert_mappable_node(struct drm_i915_private *i915,
67 struct drm_mm_node *node, u32 size)
68{
69 memset(node, 0, sizeof(*node));
70 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
71 size, 0, 0, 0,
72 i915->ggtt.mappable_end,
73 DRM_MM_SEARCH_DEFAULT,
74 DRM_MM_CREATE_DEFAULT);
75}
76
77static void
78remove_mappable_node(struct drm_mm_node *node)
79{
80 drm_mm_remove_node(node);
81}
82
73aa808f
CW
83/* some bookkeeping */
84static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
85 size_t size)
86{
c20e8355 87 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
88 dev_priv->mm.object_count++;
89 dev_priv->mm.object_memory += size;
c20e8355 90 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
91}
92
93static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
94 size_t size)
95{
c20e8355 96 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
97 dev_priv->mm.object_count--;
98 dev_priv->mm.object_memory -= size;
c20e8355 99 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
100}
101
21dd3734 102static int
33196ded 103i915_gem_wait_for_error(struct i915_gpu_error *error)
30dbf0c0 104{
30dbf0c0
CW
105 int ret;
106
d98c52cf 107 if (!i915_reset_in_progress(error))
30dbf0c0
CW
108 return 0;
109
0a6759c6
DV
110 /*
111 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
112 * userspace. If it takes that long something really bad is going on and
113 * we should simply try to bail out and fail as gracefully as possible.
114 */
1f83fee0 115 ret = wait_event_interruptible_timeout(error->reset_queue,
d98c52cf 116 !i915_reset_in_progress(error),
1f83fee0 117 10*HZ);
0a6759c6
DV
118 if (ret == 0) {
119 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
120 return -EIO;
121 } else if (ret < 0) {
30dbf0c0 122 return ret;
d98c52cf
CW
123 } else {
124 return 0;
0a6759c6 125 }
30dbf0c0
CW
126}
127
54cf91dc 128int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 129{
fac5e23e 130 struct drm_i915_private *dev_priv = to_i915(dev);
76c1dec1
CW
131 int ret;
132
33196ded 133 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
76c1dec1
CW
134 if (ret)
135 return ret;
136
137 ret = mutex_lock_interruptible(&dev->struct_mutex);
138 if (ret)
139 return ret;
140
76c1dec1
CW
141 return 0;
142}
30dbf0c0 143
5a125c3c
EA
144int
145i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 146 struct drm_file *file)
5a125c3c 147{
72e96d64 148 struct drm_i915_private *dev_priv = to_i915(dev);
62106b4f 149 struct i915_ggtt *ggtt = &dev_priv->ggtt;
72e96d64 150 struct drm_i915_gem_get_aperture *args = data;
ca1543be 151 struct i915_vma *vma;
6299f992 152 size_t pinned;
5a125c3c 153
6299f992 154 pinned = 0;
73aa808f 155 mutex_lock(&dev->struct_mutex);
1c7f4bca 156 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
20dfbde4 157 if (i915_vma_is_pinned(vma))
ca1543be 158 pinned += vma->node.size;
1c7f4bca 159 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
20dfbde4 160 if (i915_vma_is_pinned(vma))
ca1543be 161 pinned += vma->node.size;
73aa808f 162 mutex_unlock(&dev->struct_mutex);
5a125c3c 163
72e96d64 164 args->aper_size = ggtt->base.total;
0206e353 165 args->aper_available_size = args->aper_size - pinned;
6299f992 166
5a125c3c
EA
167 return 0;
168}
169
6a2c4232
CW
170static int
171i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
00731155 172{
6a2c4232
CW
173 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
174 char *vaddr = obj->phys_handle->vaddr;
175 struct sg_table *st;
176 struct scatterlist *sg;
177 int i;
00731155 178
6a2c4232
CW
179 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
180 return -EINVAL;
181
182 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
183 struct page *page;
184 char *src;
185
186 page = shmem_read_mapping_page(mapping, i);
187 if (IS_ERR(page))
188 return PTR_ERR(page);
189
190 src = kmap_atomic(page);
191 memcpy(vaddr, src, PAGE_SIZE);
192 drm_clflush_virt_range(vaddr, PAGE_SIZE);
193 kunmap_atomic(src);
194
09cbfeaf 195 put_page(page);
6a2c4232
CW
196 vaddr += PAGE_SIZE;
197 }
198
c033666a 199 i915_gem_chipset_flush(to_i915(obj->base.dev));
6a2c4232
CW
200
201 st = kmalloc(sizeof(*st), GFP_KERNEL);
202 if (st == NULL)
203 return -ENOMEM;
204
205 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
206 kfree(st);
207 return -ENOMEM;
208 }
209
210 sg = st->sgl;
211 sg->offset = 0;
212 sg->length = obj->base.size;
00731155 213
6a2c4232
CW
214 sg_dma_address(sg) = obj->phys_handle->busaddr;
215 sg_dma_len(sg) = obj->base.size;
216
217 obj->pages = st;
6a2c4232
CW
218 return 0;
219}
220
221static void
222i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
223{
224 int ret;
225
226 BUG_ON(obj->madv == __I915_MADV_PURGED);
00731155 227
6a2c4232 228 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 229 if (WARN_ON(ret)) {
6a2c4232
CW
230 /* In the event of a disaster, abandon all caches and
231 * hope for the best.
232 */
6a2c4232
CW
233 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
234 }
235
236 if (obj->madv == I915_MADV_DONTNEED)
237 obj->dirty = 0;
238
239 if (obj->dirty) {
00731155 240 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
6a2c4232 241 char *vaddr = obj->phys_handle->vaddr;
00731155
CW
242 int i;
243
244 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
6a2c4232
CW
245 struct page *page;
246 char *dst;
247
248 page = shmem_read_mapping_page(mapping, i);
249 if (IS_ERR(page))
250 continue;
251
252 dst = kmap_atomic(page);
253 drm_clflush_virt_range(vaddr, PAGE_SIZE);
254 memcpy(dst, vaddr, PAGE_SIZE);
255 kunmap_atomic(dst);
256
257 set_page_dirty(page);
258 if (obj->madv == I915_MADV_WILLNEED)
00731155 259 mark_page_accessed(page);
09cbfeaf 260 put_page(page);
00731155
CW
261 vaddr += PAGE_SIZE;
262 }
6a2c4232 263 obj->dirty = 0;
00731155
CW
264 }
265
6a2c4232
CW
266 sg_free_table(obj->pages);
267 kfree(obj->pages);
6a2c4232
CW
268}
269
270static void
271i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
272{
273 drm_pci_free(obj->base.dev, obj->phys_handle);
274}
275
276static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
277 .get_pages = i915_gem_object_get_pages_phys,
278 .put_pages = i915_gem_object_put_pages_phys,
279 .release = i915_gem_object_release_phys,
280};
281
aa653a68
CW
282int
283i915_gem_object_unbind(struct drm_i915_gem_object *obj)
284{
285 struct i915_vma *vma;
286 LIST_HEAD(still_in_list);
287 int ret;
288
289 /* The vma will only be freed if it is marked as closed, and if we wait
290 * upon rendering to the vma, we may unbind anything in the list.
291 */
292 while ((vma = list_first_entry_or_null(&obj->vma_list,
293 struct i915_vma,
294 obj_link))) {
295 list_move_tail(&vma->obj_link, &still_in_list);
296 ret = i915_vma_unbind(vma);
297 if (ret)
298 break;
299 }
300 list_splice(&still_in_list, &obj->vma_list);
301
302 return ret;
303}
304
00731155
CW
305int
306i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
307 int align)
308{
309 drm_dma_handle_t *phys;
6a2c4232 310 int ret;
00731155
CW
311
312 if (obj->phys_handle) {
313 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
314 return -EBUSY;
315
316 return 0;
317 }
318
319 if (obj->madv != I915_MADV_WILLNEED)
320 return -EFAULT;
321
322 if (obj->base.filp == NULL)
323 return -EINVAL;
324
4717ca9e
CW
325 ret = i915_gem_object_unbind(obj);
326 if (ret)
327 return ret;
328
329 ret = i915_gem_object_put_pages(obj);
6a2c4232
CW
330 if (ret)
331 return ret;
332
00731155
CW
333 /* create a new object */
334 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
335 if (!phys)
336 return -ENOMEM;
337
00731155 338 obj->phys_handle = phys;
6a2c4232
CW
339 obj->ops = &i915_gem_phys_ops;
340
341 return i915_gem_object_get_pages(obj);
00731155
CW
342}
343
344static int
345i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
346 struct drm_i915_gem_pwrite *args,
347 struct drm_file *file_priv)
348{
349 struct drm_device *dev = obj->base.dev;
350 void *vaddr = obj->phys_handle->vaddr + args->offset;
3ed605bc 351 char __user *user_data = u64_to_user_ptr(args->data_ptr);
063e4e6b 352 int ret = 0;
6a2c4232
CW
353
354 /* We manually control the domain here and pretend that it
355 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
356 */
357 ret = i915_gem_object_wait_rendering(obj, false);
358 if (ret)
359 return ret;
00731155 360
77a0d1ca 361 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
00731155
CW
362 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
363 unsigned long unwritten;
364
365 /* The physical object once assigned is fixed for the lifetime
366 * of the obj, so we can safely drop the lock and continue
367 * to access vaddr.
368 */
369 mutex_unlock(&dev->struct_mutex);
370 unwritten = copy_from_user(vaddr, user_data, args->size);
371 mutex_lock(&dev->struct_mutex);
063e4e6b
PZ
372 if (unwritten) {
373 ret = -EFAULT;
374 goto out;
375 }
00731155
CW
376 }
377
6a2c4232 378 drm_clflush_virt_range(vaddr, args->size);
c033666a 379 i915_gem_chipset_flush(to_i915(dev));
063e4e6b
PZ
380
381out:
de152b62 382 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
063e4e6b 383 return ret;
00731155
CW
384}
385
42dcedd4
CW
386void *i915_gem_object_alloc(struct drm_device *dev)
387{
fac5e23e 388 struct drm_i915_private *dev_priv = to_i915(dev);
efab6d8d 389 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
42dcedd4
CW
390}
391
392void i915_gem_object_free(struct drm_i915_gem_object *obj)
393{
fac5e23e 394 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
efab6d8d 395 kmem_cache_free(dev_priv->objects, obj);
42dcedd4
CW
396}
397
ff72145b
DA
398static int
399i915_gem_create(struct drm_file *file,
400 struct drm_device *dev,
401 uint64_t size,
402 uint32_t *handle_p)
673a394b 403{
05394f39 404 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
405 int ret;
406 u32 handle;
673a394b 407
ff72145b 408 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
409 if (size == 0)
410 return -EINVAL;
673a394b
EA
411
412 /* Allocate the new object */
d37cd8a8 413 obj = i915_gem_object_create(dev, size);
fe3db79b
CW
414 if (IS_ERR(obj))
415 return PTR_ERR(obj);
673a394b 416
05394f39 417 ret = drm_gem_handle_create(file, &obj->base, &handle);
202f2fef 418 /* drop reference from allocate - handle holds it now */
34911fd3 419 i915_gem_object_put_unlocked(obj);
d861e338
DV
420 if (ret)
421 return ret;
202f2fef 422
ff72145b 423 *handle_p = handle;
673a394b
EA
424 return 0;
425}
426
ff72145b
DA
427int
428i915_gem_dumb_create(struct drm_file *file,
429 struct drm_device *dev,
430 struct drm_mode_create_dumb *args)
431{
432 /* have to work out size/pitch and return them */
de45eaf7 433 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
ff72145b
DA
434 args->size = args->pitch * args->height;
435 return i915_gem_create(file, dev,
da6b51d0 436 args->size, &args->handle);
ff72145b
DA
437}
438
ff72145b
DA
439/**
440 * Creates a new mm object and returns a handle to it.
14bb2c11
TU
441 * @dev: drm device pointer
442 * @data: ioctl data blob
443 * @file: drm file pointer
ff72145b
DA
444 */
445int
446i915_gem_create_ioctl(struct drm_device *dev, void *data,
447 struct drm_file *file)
448{
449 struct drm_i915_gem_create *args = data;
63ed2cb2 450
ff72145b 451 return i915_gem_create(file, dev,
da6b51d0 452 args->size, &args->handle);
ff72145b
DA
453}
454
8461d226
DV
455static inline int
456__copy_to_user_swizzled(char __user *cpu_vaddr,
457 const char *gpu_vaddr, int gpu_offset,
458 int length)
459{
460 int ret, cpu_offset = 0;
461
462 while (length > 0) {
463 int cacheline_end = ALIGN(gpu_offset + 1, 64);
464 int this_length = min(cacheline_end - gpu_offset, length);
465 int swizzled_gpu_offset = gpu_offset ^ 64;
466
467 ret = __copy_to_user(cpu_vaddr + cpu_offset,
468 gpu_vaddr + swizzled_gpu_offset,
469 this_length);
470 if (ret)
471 return ret + length;
472
473 cpu_offset += this_length;
474 gpu_offset += this_length;
475 length -= this_length;
476 }
477
478 return 0;
479}
480
8c59967c 481static inline int
4f0c7cfb
BW
482__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
483 const char __user *cpu_vaddr,
8c59967c
DV
484 int length)
485{
486 int ret, cpu_offset = 0;
487
488 while (length > 0) {
489 int cacheline_end = ALIGN(gpu_offset + 1, 64);
490 int this_length = min(cacheline_end - gpu_offset, length);
491 int swizzled_gpu_offset = gpu_offset ^ 64;
492
493 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
494 cpu_vaddr + cpu_offset,
495 this_length);
496 if (ret)
497 return ret + length;
498
499 cpu_offset += this_length;
500 gpu_offset += this_length;
501 length -= this_length;
502 }
503
504 return 0;
505}
506
4c914c0c
BV
507/*
508 * Pins the specified object's pages and synchronizes the object with
509 * GPU accesses. Sets needs_clflush to non-zero if the caller should
510 * flush the object from the CPU cache.
511 */
512int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
513 int *needs_clflush)
514{
515 int ret;
516
517 *needs_clflush = 0;
518
b9bcd14a 519 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4c914c0c
BV
520 return -EINVAL;
521
c13d87ea
CW
522 ret = i915_gem_object_wait_rendering(obj, true);
523 if (ret)
524 return ret;
525
4c914c0c
BV
526 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
527 /* If we're not in the cpu read domain, set ourself into the gtt
528 * read domain and manually flush cachelines (if required). This
529 * optimizes for the case when the gpu will dirty the data
530 * anyway again before the next pread happens. */
531 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
532 obj->cache_level);
4c914c0c
BV
533 }
534
535 ret = i915_gem_object_get_pages(obj);
536 if (ret)
537 return ret;
538
539 i915_gem_object_pin_pages(obj);
540
541 return ret;
542}
543
d174bd64
DV
544/* Per-page copy function for the shmem pread fastpath.
545 * Flushes invalid cachelines before reading the target if
546 * needs_clflush is set. */
eb01459f 547static int
d174bd64
DV
548shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
549 char __user *user_data,
550 bool page_do_bit17_swizzling, bool needs_clflush)
551{
552 char *vaddr;
553 int ret;
554
e7e58eb5 555 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
556 return -EINVAL;
557
558 vaddr = kmap_atomic(page);
559 if (needs_clflush)
560 drm_clflush_virt_range(vaddr + shmem_page_offset,
561 page_length);
562 ret = __copy_to_user_inatomic(user_data,
563 vaddr + shmem_page_offset,
564 page_length);
565 kunmap_atomic(vaddr);
566
f60d7f0c 567 return ret ? -EFAULT : 0;
d174bd64
DV
568}
569
23c18c71
DV
570static void
571shmem_clflush_swizzled_range(char *addr, unsigned long length,
572 bool swizzled)
573{
e7e58eb5 574 if (unlikely(swizzled)) {
23c18c71
DV
575 unsigned long start = (unsigned long) addr;
576 unsigned long end = (unsigned long) addr + length;
577
578 /* For swizzling simply ensure that we always flush both
579 * channels. Lame, but simple and it works. Swizzled
580 * pwrite/pread is far from a hotpath - current userspace
581 * doesn't use it at all. */
582 start = round_down(start, 128);
583 end = round_up(end, 128);
584
585 drm_clflush_virt_range((void *)start, end - start);
586 } else {
587 drm_clflush_virt_range(addr, length);
588 }
589
590}
591
d174bd64
DV
592/* Only difference to the fast-path function is that this can handle bit17
593 * and uses non-atomic copy and kmap functions. */
594static int
595shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
596 char __user *user_data,
597 bool page_do_bit17_swizzling, bool needs_clflush)
598{
599 char *vaddr;
600 int ret;
601
602 vaddr = kmap(page);
603 if (needs_clflush)
23c18c71
DV
604 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
605 page_length,
606 page_do_bit17_swizzling);
d174bd64
DV
607
608 if (page_do_bit17_swizzling)
609 ret = __copy_to_user_swizzled(user_data,
610 vaddr, shmem_page_offset,
611 page_length);
612 else
613 ret = __copy_to_user(user_data,
614 vaddr + shmem_page_offset,
615 page_length);
616 kunmap(page);
617
f60d7f0c 618 return ret ? - EFAULT : 0;
d174bd64
DV
619}
620
b50a5371
AS
621static inline unsigned long
622slow_user_access(struct io_mapping *mapping,
623 uint64_t page_base, int page_offset,
624 char __user *user_data,
625 unsigned long length, bool pwrite)
626{
627 void __iomem *ioaddr;
628 void *vaddr;
629 uint64_t unwritten;
630
631 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
632 /* We can use the cpu mem copy function because this is X86. */
633 vaddr = (void __force *)ioaddr + page_offset;
634 if (pwrite)
635 unwritten = __copy_from_user(vaddr, user_data, length);
636 else
637 unwritten = __copy_to_user(user_data, vaddr, length);
638
639 io_mapping_unmap(ioaddr);
640 return unwritten;
641}
642
643static int
644i915_gem_gtt_pread(struct drm_device *dev,
645 struct drm_i915_gem_object *obj, uint64_t size,
646 uint64_t data_offset, uint64_t data_ptr)
647{
fac5e23e 648 struct drm_i915_private *dev_priv = to_i915(dev);
b50a5371
AS
649 struct i915_ggtt *ggtt = &dev_priv->ggtt;
650 struct drm_mm_node node;
651 char __user *user_data;
652 uint64_t remain;
653 uint64_t offset;
654 int ret;
655
de895082 656 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
b50a5371
AS
657 if (ret) {
658 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
659 if (ret)
660 goto out;
661
662 ret = i915_gem_object_get_pages(obj);
663 if (ret) {
664 remove_mappable_node(&node);
665 goto out;
666 }
667
668 i915_gem_object_pin_pages(obj);
669 } else {
670 node.start = i915_gem_obj_ggtt_offset(obj);
671 node.allocated = false;
672 ret = i915_gem_object_put_fence(obj);
673 if (ret)
674 goto out_unpin;
675 }
676
677 ret = i915_gem_object_set_to_gtt_domain(obj, false);
678 if (ret)
679 goto out_unpin;
680
681 user_data = u64_to_user_ptr(data_ptr);
682 remain = size;
683 offset = data_offset;
684
685 mutex_unlock(&dev->struct_mutex);
686 if (likely(!i915.prefault_disable)) {
687 ret = fault_in_multipages_writeable(user_data, remain);
688 if (ret) {
689 mutex_lock(&dev->struct_mutex);
690 goto out_unpin;
691 }
692 }
693
694 while (remain > 0) {
695 /* Operation in this page
696 *
697 * page_base = page offset within aperture
698 * page_offset = offset within page
699 * page_length = bytes to copy for this page
700 */
701 u32 page_base = node.start;
702 unsigned page_offset = offset_in_page(offset);
703 unsigned page_length = PAGE_SIZE - page_offset;
704 page_length = remain < page_length ? remain : page_length;
705 if (node.allocated) {
706 wmb();
707 ggtt->base.insert_page(&ggtt->base,
708 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
709 node.start,
710 I915_CACHE_NONE, 0);
711 wmb();
712 } else {
713 page_base += offset & PAGE_MASK;
714 }
715 /* This is a slow read/write as it tries to read from
716 * and write to user memory which may result into page
717 * faults, and so we cannot perform this under struct_mutex.
718 */
719 if (slow_user_access(ggtt->mappable, page_base,
720 page_offset, user_data,
721 page_length, false)) {
722 ret = -EFAULT;
723 break;
724 }
725
726 remain -= page_length;
727 user_data += page_length;
728 offset += page_length;
729 }
730
731 mutex_lock(&dev->struct_mutex);
732 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
733 /* The user has modified the object whilst we tried
734 * reading from it, and we now have no idea what domain
735 * the pages should be in. As we have just been touching
736 * them directly, flush everything back to the GTT
737 * domain.
738 */
739 ret = i915_gem_object_set_to_gtt_domain(obj, false);
740 }
741
742out_unpin:
743 if (node.allocated) {
744 wmb();
745 ggtt->base.clear_range(&ggtt->base,
746 node.start, node.size,
747 true);
748 i915_gem_object_unpin_pages(obj);
749 remove_mappable_node(&node);
750 } else {
751 i915_gem_object_ggtt_unpin(obj);
752 }
753out:
754 return ret;
755}
756
eb01459f 757static int
dbf7bff0
DV
758i915_gem_shmem_pread(struct drm_device *dev,
759 struct drm_i915_gem_object *obj,
760 struct drm_i915_gem_pread *args,
761 struct drm_file *file)
eb01459f 762{
8461d226 763 char __user *user_data;
eb01459f 764 ssize_t remain;
8461d226 765 loff_t offset;
eb2c0c81 766 int shmem_page_offset, page_length, ret = 0;
8461d226 767 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 768 int prefaulted = 0;
8489731c 769 int needs_clflush = 0;
67d5a50c 770 struct sg_page_iter sg_iter;
eb01459f 771
6eae0059 772 if (!i915_gem_object_has_struct_page(obj))
b50a5371
AS
773 return -ENODEV;
774
3ed605bc 775 user_data = u64_to_user_ptr(args->data_ptr);
eb01459f
EA
776 remain = args->size;
777
8461d226 778 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 779
4c914c0c 780 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
f60d7f0c
CW
781 if (ret)
782 return ret;
783
8461d226 784 offset = args->offset;
eb01459f 785
67d5a50c
ID
786 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
787 offset >> PAGE_SHIFT) {
2db76d7c 788 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66
CW
789
790 if (remain <= 0)
791 break;
792
eb01459f
EA
793 /* Operation in this page
794 *
eb01459f 795 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
796 * page_length = bytes to copy for this page
797 */
c8cbbb8b 798 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
799 page_length = remain;
800 if ((shmem_page_offset + page_length) > PAGE_SIZE)
801 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 802
8461d226
DV
803 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
804 (page_to_phys(page) & (1 << 17)) != 0;
805
d174bd64
DV
806 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
807 user_data, page_do_bit17_swizzling,
808 needs_clflush);
809 if (ret == 0)
810 goto next_page;
dbf7bff0 811
dbf7bff0
DV
812 mutex_unlock(&dev->struct_mutex);
813
d330a953 814 if (likely(!i915.prefault_disable) && !prefaulted) {
f56f821f 815 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
816 /* Userspace is tricking us, but we've already clobbered
817 * its pages with the prefault and promised to write the
818 * data up to the first fault. Hence ignore any errors
819 * and just continue. */
820 (void)ret;
821 prefaulted = 1;
822 }
eb01459f 823
d174bd64
DV
824 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
825 user_data, page_do_bit17_swizzling,
826 needs_clflush);
eb01459f 827
dbf7bff0 828 mutex_lock(&dev->struct_mutex);
f60d7f0c 829
f60d7f0c 830 if (ret)
8461d226 831 goto out;
8461d226 832
17793c9a 833next_page:
eb01459f 834 remain -= page_length;
8461d226 835 user_data += page_length;
eb01459f
EA
836 offset += page_length;
837 }
838
4f27b75d 839out:
f60d7f0c
CW
840 i915_gem_object_unpin_pages(obj);
841
eb01459f
EA
842 return ret;
843}
844
673a394b
EA
845/**
846 * Reads data from the object referenced by handle.
14bb2c11
TU
847 * @dev: drm device pointer
848 * @data: ioctl data blob
849 * @file: drm file pointer
673a394b
EA
850 *
851 * On error, the contents of *data are undefined.
852 */
853int
854i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 855 struct drm_file *file)
673a394b
EA
856{
857 struct drm_i915_gem_pread *args = data;
05394f39 858 struct drm_i915_gem_object *obj;
35b62a89 859 int ret = 0;
673a394b 860
51311d0a
CW
861 if (args->size == 0)
862 return 0;
863
864 if (!access_ok(VERIFY_WRITE,
3ed605bc 865 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
866 args->size))
867 return -EFAULT;
868
4f27b75d 869 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 870 if (ret)
4f27b75d 871 return ret;
673a394b 872
03ac0642
CW
873 obj = i915_gem_object_lookup(file, args->handle);
874 if (!obj) {
1d7cfea1
CW
875 ret = -ENOENT;
876 goto unlock;
4f27b75d 877 }
673a394b 878
7dcd2499 879 /* Bounds check source. */
05394f39
CW
880 if (args->offset > obj->base.size ||
881 args->size > obj->base.size - args->offset) {
ce9d419d 882 ret = -EINVAL;
35b62a89 883 goto out;
ce9d419d
CW
884 }
885
db53a302
CW
886 trace_i915_gem_object_pread(obj, args->offset, args->size);
887
dbf7bff0 888 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 889
b50a5371 890 /* pread for non shmem backed objects */
1dd5b6f2
CW
891 if (ret == -EFAULT || ret == -ENODEV) {
892 intel_runtime_pm_get(to_i915(dev));
b50a5371
AS
893 ret = i915_gem_gtt_pread(dev, obj, args->size,
894 args->offset, args->data_ptr);
1dd5b6f2
CW
895 intel_runtime_pm_put(to_i915(dev));
896 }
b50a5371 897
35b62a89 898out:
f8c417cd 899 i915_gem_object_put(obj);
1d7cfea1 900unlock:
4f27b75d 901 mutex_unlock(&dev->struct_mutex);
eb01459f 902 return ret;
673a394b
EA
903}
904
0839ccb8
KP
905/* This is the fast write path which cannot handle
906 * page faults in the source data
9b7530cc 907 */
0839ccb8
KP
908
909static inline int
910fast_user_write(struct io_mapping *mapping,
911 loff_t page_base, int page_offset,
912 char __user *user_data,
913 int length)
9b7530cc 914{
4f0c7cfb
BW
915 void __iomem *vaddr_atomic;
916 void *vaddr;
0839ccb8 917 unsigned long unwritten;
9b7530cc 918
3e4d3af5 919 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
920 /* We can use the cpu mem copy function because this is X86. */
921 vaddr = (void __force*)vaddr_atomic + page_offset;
922 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 923 user_data, length);
3e4d3af5 924 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 925 return unwritten;
0839ccb8
KP
926}
927
3de09aa3
EA
928/**
929 * This is the fast pwrite path, where we copy the data directly from the
930 * user into the GTT, uncached.
62f90b38 931 * @i915: i915 device private data
14bb2c11
TU
932 * @obj: i915 gem object
933 * @args: pwrite arguments structure
934 * @file: drm file pointer
3de09aa3 935 */
673a394b 936static int
4f1959ee 937i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 938 struct drm_i915_gem_object *obj,
3de09aa3 939 struct drm_i915_gem_pwrite *args,
05394f39 940 struct drm_file *file)
673a394b 941{
4f1959ee 942 struct i915_ggtt *ggtt = &i915->ggtt;
b50a5371 943 struct drm_device *dev = obj->base.dev;
4f1959ee
AS
944 struct drm_mm_node node;
945 uint64_t remain, offset;
673a394b 946 char __user *user_data;
4f1959ee 947 int ret;
b50a5371
AS
948 bool hit_slow_path = false;
949
950 if (obj->tiling_mode != I915_TILING_NONE)
951 return -EFAULT;
935aaa69 952
de895082
CW
953 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
954 PIN_MAPPABLE | PIN_NONBLOCK);
4f1959ee
AS
955 if (ret) {
956 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
957 if (ret)
958 goto out;
959
960 ret = i915_gem_object_get_pages(obj);
961 if (ret) {
962 remove_mappable_node(&node);
963 goto out;
964 }
965
966 i915_gem_object_pin_pages(obj);
967 } else {
968 node.start = i915_gem_obj_ggtt_offset(obj);
969 node.allocated = false;
b50a5371
AS
970 ret = i915_gem_object_put_fence(obj);
971 if (ret)
972 goto out_unpin;
4f1959ee 973 }
935aaa69
DV
974
975 ret = i915_gem_object_set_to_gtt_domain(obj, true);
976 if (ret)
977 goto out_unpin;
978
77a0d1ca 979 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
4f1959ee 980 obj->dirty = true;
063e4e6b 981
4f1959ee
AS
982 user_data = u64_to_user_ptr(args->data_ptr);
983 offset = args->offset;
984 remain = args->size;
985 while (remain) {
673a394b
EA
986 /* Operation in this page
987 *
0839ccb8
KP
988 * page_base = page offset within aperture
989 * page_offset = offset within page
990 * page_length = bytes to copy for this page
673a394b 991 */
4f1959ee
AS
992 u32 page_base = node.start;
993 unsigned page_offset = offset_in_page(offset);
994 unsigned page_length = PAGE_SIZE - page_offset;
995 page_length = remain < page_length ? remain : page_length;
996 if (node.allocated) {
997 wmb(); /* flush the write before we modify the GGTT */
998 ggtt->base.insert_page(&ggtt->base,
999 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1000 node.start, I915_CACHE_NONE, 0);
1001 wmb(); /* flush modifications to the GGTT (insert_page) */
1002 } else {
1003 page_base += offset & PAGE_MASK;
1004 }
0839ccb8 1005 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
1006 * source page isn't available. Return the error and we'll
1007 * retry in the slow path.
b50a5371
AS
1008 * If the object is non-shmem backed, we retry again with the
1009 * path that handles page fault.
0839ccb8 1010 */
72e96d64 1011 if (fast_user_write(ggtt->mappable, page_base,
935aaa69 1012 page_offset, user_data, page_length)) {
b50a5371
AS
1013 hit_slow_path = true;
1014 mutex_unlock(&dev->struct_mutex);
1015 if (slow_user_access(ggtt->mappable,
1016 page_base,
1017 page_offset, user_data,
1018 page_length, true)) {
1019 ret = -EFAULT;
1020 mutex_lock(&dev->struct_mutex);
1021 goto out_flush;
1022 }
1023
1024 mutex_lock(&dev->struct_mutex);
935aaa69 1025 }
673a394b 1026
0839ccb8
KP
1027 remain -= page_length;
1028 user_data += page_length;
1029 offset += page_length;
673a394b 1030 }
673a394b 1031
063e4e6b 1032out_flush:
b50a5371
AS
1033 if (hit_slow_path) {
1034 if (ret == 0 &&
1035 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1036 /* The user has modified the object whilst we tried
1037 * reading from it, and we now have no idea what domain
1038 * the pages should be in. As we have just been touching
1039 * them directly, flush everything back to the GTT
1040 * domain.
1041 */
1042 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1043 }
1044 }
1045
de152b62 1046 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
935aaa69 1047out_unpin:
4f1959ee
AS
1048 if (node.allocated) {
1049 wmb();
1050 ggtt->base.clear_range(&ggtt->base,
1051 node.start, node.size,
1052 true);
1053 i915_gem_object_unpin_pages(obj);
1054 remove_mappable_node(&node);
1055 } else {
1056 i915_gem_object_ggtt_unpin(obj);
1057 }
935aaa69 1058out:
3de09aa3 1059 return ret;
673a394b
EA
1060}
1061
d174bd64
DV
1062/* Per-page copy function for the shmem pwrite fastpath.
1063 * Flushes invalid cachelines before writing to the target if
1064 * needs_clflush_before is set and flushes out any written cachelines after
1065 * writing if needs_clflush is set. */
3043c60c 1066static int
d174bd64
DV
1067shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1068 char __user *user_data,
1069 bool page_do_bit17_swizzling,
1070 bool needs_clflush_before,
1071 bool needs_clflush_after)
673a394b 1072{
d174bd64 1073 char *vaddr;
673a394b 1074 int ret;
3de09aa3 1075
e7e58eb5 1076 if (unlikely(page_do_bit17_swizzling))
d174bd64 1077 return -EINVAL;
3de09aa3 1078
d174bd64
DV
1079 vaddr = kmap_atomic(page);
1080 if (needs_clflush_before)
1081 drm_clflush_virt_range(vaddr + shmem_page_offset,
1082 page_length);
c2831a94
CW
1083 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1084 user_data, page_length);
d174bd64
DV
1085 if (needs_clflush_after)
1086 drm_clflush_virt_range(vaddr + shmem_page_offset,
1087 page_length);
1088 kunmap_atomic(vaddr);
3de09aa3 1089
755d2218 1090 return ret ? -EFAULT : 0;
3de09aa3
EA
1091}
1092
d174bd64
DV
1093/* Only difference to the fast-path function is that this can handle bit17
1094 * and uses non-atomic copy and kmap functions. */
3043c60c 1095static int
d174bd64
DV
1096shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1097 char __user *user_data,
1098 bool page_do_bit17_swizzling,
1099 bool needs_clflush_before,
1100 bool needs_clflush_after)
673a394b 1101{
d174bd64
DV
1102 char *vaddr;
1103 int ret;
e5281ccd 1104
d174bd64 1105 vaddr = kmap(page);
e7e58eb5 1106 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
1107 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1108 page_length,
1109 page_do_bit17_swizzling);
d174bd64
DV
1110 if (page_do_bit17_swizzling)
1111 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
1112 user_data,
1113 page_length);
d174bd64
DV
1114 else
1115 ret = __copy_from_user(vaddr + shmem_page_offset,
1116 user_data,
1117 page_length);
1118 if (needs_clflush_after)
23c18c71
DV
1119 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1120 page_length,
1121 page_do_bit17_swizzling);
d174bd64 1122 kunmap(page);
40123c1f 1123
755d2218 1124 return ret ? -EFAULT : 0;
40123c1f
EA
1125}
1126
40123c1f 1127static int
e244a443
DV
1128i915_gem_shmem_pwrite(struct drm_device *dev,
1129 struct drm_i915_gem_object *obj,
1130 struct drm_i915_gem_pwrite *args,
1131 struct drm_file *file)
40123c1f 1132{
40123c1f 1133 ssize_t remain;
8c59967c
DV
1134 loff_t offset;
1135 char __user *user_data;
eb2c0c81 1136 int shmem_page_offset, page_length, ret = 0;
8c59967c 1137 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 1138 int hit_slowpath = 0;
58642885
DV
1139 int needs_clflush_after = 0;
1140 int needs_clflush_before = 0;
67d5a50c 1141 struct sg_page_iter sg_iter;
40123c1f 1142
3ed605bc 1143 user_data = u64_to_user_ptr(args->data_ptr);
40123c1f
EA
1144 remain = args->size;
1145
8c59967c 1146 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 1147
c13d87ea
CW
1148 ret = i915_gem_object_wait_rendering(obj, false);
1149 if (ret)
1150 return ret;
1151
58642885
DV
1152 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1153 /* If we're not in the cpu write domain, set ourself into the gtt
1154 * write domain and manually flush cachelines (if required). This
1155 * optimizes for the case when the gpu will use the data
1156 * right away and we therefore have to clflush anyway. */
2c22569b 1157 needs_clflush_after = cpu_write_needs_clflush(obj);
58642885 1158 }
c76ce038
CW
1159 /* Same trick applies to invalidate partially written cachelines read
1160 * before writing. */
1161 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1162 needs_clflush_before =
1163 !cpu_cache_is_coherent(dev, obj->cache_level);
58642885 1164
755d2218
CW
1165 ret = i915_gem_object_get_pages(obj);
1166 if (ret)
1167 return ret;
1168
77a0d1ca 1169 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
063e4e6b 1170
755d2218
CW
1171 i915_gem_object_pin_pages(obj);
1172
673a394b 1173 offset = args->offset;
05394f39 1174 obj->dirty = 1;
673a394b 1175
67d5a50c
ID
1176 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1177 offset >> PAGE_SHIFT) {
2db76d7c 1178 struct page *page = sg_page_iter_page(&sg_iter);
58642885 1179 int partial_cacheline_write;
e5281ccd 1180
9da3da66
CW
1181 if (remain <= 0)
1182 break;
1183
40123c1f
EA
1184 /* Operation in this page
1185 *
40123c1f 1186 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
1187 * page_length = bytes to copy for this page
1188 */
c8cbbb8b 1189 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
1190
1191 page_length = remain;
1192 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1193 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 1194
58642885
DV
1195 /* If we don't overwrite a cacheline completely we need to be
1196 * careful to have up-to-date data by first clflushing. Don't
1197 * overcomplicate things and flush the entire patch. */
1198 partial_cacheline_write = needs_clflush_before &&
1199 ((shmem_page_offset | page_length)
1200 & (boot_cpu_data.x86_clflush_size - 1));
1201
8c59967c
DV
1202 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1203 (page_to_phys(page) & (1 << 17)) != 0;
1204
d174bd64
DV
1205 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1206 user_data, page_do_bit17_swizzling,
1207 partial_cacheline_write,
1208 needs_clflush_after);
1209 if (ret == 0)
1210 goto next_page;
e244a443
DV
1211
1212 hit_slowpath = 1;
e244a443 1213 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
1214 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1215 user_data, page_do_bit17_swizzling,
1216 partial_cacheline_write,
1217 needs_clflush_after);
40123c1f 1218
e244a443 1219 mutex_lock(&dev->struct_mutex);
755d2218 1220
755d2218 1221 if (ret)
8c59967c 1222 goto out;
8c59967c 1223
17793c9a 1224next_page:
40123c1f 1225 remain -= page_length;
8c59967c 1226 user_data += page_length;
40123c1f 1227 offset += page_length;
673a394b
EA
1228 }
1229
fbd5a26d 1230out:
755d2218
CW
1231 i915_gem_object_unpin_pages(obj);
1232
e244a443 1233 if (hit_slowpath) {
8dcf015e
DV
1234 /*
1235 * Fixup: Flush cpu caches in case we didn't flush the dirty
1236 * cachelines in-line while writing and the object moved
1237 * out of the cpu write domain while we've dropped the lock.
1238 */
1239 if (!needs_clflush_after &&
1240 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
000433b6 1241 if (i915_gem_clflush_object(obj, obj->pin_display))
ed75a55b 1242 needs_clflush_after = true;
e244a443 1243 }
8c59967c 1244 }
673a394b 1245
58642885 1246 if (needs_clflush_after)
c033666a 1247 i915_gem_chipset_flush(to_i915(dev));
ed75a55b
VS
1248 else
1249 obj->cache_dirty = true;
58642885 1250
de152b62 1251 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
40123c1f 1252 return ret;
673a394b
EA
1253}
1254
1255/**
1256 * Writes data to the object referenced by handle.
14bb2c11
TU
1257 * @dev: drm device
1258 * @data: ioctl data blob
1259 * @file: drm file
673a394b
EA
1260 *
1261 * On error, the contents of the buffer that were to be modified are undefined.
1262 */
1263int
1264i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 1265 struct drm_file *file)
673a394b 1266{
fac5e23e 1267 struct drm_i915_private *dev_priv = to_i915(dev);
673a394b 1268 struct drm_i915_gem_pwrite *args = data;
05394f39 1269 struct drm_i915_gem_object *obj;
51311d0a
CW
1270 int ret;
1271
1272 if (args->size == 0)
1273 return 0;
1274
1275 if (!access_ok(VERIFY_READ,
3ed605bc 1276 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
1277 args->size))
1278 return -EFAULT;
1279
d330a953 1280 if (likely(!i915.prefault_disable)) {
3ed605bc 1281 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
0b74b508
XZ
1282 args->size);
1283 if (ret)
1284 return -EFAULT;
1285 }
673a394b 1286
5d77d9c5
ID
1287 intel_runtime_pm_get(dev_priv);
1288
fbd5a26d 1289 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1290 if (ret)
5d77d9c5 1291 goto put_rpm;
1d7cfea1 1292
03ac0642
CW
1293 obj = i915_gem_object_lookup(file, args->handle);
1294 if (!obj) {
1d7cfea1
CW
1295 ret = -ENOENT;
1296 goto unlock;
fbd5a26d 1297 }
673a394b 1298
7dcd2499 1299 /* Bounds check destination. */
05394f39
CW
1300 if (args->offset > obj->base.size ||
1301 args->size > obj->base.size - args->offset) {
ce9d419d 1302 ret = -EINVAL;
35b62a89 1303 goto out;
ce9d419d
CW
1304 }
1305
db53a302
CW
1306 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1307
935aaa69 1308 ret = -EFAULT;
673a394b
EA
1309 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1310 * it would end up going through the fenced access, and we'll get
1311 * different detiling behavior between reading and writing.
1312 * pread/pwrite currently are reading and writing from the CPU
1313 * perspective, requiring manual detiling by the client.
1314 */
6eae0059
CW
1315 if (!i915_gem_object_has_struct_page(obj) ||
1316 cpu_write_needs_clflush(obj)) {
4f1959ee 1317 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
935aaa69
DV
1318 /* Note that the gtt paths might fail with non-page-backed user
1319 * pointers (e.g. gtt mappings when moving data between
1320 * textures). Fallback to the shmem path in that case. */
fbd5a26d 1321 }
673a394b 1322
d1054ee4 1323 if (ret == -EFAULT || ret == -ENOSPC) {
6a2c4232
CW
1324 if (obj->phys_handle)
1325 ret = i915_gem_phys_pwrite(obj, args, file);
6eae0059 1326 else if (i915_gem_object_has_struct_page(obj))
6a2c4232 1327 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
b50a5371
AS
1328 else
1329 ret = -ENODEV;
6a2c4232 1330 }
5c0480f2 1331
35b62a89 1332out:
f8c417cd 1333 i915_gem_object_put(obj);
1d7cfea1 1334unlock:
fbd5a26d 1335 mutex_unlock(&dev->struct_mutex);
5d77d9c5
ID
1336put_rpm:
1337 intel_runtime_pm_put(dev_priv);
1338
673a394b
EA
1339 return ret;
1340}
1341
b361237b
CW
1342/**
1343 * Ensures that all rendering to the object has completed and the object is
1344 * safe to unbind from the GTT or access from the CPU.
14bb2c11
TU
1345 * @obj: i915 gem object
1346 * @readonly: waiting for read access or write
b361237b 1347 */
2e2f351d 1348int
b361237b
CW
1349i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1350 bool readonly)
1351{
c13d87ea 1352 struct reservation_object *resv;
8cac6f6c
CW
1353 struct i915_gem_active *active;
1354 unsigned long active_mask;
1355 int idx, ret;
b361237b 1356
8cac6f6c
CW
1357 lockdep_assert_held(&obj->base.dev->struct_mutex);
1358
1359 if (!readonly) {
1360 active = obj->last_read;
573adb39 1361 active_mask = i915_gem_object_get_active(obj);
b4716185 1362 } else {
8cac6f6c
CW
1363 active_mask = 1;
1364 active = &obj->last_write;
1365 }
b4716185 1366
8cac6f6c 1367 for_each_active(active_mask, idx) {
fa545cbf
CW
1368 ret = i915_gem_active_wait(&active[idx],
1369 &obj->base.dev->struct_mutex);
8cac6f6c
CW
1370 if (ret)
1371 return ret;
b4716185
CW
1372 }
1373
c13d87ea
CW
1374 resv = i915_gem_object_get_dmabuf_resv(obj);
1375 if (resv) {
1376 long err;
1377
1378 err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
1379 MAX_SCHEDULE_TIMEOUT);
1380 if (err < 0)
1381 return err;
1382 }
1383
b4716185
CW
1384 return 0;
1385}
1386
3236f57a
CW
1387/* A nonblocking variant of the above wait. This is a highly dangerous routine
1388 * as the object state may change during this call.
1389 */
1390static __must_check int
1391i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
2e1b8730 1392 struct intel_rps_client *rps,
3236f57a
CW
1393 bool readonly)
1394{
1395 struct drm_device *dev = obj->base.dev;
fac5e23e 1396 struct drm_i915_private *dev_priv = to_i915(dev);
666796da 1397 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
8cac6f6c
CW
1398 struct i915_gem_active *active;
1399 unsigned long active_mask;
b4716185 1400 int ret, i, n = 0;
3236f57a
CW
1401
1402 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1403 BUG_ON(!dev_priv->mm.interruptible);
1404
573adb39 1405 active_mask = i915_gem_object_get_active(obj);
8cac6f6c 1406 if (!active_mask)
3236f57a
CW
1407 return 0;
1408
8cac6f6c
CW
1409 if (!readonly) {
1410 active = obj->last_read;
b4716185 1411 } else {
8cac6f6c
CW
1412 active_mask = 1;
1413 active = &obj->last_write;
1414 }
b4716185 1415
8cac6f6c
CW
1416 for_each_active(active_mask, i) {
1417 struct drm_i915_gem_request *req;
b4716185 1418
8cac6f6c
CW
1419 req = i915_gem_active_get(&active[i],
1420 &obj->base.dev->struct_mutex);
1421 if (req)
27c01aae 1422 requests[n++] = req;
b4716185
CW
1423 }
1424
3236f57a 1425 mutex_unlock(&dev->struct_mutex);
299259a3 1426 ret = 0;
b4716185 1427 for (i = 0; ret == 0 && i < n; i++)
776f3236 1428 ret = i915_wait_request(requests[i], true, NULL, rps);
3236f57a
CW
1429 mutex_lock(&dev->struct_mutex);
1430
fa545cbf 1431 for (i = 0; i < n; i++)
e8a261ea 1432 i915_gem_request_put(requests[i]);
b4716185
CW
1433
1434 return ret;
3236f57a
CW
1435}
1436
2e1b8730
CW
1437static struct intel_rps_client *to_rps_client(struct drm_file *file)
1438{
1439 struct drm_i915_file_private *fpriv = file->driver_priv;
1440 return &fpriv->rps;
1441}
1442
aeecc969
CW
1443static enum fb_op_origin
1444write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1445{
1446 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1447 ORIGIN_GTT : ORIGIN_CPU;
1448}
1449
673a394b 1450/**
2ef7eeaa
EA
1451 * Called when user space prepares to use an object with the CPU, either
1452 * through the mmap ioctl's mapping or a GTT mapping.
14bb2c11
TU
1453 * @dev: drm device
1454 * @data: ioctl data blob
1455 * @file: drm file
673a394b
EA
1456 */
1457int
1458i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1459 struct drm_file *file)
673a394b
EA
1460{
1461 struct drm_i915_gem_set_domain *args = data;
05394f39 1462 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1463 uint32_t read_domains = args->read_domains;
1464 uint32_t write_domain = args->write_domain;
673a394b
EA
1465 int ret;
1466
2ef7eeaa 1467 /* Only handle setting domains to types used by the CPU. */
21d509e3 1468 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1469 return -EINVAL;
1470
21d509e3 1471 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1472 return -EINVAL;
1473
1474 /* Having something in the write domain implies it's in the read
1475 * domain, and only that read domain. Enforce that in the request.
1476 */
1477 if (write_domain != 0 && read_domains != write_domain)
1478 return -EINVAL;
1479
76c1dec1 1480 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1481 if (ret)
76c1dec1 1482 return ret;
1d7cfea1 1483
03ac0642
CW
1484 obj = i915_gem_object_lookup(file, args->handle);
1485 if (!obj) {
1d7cfea1
CW
1486 ret = -ENOENT;
1487 goto unlock;
76c1dec1 1488 }
673a394b 1489
3236f57a
CW
1490 /* Try to flush the object off the GPU without holding the lock.
1491 * We will repeat the flush holding the lock in the normal manner
1492 * to catch cases where we are gazumped.
1493 */
6e4930f6 1494 ret = i915_gem_object_wait_rendering__nonblocking(obj,
2e1b8730 1495 to_rps_client(file),
6e4930f6 1496 !write_domain);
3236f57a
CW
1497 if (ret)
1498 goto unref;
1499
43566ded 1500 if (read_domains & I915_GEM_DOMAIN_GTT)
2ef7eeaa 1501 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
43566ded 1502 else
e47c68e9 1503 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa 1504
031b698a 1505 if (write_domain != 0)
aeecc969 1506 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
031b698a 1507
3236f57a 1508unref:
f8c417cd 1509 i915_gem_object_put(obj);
1d7cfea1 1510unlock:
673a394b
EA
1511 mutex_unlock(&dev->struct_mutex);
1512 return ret;
1513}
1514
1515/**
1516 * Called when user space has done writes to this buffer
14bb2c11
TU
1517 * @dev: drm device
1518 * @data: ioctl data blob
1519 * @file: drm file
673a394b
EA
1520 */
1521int
1522i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1523 struct drm_file *file)
673a394b
EA
1524{
1525 struct drm_i915_gem_sw_finish *args = data;
05394f39 1526 struct drm_i915_gem_object *obj;
673a394b
EA
1527 int ret = 0;
1528
76c1dec1 1529 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1530 if (ret)
76c1dec1 1531 return ret;
1d7cfea1 1532
03ac0642
CW
1533 obj = i915_gem_object_lookup(file, args->handle);
1534 if (!obj) {
1d7cfea1
CW
1535 ret = -ENOENT;
1536 goto unlock;
673a394b
EA
1537 }
1538
673a394b 1539 /* Pinned buffers may be scanout, so flush the cache */
2c22569b 1540 if (obj->pin_display)
e62b59e4 1541 i915_gem_object_flush_cpu_write_domain(obj);
e47c68e9 1542
f8c417cd 1543 i915_gem_object_put(obj);
1d7cfea1 1544unlock:
673a394b
EA
1545 mutex_unlock(&dev->struct_mutex);
1546 return ret;
1547}
1548
1549/**
14bb2c11
TU
1550 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1551 * it is mapped to.
1552 * @dev: drm device
1553 * @data: ioctl data blob
1554 * @file: drm file
673a394b
EA
1555 *
1556 * While the mapping holds a reference on the contents of the object, it doesn't
1557 * imply a ref on the object itself.
34367381
DV
1558 *
1559 * IMPORTANT:
1560 *
1561 * DRM driver writers who look a this function as an example for how to do GEM
1562 * mmap support, please don't implement mmap support like here. The modern way
1563 * to implement DRM mmap support is with an mmap offset ioctl (like
1564 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1565 * That way debug tooling like valgrind will understand what's going on, hiding
1566 * the mmap call in a driver private ioctl will break that. The i915 driver only
1567 * does cpu mmaps this way because we didn't know better.
673a394b
EA
1568 */
1569int
1570i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1571 struct drm_file *file)
673a394b
EA
1572{
1573 struct drm_i915_gem_mmap *args = data;
03ac0642 1574 struct drm_i915_gem_object *obj;
673a394b
EA
1575 unsigned long addr;
1576
1816f923
AG
1577 if (args->flags & ~(I915_MMAP_WC))
1578 return -EINVAL;
1579
568a58e5 1580 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1816f923
AG
1581 return -ENODEV;
1582
03ac0642
CW
1583 obj = i915_gem_object_lookup(file, args->handle);
1584 if (!obj)
bf79cb91 1585 return -ENOENT;
673a394b 1586
1286ff73
DV
1587 /* prime objects have no backing filp to GEM mmap
1588 * pages from.
1589 */
03ac0642 1590 if (!obj->base.filp) {
34911fd3 1591 i915_gem_object_put_unlocked(obj);
1286ff73
DV
1592 return -EINVAL;
1593 }
1594
03ac0642 1595 addr = vm_mmap(obj->base.filp, 0, args->size,
673a394b
EA
1596 PROT_READ | PROT_WRITE, MAP_SHARED,
1597 args->offset);
1816f923
AG
1598 if (args->flags & I915_MMAP_WC) {
1599 struct mm_struct *mm = current->mm;
1600 struct vm_area_struct *vma;
1601
80a89a5e 1602 if (down_write_killable(&mm->mmap_sem)) {
34911fd3 1603 i915_gem_object_put_unlocked(obj);
80a89a5e
MH
1604 return -EINTR;
1605 }
1816f923
AG
1606 vma = find_vma(mm, addr);
1607 if (vma)
1608 vma->vm_page_prot =
1609 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1610 else
1611 addr = -ENOMEM;
1612 up_write(&mm->mmap_sem);
aeecc969
CW
1613
1614 /* This may race, but that's ok, it only gets set */
03ac0642 1615 WRITE_ONCE(obj->has_wc_mmap, true);
1816f923 1616 }
34911fd3 1617 i915_gem_object_put_unlocked(obj);
673a394b
EA
1618 if (IS_ERR((void *)addr))
1619 return addr;
1620
1621 args->addr_ptr = (uint64_t) addr;
1622
1623 return 0;
1624}
1625
de151cf6
JB
1626/**
1627 * i915_gem_fault - fault a page into the GTT
d9072a3e
GT
1628 * @vma: VMA in question
1629 * @vmf: fault info
de151cf6
JB
1630 *
1631 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1632 * from userspace. The fault handler takes care of binding the object to
1633 * the GTT (if needed), allocating and programming a fence register (again,
1634 * only if needed based on whether the old reg is still valid or the object
1635 * is tiled) and inserting a new PTE into the faulting process.
1636 *
1637 * Note that the faulting process may involve evicting existing objects
1638 * from the GTT and/or fence registers to make room. So performance may
1639 * suffer if the GTT working set is large or there are few fence registers
1640 * left.
1641 */
1642int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1643{
05394f39
CW
1644 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1645 struct drm_device *dev = obj->base.dev;
72e96d64
JL
1646 struct drm_i915_private *dev_priv = to_i915(dev);
1647 struct i915_ggtt *ggtt = &dev_priv->ggtt;
c5ad54cf 1648 struct i915_ggtt_view view = i915_ggtt_view_normal;
de151cf6
JB
1649 pgoff_t page_offset;
1650 unsigned long pfn;
1651 int ret = 0;
0f973f27 1652 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6 1653
f65c9168
PZ
1654 intel_runtime_pm_get(dev_priv);
1655
de151cf6
JB
1656 /* We don't use vmf->pgoff since that has the fake offset */
1657 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1658 PAGE_SHIFT;
1659
d9bc7e9f
CW
1660 ret = i915_mutex_lock_interruptible(dev);
1661 if (ret)
1662 goto out;
a00b10c3 1663
db53a302
CW
1664 trace_i915_gem_object_fault(obj, page_offset, true, write);
1665
6e4930f6
CW
1666 /* Try to flush the object off the GPU first without holding the lock.
1667 * Upon reacquiring the lock, we will perform our sanity checks and then
1668 * repeat the flush holding the lock in the normal manner to catch cases
1669 * where we are gazumped.
1670 */
1671 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1672 if (ret)
1673 goto unlock;
1674
eb119bd6
CW
1675 /* Access to snoopable pages through the GTT is incoherent. */
1676 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ddeff6ee 1677 ret = -EFAULT;
eb119bd6
CW
1678 goto unlock;
1679 }
1680
c5ad54cf 1681 /* Use a partial view if the object is bigger than the aperture. */
72e96d64 1682 if (obj->base.size >= ggtt->mappable_end &&
e7ded2d7 1683 obj->tiling_mode == I915_TILING_NONE) {
c5ad54cf 1684 static const unsigned int chunk_size = 256; // 1 MiB
e7ded2d7 1685
c5ad54cf
JL
1686 memset(&view, 0, sizeof(view));
1687 view.type = I915_GGTT_VIEW_PARTIAL;
1688 view.params.partial.offset = rounddown(page_offset, chunk_size);
1689 view.params.partial.size =
1690 min_t(unsigned int,
1691 chunk_size,
1692 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1693 view.params.partial.offset);
1694 }
1695
1696 /* Now pin it into the GTT if needed */
91b2db6f 1697 ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
c9839303
CW
1698 if (ret)
1699 goto unlock;
4a684a41 1700
c9839303
CW
1701 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1702 if (ret)
1703 goto unpin;
74898d7e 1704
06d98131 1705 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1706 if (ret)
c9839303 1707 goto unpin;
7d1c4804 1708
b90b91d8 1709 /* Finally, remap it using the new GTT offset */
72e96d64 1710 pfn = ggtt->mappable_base +
c5ad54cf 1711 i915_gem_obj_ggtt_offset_view(obj, &view);
f343c5f6 1712 pfn >>= PAGE_SHIFT;
de151cf6 1713
c5ad54cf
JL
1714 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1715 /* Overriding existing pages in partial view does not cause
1716 * us any trouble as TLBs are still valid because the fault
1717 * is due to userspace losing part of the mapping or never
1718 * having accessed it before (at this partials' range).
1719 */
1720 unsigned long base = vma->vm_start +
1721 (view.params.partial.offset << PAGE_SHIFT);
1722 unsigned int i;
b90b91d8 1723
c5ad54cf
JL
1724 for (i = 0; i < view.params.partial.size; i++) {
1725 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
b90b91d8
CW
1726 if (ret)
1727 break;
1728 }
1729
1730 obj->fault_mappable = true;
c5ad54cf
JL
1731 } else {
1732 if (!obj->fault_mappable) {
1733 unsigned long size = min_t(unsigned long,
1734 vma->vm_end - vma->vm_start,
1735 obj->base.size);
1736 int i;
1737
1738 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1739 ret = vm_insert_pfn(vma,
1740 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1741 pfn + i);
1742 if (ret)
1743 break;
1744 }
1745
1746 obj->fault_mappable = true;
1747 } else
1748 ret = vm_insert_pfn(vma,
1749 (unsigned long)vmf->virtual_address,
1750 pfn + page_offset);
1751 }
c9839303 1752unpin:
c5ad54cf 1753 i915_gem_object_ggtt_unpin_view(obj, &view);
c715089f 1754unlock:
de151cf6 1755 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1756out:
de151cf6 1757 switch (ret) {
d9bc7e9f 1758 case -EIO:
2232f031
DV
1759 /*
1760 * We eat errors when the gpu is terminally wedged to avoid
1761 * userspace unduly crashing (gl has no provisions for mmaps to
1762 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1763 * and so needs to be reported.
1764 */
1765 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
f65c9168
PZ
1766 ret = VM_FAULT_SIGBUS;
1767 break;
1768 }
045e769a 1769 case -EAGAIN:
571c608d
DV
1770 /*
1771 * EAGAIN means the gpu is hung and we'll wait for the error
1772 * handler to reset everything when re-faulting in
1773 * i915_mutex_lock_interruptible.
d9bc7e9f 1774 */
c715089f
CW
1775 case 0:
1776 case -ERESTARTSYS:
bed636ab 1777 case -EINTR:
e79e0fe3
DR
1778 case -EBUSY:
1779 /*
1780 * EBUSY is ok: this just means that another thread
1781 * already did the job.
1782 */
f65c9168
PZ
1783 ret = VM_FAULT_NOPAGE;
1784 break;
de151cf6 1785 case -ENOMEM:
f65c9168
PZ
1786 ret = VM_FAULT_OOM;
1787 break;
a7c2e1aa 1788 case -ENOSPC:
45d67817 1789 case -EFAULT:
f65c9168
PZ
1790 ret = VM_FAULT_SIGBUS;
1791 break;
de151cf6 1792 default:
a7c2e1aa 1793 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
f65c9168
PZ
1794 ret = VM_FAULT_SIGBUS;
1795 break;
de151cf6 1796 }
f65c9168
PZ
1797
1798 intel_runtime_pm_put(dev_priv);
1799 return ret;
de151cf6
JB
1800}
1801
901782b2
CW
1802/**
1803 * i915_gem_release_mmap - remove physical page mappings
1804 * @obj: obj in question
1805 *
af901ca1 1806 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1807 * relinquish ownership of the pages back to the system.
1808 *
1809 * It is vital that we remove the page mapping if we have mapped a tiled
1810 * object through the GTT and then lose the fence register due to
1811 * resource pressure. Similarly if the object has been moved out of the
1812 * aperture, than pages mapped into userspace must be revoked. Removing the
1813 * mapping will then trigger a page fault on the next user access, allowing
1814 * fixup by i915_gem_fault().
1815 */
d05ca301 1816void
05394f39 1817i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1818{
349f2ccf
CW
1819 /* Serialisation between user GTT access and our code depends upon
1820 * revoking the CPU's PTE whilst the mutex is held. The next user
1821 * pagefault then has to wait until we release the mutex.
1822 */
1823 lockdep_assert_held(&obj->base.dev->struct_mutex);
1824
6299f992
CW
1825 if (!obj->fault_mappable)
1826 return;
901782b2 1827
6796cb16
DH
1828 drm_vma_node_unmap(&obj->base.vma_node,
1829 obj->base.dev->anon_inode->i_mapping);
349f2ccf
CW
1830
1831 /* Ensure that the CPU's PTE are revoked and there are not outstanding
1832 * memory transactions from userspace before we return. The TLB
1833 * flushing implied above by changing the PTE above *should* be
1834 * sufficient, an extra barrier here just provides us with a bit
1835 * of paranoid documentation about our requirement to serialise
1836 * memory writes before touching registers / GSM.
1837 */
1838 wmb();
1839
6299f992 1840 obj->fault_mappable = false;
901782b2
CW
1841}
1842
eedd10f4
CW
1843void
1844i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1845{
1846 struct drm_i915_gem_object *obj;
1847
1848 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1849 i915_gem_release_mmap(obj);
1850}
1851
ad1a7d20
CW
1852/**
1853 * i915_gem_get_ggtt_size - return required global GTT size for an object
a9f1481f 1854 * @dev_priv: i915 device
ad1a7d20
CW
1855 * @size: object size
1856 * @tiling_mode: tiling mode
1857 *
1858 * Return the required global GTT size for an object, taking into account
1859 * potential fence register mapping.
1860 */
a9f1481f
CW
1861u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1862 u64 size, int tiling_mode)
92b88aeb 1863{
ad1a7d20 1864 u64 ggtt_size;
92b88aeb 1865
ad1a7d20
CW
1866 GEM_BUG_ON(size == 0);
1867
a9f1481f 1868 if (INTEL_GEN(dev_priv) >= 4 ||
e28f8711
CW
1869 tiling_mode == I915_TILING_NONE)
1870 return size;
92b88aeb
CW
1871
1872 /* Previous chips need a power-of-two fence region when tiling */
a9f1481f 1873 if (IS_GEN3(dev_priv))
ad1a7d20 1874 ggtt_size = 1024*1024;
92b88aeb 1875 else
ad1a7d20 1876 ggtt_size = 512*1024;
92b88aeb 1877
ad1a7d20
CW
1878 while (ggtt_size < size)
1879 ggtt_size <<= 1;
92b88aeb 1880
ad1a7d20 1881 return ggtt_size;
92b88aeb
CW
1882}
1883
de151cf6 1884/**
ad1a7d20 1885 * i915_gem_get_ggtt_alignment - return required global GTT alignment
a9f1481f 1886 * @dev_priv: i915 device
14bb2c11
TU
1887 * @size: object size
1888 * @tiling_mode: tiling mode
ad1a7d20 1889 * @fenced: is fenced alignment required or not
de151cf6 1890 *
ad1a7d20 1891 * Return the required global GTT alignment for an object, taking into account
5e783301 1892 * potential fence register mapping.
de151cf6 1893 */
a9f1481f 1894u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
ad1a7d20 1895 int tiling_mode, bool fenced)
de151cf6 1896{
ad1a7d20
CW
1897 GEM_BUG_ON(size == 0);
1898
de151cf6
JB
1899 /*
1900 * Minimum alignment is 4k (GTT page size), but might be greater
1901 * if a fence register is needed for the object.
1902 */
a9f1481f 1903 if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
e28f8711 1904 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1905 return 4096;
1906
a00b10c3
CW
1907 /*
1908 * Previous chips need to be aligned to the size of the smallest
1909 * fence register that can contain the object.
1910 */
a9f1481f 1911 return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
a00b10c3
CW
1912}
1913
d8cb5086
CW
1914static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1915{
fac5e23e 1916 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
d8cb5086
CW
1917 int ret;
1918
da494d7c
DV
1919 dev_priv->mm.shrinker_no_lock_stealing = true;
1920
d8cb5086
CW
1921 ret = drm_gem_create_mmap_offset(&obj->base);
1922 if (ret != -ENOSPC)
da494d7c 1923 goto out;
d8cb5086
CW
1924
1925 /* Badly fragmented mmap space? The only way we can recover
1926 * space is by destroying unwanted objects. We can't randomly release
1927 * mmap_offsets as userspace expects them to be persistent for the
1928 * lifetime of the objects. The closest we can is to release the
1929 * offsets on purgeable objects by truncating it and marking it purged,
1930 * which prevents userspace from ever using that object again.
1931 */
21ab4e74
CW
1932 i915_gem_shrink(dev_priv,
1933 obj->base.size >> PAGE_SHIFT,
1934 I915_SHRINK_BOUND |
1935 I915_SHRINK_UNBOUND |
1936 I915_SHRINK_PURGEABLE);
d8cb5086
CW
1937 ret = drm_gem_create_mmap_offset(&obj->base);
1938 if (ret != -ENOSPC)
da494d7c 1939 goto out;
d8cb5086
CW
1940
1941 i915_gem_shrink_all(dev_priv);
da494d7c
DV
1942 ret = drm_gem_create_mmap_offset(&obj->base);
1943out:
1944 dev_priv->mm.shrinker_no_lock_stealing = false;
1945
1946 return ret;
d8cb5086
CW
1947}
1948
1949static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1950{
d8cb5086
CW
1951 drm_gem_free_mmap_offset(&obj->base);
1952}
1953
da6b51d0 1954int
ff72145b
DA
1955i915_gem_mmap_gtt(struct drm_file *file,
1956 struct drm_device *dev,
da6b51d0 1957 uint32_t handle,
ff72145b 1958 uint64_t *offset)
de151cf6 1959{
05394f39 1960 struct drm_i915_gem_object *obj;
de151cf6
JB
1961 int ret;
1962
76c1dec1 1963 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1964 if (ret)
76c1dec1 1965 return ret;
de151cf6 1966
03ac0642
CW
1967 obj = i915_gem_object_lookup(file, handle);
1968 if (!obj) {
1d7cfea1
CW
1969 ret = -ENOENT;
1970 goto unlock;
1971 }
de151cf6 1972
05394f39 1973 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 1974 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
8c99e57d 1975 ret = -EFAULT;
1d7cfea1 1976 goto out;
ab18282d
CW
1977 }
1978
d8cb5086
CW
1979 ret = i915_gem_object_create_mmap_offset(obj);
1980 if (ret)
1981 goto out;
de151cf6 1982
0de23977 1983 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
de151cf6 1984
1d7cfea1 1985out:
f8c417cd 1986 i915_gem_object_put(obj);
1d7cfea1 1987unlock:
de151cf6 1988 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1989 return ret;
de151cf6
JB
1990}
1991
ff72145b
DA
1992/**
1993 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1994 * @dev: DRM device
1995 * @data: GTT mapping ioctl data
1996 * @file: GEM object info
1997 *
1998 * Simply returns the fake offset to userspace so it can mmap it.
1999 * The mmap call will end up in drm_gem_mmap(), which will set things
2000 * up so we can get faults in the handler above.
2001 *
2002 * The fault handler will take care of binding the object into the GTT
2003 * (since it may have been evicted to make room for something), allocating
2004 * a fence register, and mapping the appropriate aperture address into
2005 * userspace.
2006 */
2007int
2008i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2009 struct drm_file *file)
2010{
2011 struct drm_i915_gem_mmap_gtt *args = data;
2012
da6b51d0 2013 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
ff72145b
DA
2014}
2015
225067ee
DV
2016/* Immediately discard the backing storage */
2017static void
2018i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 2019{
4d6294bf 2020 i915_gem_object_free_mmap_offset(obj);
1286ff73 2021
4d6294bf
CW
2022 if (obj->base.filp == NULL)
2023 return;
e5281ccd 2024
225067ee
DV
2025 /* Our goal here is to return as much of the memory as
2026 * is possible back to the system as we are called from OOM.
2027 * To do this we must instruct the shmfs to drop all of its
2028 * backing pages, *now*.
2029 */
5537252b 2030 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
225067ee
DV
2031 obj->madv = __I915_MADV_PURGED;
2032}
e5281ccd 2033
5537252b
CW
2034/* Try to discard unwanted pages */
2035static void
2036i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
225067ee 2037{
5537252b
CW
2038 struct address_space *mapping;
2039
2040 switch (obj->madv) {
2041 case I915_MADV_DONTNEED:
2042 i915_gem_object_truncate(obj);
2043 case __I915_MADV_PURGED:
2044 return;
2045 }
2046
2047 if (obj->base.filp == NULL)
2048 return;
2049
2050 mapping = file_inode(obj->base.filp)->i_mapping,
2051 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
e5281ccd
CW
2052}
2053
5cdf5881 2054static void
05394f39 2055i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 2056{
85d1225e
DG
2057 struct sgt_iter sgt_iter;
2058 struct page *page;
90797e6d 2059 int ret;
1286ff73 2060
05394f39 2061 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 2062
6c085a72 2063 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 2064 if (WARN_ON(ret)) {
6c085a72
CW
2065 /* In the event of a disaster, abandon all caches and
2066 * hope for the best.
2067 */
2c22569b 2068 i915_gem_clflush_object(obj, true);
6c085a72
CW
2069 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2070 }
2071
e2273302
ID
2072 i915_gem_gtt_finish_object(obj);
2073
6dacfd2f 2074 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
2075 i915_gem_object_save_bit_17_swizzle(obj);
2076
05394f39
CW
2077 if (obj->madv == I915_MADV_DONTNEED)
2078 obj->dirty = 0;
3ef94daa 2079
85d1225e 2080 for_each_sgt_page(page, sgt_iter, obj->pages) {
05394f39 2081 if (obj->dirty)
9da3da66 2082 set_page_dirty(page);
3ef94daa 2083
05394f39 2084 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 2085 mark_page_accessed(page);
3ef94daa 2086
09cbfeaf 2087 put_page(page);
3ef94daa 2088 }
05394f39 2089 obj->dirty = 0;
673a394b 2090
9da3da66
CW
2091 sg_free_table(obj->pages);
2092 kfree(obj->pages);
37e680a1 2093}
6c085a72 2094
dd624afd 2095int
37e680a1
CW
2096i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2097{
2098 const struct drm_i915_gem_object_ops *ops = obj->ops;
2099
2f745ad3 2100 if (obj->pages == NULL)
37e680a1
CW
2101 return 0;
2102
a5570178
CW
2103 if (obj->pages_pin_count)
2104 return -EBUSY;
2105
15717de2 2106 GEM_BUG_ON(obj->bind_count);
3e123027 2107
a2165e31
CW
2108 /* ->put_pages might need to allocate memory for the bit17 swizzle
2109 * array, hence protect them from being reaped by removing them from gtt
2110 * lists early. */
35c20a60 2111 list_del(&obj->global_list);
a2165e31 2112
0a798eb9 2113 if (obj->mapping) {
fb8621d3
CW
2114 if (is_vmalloc_addr(obj->mapping))
2115 vunmap(obj->mapping);
2116 else
2117 kunmap(kmap_to_page(obj->mapping));
0a798eb9
CW
2118 obj->mapping = NULL;
2119 }
2120
37e680a1 2121 ops->put_pages(obj);
05394f39 2122 obj->pages = NULL;
37e680a1 2123
5537252b 2124 i915_gem_object_invalidate(obj);
6c085a72
CW
2125
2126 return 0;
2127}
2128
37e680a1 2129static int
6c085a72 2130i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 2131{
fac5e23e 2132 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
e5281ccd
CW
2133 int page_count, i;
2134 struct address_space *mapping;
9da3da66
CW
2135 struct sg_table *st;
2136 struct scatterlist *sg;
85d1225e 2137 struct sgt_iter sgt_iter;
e5281ccd 2138 struct page *page;
90797e6d 2139 unsigned long last_pfn = 0; /* suppress gcc warning */
e2273302 2140 int ret;
6c085a72 2141 gfp_t gfp;
e5281ccd 2142
6c085a72
CW
2143 /* Assert that the object is not currently in any GPU domain. As it
2144 * wasn't in the GTT, there shouldn't be any way it could have been in
2145 * a GPU cache
2146 */
2147 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2148 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2149
9da3da66
CW
2150 st = kmalloc(sizeof(*st), GFP_KERNEL);
2151 if (st == NULL)
2152 return -ENOMEM;
2153
05394f39 2154 page_count = obj->base.size / PAGE_SIZE;
9da3da66 2155 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
9da3da66 2156 kfree(st);
e5281ccd 2157 return -ENOMEM;
9da3da66 2158 }
e5281ccd 2159
9da3da66
CW
2160 /* Get the list of pages out of our struct file. They'll be pinned
2161 * at this point until we release them.
2162 *
2163 * Fail silently without starting the shrinker
2164 */
496ad9aa 2165 mapping = file_inode(obj->base.filp)->i_mapping;
c62d2555 2166 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
d0164adc 2167 gfp |= __GFP_NORETRY | __GFP_NOWARN;
90797e6d
ID
2168 sg = st->sgl;
2169 st->nents = 0;
2170 for (i = 0; i < page_count; i++) {
6c085a72
CW
2171 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2172 if (IS_ERR(page)) {
21ab4e74
CW
2173 i915_gem_shrink(dev_priv,
2174 page_count,
2175 I915_SHRINK_BOUND |
2176 I915_SHRINK_UNBOUND |
2177 I915_SHRINK_PURGEABLE);
6c085a72
CW
2178 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2179 }
2180 if (IS_ERR(page)) {
2181 /* We've tried hard to allocate the memory by reaping
2182 * our own buffer, now let the real VM do its job and
2183 * go down in flames if truly OOM.
2184 */
6c085a72 2185 i915_gem_shrink_all(dev_priv);
f461d1be 2186 page = shmem_read_mapping_page(mapping, i);
e2273302
ID
2187 if (IS_ERR(page)) {
2188 ret = PTR_ERR(page);
6c085a72 2189 goto err_pages;
e2273302 2190 }
6c085a72 2191 }
426729dc
KRW
2192#ifdef CONFIG_SWIOTLB
2193 if (swiotlb_nr_tbl()) {
2194 st->nents++;
2195 sg_set_page(sg, page, PAGE_SIZE, 0);
2196 sg = sg_next(sg);
2197 continue;
2198 }
2199#endif
90797e6d
ID
2200 if (!i || page_to_pfn(page) != last_pfn + 1) {
2201 if (i)
2202 sg = sg_next(sg);
2203 st->nents++;
2204 sg_set_page(sg, page, PAGE_SIZE, 0);
2205 } else {
2206 sg->length += PAGE_SIZE;
2207 }
2208 last_pfn = page_to_pfn(page);
3bbbe706
DV
2209
2210 /* Check that the i965g/gm workaround works. */
2211 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
e5281ccd 2212 }
426729dc
KRW
2213#ifdef CONFIG_SWIOTLB
2214 if (!swiotlb_nr_tbl())
2215#endif
2216 sg_mark_end(sg);
74ce6b6c
CW
2217 obj->pages = st;
2218
e2273302
ID
2219 ret = i915_gem_gtt_prepare_object(obj);
2220 if (ret)
2221 goto err_pages;
2222
6dacfd2f 2223 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
2224 i915_gem_object_do_bit_17_swizzle(obj);
2225
656bfa3a
DV
2226 if (obj->tiling_mode != I915_TILING_NONE &&
2227 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2228 i915_gem_object_pin_pages(obj);
2229
e5281ccd
CW
2230 return 0;
2231
2232err_pages:
90797e6d 2233 sg_mark_end(sg);
85d1225e
DG
2234 for_each_sgt_page(page, sgt_iter, st)
2235 put_page(page);
9da3da66
CW
2236 sg_free_table(st);
2237 kfree(st);
0820baf3
CW
2238
2239 /* shmemfs first checks if there is enough memory to allocate the page
2240 * and reports ENOSPC should there be insufficient, along with the usual
2241 * ENOMEM for a genuine allocation failure.
2242 *
2243 * We use ENOSPC in our driver to mean that we have run out of aperture
2244 * space and so want to translate the error from shmemfs back to our
2245 * usual understanding of ENOMEM.
2246 */
e2273302
ID
2247 if (ret == -ENOSPC)
2248 ret = -ENOMEM;
2249
2250 return ret;
673a394b
EA
2251}
2252
37e680a1
CW
2253/* Ensure that the associated pages are gathered from the backing storage
2254 * and pinned into our object. i915_gem_object_get_pages() may be called
2255 * multiple times before they are released by a single call to
2256 * i915_gem_object_put_pages() - once the pages are no longer referenced
2257 * either as a result of memory pressure (reaping pages under the shrinker)
2258 * or as the object is itself released.
2259 */
2260int
2261i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2262{
fac5e23e 2263 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
37e680a1
CW
2264 const struct drm_i915_gem_object_ops *ops = obj->ops;
2265 int ret;
2266
2f745ad3 2267 if (obj->pages)
37e680a1
CW
2268 return 0;
2269
43e28f09 2270 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2271 DRM_DEBUG("Attempting to obtain a purgeable object\n");
8c99e57d 2272 return -EFAULT;
43e28f09
CW
2273 }
2274
a5570178
CW
2275 BUG_ON(obj->pages_pin_count);
2276
37e680a1
CW
2277 ret = ops->get_pages(obj);
2278 if (ret)
2279 return ret;
2280
35c20a60 2281 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
ee286370
CW
2282
2283 obj->get_page.sg = obj->pages->sgl;
2284 obj->get_page.last = 0;
2285
37e680a1 2286 return 0;
673a394b
EA
2287}
2288
dd6034c6
DG
2289/* The 'mapping' part of i915_gem_object_pin_map() below */
2290static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2291{
2292 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2293 struct sg_table *sgt = obj->pages;
85d1225e
DG
2294 struct sgt_iter sgt_iter;
2295 struct page *page;
b338fa47
DG
2296 struct page *stack_pages[32];
2297 struct page **pages = stack_pages;
dd6034c6
DG
2298 unsigned long i = 0;
2299 void *addr;
2300
2301 /* A single page can always be kmapped */
2302 if (n_pages == 1)
2303 return kmap(sg_page(sgt->sgl));
2304
b338fa47
DG
2305 if (n_pages > ARRAY_SIZE(stack_pages)) {
2306 /* Too big for stack -- allocate temporary array instead */
2307 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2308 if (!pages)
2309 return NULL;
2310 }
dd6034c6 2311
85d1225e
DG
2312 for_each_sgt_page(page, sgt_iter, sgt)
2313 pages[i++] = page;
dd6034c6
DG
2314
2315 /* Check that we have the expected number of pages */
2316 GEM_BUG_ON(i != n_pages);
2317
2318 addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
2319
b338fa47
DG
2320 if (pages != stack_pages)
2321 drm_free_large(pages);
dd6034c6
DG
2322
2323 return addr;
2324}
2325
2326/* get, pin, and map the pages of the object into kernel space */
0a798eb9
CW
2327void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2328{
2329 int ret;
2330
2331 lockdep_assert_held(&obj->base.dev->struct_mutex);
2332
2333 ret = i915_gem_object_get_pages(obj);
2334 if (ret)
2335 return ERR_PTR(ret);
2336
2337 i915_gem_object_pin_pages(obj);
2338
dd6034c6
DG
2339 if (!obj->mapping) {
2340 obj->mapping = i915_gem_object_map(obj);
2341 if (!obj->mapping) {
0a798eb9
CW
2342 i915_gem_object_unpin_pages(obj);
2343 return ERR_PTR(-ENOMEM);
2344 }
2345 }
2346
2347 return obj->mapping;
2348}
2349
b4716185 2350static void
fa545cbf
CW
2351i915_gem_object_retire__write(struct i915_gem_active *active,
2352 struct drm_i915_gem_request *request)
e2d05a8b 2353{
fa545cbf
CW
2354 struct drm_i915_gem_object *obj =
2355 container_of(active, struct drm_i915_gem_object, last_write);
b4716185 2356
de152b62 2357 intel_fb_obj_flush(obj, true, ORIGIN_CS);
e2d05a8b
BW
2358}
2359
caea7476 2360static void
fa545cbf
CW
2361i915_gem_object_retire__read(struct i915_gem_active *active,
2362 struct drm_i915_gem_request *request)
ce44b0ea 2363{
fa545cbf
CW
2364 int idx = request->engine->id;
2365 struct drm_i915_gem_object *obj =
2366 container_of(active, struct drm_i915_gem_object, last_read[idx]);
ce44b0ea 2367
573adb39 2368 GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
b4716185 2369
573adb39
CW
2370 i915_gem_object_clear_active(obj, idx);
2371 if (i915_gem_object_is_active(obj))
b4716185 2372 return;
caea7476 2373
6c246959
CW
2374 /* Bump our place on the bound list to keep it roughly in LRU order
2375 * so that we don't steal from recently used but inactive objects
2376 * (unless we are forced to ofc!)
2377 */
b0decaf7
CW
2378 if (obj->bind_count)
2379 list_move_tail(&obj->global_list,
2380 &request->i915->mm.bound_list);
caea7476 2381
f8c417cd 2382 i915_gem_object_put(obj);
c8725f3d
CW
2383}
2384
7b4d3a16 2385static bool i915_context_is_banned(const struct i915_gem_context *ctx)
be62acb4 2386{
44e2c070 2387 unsigned long elapsed;
be62acb4 2388
44e2c070 2389 if (ctx->hang_stats.banned)
be62acb4
MK
2390 return true;
2391
7b4d3a16 2392 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
676fa572
CW
2393 if (ctx->hang_stats.ban_period_seconds &&
2394 elapsed <= ctx->hang_stats.ban_period_seconds) {
7b4d3a16
CW
2395 DRM_DEBUG("context hanging too fast, banning!\n");
2396 return true;
be62acb4
MK
2397 }
2398
2399 return false;
2400}
2401
7b4d3a16 2402static void i915_set_reset_status(struct i915_gem_context *ctx,
b6b0fac0 2403 const bool guilty)
aa60c664 2404{
7b4d3a16 2405 struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
44e2c070
MK
2406
2407 if (guilty) {
7b4d3a16 2408 hs->banned = i915_context_is_banned(ctx);
44e2c070
MK
2409 hs->batch_active++;
2410 hs->guilty_ts = get_seconds();
2411 } else {
2412 hs->batch_pending++;
aa60c664
MK
2413 }
2414}
2415
8d9fc7fd 2416struct drm_i915_gem_request *
0bc40be8 2417i915_gem_find_active_request(struct intel_engine_cs *engine)
9375e446 2418{
4db080f9
CW
2419 struct drm_i915_gem_request *request;
2420
f69a02c9
CW
2421 /* We are called by the error capture and reset at a random
2422 * point in time. In particular, note that neither is crucially
2423 * ordered with an interrupt. After a hang, the GPU is dead and we
2424 * assume that no more writes can happen (we waited long enough for
2425 * all writes that were in transaction to be flushed) - adding an
2426 * extra delay for a recent interrupt is pointless. Hence, we do
2427 * not need an engine->irq_seqno_barrier() before the seqno reads.
2428 */
efdf7c06 2429 list_for_each_entry(request, &engine->request_list, link) {
f69a02c9 2430 if (i915_gem_request_completed(request))
4db080f9 2431 continue;
aa60c664 2432
b6b0fac0 2433 return request;
4db080f9 2434 }
b6b0fac0
MK
2435
2436 return NULL;
2437}
2438
7b4d3a16 2439static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
b6b0fac0
MK
2440{
2441 struct drm_i915_gem_request *request;
2442 bool ring_hung;
2443
0bc40be8 2444 request = i915_gem_find_active_request(engine);
b6b0fac0
MK
2445 if (request == NULL)
2446 return;
2447
0bc40be8 2448 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
b6b0fac0 2449
7b4d3a16 2450 i915_set_reset_status(request->ctx, ring_hung);
efdf7c06 2451 list_for_each_entry_continue(request, &engine->request_list, link)
7b4d3a16 2452 i915_set_reset_status(request->ctx, false);
4db080f9 2453}
aa60c664 2454
7b4d3a16 2455static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
4db080f9 2456{
7e37f889 2457 struct intel_ring *ring;
608c1a52 2458
c4b0930b
CW
2459 /* Mark all pending requests as complete so that any concurrent
2460 * (lockless) lookup doesn't try and wait upon the request as we
2461 * reset it.
2462 */
7e37f889 2463 intel_engine_init_seqno(engine, engine->last_submitted_seqno);
c4b0930b 2464
dcb4c12a
OM
2465 /*
2466 * Clear the execlists queue up before freeing the requests, as those
2467 * are the ones that keep the context and ringbuffer backing objects
2468 * pinned in place.
2469 */
dcb4c12a 2470
7de1691a 2471 if (i915.enable_execlists) {
27af5eea
TU
2472 /* Ensure irq handler finishes or is cancelled. */
2473 tasklet_kill(&engine->irq_tasklet);
1197b4f2 2474
e39d42fa 2475 intel_execlists_cancel_requests(engine);
dcb4c12a
OM
2476 }
2477
1d62beea
BW
2478 /*
2479 * We must free the requests after all the corresponding objects have
2480 * been moved off active lists. Which is the same order as the normal
2481 * retire_requests function does. This is important if object hold
2482 * implicit references on things like e.g. ppgtt address spaces through
2483 * the request.
2484 */
05235c53 2485 if (!list_empty(&engine->request_list)) {
1d62beea
BW
2486 struct drm_i915_gem_request *request;
2487
05235c53
CW
2488 request = list_last_entry(&engine->request_list,
2489 struct drm_i915_gem_request,
efdf7c06 2490 link);
1d62beea 2491
05235c53 2492 i915_gem_request_retire_upto(request);
1d62beea 2493 }
608c1a52
CW
2494
2495 /* Having flushed all requests from all queues, we know that all
2496 * ringbuffers must now be empty. However, since we do not reclaim
2497 * all space when retiring the request (to prevent HEADs colliding
2498 * with rapid ringbuffer wraparound) the amount of available space
2499 * upon reset is less than when we start. Do one more pass over
2500 * all the ringbuffers to reset last_retired_head.
2501 */
7e37f889
CW
2502 list_for_each_entry(ring, &engine->buffers, link) {
2503 ring->last_retired_head = ring->tail;
2504 intel_ring_update_space(ring);
608c1a52 2505 }
2ed53a94 2506
b913b33c 2507 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
673a394b
EA
2508}
2509
069efc1d 2510void i915_gem_reset(struct drm_device *dev)
673a394b 2511{
fac5e23e 2512 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 2513 struct intel_engine_cs *engine;
673a394b 2514
4db080f9
CW
2515 /*
2516 * Before we free the objects from the requests, we need to inspect
2517 * them for finding the guilty party. As the requests only borrow
2518 * their reference to the objects, the inspection must be done first.
2519 */
b4ac5afc 2520 for_each_engine(engine, dev_priv)
7b4d3a16 2521 i915_gem_reset_engine_status(engine);
4db080f9 2522
b4ac5afc 2523 for_each_engine(engine, dev_priv)
7b4d3a16 2524 i915_gem_reset_engine_cleanup(engine);
b913b33c 2525 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
dfaae392 2526
acce9ffa
BW
2527 i915_gem_context_reset(dev);
2528
19b2dbde 2529 i915_gem_restore_fences(dev);
673a394b
EA
2530}
2531
75ef9da2 2532static void
673a394b
EA
2533i915_gem_retire_work_handler(struct work_struct *work)
2534{
b29c19b6 2535 struct drm_i915_private *dev_priv =
67d97da3 2536 container_of(work, typeof(*dev_priv), gt.retire_work.work);
91c8a326 2537 struct drm_device *dev = &dev_priv->drm;
673a394b 2538
891b48cf 2539 /* Come back later if the device is busy... */
b29c19b6 2540 if (mutex_trylock(&dev->struct_mutex)) {
67d97da3 2541 i915_gem_retire_requests(dev_priv);
b29c19b6 2542 mutex_unlock(&dev->struct_mutex);
673a394b 2543 }
67d97da3
CW
2544
2545 /* Keep the retire handler running until we are finally idle.
2546 * We do not need to do this test under locking as in the worst-case
2547 * we queue the retire worker once too often.
2548 */
c9615613
CW
2549 if (READ_ONCE(dev_priv->gt.awake)) {
2550 i915_queue_hangcheck(dev_priv);
67d97da3
CW
2551 queue_delayed_work(dev_priv->wq,
2552 &dev_priv->gt.retire_work,
bcb45086 2553 round_jiffies_up_relative(HZ));
c9615613 2554 }
b29c19b6 2555}
0a58705b 2556
b29c19b6
CW
2557static void
2558i915_gem_idle_work_handler(struct work_struct *work)
2559{
2560 struct drm_i915_private *dev_priv =
67d97da3 2561 container_of(work, typeof(*dev_priv), gt.idle_work.work);
91c8a326 2562 struct drm_device *dev = &dev_priv->drm;
b4ac5afc 2563 struct intel_engine_cs *engine;
67d97da3
CW
2564 unsigned int stuck_engines;
2565 bool rearm_hangcheck;
2566
2567 if (!READ_ONCE(dev_priv->gt.awake))
2568 return;
2569
2570 if (READ_ONCE(dev_priv->gt.active_engines))
2571 return;
2572
2573 rearm_hangcheck =
2574 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2575
2576 if (!mutex_trylock(&dev->struct_mutex)) {
2577 /* Currently busy, come back later */
2578 mod_delayed_work(dev_priv->wq,
2579 &dev_priv->gt.idle_work,
2580 msecs_to_jiffies(50));
2581 goto out_rearm;
2582 }
2583
2584 if (dev_priv->gt.active_engines)
2585 goto out_unlock;
b29c19b6 2586
b4ac5afc 2587 for_each_engine(engine, dev_priv)
67d97da3 2588 i915_gem_batch_pool_fini(&engine->batch_pool);
35c94185 2589
67d97da3
CW
2590 GEM_BUG_ON(!dev_priv->gt.awake);
2591 dev_priv->gt.awake = false;
2592 rearm_hangcheck = false;
30ecad77 2593
2529d570
CW
2594 /* As we have disabled hangcheck, we need to unstick any waiters still
2595 * hanging around. However, as we may be racing against the interrupt
2596 * handler or the waiters themselves, we skip enabling the fake-irq.
2597 */
67d97da3 2598 stuck_engines = intel_kick_waiters(dev_priv);
2529d570
CW
2599 if (unlikely(stuck_engines))
2600 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
2601 stuck_engines);
35c94185 2602
67d97da3
CW
2603 if (INTEL_GEN(dev_priv) >= 6)
2604 gen6_rps_idle(dev_priv);
2605 intel_runtime_pm_put(dev_priv);
2606out_unlock:
2607 mutex_unlock(&dev->struct_mutex);
b29c19b6 2608
67d97da3
CW
2609out_rearm:
2610 if (rearm_hangcheck) {
2611 GEM_BUG_ON(!dev_priv->gt.awake);
2612 i915_queue_hangcheck(dev_priv);
35c94185 2613 }
673a394b
EA
2614}
2615
b1f788c6
CW
2616void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2617{
2618 struct drm_i915_gem_object *obj = to_intel_bo(gem);
2619 struct drm_i915_file_private *fpriv = file->driver_priv;
2620 struct i915_vma *vma, *vn;
2621
2622 mutex_lock(&obj->base.dev->struct_mutex);
2623 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2624 if (vma->vm->file == fpriv)
2625 i915_vma_close(vma);
2626 mutex_unlock(&obj->base.dev->struct_mutex);
2627}
2628
23ba4fd0
BW
2629/**
2630 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
14bb2c11
TU
2631 * @dev: drm device pointer
2632 * @data: ioctl data blob
2633 * @file: drm file pointer
23ba4fd0
BW
2634 *
2635 * Returns 0 if successful, else an error is returned with the remaining time in
2636 * the timeout parameter.
2637 * -ETIME: object is still busy after timeout
2638 * -ERESTARTSYS: signal interrupted the wait
2639 * -ENONENT: object doesn't exist
2640 * Also possible, but rare:
2641 * -EAGAIN: GPU wedged
2642 * -ENOMEM: damn
2643 * -ENODEV: Internal IRQ fail
2644 * -E?: The add request failed
2645 *
2646 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2647 * non-zero timeout parameter the wait ioctl will wait for the given number of
2648 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2649 * without holding struct_mutex the object may become re-busied before this
2650 * function completes. A similar but shorter * race condition exists in the busy
2651 * ioctl
2652 */
2653int
2654i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2655{
2656 struct drm_i915_gem_wait *args = data;
2657 struct drm_i915_gem_object *obj;
27c01aae 2658 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
b4716185
CW
2659 int i, n = 0;
2660 int ret;
23ba4fd0 2661
11b5d511
DV
2662 if (args->flags != 0)
2663 return -EINVAL;
2664
23ba4fd0
BW
2665 ret = i915_mutex_lock_interruptible(dev);
2666 if (ret)
2667 return ret;
2668
03ac0642
CW
2669 obj = i915_gem_object_lookup(file, args->bo_handle);
2670 if (!obj) {
23ba4fd0
BW
2671 mutex_unlock(&dev->struct_mutex);
2672 return -ENOENT;
2673 }
2674
573adb39 2675 if (!i915_gem_object_is_active(obj))
97b2a6a1 2676 goto out;
23ba4fd0 2677
666796da 2678 for (i = 0; i < I915_NUM_ENGINES; i++) {
27c01aae 2679 struct drm_i915_gem_request *req;
b4716185 2680
d72d908b
CW
2681 req = i915_gem_active_get(&obj->last_read[i],
2682 &obj->base.dev->struct_mutex);
27c01aae
CW
2683 if (req)
2684 requests[n++] = req;
b4716185
CW
2685 }
2686
21c310f2
CW
2687out:
2688 i915_gem_object_put(obj);
23ba4fd0
BW
2689 mutex_unlock(&dev->struct_mutex);
2690
b4716185
CW
2691 for (i = 0; i < n; i++) {
2692 if (ret == 0)
776f3236
CW
2693 ret = i915_wait_request(requests[i], true,
2694 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2695 to_rps_client(file));
27c01aae 2696 i915_gem_request_put(requests[i]);
b4716185 2697 }
ff865885 2698 return ret;
23ba4fd0
BW
2699}
2700
b4716185 2701static int
fa545cbf 2702__i915_gem_object_sync(struct drm_i915_gem_request *to,
8e637178 2703 struct drm_i915_gem_request *from)
b4716185 2704{
b4716185
CW
2705 int ret;
2706
8e637178 2707 if (to->engine == from->engine)
b4716185
CW
2708 return 0;
2709
39df9190 2710 if (!i915.semaphores) {
776f3236
CW
2711 ret = i915_wait_request(from,
2712 from->i915->mm.interruptible,
2713 NULL,
2714 NO_WAITBOOST);
b4716185
CW
2715 if (ret)
2716 return ret;
b4716185 2717 } else {
8e637178 2718 int idx = intel_engine_sync_index(from->engine, to->engine);
ddf07be7 2719 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
b4716185
CW
2720 return 0;
2721
8e637178 2722 trace_i915_gem_ring_sync_to(to, from);
ddf07be7 2723 ret = to->engine->semaphore.sync_to(to, from);
b4716185
CW
2724 if (ret)
2725 return ret;
2726
ddf07be7 2727 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
b4716185
CW
2728 }
2729
2730 return 0;
2731}
2732
5816d648
BW
2733/**
2734 * i915_gem_object_sync - sync an object to a ring.
2735 *
2736 * @obj: object which may be in use on another ring.
8e637178 2737 * @to: request we are wishing to use
5816d648
BW
2738 *
2739 * This code is meant to abstract object synchronization with the GPU.
8e637178
CW
2740 * Conceptually we serialise writes between engines inside the GPU.
2741 * We only allow one engine to write into a buffer at any time, but
2742 * multiple readers. To ensure each has a coherent view of memory, we must:
b4716185
CW
2743 *
2744 * - If there is an outstanding write request to the object, the new
2745 * request must wait for it to complete (either CPU or in hw, requests
2746 * on the same ring will be naturally ordered).
2747 *
2748 * - If we are a write request (pending_write_domain is set), the new
2749 * request must wait for outstanding read requests to complete.
5816d648
BW
2750 *
2751 * Returns 0 if successful, else propagates up the lower layer error.
2752 */
2911a35b
BW
2753int
2754i915_gem_object_sync(struct drm_i915_gem_object *obj,
8e637178 2755 struct drm_i915_gem_request *to)
2911a35b 2756{
8cac6f6c
CW
2757 struct i915_gem_active *active;
2758 unsigned long active_mask;
2759 int idx;
41c52415 2760
8cac6f6c 2761 lockdep_assert_held(&obj->base.dev->struct_mutex);
2911a35b 2762
573adb39 2763 active_mask = i915_gem_object_get_active(obj);
8cac6f6c
CW
2764 if (!active_mask)
2765 return 0;
27c01aae 2766
8cac6f6c
CW
2767 if (obj->base.pending_write_domain) {
2768 active = obj->last_read;
b4716185 2769 } else {
8cac6f6c
CW
2770 active_mask = 1;
2771 active = &obj->last_write;
b4716185 2772 }
8cac6f6c
CW
2773
2774 for_each_active(active_mask, idx) {
2775 struct drm_i915_gem_request *request;
2776 int ret;
2777
2778 request = i915_gem_active_peek(&active[idx],
2779 &obj->base.dev->struct_mutex);
2780 if (!request)
2781 continue;
2782
fa545cbf 2783 ret = __i915_gem_object_sync(to, request);
b4716185
CW
2784 if (ret)
2785 return ret;
2786 }
2911a35b 2787
b4716185 2788 return 0;
2911a35b
BW
2789}
2790
b5ffc9bc
CW
2791static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2792{
2793 u32 old_write_domain, old_read_domains;
2794
b5ffc9bc
CW
2795 /* Force a pagefault for domain tracking on next user access */
2796 i915_gem_release_mmap(obj);
2797
b97c3d9c
KP
2798 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2799 return;
2800
b5ffc9bc
CW
2801 old_read_domains = obj->base.read_domains;
2802 old_write_domain = obj->base.write_domain;
2803
2804 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2805 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2806
2807 trace_i915_gem_object_change_domain(obj,
2808 old_read_domains,
2809 old_write_domain);
2810}
2811
8ef8561f
CW
2812static void __i915_vma_iounmap(struct i915_vma *vma)
2813{
20dfbde4 2814 GEM_BUG_ON(i915_vma_is_pinned(vma));
8ef8561f
CW
2815
2816 if (vma->iomap == NULL)
2817 return;
2818
2819 io_mapping_unmap(vma->iomap);
2820 vma->iomap = NULL;
2821}
2822
df0e9a28 2823int i915_vma_unbind(struct i915_vma *vma)
673a394b 2824{
07fe0b12 2825 struct drm_i915_gem_object *obj = vma->obj;
b0decaf7 2826 unsigned long active;
43e28f09 2827 int ret;
673a394b 2828
b0decaf7
CW
2829 /* First wait upon any activity as retiring the request may
2830 * have side-effects such as unpinning or even unbinding this vma.
2831 */
2832 active = i915_vma_get_active(vma);
df0e9a28 2833 if (active) {
b0decaf7
CW
2834 int idx;
2835
b1f788c6
CW
2836 /* When a closed VMA is retired, it is unbound - eek.
2837 * In order to prevent it from being recursively closed,
2838 * take a pin on the vma so that the second unbind is
2839 * aborted.
2840 */
20dfbde4 2841 __i915_vma_pin(vma);
b1f788c6 2842
b0decaf7
CW
2843 for_each_active(active, idx) {
2844 ret = i915_gem_active_retire(&vma->last_read[idx],
2845 &vma->vm->dev->struct_mutex);
2846 if (ret)
b1f788c6 2847 break;
b0decaf7
CW
2848 }
2849
20dfbde4 2850 __i915_vma_unpin(vma);
b1f788c6
CW
2851 if (ret)
2852 return ret;
2853
b0decaf7
CW
2854 GEM_BUG_ON(i915_vma_is_active(vma));
2855 }
2856
20dfbde4 2857 if (i915_vma_is_pinned(vma))
b0decaf7
CW
2858 return -EBUSY;
2859
b1f788c6
CW
2860 if (!drm_mm_node_allocated(&vma->node))
2861 goto destroy;
433544bd 2862
15717de2
CW
2863 GEM_BUG_ON(obj->bind_count == 0);
2864 GEM_BUG_ON(!obj->pages);
c4670ad0 2865
3272db53
CW
2866 if (i915_vma_is_ggtt(vma) &&
2867 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
8b1bc9b4 2868 i915_gem_object_finish_gtt(obj);
5323fd04 2869
8b1bc9b4
DV
2870 /* release the fence reg _after_ flushing */
2871 ret = i915_gem_object_put_fence(obj);
2872 if (ret)
2873 return ret;
8ef8561f
CW
2874
2875 __i915_vma_iounmap(vma);
8b1bc9b4 2876 }
96b47b65 2877
50e046b6
CW
2878 if (likely(!vma->vm->closed)) {
2879 trace_i915_vma_unbind(vma);
2880 vma->vm->unbind_vma(vma);
2881 }
3272db53 2882 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
6f65e29a 2883
50e046b6
CW
2884 drm_mm_remove_node(&vma->node);
2885 list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2886
3272db53 2887 if (i915_vma_is_ggtt(vma)) {
fe14d5f4
TU
2888 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2889 obj->map_and_fenceable = false;
2890 } else if (vma->ggtt_view.pages) {
2891 sg_free_table(vma->ggtt_view.pages);
2892 kfree(vma->ggtt_view.pages);
fe14d5f4 2893 }
016a65a3 2894 vma->ggtt_view.pages = NULL;
fe14d5f4 2895 }
673a394b 2896
2f633156 2897 /* Since the unbound list is global, only move to that list if
b93dab6e 2898 * no more VMAs exist. */
15717de2
CW
2899 if (--obj->bind_count == 0)
2900 list_move_tail(&obj->global_list,
2901 &to_i915(obj->base.dev)->mm.unbound_list);
673a394b 2902
70903c3b
CW
2903 /* And finally now the object is completely decoupled from this vma,
2904 * we can drop its hold on the backing storage and allow it to be
2905 * reaped by the shrinker.
2906 */
2907 i915_gem_object_unpin_pages(obj);
2908
b1f788c6 2909destroy:
3272db53 2910 if (unlikely(i915_vma_is_closed(vma)))
b1f788c6
CW
2911 i915_vma_destroy(vma);
2912
88241785 2913 return 0;
54cf91dc
CW
2914}
2915
6e5a5beb 2916int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
4df2faf4 2917{
e2f80391 2918 struct intel_engine_cs *engine;
b4ac5afc 2919 int ret;
4df2faf4 2920
91c8a326 2921 lockdep_assert_held(&dev_priv->drm.struct_mutex);
6e5a5beb 2922
b4ac5afc 2923 for_each_engine(engine, dev_priv) {
62e63007
CW
2924 if (engine->last_context == NULL)
2925 continue;
2926
666796da 2927 ret = intel_engine_idle(engine);
1ec14ad3
CW
2928 if (ret)
2929 return ret;
2930 }
4df2faf4 2931
8a1a49f9 2932 return 0;
4df2faf4
DV
2933}
2934
4144f9b5 2935static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
42d6ab48
CW
2936 unsigned long cache_level)
2937{
4144f9b5 2938 struct drm_mm_node *gtt_space = &vma->node;
42d6ab48
CW
2939 struct drm_mm_node *other;
2940
4144f9b5
CW
2941 /*
2942 * On some machines we have to be careful when putting differing types
2943 * of snoopable memory together to avoid the prefetcher crossing memory
2944 * domains and dying. During vm initialisation, we decide whether or not
2945 * these constraints apply and set the drm_mm.color_adjust
2946 * appropriately.
42d6ab48 2947 */
4144f9b5 2948 if (vma->vm->mm.color_adjust == NULL)
42d6ab48
CW
2949 return true;
2950
c6cfb325 2951 if (!drm_mm_node_allocated(gtt_space))
42d6ab48
CW
2952 return true;
2953
2954 if (list_empty(&gtt_space->node_list))
2955 return true;
2956
2957 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2958 if (other->allocated && !other->hole_follows && other->color != cache_level)
2959 return false;
2960
2961 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2962 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2963 return false;
2964
2965 return true;
2966}
2967
673a394b 2968/**
59bfa124
CW
2969 * i915_vma_insert - finds a slot for the vma in its address space
2970 * @vma: the vma
91b2db6f 2971 * @size: requested size in bytes (can be larger than the VMA)
59bfa124 2972 * @alignment: required alignment
14bb2c11 2973 * @flags: mask of PIN_* flags to use
59bfa124
CW
2974 *
2975 * First we try to allocate some free space that meets the requirements for
2976 * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
2977 * preferrably the oldest idle entry to make room for the new VMA.
2978 *
2979 * Returns:
2980 * 0 on success, negative error code otherwise.
673a394b 2981 */
59bfa124
CW
2982static int
2983i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
673a394b 2984{
59bfa124
CW
2985 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
2986 struct drm_i915_gem_object *obj = vma->obj;
de180033
CW
2987 u64 start, end;
2988 u64 min_alignment;
07f73f69 2989 int ret;
673a394b 2990
3272db53 2991 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
59bfa124 2992 GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
de180033
CW
2993
2994 size = max(size, vma->size);
2995 if (flags & PIN_MAPPABLE)
2996 size = i915_gem_get_ggtt_size(dev_priv, size, obj->tiling_mode);
2997
2998 min_alignment =
2999 i915_gem_get_ggtt_alignment(dev_priv, size, obj->tiling_mode,
3000 flags & PIN_MAPPABLE);
3001 if (alignment == 0)
3002 alignment = min_alignment;
3003 if (alignment & (min_alignment - 1)) {
3004 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
3005 alignment, min_alignment);
59bfa124 3006 return -EINVAL;
91e6711e 3007 }
a00b10c3 3008
101b506a 3009 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
de180033
CW
3010
3011 end = vma->vm->total;
101b506a 3012 if (flags & PIN_MAPPABLE)
91b2db6f 3013 end = min_t(u64, end, dev_priv->ggtt.mappable_end);
101b506a 3014 if (flags & PIN_ZONE_4G)
48ea1e32 3015 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
101b506a 3016
91e6711e
JL
3017 /* If binding the object/GGTT view requires more space than the entire
3018 * aperture has, reject it early before evicting everything in a vain
3019 * attempt to find space.
654fc607 3020 */
91e6711e 3021 if (size > end) {
de180033 3022 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
91b2db6f 3023 size, obj->base.size,
1ec9e26d 3024 flags & PIN_MAPPABLE ? "mappable" : "total",
d23db88c 3025 end);
59bfa124 3026 return -E2BIG;
654fc607
CW
3027 }
3028
37e680a1 3029 ret = i915_gem_object_get_pages(obj);
6c085a72 3030 if (ret)
59bfa124 3031 return ret;
6c085a72 3032
fbdda6fb
CW
3033 i915_gem_object_pin_pages(obj);
3034
506a8e87 3035 if (flags & PIN_OFFSET_FIXED) {
59bfa124 3036 u64 offset = flags & PIN_OFFSET_MASK;
de180033 3037 if (offset & (alignment - 1) || offset > end - size) {
506a8e87 3038 ret = -EINVAL;
de180033 3039 goto err_unpin;
506a8e87 3040 }
de180033 3041
506a8e87
CW
3042 vma->node.start = offset;
3043 vma->node.size = size;
3044 vma->node.color = obj->cache_level;
de180033 3045 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
506a8e87
CW
3046 if (ret) {
3047 ret = i915_gem_evict_for_vma(vma);
3048 if (ret == 0)
de180033
CW
3049 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3050 if (ret)
3051 goto err_unpin;
506a8e87 3052 }
101b506a 3053 } else {
de180033
CW
3054 u32 search_flag, alloc_flag;
3055
506a8e87
CW
3056 if (flags & PIN_HIGH) {
3057 search_flag = DRM_MM_SEARCH_BELOW;
3058 alloc_flag = DRM_MM_CREATE_TOP;
3059 } else {
3060 search_flag = DRM_MM_SEARCH_DEFAULT;
3061 alloc_flag = DRM_MM_CREATE_DEFAULT;
3062 }
101b506a 3063
954c4691
CW
3064 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3065 * so we know that we always have a minimum alignment of 4096.
3066 * The drm_mm range manager is optimised to return results
3067 * with zero alignment, so where possible use the optimal
3068 * path.
3069 */
3070 if (alignment <= 4096)
3071 alignment = 0;
3072
0a9ae0d7 3073search_free:
de180033
CW
3074 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3075 &vma->node,
506a8e87
CW
3076 size, alignment,
3077 obj->cache_level,
3078 start, end,
3079 search_flag,
3080 alloc_flag);
3081 if (ret) {
de180033 3082 ret = i915_gem_evict_something(vma->vm, size, alignment,
506a8e87
CW
3083 obj->cache_level,
3084 start, end,
3085 flags);
3086 if (ret == 0)
3087 goto search_free;
9731129c 3088
de180033 3089 goto err_unpin;
506a8e87 3090 }
673a394b 3091 }
37508589 3092 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
673a394b 3093
35c20a60 3094 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
de180033 3095 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
15717de2 3096 obj->bind_count++;
bf1a1092 3097
59bfa124 3098 return 0;
2f633156 3099
bc6bc15b 3100err_unpin:
2f633156 3101 i915_gem_object_unpin_pages(obj);
59bfa124 3102 return ret;
673a394b
EA
3103}
3104
000433b6 3105bool
2c22569b
CW
3106i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3107 bool force)
673a394b 3108{
673a394b
EA
3109 /* If we don't have a page list set up, then we're not pinned
3110 * to GPU, and we can ignore the cache flush because it'll happen
3111 * again at bind time.
3112 */
05394f39 3113 if (obj->pages == NULL)
000433b6 3114 return false;
673a394b 3115
769ce464
ID
3116 /*
3117 * Stolen memory is always coherent with the GPU as it is explicitly
3118 * marked as wc by the system, or the system is cache-coherent.
3119 */
6a2c4232 3120 if (obj->stolen || obj->phys_handle)
000433b6 3121 return false;
769ce464 3122
9c23f7fc
CW
3123 /* If the GPU is snooping the contents of the CPU cache,
3124 * we do not need to manually clear the CPU cache lines. However,
3125 * the caches are only snooped when the render cache is
3126 * flushed/invalidated. As we always have to emit invalidations
3127 * and flushes when moving into and out of the RENDER domain, correct
3128 * snooping behaviour occurs naturally as the result of our domain
3129 * tracking.
3130 */
0f71979a
CW
3131 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3132 obj->cache_dirty = true;
000433b6 3133 return false;
0f71979a 3134 }
9c23f7fc 3135
1c5d22f7 3136 trace_i915_gem_object_clflush(obj);
9da3da66 3137 drm_clflush_sg(obj->pages);
0f71979a 3138 obj->cache_dirty = false;
000433b6
CW
3139
3140 return true;
e47c68e9
EA
3141}
3142
3143/** Flushes the GTT write domain for the object if it's dirty. */
3144static void
05394f39 3145i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3146{
1c5d22f7
CW
3147 uint32_t old_write_domain;
3148
05394f39 3149 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3150 return;
3151
63256ec5 3152 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3153 * to it immediately go to main memory as far as we know, so there's
3154 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3155 *
3156 * However, we do have to enforce the order so that all writes through
3157 * the GTT land before any writes to the device, such as updates to
3158 * the GATT itself.
e47c68e9 3159 */
63256ec5
CW
3160 wmb();
3161
05394f39
CW
3162 old_write_domain = obj->base.write_domain;
3163 obj->base.write_domain = 0;
1c5d22f7 3164
de152b62 3165 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
f99d7069 3166
1c5d22f7 3167 trace_i915_gem_object_change_domain(obj,
05394f39 3168 obj->base.read_domains,
1c5d22f7 3169 old_write_domain);
e47c68e9
EA
3170}
3171
3172/** Flushes the CPU write domain for the object if it's dirty. */
3173static void
e62b59e4 3174i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3175{
1c5d22f7 3176 uint32_t old_write_domain;
e47c68e9 3177
05394f39 3178 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3179 return;
3180
e62b59e4 3181 if (i915_gem_clflush_object(obj, obj->pin_display))
c033666a 3182 i915_gem_chipset_flush(to_i915(obj->base.dev));
000433b6 3183
05394f39
CW
3184 old_write_domain = obj->base.write_domain;
3185 obj->base.write_domain = 0;
1c5d22f7 3186
de152b62 3187 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
f99d7069 3188
1c5d22f7 3189 trace_i915_gem_object_change_domain(obj,
05394f39 3190 obj->base.read_domains,
1c5d22f7 3191 old_write_domain);
e47c68e9
EA
3192}
3193
2ef7eeaa
EA
3194/**
3195 * Moves a single object to the GTT read, and possibly write domain.
14bb2c11
TU
3196 * @obj: object to act on
3197 * @write: ask for write access or read only
2ef7eeaa
EA
3198 *
3199 * This function returns when the move is complete, including waiting on
3200 * flushes to occur.
3201 */
79e53945 3202int
2021746e 3203i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3204{
1c5d22f7 3205 uint32_t old_write_domain, old_read_domains;
43566ded 3206 struct i915_vma *vma;
e47c68e9 3207 int ret;
2ef7eeaa 3208
0201f1ec 3209 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3210 if (ret)
3211 return ret;
3212
c13d87ea
CW
3213 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3214 return 0;
3215
43566ded
CW
3216 /* Flush and acquire obj->pages so that we are coherent through
3217 * direct access in memory with previous cached writes through
3218 * shmemfs and that our cache domain tracking remains valid.
3219 * For example, if the obj->filp was moved to swap without us
3220 * being notified and releasing the pages, we would mistakenly
3221 * continue to assume that the obj remained out of the CPU cached
3222 * domain.
3223 */
3224 ret = i915_gem_object_get_pages(obj);
3225 if (ret)
3226 return ret;
3227
e62b59e4 3228 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3229
d0a57789
CW
3230 /* Serialise direct access to this object with the barriers for
3231 * coherent writes from the GPU, by effectively invalidating the
3232 * GTT domain upon first access.
3233 */
3234 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3235 mb();
3236
05394f39
CW
3237 old_write_domain = obj->base.write_domain;
3238 old_read_domains = obj->base.read_domains;
1c5d22f7 3239
e47c68e9
EA
3240 /* It should now be out of any other write domains, and we can update
3241 * the domain values for our changes.
3242 */
05394f39
CW
3243 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3244 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3245 if (write) {
05394f39
CW
3246 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3247 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3248 obj->dirty = 1;
2ef7eeaa
EA
3249 }
3250
1c5d22f7
CW
3251 trace_i915_gem_object_change_domain(obj,
3252 old_read_domains,
3253 old_write_domain);
3254
8325a09d 3255 /* And bump the LRU for this access */
43566ded 3256 vma = i915_gem_obj_to_ggtt(obj);
b0decaf7
CW
3257 if (vma &&
3258 drm_mm_node_allocated(&vma->node) &&
3259 !i915_vma_is_active(vma))
3260 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
8325a09d 3261
e47c68e9
EA
3262 return 0;
3263}
3264
ef55f92a
CW
3265/**
3266 * Changes the cache-level of an object across all VMA.
14bb2c11
TU
3267 * @obj: object to act on
3268 * @cache_level: new cache level to set for the object
ef55f92a
CW
3269 *
3270 * After this function returns, the object will be in the new cache-level
3271 * across all GTT and the contents of the backing storage will be coherent,
3272 * with respect to the new cache-level. In order to keep the backing storage
3273 * coherent for all users, we only allow a single cache level to be set
3274 * globally on the object and prevent it from being changed whilst the
3275 * hardware is reading from the object. That is if the object is currently
3276 * on the scanout it will be set to uncached (or equivalent display
3277 * cache coherency) and all non-MOCS GPU access will also be uncached so
3278 * that all direct access to the scanout remains coherent.
3279 */
e4ffd173
CW
3280int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3281 enum i915_cache_level cache_level)
3282{
aa653a68 3283 struct i915_vma *vma;
ed75a55b 3284 int ret = 0;
e4ffd173
CW
3285
3286 if (obj->cache_level == cache_level)
ed75a55b 3287 goto out;
e4ffd173 3288
ef55f92a
CW
3289 /* Inspect the list of currently bound VMA and unbind any that would
3290 * be invalid given the new cache-level. This is principally to
3291 * catch the issue of the CS prefetch crossing page boundaries and
3292 * reading an invalid PTE on older architectures.
3293 */
aa653a68
CW
3294restart:
3295 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3296 if (!drm_mm_node_allocated(&vma->node))
3297 continue;
3298
20dfbde4 3299 if (i915_vma_is_pinned(vma)) {
ef55f92a
CW
3300 DRM_DEBUG("can not change the cache level of pinned objects\n");
3301 return -EBUSY;
3302 }
3303
aa653a68
CW
3304 if (i915_gem_valid_gtt_space(vma, cache_level))
3305 continue;
3306
3307 ret = i915_vma_unbind(vma);
3308 if (ret)
3309 return ret;
3310
3311 /* As unbinding may affect other elements in the
3312 * obj->vma_list (due to side-effects from retiring
3313 * an active vma), play safe and restart the iterator.
3314 */
3315 goto restart;
42d6ab48
CW
3316 }
3317
ef55f92a
CW
3318 /* We can reuse the existing drm_mm nodes but need to change the
3319 * cache-level on the PTE. We could simply unbind them all and
3320 * rebind with the correct cache-level on next use. However since
3321 * we already have a valid slot, dma mapping, pages etc, we may as
3322 * rewrite the PTE in the belief that doing so tramples upon less
3323 * state and so involves less work.
3324 */
15717de2 3325 if (obj->bind_count) {
ef55f92a
CW
3326 /* Before we change the PTE, the GPU must not be accessing it.
3327 * If we wait upon the object, we know that all the bound
3328 * VMA are no longer active.
3329 */
2e2f351d 3330 ret = i915_gem_object_wait_rendering(obj, false);
e4ffd173
CW
3331 if (ret)
3332 return ret;
3333
aa653a68 3334 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
ef55f92a
CW
3335 /* Access to snoopable pages through the GTT is
3336 * incoherent and on some machines causes a hard
3337 * lockup. Relinquish the CPU mmaping to force
3338 * userspace to refault in the pages and we can
3339 * then double check if the GTT mapping is still
3340 * valid for that pointer access.
3341 */
3342 i915_gem_release_mmap(obj);
3343
3344 /* As we no longer need a fence for GTT access,
3345 * we can relinquish it now (and so prevent having
3346 * to steal a fence from someone else on the next
3347 * fence request). Note GPU activity would have
3348 * dropped the fence as all snoopable access is
3349 * supposed to be linear.
3350 */
e4ffd173
CW
3351 ret = i915_gem_object_put_fence(obj);
3352 if (ret)
3353 return ret;
ef55f92a
CW
3354 } else {
3355 /* We either have incoherent backing store and
3356 * so no GTT access or the architecture is fully
3357 * coherent. In such cases, existing GTT mmaps
3358 * ignore the cache bit in the PTE and we can
3359 * rewrite it without confusing the GPU or having
3360 * to force userspace to fault back in its mmaps.
3361 */
e4ffd173
CW
3362 }
3363
1c7f4bca 3364 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3365 if (!drm_mm_node_allocated(&vma->node))
3366 continue;
3367
3368 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3369 if (ret)
3370 return ret;
3371 }
e4ffd173
CW
3372 }
3373
1c7f4bca 3374 list_for_each_entry(vma, &obj->vma_list, obj_link)
2c22569b
CW
3375 vma->node.color = cache_level;
3376 obj->cache_level = cache_level;
3377
ed75a55b 3378out:
ef55f92a
CW
3379 /* Flush the dirty CPU caches to the backing storage so that the
3380 * object is now coherent at its new cache level (with respect
3381 * to the access domain).
3382 */
b50a5371 3383 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
0f71979a 3384 if (i915_gem_clflush_object(obj, true))
c033666a 3385 i915_gem_chipset_flush(to_i915(obj->base.dev));
e4ffd173
CW
3386 }
3387
e4ffd173
CW
3388 return 0;
3389}
3390
199adf40
BW
3391int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3392 struct drm_file *file)
e6994aee 3393{
199adf40 3394 struct drm_i915_gem_caching *args = data;
e6994aee 3395 struct drm_i915_gem_object *obj;
e6994aee 3396
03ac0642
CW
3397 obj = i915_gem_object_lookup(file, args->handle);
3398 if (!obj)
432be69d 3399 return -ENOENT;
e6994aee 3400
651d794f
CW
3401 switch (obj->cache_level) {
3402 case I915_CACHE_LLC:
3403 case I915_CACHE_L3_LLC:
3404 args->caching = I915_CACHING_CACHED;
3405 break;
3406
4257d3ba
CW
3407 case I915_CACHE_WT:
3408 args->caching = I915_CACHING_DISPLAY;
3409 break;
3410
651d794f
CW
3411 default:
3412 args->caching = I915_CACHING_NONE;
3413 break;
3414 }
e6994aee 3415
34911fd3 3416 i915_gem_object_put_unlocked(obj);
432be69d 3417 return 0;
e6994aee
CW
3418}
3419
199adf40
BW
3420int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3421 struct drm_file *file)
e6994aee 3422{
fac5e23e 3423 struct drm_i915_private *dev_priv = to_i915(dev);
199adf40 3424 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3425 struct drm_i915_gem_object *obj;
3426 enum i915_cache_level level;
3427 int ret;
3428
199adf40
BW
3429 switch (args->caching) {
3430 case I915_CACHING_NONE:
e6994aee
CW
3431 level = I915_CACHE_NONE;
3432 break;
199adf40 3433 case I915_CACHING_CACHED:
e5756c10
ID
3434 /*
3435 * Due to a HW issue on BXT A stepping, GPU stores via a
3436 * snooped mapping may leave stale data in a corresponding CPU
3437 * cacheline, whereas normally such cachelines would get
3438 * invalidated.
3439 */
ca377809 3440 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
e5756c10
ID
3441 return -ENODEV;
3442
e6994aee
CW
3443 level = I915_CACHE_LLC;
3444 break;
4257d3ba
CW
3445 case I915_CACHING_DISPLAY:
3446 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3447 break;
e6994aee
CW
3448 default:
3449 return -EINVAL;
3450 }
3451
fd0fe6ac
ID
3452 intel_runtime_pm_get(dev_priv);
3453
3bc2913e
BW
3454 ret = i915_mutex_lock_interruptible(dev);
3455 if (ret)
fd0fe6ac 3456 goto rpm_put;
3bc2913e 3457
03ac0642
CW
3458 obj = i915_gem_object_lookup(file, args->handle);
3459 if (!obj) {
e6994aee
CW
3460 ret = -ENOENT;
3461 goto unlock;
3462 }
3463
3464 ret = i915_gem_object_set_cache_level(obj, level);
3465
f8c417cd 3466 i915_gem_object_put(obj);
e6994aee
CW
3467unlock:
3468 mutex_unlock(&dev->struct_mutex);
fd0fe6ac
ID
3469rpm_put:
3470 intel_runtime_pm_put(dev_priv);
3471
e6994aee
CW
3472 return ret;
3473}
3474
b9241ea3 3475/*
2da3b9b9
CW
3476 * Prepare buffer for display plane (scanout, cursors, etc).
3477 * Can be called from an uninterruptible phase (modesetting) and allows
3478 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
3479 */
3480int
2da3b9b9
CW
3481i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3482 u32 alignment,
e6617330 3483 const struct i915_ggtt_view *view)
b9241ea3 3484{
2da3b9b9 3485 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3486 int ret;
3487
cc98b413
CW
3488 /* Mark the pin_display early so that we account for the
3489 * display coherency whilst setting up the cache domains.
3490 */
8a0c39b1 3491 obj->pin_display++;
cc98b413 3492
a7ef0640
EA
3493 /* The display engine is not coherent with the LLC cache on gen6. As
3494 * a result, we make sure that the pinning that is about to occur is
3495 * done with uncached PTEs. This is lowest common denominator for all
3496 * chipsets.
3497 *
3498 * However for gen6+, we could do better by using the GFDT bit instead
3499 * of uncaching, which would allow us to flush all the LLC-cached data
3500 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3501 */
651d794f
CW
3502 ret = i915_gem_object_set_cache_level(obj,
3503 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
a7ef0640 3504 if (ret)
cc98b413 3505 goto err_unpin_display;
a7ef0640 3506
2da3b9b9
CW
3507 /* As the user may map the buffer once pinned in the display plane
3508 * (e.g. libkms for the bootup splash), we have to ensure that we
3509 * always use map_and_fenceable for all scanout buffers.
3510 */
91b2db6f 3511 ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
50470bb0
TU
3512 view->type == I915_GGTT_VIEW_NORMAL ?
3513 PIN_MAPPABLE : 0);
2da3b9b9 3514 if (ret)
cc98b413 3515 goto err_unpin_display;
2da3b9b9 3516
e62b59e4 3517 i915_gem_object_flush_cpu_write_domain(obj);
b118c1e3 3518
2da3b9b9 3519 old_write_domain = obj->base.write_domain;
05394f39 3520 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3521
3522 /* It should now be out of any other write domains, and we can update
3523 * the domain values for our changes.
3524 */
e5f1d962 3525 obj->base.write_domain = 0;
05394f39 3526 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3527
3528 trace_i915_gem_object_change_domain(obj,
3529 old_read_domains,
2da3b9b9 3530 old_write_domain);
b9241ea3
ZW
3531
3532 return 0;
cc98b413
CW
3533
3534err_unpin_display:
8a0c39b1 3535 obj->pin_display--;
cc98b413
CW
3536 return ret;
3537}
3538
3539void
e6617330
TU
3540i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3541 const struct i915_ggtt_view *view)
cc98b413 3542{
8a0c39b1
TU
3543 if (WARN_ON(obj->pin_display == 0))
3544 return;
3545
e6617330
TU
3546 i915_gem_object_ggtt_unpin_view(obj, view);
3547
8a0c39b1 3548 obj->pin_display--;
b9241ea3
ZW
3549}
3550
e47c68e9
EA
3551/**
3552 * Moves a single object to the CPU read, and possibly write domain.
14bb2c11
TU
3553 * @obj: object to act on
3554 * @write: requesting write or read-only access
e47c68e9
EA
3555 *
3556 * This function returns when the move is complete, including waiting on
3557 * flushes to occur.
3558 */
dabdfe02 3559int
919926ae 3560i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3561{
1c5d22f7 3562 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3563 int ret;
3564
0201f1ec 3565 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3566 if (ret)
3567 return ret;
3568
c13d87ea
CW
3569 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3570 return 0;
3571
e47c68e9 3572 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3573
05394f39
CW
3574 old_write_domain = obj->base.write_domain;
3575 old_read_domains = obj->base.read_domains;
1c5d22f7 3576
e47c68e9 3577 /* Flush the CPU cache if it's still invalid. */
05394f39 3578 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2c22569b 3579 i915_gem_clflush_object(obj, false);
2ef7eeaa 3580
05394f39 3581 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3582 }
3583
3584 /* It should now be out of any other write domains, and we can update
3585 * the domain values for our changes.
3586 */
05394f39 3587 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3588
3589 /* If we're writing through the CPU, then the GPU read domains will
3590 * need to be invalidated at next use.
3591 */
3592 if (write) {
05394f39
CW
3593 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3594 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3595 }
2ef7eeaa 3596
1c5d22f7
CW
3597 trace_i915_gem_object_change_domain(obj,
3598 old_read_domains,
3599 old_write_domain);
3600
2ef7eeaa
EA
3601 return 0;
3602}
3603
673a394b
EA
3604/* Throttle our rendering by waiting until the ring has completed our requests
3605 * emitted over 20 msec ago.
3606 *
b962442e
EA
3607 * Note that if we were to use the current jiffies each time around the loop,
3608 * we wouldn't escape the function with any frames outstanding if the time to
3609 * render a frame was over 20ms.
3610 *
673a394b
EA
3611 * This should get us reasonable parallelism between CPU and GPU but also
3612 * relatively low latency when blocking on a particular request to finish.
3613 */
40a5f0de 3614static int
f787a5f5 3615i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3616{
fac5e23e 3617 struct drm_i915_private *dev_priv = to_i915(dev);
f787a5f5 3618 struct drm_i915_file_private *file_priv = file->driver_priv;
d0bc54f2 3619 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
54fb2411 3620 struct drm_i915_gem_request *request, *target = NULL;
f787a5f5 3621 int ret;
93533c29 3622
308887aa
DV
3623 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3624 if (ret)
3625 return ret;
3626
f4457ae7
CW
3627 /* ABI: return -EIO if already wedged */
3628 if (i915_terminally_wedged(&dev_priv->gpu_error))
3629 return -EIO;
e110e8d6 3630
1c25595f 3631 spin_lock(&file_priv->mm.lock);
f787a5f5 3632 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3633 if (time_after_eq(request->emitted_jiffies, recent_enough))
3634 break;
40a5f0de 3635
fcfa423c
JH
3636 /*
3637 * Note that the request might not have been submitted yet.
3638 * In which case emitted_jiffies will be zero.
3639 */
3640 if (!request->emitted_jiffies)
3641 continue;
3642
54fb2411 3643 target = request;
b962442e 3644 }
ff865885 3645 if (target)
e8a261ea 3646 i915_gem_request_get(target);
1c25595f 3647 spin_unlock(&file_priv->mm.lock);
40a5f0de 3648
54fb2411 3649 if (target == NULL)
f787a5f5 3650 return 0;
2bc43b5c 3651
776f3236 3652 ret = i915_wait_request(target, true, NULL, NULL);
e8a261ea 3653 i915_gem_request_put(target);
ff865885 3654
40a5f0de
EA
3655 return ret;
3656}
3657
d23db88c 3658static bool
91b2db6f 3659i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
d23db88c
CW
3660{
3661 struct drm_i915_gem_object *obj = vma->obj;
3662
59bfa124
CW
3663 if (!drm_mm_node_allocated(&vma->node))
3664 return false;
3665
91b2db6f
CW
3666 if (vma->node.size < size)
3667 return true;
3668
3669 if (alignment && vma->node.start & (alignment - 1))
d23db88c
CW
3670 return true;
3671
3672 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3673 return true;
3674
3675 if (flags & PIN_OFFSET_BIAS &&
3676 vma->node.start < (flags & PIN_OFFSET_MASK))
3677 return true;
3678
506a8e87
CW
3679 if (flags & PIN_OFFSET_FIXED &&
3680 vma->node.start != (flags & PIN_OFFSET_MASK))
3681 return true;
3682
d23db88c
CW
3683 return false;
3684}
3685
d0710abb
CW
3686void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3687{
3688 struct drm_i915_gem_object *obj = vma->obj;
a9f1481f 3689 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
d0710abb
CW
3690 bool mappable, fenceable;
3691 u32 fence_size, fence_alignment;
3692
a9f1481f 3693 fence_size = i915_gem_get_ggtt_size(dev_priv,
ad1a7d20
CW
3694 obj->base.size,
3695 obj->tiling_mode);
a9f1481f 3696 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
ad1a7d20
CW
3697 obj->base.size,
3698 obj->tiling_mode,
3699 true);
d0710abb
CW
3700
3701 fenceable = (vma->node.size == fence_size &&
3702 (vma->node.start & (fence_alignment - 1)) == 0);
3703
3704 mappable = (vma->node.start + fence_size <=
a9f1481f 3705 dev_priv->ggtt.mappable_end);
d0710abb
CW
3706
3707 obj->map_and_fenceable = mappable && fenceable;
3708}
3709
305bc234
CW
3710int __i915_vma_do_pin(struct i915_vma *vma,
3711 u64 size, u64 alignment, u64 flags)
673a394b 3712{
305bc234 3713 unsigned int bound = vma->flags;
673a394b
EA
3714 int ret;
3715
59bfa124 3716 GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
3272db53 3717 GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
d7f46fc4 3718
305bc234
CW
3719 if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
3720 ret = -EBUSY;
3721 goto err;
3722 }
ac0c6b5a 3723
de895082 3724 if ((bound & I915_VMA_BIND_MASK) == 0) {
59bfa124
CW
3725 ret = i915_vma_insert(vma, size, alignment, flags);
3726 if (ret)
3727 goto err;
fe14d5f4 3728 }
74898d7e 3729
59bfa124 3730 ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
3b16525c 3731 if (ret)
59bfa124 3732 goto err;
3b16525c 3733
3272db53 3734 if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
d0710abb 3735 __i915_vma_set_map_and_fenceable(vma);
ef79e17c 3736
3b16525c 3737 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
673a394b 3738 return 0;
673a394b 3739
59bfa124
CW
3740err:
3741 __i915_vma_unpin(vma);
3742 return ret;
ec7adb6e
JL
3743}
3744
3745int
3746i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3747 const struct i915_ggtt_view *view,
91b2db6f 3748 u64 size,
2ffffd0f
CW
3749 u64 alignment,
3750 u64 flags)
ec7adb6e 3751{
59bfa124
CW
3752 struct i915_vma *vma;
3753 int ret;
72e96d64 3754
de895082
CW
3755 if (!view)
3756 view = &i915_ggtt_view_normal;
ec7adb6e 3757
59bfa124
CW
3758 vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
3759 if (IS_ERR(vma))
3760 return PTR_ERR(vma);
3761
3762 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3763 if (flags & PIN_NONBLOCK &&
3764 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3765 return -ENOSPC;
3766
3767 WARN(i915_vma_is_pinned(vma),
3768 "bo is already pinned in ggtt with incorrect alignment:"
3769 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
3770 " obj->map_and_fenceable=%d\n",
3771 upper_32_bits(vma->node.start),
3772 lower_32_bits(vma->node.start),
3773 alignment,
3774 !!(flags & PIN_MAPPABLE),
3775 obj->map_and_fenceable);
3776 ret = i915_vma_unbind(vma);
3777 if (ret)
3778 return ret;
3779 }
3780
3781 return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
ec7adb6e
JL
3782}
3783
673a394b 3784void
e6617330
TU
3785i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3786 const struct i915_ggtt_view *view)
673a394b 3787{
de895082 3788 i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
673a394b
EA
3789}
3790
673a394b
EA
3791int
3792i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3793 struct drm_file *file)
673a394b
EA
3794{
3795 struct drm_i915_gem_busy *args = data;
05394f39 3796 struct drm_i915_gem_object *obj;
30dbf0c0
CW
3797 int ret;
3798
76c1dec1 3799 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 3800 if (ret)
76c1dec1 3801 return ret;
673a394b 3802
03ac0642
CW
3803 obj = i915_gem_object_lookup(file, args->handle);
3804 if (!obj) {
1d7cfea1
CW
3805 ret = -ENOENT;
3806 goto unlock;
673a394b 3807 }
d1b851fc 3808
0be555b6
CW
3809 /* Count all active objects as busy, even if they are currently not used
3810 * by the gpu. Users of this interface expect objects to eventually
21c310f2 3811 * become non-busy without any further actions.
c4de0a5d 3812 */
426960be 3813 args->busy = 0;
573adb39 3814 if (i915_gem_object_is_active(obj)) {
27c01aae 3815 struct drm_i915_gem_request *req;
426960be
CW
3816 int i;
3817
666796da 3818 for (i = 0; i < I915_NUM_ENGINES; i++) {
d72d908b
CW
3819 req = i915_gem_active_peek(&obj->last_read[i],
3820 &obj->base.dev->struct_mutex);
426960be 3821 if (req)
4a570db5 3822 args->busy |= 1 << (16 + req->engine->exec_id);
426960be 3823 }
d72d908b
CW
3824 req = i915_gem_active_peek(&obj->last_write,
3825 &obj->base.dev->struct_mutex);
27c01aae
CW
3826 if (req)
3827 args->busy |= req->engine->exec_id;
426960be 3828 }
673a394b 3829
f8c417cd 3830 i915_gem_object_put(obj);
1d7cfea1 3831unlock:
673a394b 3832 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3833 return ret;
673a394b
EA
3834}
3835
3836int
3837i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3838 struct drm_file *file_priv)
3839{
0206e353 3840 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3841}
3842
3ef94daa
CW
3843int
3844i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3845 struct drm_file *file_priv)
3846{
fac5e23e 3847 struct drm_i915_private *dev_priv = to_i915(dev);
3ef94daa 3848 struct drm_i915_gem_madvise *args = data;
05394f39 3849 struct drm_i915_gem_object *obj;
76c1dec1 3850 int ret;
3ef94daa
CW
3851
3852 switch (args->madv) {
3853 case I915_MADV_DONTNEED:
3854 case I915_MADV_WILLNEED:
3855 break;
3856 default:
3857 return -EINVAL;
3858 }
3859
1d7cfea1
CW
3860 ret = i915_mutex_lock_interruptible(dev);
3861 if (ret)
3862 return ret;
3863
03ac0642
CW
3864 obj = i915_gem_object_lookup(file_priv, args->handle);
3865 if (!obj) {
1d7cfea1
CW
3866 ret = -ENOENT;
3867 goto unlock;
3ef94daa 3868 }
3ef94daa 3869
d7f46fc4 3870 if (i915_gem_obj_is_pinned(obj)) {
1d7cfea1
CW
3871 ret = -EINVAL;
3872 goto out;
3ef94daa
CW
3873 }
3874
656bfa3a
DV
3875 if (obj->pages &&
3876 obj->tiling_mode != I915_TILING_NONE &&
3877 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3878 if (obj->madv == I915_MADV_WILLNEED)
3879 i915_gem_object_unpin_pages(obj);
3880 if (args->madv == I915_MADV_WILLNEED)
3881 i915_gem_object_pin_pages(obj);
3882 }
3883
05394f39
CW
3884 if (obj->madv != __I915_MADV_PURGED)
3885 obj->madv = args->madv;
3ef94daa 3886
6c085a72 3887 /* if the object is no longer attached, discard its backing storage */
be6a0376 3888 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
2d7ef395
CW
3889 i915_gem_object_truncate(obj);
3890
05394f39 3891 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3892
1d7cfea1 3893out:
f8c417cd 3894 i915_gem_object_put(obj);
1d7cfea1 3895unlock:
3ef94daa 3896 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3897 return ret;
3ef94daa
CW
3898}
3899
37e680a1
CW
3900void i915_gem_object_init(struct drm_i915_gem_object *obj,
3901 const struct drm_i915_gem_object_ops *ops)
0327d6ba 3902{
b4716185
CW
3903 int i;
3904
35c20a60 3905 INIT_LIST_HEAD(&obj->global_list);
666796da 3906 for (i = 0; i < I915_NUM_ENGINES; i++)
fa545cbf
CW
3907 init_request_active(&obj->last_read[i],
3908 i915_gem_object_retire__read);
3909 init_request_active(&obj->last_write,
3910 i915_gem_object_retire__write);
3911 init_request_active(&obj->last_fence, NULL);
b25cb2f8 3912 INIT_LIST_HEAD(&obj->obj_exec_link);
2f633156 3913 INIT_LIST_HEAD(&obj->vma_list);
8d9d5744 3914 INIT_LIST_HEAD(&obj->batch_pool_link);
0327d6ba 3915
37e680a1
CW
3916 obj->ops = ops;
3917
0327d6ba
CW
3918 obj->fence_reg = I915_FENCE_REG_NONE;
3919 obj->madv = I915_MADV_WILLNEED;
0327d6ba 3920
f19ec8cb 3921 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
0327d6ba
CW
3922}
3923
37e680a1 3924static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
de472664 3925 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
37e680a1
CW
3926 .get_pages = i915_gem_object_get_pages_gtt,
3927 .put_pages = i915_gem_object_put_pages_gtt,
3928};
3929
d37cd8a8 3930struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
05394f39 3931 size_t size)
ac52bc56 3932{
c397b908 3933 struct drm_i915_gem_object *obj;
5949eac4 3934 struct address_space *mapping;
1a240d4d 3935 gfp_t mask;
fe3db79b 3936 int ret;
ac52bc56 3937
42dcedd4 3938 obj = i915_gem_object_alloc(dev);
c397b908 3939 if (obj == NULL)
fe3db79b 3940 return ERR_PTR(-ENOMEM);
673a394b 3941
fe3db79b
CW
3942 ret = drm_gem_object_init(dev, &obj->base, size);
3943 if (ret)
3944 goto fail;
673a394b 3945
bed1ea95
CW
3946 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3947 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3948 /* 965gm cannot relocate objects above 4GiB. */
3949 mask &= ~__GFP_HIGHMEM;
3950 mask |= __GFP_DMA32;
3951 }
3952
496ad9aa 3953 mapping = file_inode(obj->base.filp)->i_mapping;
bed1ea95 3954 mapping_set_gfp_mask(mapping, mask);
5949eac4 3955
37e680a1 3956 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 3957
c397b908
DV
3958 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3959 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 3960
3d29b842
ED
3961 if (HAS_LLC(dev)) {
3962 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
3963 * cache) for about a 10% performance improvement
3964 * compared to uncached. Graphics requests other than
3965 * display scanout are coherent with the CPU in
3966 * accessing this cache. This means in this mode we
3967 * don't need to clflush on the CPU side, and on the
3968 * GPU side we only need to flush internal caches to
3969 * get data visible to the CPU.
3970 *
3971 * However, we maintain the display planes as UC, and so
3972 * need to rebind when first used as such.
3973 */
3974 obj->cache_level = I915_CACHE_LLC;
3975 } else
3976 obj->cache_level = I915_CACHE_NONE;
3977
d861e338
DV
3978 trace_i915_gem_object_create(obj);
3979
05394f39 3980 return obj;
fe3db79b
CW
3981
3982fail:
3983 i915_gem_object_free(obj);
3984
3985 return ERR_PTR(ret);
c397b908
DV
3986}
3987
340fbd8c
CW
3988static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3989{
3990 /* If we are the last user of the backing storage (be it shmemfs
3991 * pages or stolen etc), we know that the pages are going to be
3992 * immediately released. In this case, we can then skip copying
3993 * back the contents from the GPU.
3994 */
3995
3996 if (obj->madv != I915_MADV_WILLNEED)
3997 return false;
3998
3999 if (obj->base.filp == NULL)
4000 return true;
4001
4002 /* At first glance, this looks racy, but then again so would be
4003 * userspace racing mmap against close. However, the first external
4004 * reference to the filp can only be obtained through the
4005 * i915_gem_mmap_ioctl() which safeguards us against the user
4006 * acquiring such a reference whilst we are in the middle of
4007 * freeing the object.
4008 */
4009 return atomic_long_read(&obj->base.filp->f_count) == 1;
4010}
4011
1488fc08 4012void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 4013{
1488fc08 4014 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 4015 struct drm_device *dev = obj->base.dev;
fac5e23e 4016 struct drm_i915_private *dev_priv = to_i915(dev);
07fe0b12 4017 struct i915_vma *vma, *next;
673a394b 4018
f65c9168
PZ
4019 intel_runtime_pm_get(dev_priv);
4020
26e12f89
CW
4021 trace_i915_gem_object_destroy(obj);
4022
b1f788c6
CW
4023 /* All file-owned VMA should have been released by this point through
4024 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4025 * However, the object may also be bound into the global GTT (e.g.
4026 * older GPUs without per-process support, or for direct access through
4027 * the GTT either for the user or for scanout). Those VMA still need to
4028 * unbound now.
4029 */
1c7f4bca 4030 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
3272db53 4031 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
b1f788c6 4032 GEM_BUG_ON(i915_vma_is_active(vma));
3272db53 4033 vma->flags &= ~I915_VMA_PIN_MASK;
b1f788c6 4034 i915_vma_close(vma);
1488fc08 4035 }
15717de2 4036 GEM_BUG_ON(obj->bind_count);
1488fc08 4037
1d64ae71
BW
4038 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4039 * before progressing. */
4040 if (obj->stolen)
4041 i915_gem_object_unpin_pages(obj);
4042
faf5bf0a 4043 WARN_ON(atomic_read(&obj->frontbuffer_bits));
a071fa00 4044
656bfa3a
DV
4045 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4046 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4047 obj->tiling_mode != I915_TILING_NONE)
4048 i915_gem_object_unpin_pages(obj);
4049
401c29f6
BW
4050 if (WARN_ON(obj->pages_pin_count))
4051 obj->pages_pin_count = 0;
340fbd8c 4052 if (discard_backing_storage(obj))
5537252b 4053 obj->madv = I915_MADV_DONTNEED;
37e680a1 4054 i915_gem_object_put_pages(obj);
de151cf6 4055
9da3da66
CW
4056 BUG_ON(obj->pages);
4057
2f745ad3
CW
4058 if (obj->base.import_attach)
4059 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 4060
5cc9ed4b
CW
4061 if (obj->ops->release)
4062 obj->ops->release(obj);
4063
05394f39
CW
4064 drm_gem_object_release(&obj->base);
4065 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4066
05394f39 4067 kfree(obj->bit_17);
42dcedd4 4068 i915_gem_object_free(obj);
f65c9168
PZ
4069
4070 intel_runtime_pm_put(dev_priv);
673a394b
EA
4071}
4072
ec7adb6e
JL
4073struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4074 struct i915_address_space *vm)
e656a6cb
DV
4075{
4076 struct i915_vma *vma;
1c7f4bca 4077 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1b683729
TU
4078 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4079 vma->vm == vm)
e656a6cb 4080 return vma;
ec7adb6e
JL
4081 }
4082 return NULL;
4083}
4084
4085struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4086 const struct i915_ggtt_view *view)
4087{
ec7adb6e 4088 struct i915_vma *vma;
e656a6cb 4089
598b9ec8 4090 GEM_BUG_ON(!view);
ec7adb6e 4091
1c7f4bca 4092 list_for_each_entry(vma, &obj->vma_list, obj_link)
3272db53
CW
4093 if (i915_vma_is_ggtt(vma) &&
4094 i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e 4095 return vma;
e656a6cb
DV
4096 return NULL;
4097}
4098
e3efda49 4099static void
117897f4 4100i915_gem_stop_engines(struct drm_device *dev)
e3efda49 4101{
fac5e23e 4102 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4103 struct intel_engine_cs *engine;
e3efda49 4104
b4ac5afc 4105 for_each_engine(engine, dev_priv)
117897f4 4106 dev_priv->gt.stop_engine(engine);
e3efda49
CW
4107}
4108
29105ccc 4109int
45c5f202 4110i915_gem_suspend(struct drm_device *dev)
29105ccc 4111{
fac5e23e 4112 struct drm_i915_private *dev_priv = to_i915(dev);
45c5f202 4113 int ret = 0;
28dfe52a 4114
54b4f68f
CW
4115 intel_suspend_gt_powersave(dev_priv);
4116
45c5f202 4117 mutex_lock(&dev->struct_mutex);
5ab57c70
CW
4118
4119 /* We have to flush all the executing contexts to main memory so
4120 * that they can saved in the hibernation image. To ensure the last
4121 * context image is coherent, we have to switch away from it. That
4122 * leaves the dev_priv->kernel_context still active when
4123 * we actually suspend, and its image in memory may not match the GPU
4124 * state. Fortunately, the kernel_context is disposable and we do
4125 * not rely on its state.
4126 */
4127 ret = i915_gem_switch_to_kernel_context(dev_priv);
4128 if (ret)
4129 goto err;
4130
6e5a5beb 4131 ret = i915_gem_wait_for_idle(dev_priv);
f7403347 4132 if (ret)
45c5f202 4133 goto err;
f7403347 4134
c033666a 4135 i915_gem_retire_requests(dev_priv);
673a394b 4136
5ab57c70
CW
4137 /* Note that rather than stopping the engines, all we have to do
4138 * is assert that every RING_HEAD == RING_TAIL (all execution complete)
4139 * and similar for all logical context images (to ensure they are
4140 * all ready for hibernation).
4141 */
117897f4 4142 i915_gem_stop_engines(dev);
b2e862d0 4143 i915_gem_context_lost(dev_priv);
45c5f202
CW
4144 mutex_unlock(&dev->struct_mutex);
4145
737b1506 4146 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
67d97da3
CW
4147 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4148 flush_delayed_work(&dev_priv->gt.idle_work);
29105ccc 4149
bdcf120b
CW
4150 /* Assert that we sucessfully flushed all the work and
4151 * reset the GPU back to its idle, low power state.
4152 */
67d97da3 4153 WARN_ON(dev_priv->gt.awake);
bdcf120b 4154
673a394b 4155 return 0;
45c5f202
CW
4156
4157err:
4158 mutex_unlock(&dev->struct_mutex);
4159 return ret;
673a394b
EA
4160}
4161
5ab57c70
CW
4162void i915_gem_resume(struct drm_device *dev)
4163{
4164 struct drm_i915_private *dev_priv = to_i915(dev);
4165
4166 mutex_lock(&dev->struct_mutex);
4167 i915_gem_restore_gtt_mappings(dev);
4168
4169 /* As we didn't flush the kernel context before suspend, we cannot
4170 * guarantee that the context image is complete. So let's just reset
4171 * it and start again.
4172 */
4173 if (i915.enable_execlists)
4174 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4175
4176 mutex_unlock(&dev->struct_mutex);
4177}
4178
f691e2f4
DV
4179void i915_gem_init_swizzling(struct drm_device *dev)
4180{
fac5e23e 4181 struct drm_i915_private *dev_priv = to_i915(dev);
f691e2f4 4182
11782b02 4183 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
4184 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4185 return;
4186
4187 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4188 DISP_TILE_SURFACE_SWIZZLING);
4189
11782b02
DV
4190 if (IS_GEN5(dev))
4191 return;
4192
f691e2f4
DV
4193 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4194 if (IS_GEN6(dev))
6b26c86d 4195 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
8782e26c 4196 else if (IS_GEN7(dev))
6b26c86d 4197 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
31a5336e
BW
4198 else if (IS_GEN8(dev))
4199 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
8782e26c
BW
4200 else
4201 BUG();
f691e2f4 4202}
e21af88d 4203
81e7f200
VS
4204static void init_unused_ring(struct drm_device *dev, u32 base)
4205{
fac5e23e 4206 struct drm_i915_private *dev_priv = to_i915(dev);
81e7f200
VS
4207
4208 I915_WRITE(RING_CTL(base), 0);
4209 I915_WRITE(RING_HEAD(base), 0);
4210 I915_WRITE(RING_TAIL(base), 0);
4211 I915_WRITE(RING_START(base), 0);
4212}
4213
4214static void init_unused_rings(struct drm_device *dev)
4215{
4216 if (IS_I830(dev)) {
4217 init_unused_ring(dev, PRB1_BASE);
4218 init_unused_ring(dev, SRB0_BASE);
4219 init_unused_ring(dev, SRB1_BASE);
4220 init_unused_ring(dev, SRB2_BASE);
4221 init_unused_ring(dev, SRB3_BASE);
4222 } else if (IS_GEN2(dev)) {
4223 init_unused_ring(dev, SRB0_BASE);
4224 init_unused_ring(dev, SRB1_BASE);
4225 } else if (IS_GEN3(dev)) {
4226 init_unused_ring(dev, PRB1_BASE);
4227 init_unused_ring(dev, PRB2_BASE);
4228 }
4229}
4230
4fc7c971
BW
4231int
4232i915_gem_init_hw(struct drm_device *dev)
4233{
fac5e23e 4234 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4235 struct intel_engine_cs *engine;
d200cda6 4236 int ret;
4fc7c971 4237
5e4f5189
CW
4238 /* Double layer security blanket, see i915_gem_init() */
4239 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4240
3accaf7e 4241 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
05e21cc4 4242 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4fc7c971 4243
0bf21347
VS
4244 if (IS_HASWELL(dev))
4245 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4246 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
9435373e 4247
88a2b2a3 4248 if (HAS_PCH_NOP(dev)) {
6ba844b0
DV
4249 if (IS_IVYBRIDGE(dev)) {
4250 u32 temp = I915_READ(GEN7_MSG_CTL);
4251 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4252 I915_WRITE(GEN7_MSG_CTL, temp);
4253 } else if (INTEL_INFO(dev)->gen >= 7) {
4254 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4255 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4256 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4257 }
88a2b2a3
BW
4258 }
4259
4fc7c971
BW
4260 i915_gem_init_swizzling(dev);
4261
d5abdfda
DV
4262 /*
4263 * At least 830 can leave some of the unused rings
4264 * "active" (ie. head != tail) after resume which
4265 * will prevent c3 entry. Makes sure all unused rings
4266 * are totally idle.
4267 */
4268 init_unused_rings(dev);
4269
ed54c1a1 4270 BUG_ON(!dev_priv->kernel_context);
90638cc1 4271
4ad2fd88
JH
4272 ret = i915_ppgtt_init_hw(dev);
4273 if (ret) {
4274 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4275 goto out;
4276 }
4277
4278 /* Need to do basic initialisation of all rings first: */
b4ac5afc 4279 for_each_engine(engine, dev_priv) {
e2f80391 4280 ret = engine->init_hw(engine);
35a57ffb 4281 if (ret)
5e4f5189 4282 goto out;
35a57ffb 4283 }
99433931 4284
0ccdacf6
PA
4285 intel_mocs_init_l3cc_table(dev);
4286
33a732f4 4287 /* We can't enable contexts until all firmware is loaded */
e556f7c1
DG
4288 ret = intel_guc_setup(dev);
4289 if (ret)
4290 goto out;
33a732f4 4291
5e4f5189
CW
4292out:
4293 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2fa48d8d 4294 return ret;
8187a2b7
ZN
4295}
4296
39df9190
CW
4297bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4298{
4299 if (INTEL_INFO(dev_priv)->gen < 6)
4300 return false;
4301
4302 /* TODO: make semaphores and Execlists play nicely together */
4303 if (i915.enable_execlists)
4304 return false;
4305
4306 if (value >= 0)
4307 return value;
4308
4309#ifdef CONFIG_INTEL_IOMMU
4310 /* Enable semaphores on SNB when IO remapping is off */
4311 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4312 return false;
4313#endif
4314
4315 return true;
4316}
4317
1070a42b
CW
4318int i915_gem_init(struct drm_device *dev)
4319{
fac5e23e 4320 struct drm_i915_private *dev_priv = to_i915(dev);
1070a42b
CW
4321 int ret;
4322
1070a42b 4323 mutex_lock(&dev->struct_mutex);
d62b4892 4324
a83014d3 4325 if (!i915.enable_execlists) {
7e37f889
CW
4326 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4327 dev_priv->gt.stop_engine = intel_engine_stop;
454afebd 4328 } else {
117897f4
TU
4329 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4330 dev_priv->gt.stop_engine = intel_logical_ring_stop;
a83014d3
OM
4331 }
4332
5e4f5189
CW
4333 /* This is just a security blanket to placate dragons.
4334 * On some systems, we very sporadically observe that the first TLBs
4335 * used by the CS may be stale, despite us poking the TLB reset. If
4336 * we hold the forcewake during initialisation these problems
4337 * just magically go away.
4338 */
4339 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4340
72778cb2 4341 i915_gem_init_userptr(dev_priv);
f6b9d5ca
CW
4342
4343 ret = i915_gem_init_ggtt(dev_priv);
4344 if (ret)
4345 goto out_unlock;
d62b4892 4346
2fa48d8d 4347 ret = i915_gem_context_init(dev);
7bcc3777
JN
4348 if (ret)
4349 goto out_unlock;
2fa48d8d 4350
8b3e2d36 4351 ret = intel_engines_init(dev);
35a57ffb 4352 if (ret)
7bcc3777 4353 goto out_unlock;
2fa48d8d 4354
1070a42b 4355 ret = i915_gem_init_hw(dev);
60990320 4356 if (ret == -EIO) {
7e21d648 4357 /* Allow engine initialisation to fail by marking the GPU as
60990320
CW
4358 * wedged. But we only want to do this where the GPU is angry,
4359 * for all other failure, such as an allocation failure, bail.
4360 */
4361 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
805de8f4 4362 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
60990320 4363 ret = 0;
1070a42b 4364 }
7bcc3777
JN
4365
4366out_unlock:
5e4f5189 4367 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
60990320 4368 mutex_unlock(&dev->struct_mutex);
1070a42b 4369
60990320 4370 return ret;
1070a42b
CW
4371}
4372
8187a2b7 4373void
117897f4 4374i915_gem_cleanup_engines(struct drm_device *dev)
8187a2b7 4375{
fac5e23e 4376 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4377 struct intel_engine_cs *engine;
8187a2b7 4378
b4ac5afc 4379 for_each_engine(engine, dev_priv)
117897f4 4380 dev_priv->gt.cleanup_engine(engine);
8187a2b7
ZN
4381}
4382
64193406 4383static void
666796da 4384init_engine_lists(struct intel_engine_cs *engine)
64193406 4385{
0bc40be8 4386 INIT_LIST_HEAD(&engine->request_list);
64193406
CW
4387}
4388
40ae4e16
ID
4389void
4390i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4391{
91c8a326 4392 struct drm_device *dev = &dev_priv->drm;
40ae4e16
ID
4393
4394 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4395 !IS_CHERRYVIEW(dev_priv))
4396 dev_priv->num_fence_regs = 32;
4397 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4398 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4399 dev_priv->num_fence_regs = 16;
4400 else
4401 dev_priv->num_fence_regs = 8;
4402
c033666a 4403 if (intel_vgpu_active(dev_priv))
40ae4e16
ID
4404 dev_priv->num_fence_regs =
4405 I915_READ(vgtif_reg(avail_rs.fence_num));
4406
4407 /* Initialize fence registers to zero */
4408 i915_gem_restore_fences(dev);
4409
4410 i915_gem_detect_bit_6_swizzle(dev);
4411}
4412
673a394b 4413void
d64aa096 4414i915_gem_load_init(struct drm_device *dev)
673a394b 4415{
fac5e23e 4416 struct drm_i915_private *dev_priv = to_i915(dev);
42dcedd4
CW
4417 int i;
4418
efab6d8d 4419 dev_priv->objects =
42dcedd4
CW
4420 kmem_cache_create("i915_gem_object",
4421 sizeof(struct drm_i915_gem_object), 0,
4422 SLAB_HWCACHE_ALIGN,
4423 NULL);
e20d2ab7
CW
4424 dev_priv->vmas =
4425 kmem_cache_create("i915_gem_vma",
4426 sizeof(struct i915_vma), 0,
4427 SLAB_HWCACHE_ALIGN,
4428 NULL);
efab6d8d
CW
4429 dev_priv->requests =
4430 kmem_cache_create("i915_gem_request",
4431 sizeof(struct drm_i915_gem_request), 0,
4432 SLAB_HWCACHE_ALIGN,
4433 NULL);
673a394b 4434
a33afea5 4435 INIT_LIST_HEAD(&dev_priv->context_list);
6c085a72
CW
4436 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4437 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 4438 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
666796da
TU
4439 for (i = 0; i < I915_NUM_ENGINES; i++)
4440 init_engine_lists(&dev_priv->engine[i]);
4b9de737 4441 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 4442 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
67d97da3 4443 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
673a394b 4444 i915_gem_retire_work_handler);
67d97da3 4445 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
b29c19b6 4446 i915_gem_idle_work_handler);
1f15b76f 4447 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
1f83fee0 4448 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
31169714 4449
72bfa19c
CW
4450 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4451
19b2dbde 4452 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
10ed13e4 4453
6b95a207 4454 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 4455
ce453d81
CW
4456 dev_priv->mm.interruptible = true;
4457
b5add959 4458 spin_lock_init(&dev_priv->fb_tracking.lock);
673a394b 4459}
71acb5eb 4460
d64aa096
ID
4461void i915_gem_load_cleanup(struct drm_device *dev)
4462{
4463 struct drm_i915_private *dev_priv = to_i915(dev);
4464
4465 kmem_cache_destroy(dev_priv->requests);
4466 kmem_cache_destroy(dev_priv->vmas);
4467 kmem_cache_destroy(dev_priv->objects);
4468}
4469
461fb99c
CW
4470int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4471{
4472 struct drm_i915_gem_object *obj;
4473
4474 /* Called just before we write the hibernation image.
4475 *
4476 * We need to update the domain tracking to reflect that the CPU
4477 * will be accessing all the pages to create and restore from the
4478 * hibernation, and so upon restoration those pages will be in the
4479 * CPU domain.
4480 *
4481 * To make sure the hibernation image contains the latest state,
4482 * we update that state just before writing out the image.
4483 */
4484
4485 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
4486 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4487 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4488 }
4489
4490 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4491 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4492 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4493 }
4494
4495 return 0;
4496}
4497
f787a5f5 4498void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4499{
f787a5f5 4500 struct drm_i915_file_private *file_priv = file->driver_priv;
15f7bbc7 4501 struct drm_i915_gem_request *request;
b962442e
EA
4502
4503 /* Clean up our request list when the client is going away, so that
4504 * later retire_requests won't dereference our soon-to-be-gone
4505 * file_priv.
4506 */
1c25595f 4507 spin_lock(&file_priv->mm.lock);
15f7bbc7 4508 list_for_each_entry(request, &file_priv->mm.request_list, client_list)
f787a5f5 4509 request->file_priv = NULL;
1c25595f 4510 spin_unlock(&file_priv->mm.lock);
b29c19b6 4511
2e1b8730 4512 if (!list_empty(&file_priv->rps.link)) {
8d3afd7d 4513 spin_lock(&to_i915(dev)->rps.client_lock);
2e1b8730 4514 list_del(&file_priv->rps.link);
8d3afd7d 4515 spin_unlock(&to_i915(dev)->rps.client_lock);
1854d5ca 4516 }
b29c19b6
CW
4517}
4518
4519int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4520{
4521 struct drm_i915_file_private *file_priv;
e422b888 4522 int ret;
b29c19b6
CW
4523
4524 DRM_DEBUG_DRIVER("\n");
4525
4526 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4527 if (!file_priv)
4528 return -ENOMEM;
4529
4530 file->driver_priv = file_priv;
f19ec8cb 4531 file_priv->dev_priv = to_i915(dev);
ab0e7ff9 4532 file_priv->file = file;
2e1b8730 4533 INIT_LIST_HEAD(&file_priv->rps.link);
b29c19b6
CW
4534
4535 spin_lock_init(&file_priv->mm.lock);
4536 INIT_LIST_HEAD(&file_priv->mm.request_list);
b29c19b6 4537
c80ff16e 4538 file_priv->bsd_engine = -1;
de1add36 4539
e422b888
BW
4540 ret = i915_gem_context_open(dev, file);
4541 if (ret)
4542 kfree(file_priv);
b29c19b6 4543
e422b888 4544 return ret;
b29c19b6
CW
4545}
4546
b680c37a
DV
4547/**
4548 * i915_gem_track_fb - update frontbuffer tracking
d9072a3e
GT
4549 * @old: current GEM buffer for the frontbuffer slots
4550 * @new: new GEM buffer for the frontbuffer slots
4551 * @frontbuffer_bits: bitmask of frontbuffer slots
b680c37a
DV
4552 *
4553 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4554 * from @old and setting them in @new. Both @old and @new can be NULL.
4555 */
a071fa00
DV
4556void i915_gem_track_fb(struct drm_i915_gem_object *old,
4557 struct drm_i915_gem_object *new,
4558 unsigned frontbuffer_bits)
4559{
faf5bf0a
CW
4560 /* Control of individual bits within the mask are guarded by
4561 * the owning plane->mutex, i.e. we can never see concurrent
4562 * manipulation of individual bits. But since the bitfield as a whole
4563 * is updated using RMW, we need to use atomics in order to update
4564 * the bits.
4565 */
4566 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4567 sizeof(atomic_t) * BITS_PER_BYTE);
4568
a071fa00 4569 if (old) {
faf5bf0a
CW
4570 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4571 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
a071fa00
DV
4572 }
4573
4574 if (new) {
faf5bf0a
CW
4575 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4576 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
a071fa00
DV
4577 }
4578}
4579
a70a3148 4580/* All the new VM stuff */
088e0df4
MT
4581u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4582 struct i915_address_space *vm)
a70a3148 4583{
fac5e23e 4584 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
a70a3148
BW
4585 struct i915_vma *vma;
4586
896ab1a5 4587 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
a70a3148 4588
1c7f4bca 4589 list_for_each_entry(vma, &o->vma_list, obj_link) {
3272db53 4590 if (i915_vma_is_ggtt(vma) &&
ec7adb6e
JL
4591 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4592 continue;
4593 if (vma->vm == vm)
a70a3148 4594 return vma->node.start;
a70a3148 4595 }
ec7adb6e 4596
f25748ea
DV
4597 WARN(1, "%s vma for this object not found.\n",
4598 i915_is_ggtt(vm) ? "global" : "ppgtt");
a70a3148
BW
4599 return -1;
4600}
4601
088e0df4
MT
4602u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4603 const struct i915_ggtt_view *view)
a70a3148
BW
4604{
4605 struct i915_vma *vma;
4606
1c7f4bca 4607 list_for_each_entry(vma, &o->vma_list, obj_link)
3272db53
CW
4608 if (i915_vma_is_ggtt(vma) &&
4609 i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e
JL
4610 return vma->node.start;
4611
5678ad73 4612 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
ec7adb6e
JL
4613 return -1;
4614}
4615
4616bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4617 struct i915_address_space *vm)
4618{
4619 struct i915_vma *vma;
4620
1c7f4bca 4621 list_for_each_entry(vma, &o->vma_list, obj_link) {
3272db53 4622 if (i915_vma_is_ggtt(vma) &&
ec7adb6e
JL
4623 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4624 continue;
4625 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4626 return true;
4627 }
4628
4629 return false;
4630}
4631
4632bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
9abc4648 4633 const struct i915_ggtt_view *view)
ec7adb6e 4634{
ec7adb6e
JL
4635 struct i915_vma *vma;
4636
1c7f4bca 4637 list_for_each_entry(vma, &o->vma_list, obj_link)
3272db53 4638 if (i915_vma_is_ggtt(vma) &&
9abc4648 4639 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
fe14d5f4 4640 drm_mm_node_allocated(&vma->node))
a70a3148
BW
4641 return true;
4642
4643 return false;
4644}
4645
8da32727 4646unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
a70a3148 4647{
a70a3148
BW
4648 struct i915_vma *vma;
4649
8da32727 4650 GEM_BUG_ON(list_empty(&o->vma_list));
a70a3148 4651
1c7f4bca 4652 list_for_each_entry(vma, &o->vma_list, obj_link) {
3272db53 4653 if (i915_vma_is_ggtt(vma) &&
8da32727 4654 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
a70a3148 4655 return vma->node.size;
ec7adb6e 4656 }
8da32727 4657
a70a3148
BW
4658 return 0;
4659}
4660
ec7adb6e 4661bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5c2abbea
BW
4662{
4663 struct i915_vma *vma;
1c7f4bca 4664 list_for_each_entry(vma, &obj->vma_list, obj_link)
20dfbde4 4665 if (i915_vma_is_pinned(vma))
ec7adb6e 4666 return true;
a6631ae1 4667
ec7adb6e 4668 return false;
5c2abbea 4669}
ea70299d 4670
033908ae
DG
4671/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4672struct page *
4673i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4674{
4675 struct page *page;
4676
4677 /* Only default objects have per-page dirty tracking */
b9bcd14a 4678 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
033908ae
DG
4679 return NULL;
4680
4681 page = i915_gem_object_get_page(obj, n);
4682 set_page_dirty(page);
4683 return page;
4684}
4685
ea70299d
DG
4686/* Allocate a new GEM object and fill it with the supplied data */
4687struct drm_i915_gem_object *
4688i915_gem_object_create_from_data(struct drm_device *dev,
4689 const void *data, size_t size)
4690{
4691 struct drm_i915_gem_object *obj;
4692 struct sg_table *sg;
4693 size_t bytes;
4694 int ret;
4695
d37cd8a8 4696 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
fe3db79b 4697 if (IS_ERR(obj))
ea70299d
DG
4698 return obj;
4699
4700 ret = i915_gem_object_set_to_cpu_domain(obj, true);
4701 if (ret)
4702 goto fail;
4703
4704 ret = i915_gem_object_get_pages(obj);
4705 if (ret)
4706 goto fail;
4707
4708 i915_gem_object_pin_pages(obj);
4709 sg = obj->pages;
4710 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
9e7d18c0 4711 obj->dirty = 1; /* Backing store is now out of date */
ea70299d
DG
4712 i915_gem_object_unpin_pages(obj);
4713
4714 if (WARN_ON(bytes != size)) {
4715 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4716 ret = -EFAULT;
4717 goto fail;
4718 }
4719
4720 return obj;
4721
4722fail:
f8c417cd 4723 i915_gem_object_put(obj);
ea70299d
DG
4724 return ERR_PTR(ret);
4725}