]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Remove the list of pinned inactive objects
[mirror_ubuntu-artful-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
1c5d22f7 32#include "i915_trace.h"
652c393a 33#include "intel_drv.h"
5949eac4 34#include <linux/shmem_fs.h>
5a0e3ad6 35#include <linux/slab.h>
673a394b 36#include <linux/swap.h>
79e53945 37#include <linux/pci.h>
673a394b 38
88241785 39static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
05394f39
CW
40static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
41static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
88241785
CW
42static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
43 unsigned alignment,
44 bool map_and_fenceable);
05394f39
CW
45static int i915_gem_phys_pwrite(struct drm_device *dev,
46 struct drm_i915_gem_object *obj,
71acb5eb 47 struct drm_i915_gem_pwrite *args,
05394f39
CW
48 struct drm_file *file);
49static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
673a394b 50
61050808
CW
51static void i915_gem_write_fence(struct drm_device *dev, int reg,
52 struct drm_i915_gem_object *obj);
53static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
54 struct drm_i915_fence_reg *fence,
55 bool enable);
56
17250b71 57static int i915_gem_inactive_shrink(struct shrinker *shrinker,
1495f230 58 struct shrink_control *sc);
8c59967c 59static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
31169714 60
61050808
CW
61static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
62{
63 if (obj->tiling_mode)
64 i915_gem_release_mmap(obj);
65
66 /* As we do not have an associated fence register, we will force
67 * a tiling change if we ever need to acquire one.
68 */
5d82e3e6 69 obj->fence_dirty = false;
61050808
CW
70 obj->fence_reg = I915_FENCE_REG_NONE;
71}
72
73aa808f
CW
73/* some bookkeeping */
74static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
75 size_t size)
76{
77 dev_priv->mm.object_count++;
78 dev_priv->mm.object_memory += size;
79}
80
81static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
82 size_t size)
83{
84 dev_priv->mm.object_count--;
85 dev_priv->mm.object_memory -= size;
86}
87
21dd3734
CW
88static int
89i915_gem_wait_for_error(struct drm_device *dev)
30dbf0c0
CW
90{
91 struct drm_i915_private *dev_priv = dev->dev_private;
92 struct completion *x = &dev_priv->error_completion;
93 unsigned long flags;
94 int ret;
95
96 if (!atomic_read(&dev_priv->mm.wedged))
97 return 0;
98
99 ret = wait_for_completion_interruptible(x);
100 if (ret)
101 return ret;
102
21dd3734
CW
103 if (atomic_read(&dev_priv->mm.wedged)) {
104 /* GPU is hung, bump the completion count to account for
105 * the token we just consumed so that we never hit zero and
106 * end up waiting upon a subsequent completion event that
107 * will never happen.
108 */
109 spin_lock_irqsave(&x->wait.lock, flags);
110 x->done++;
111 spin_unlock_irqrestore(&x->wait.lock, flags);
112 }
113 return 0;
30dbf0c0
CW
114}
115
54cf91dc 116int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 117{
76c1dec1
CW
118 int ret;
119
21dd3734 120 ret = i915_gem_wait_for_error(dev);
76c1dec1
CW
121 if (ret)
122 return ret;
123
124 ret = mutex_lock_interruptible(&dev->struct_mutex);
125 if (ret)
126 return ret;
127
23bc5982 128 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
129 return 0;
130}
30dbf0c0 131
7d1c4804 132static inline bool
05394f39 133i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
7d1c4804 134{
1b50247a 135 return !obj->active;
7d1c4804
CW
136}
137
79e53945
JB
138int
139i915_gem_init_ioctl(struct drm_device *dev, void *data,
05394f39 140 struct drm_file *file)
79e53945
JB
141{
142 struct drm_i915_gem_init *args = data;
2021746e
CW
143
144 if (args->gtt_start >= args->gtt_end ||
145 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
146 return -EINVAL;
79e53945 147
f534bc0b
DV
148 /* GEM with user mode setting was never supported on ilk and later. */
149 if (INTEL_INFO(dev)->gen >= 5)
150 return -ENODEV;
151
79e53945 152 mutex_lock(&dev->struct_mutex);
644ec02b
DV
153 i915_gem_init_global_gtt(dev, args->gtt_start,
154 args->gtt_end, args->gtt_end);
673a394b
EA
155 mutex_unlock(&dev->struct_mutex);
156
2021746e 157 return 0;
673a394b
EA
158}
159
5a125c3c
EA
160int
161i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 162 struct drm_file *file)
5a125c3c 163{
73aa808f 164 struct drm_i915_private *dev_priv = dev->dev_private;
5a125c3c 165 struct drm_i915_gem_get_aperture *args = data;
6299f992
CW
166 struct drm_i915_gem_object *obj;
167 size_t pinned;
5a125c3c
EA
168
169 if (!(dev->driver->driver_features & DRIVER_GEM))
170 return -ENODEV;
171
6299f992 172 pinned = 0;
73aa808f 173 mutex_lock(&dev->struct_mutex);
1b50247a
CW
174 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
175 if (obj->pin_count)
176 pinned += obj->gtt_space->size;
73aa808f 177 mutex_unlock(&dev->struct_mutex);
5a125c3c 178
6299f992 179 args->aper_size = dev_priv->mm.gtt_total;
0206e353 180 args->aper_available_size = args->aper_size - pinned;
6299f992 181
5a125c3c
EA
182 return 0;
183}
184
ff72145b
DA
185static int
186i915_gem_create(struct drm_file *file,
187 struct drm_device *dev,
188 uint64_t size,
189 uint32_t *handle_p)
673a394b 190{
05394f39 191 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
192 int ret;
193 u32 handle;
673a394b 194
ff72145b 195 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
196 if (size == 0)
197 return -EINVAL;
673a394b
EA
198
199 /* Allocate the new object */
ff72145b 200 obj = i915_gem_alloc_object(dev, size);
673a394b
EA
201 if (obj == NULL)
202 return -ENOMEM;
203
05394f39 204 ret = drm_gem_handle_create(file, &obj->base, &handle);
1dfd9754 205 if (ret) {
05394f39
CW
206 drm_gem_object_release(&obj->base);
207 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
202f2fef 208 kfree(obj);
673a394b 209 return ret;
1dfd9754 210 }
673a394b 211
202f2fef 212 /* drop reference from allocate - handle holds it now */
05394f39 213 drm_gem_object_unreference(&obj->base);
202f2fef
CW
214 trace_i915_gem_object_create(obj);
215
ff72145b 216 *handle_p = handle;
673a394b
EA
217 return 0;
218}
219
ff72145b
DA
220int
221i915_gem_dumb_create(struct drm_file *file,
222 struct drm_device *dev,
223 struct drm_mode_create_dumb *args)
224{
225 /* have to work out size/pitch and return them */
ed0291fd 226 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
ff72145b
DA
227 args->size = args->pitch * args->height;
228 return i915_gem_create(file, dev,
229 args->size, &args->handle);
230}
231
232int i915_gem_dumb_destroy(struct drm_file *file,
233 struct drm_device *dev,
234 uint32_t handle)
235{
236 return drm_gem_handle_delete(file, handle);
237}
238
239/**
240 * Creates a new mm object and returns a handle to it.
241 */
242int
243i915_gem_create_ioctl(struct drm_device *dev, void *data,
244 struct drm_file *file)
245{
246 struct drm_i915_gem_create *args = data;
247 return i915_gem_create(file, dev,
248 args->size, &args->handle);
249}
250
05394f39 251static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
280b713b 252{
05394f39 253 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
280b713b
EA
254
255 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
05394f39 256 obj->tiling_mode != I915_TILING_NONE;
280b713b
EA
257}
258
8461d226
DV
259static inline int
260__copy_to_user_swizzled(char __user *cpu_vaddr,
261 const char *gpu_vaddr, int gpu_offset,
262 int length)
263{
264 int ret, cpu_offset = 0;
265
266 while (length > 0) {
267 int cacheline_end = ALIGN(gpu_offset + 1, 64);
268 int this_length = min(cacheline_end - gpu_offset, length);
269 int swizzled_gpu_offset = gpu_offset ^ 64;
270
271 ret = __copy_to_user(cpu_vaddr + cpu_offset,
272 gpu_vaddr + swizzled_gpu_offset,
273 this_length);
274 if (ret)
275 return ret + length;
276
277 cpu_offset += this_length;
278 gpu_offset += this_length;
279 length -= this_length;
280 }
281
282 return 0;
283}
284
8c59967c 285static inline int
4f0c7cfb
BW
286__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
287 const char __user *cpu_vaddr,
8c59967c
DV
288 int length)
289{
290 int ret, cpu_offset = 0;
291
292 while (length > 0) {
293 int cacheline_end = ALIGN(gpu_offset + 1, 64);
294 int this_length = min(cacheline_end - gpu_offset, length);
295 int swizzled_gpu_offset = gpu_offset ^ 64;
296
297 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
298 cpu_vaddr + cpu_offset,
299 this_length);
300 if (ret)
301 return ret + length;
302
303 cpu_offset += this_length;
304 gpu_offset += this_length;
305 length -= this_length;
306 }
307
308 return 0;
309}
310
d174bd64
DV
311/* Per-page copy function for the shmem pread fastpath.
312 * Flushes invalid cachelines before reading the target if
313 * needs_clflush is set. */
eb01459f 314static int
d174bd64
DV
315shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
316 char __user *user_data,
317 bool page_do_bit17_swizzling, bool needs_clflush)
318{
319 char *vaddr;
320 int ret;
321
e7e58eb5 322 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
323 return -EINVAL;
324
325 vaddr = kmap_atomic(page);
326 if (needs_clflush)
327 drm_clflush_virt_range(vaddr + shmem_page_offset,
328 page_length);
329 ret = __copy_to_user_inatomic(user_data,
330 vaddr + shmem_page_offset,
331 page_length);
332 kunmap_atomic(vaddr);
333
334 return ret;
335}
336
23c18c71
DV
337static void
338shmem_clflush_swizzled_range(char *addr, unsigned long length,
339 bool swizzled)
340{
e7e58eb5 341 if (unlikely(swizzled)) {
23c18c71
DV
342 unsigned long start = (unsigned long) addr;
343 unsigned long end = (unsigned long) addr + length;
344
345 /* For swizzling simply ensure that we always flush both
346 * channels. Lame, but simple and it works. Swizzled
347 * pwrite/pread is far from a hotpath - current userspace
348 * doesn't use it at all. */
349 start = round_down(start, 128);
350 end = round_up(end, 128);
351
352 drm_clflush_virt_range((void *)start, end - start);
353 } else {
354 drm_clflush_virt_range(addr, length);
355 }
356
357}
358
d174bd64
DV
359/* Only difference to the fast-path function is that this can handle bit17
360 * and uses non-atomic copy and kmap functions. */
361static int
362shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
363 char __user *user_data,
364 bool page_do_bit17_swizzling, bool needs_clflush)
365{
366 char *vaddr;
367 int ret;
368
369 vaddr = kmap(page);
370 if (needs_clflush)
23c18c71
DV
371 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
372 page_length,
373 page_do_bit17_swizzling);
d174bd64
DV
374
375 if (page_do_bit17_swizzling)
376 ret = __copy_to_user_swizzled(user_data,
377 vaddr, shmem_page_offset,
378 page_length);
379 else
380 ret = __copy_to_user(user_data,
381 vaddr + shmem_page_offset,
382 page_length);
383 kunmap(page);
384
385 return ret;
386}
387
eb01459f 388static int
dbf7bff0
DV
389i915_gem_shmem_pread(struct drm_device *dev,
390 struct drm_i915_gem_object *obj,
391 struct drm_i915_gem_pread *args,
392 struct drm_file *file)
eb01459f 393{
05394f39 394 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
8461d226 395 char __user *user_data;
eb01459f 396 ssize_t remain;
8461d226 397 loff_t offset;
eb2c0c81 398 int shmem_page_offset, page_length, ret = 0;
8461d226 399 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
dbf7bff0 400 int hit_slowpath = 0;
96d79b52 401 int prefaulted = 0;
8489731c 402 int needs_clflush = 0;
692a576b 403 int release_page;
eb01459f 404
8461d226 405 user_data = (char __user *) (uintptr_t) args->data_ptr;
eb01459f
EA
406 remain = args->size;
407
8461d226 408 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 409
8489731c
DV
410 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
411 /* If we're not in the cpu read domain, set ourself into the gtt
412 * read domain and manually flush cachelines (if required). This
413 * optimizes for the case when the gpu will dirty the data
414 * anyway again before the next pread happens. */
415 if (obj->cache_level == I915_CACHE_NONE)
416 needs_clflush = 1;
417 ret = i915_gem_object_set_to_gtt_domain(obj, false);
418 if (ret)
419 return ret;
420 }
eb01459f 421
8461d226 422 offset = args->offset;
eb01459f
EA
423
424 while (remain > 0) {
e5281ccd
CW
425 struct page *page;
426
eb01459f
EA
427 /* Operation in this page
428 *
eb01459f 429 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
430 * page_length = bytes to copy for this page
431 */
c8cbbb8b 432 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
433 page_length = remain;
434 if ((shmem_page_offset + page_length) > PAGE_SIZE)
435 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 436
692a576b
DV
437 if (obj->pages) {
438 page = obj->pages[offset >> PAGE_SHIFT];
439 release_page = 0;
440 } else {
441 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
442 if (IS_ERR(page)) {
443 ret = PTR_ERR(page);
444 goto out;
445 }
446 release_page = 1;
b65552f0 447 }
e5281ccd 448
8461d226
DV
449 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
450 (page_to_phys(page) & (1 << 17)) != 0;
451
d174bd64
DV
452 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
453 user_data, page_do_bit17_swizzling,
454 needs_clflush);
455 if (ret == 0)
456 goto next_page;
dbf7bff0
DV
457
458 hit_slowpath = 1;
692a576b 459 page_cache_get(page);
dbf7bff0
DV
460 mutex_unlock(&dev->struct_mutex);
461
96d79b52 462 if (!prefaulted) {
f56f821f 463 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
464 /* Userspace is tricking us, but we've already clobbered
465 * its pages with the prefault and promised to write the
466 * data up to the first fault. Hence ignore any errors
467 * and just continue. */
468 (void)ret;
469 prefaulted = 1;
470 }
eb01459f 471
d174bd64
DV
472 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
473 user_data, page_do_bit17_swizzling,
474 needs_clflush);
eb01459f 475
dbf7bff0 476 mutex_lock(&dev->struct_mutex);
e5281ccd 477 page_cache_release(page);
dbf7bff0 478next_page:
e5281ccd 479 mark_page_accessed(page);
692a576b
DV
480 if (release_page)
481 page_cache_release(page);
e5281ccd 482
8461d226
DV
483 if (ret) {
484 ret = -EFAULT;
485 goto out;
486 }
487
eb01459f 488 remain -= page_length;
8461d226 489 user_data += page_length;
eb01459f
EA
490 offset += page_length;
491 }
492
4f27b75d 493out:
dbf7bff0
DV
494 if (hit_slowpath) {
495 /* Fixup: Kill any reinstated backing storage pages */
496 if (obj->madv == __I915_MADV_PURGED)
497 i915_gem_object_truncate(obj);
498 }
eb01459f
EA
499
500 return ret;
501}
502
673a394b
EA
503/**
504 * Reads data from the object referenced by handle.
505 *
506 * On error, the contents of *data are undefined.
507 */
508int
509i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 510 struct drm_file *file)
673a394b
EA
511{
512 struct drm_i915_gem_pread *args = data;
05394f39 513 struct drm_i915_gem_object *obj;
35b62a89 514 int ret = 0;
673a394b 515
51311d0a
CW
516 if (args->size == 0)
517 return 0;
518
519 if (!access_ok(VERIFY_WRITE,
520 (char __user *)(uintptr_t)args->data_ptr,
521 args->size))
522 return -EFAULT;
523
4f27b75d 524 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 525 if (ret)
4f27b75d 526 return ret;
673a394b 527
05394f39 528 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 529 if (&obj->base == NULL) {
1d7cfea1
CW
530 ret = -ENOENT;
531 goto unlock;
4f27b75d 532 }
673a394b 533
7dcd2499 534 /* Bounds check source. */
05394f39
CW
535 if (args->offset > obj->base.size ||
536 args->size > obj->base.size - args->offset) {
ce9d419d 537 ret = -EINVAL;
35b62a89 538 goto out;
ce9d419d
CW
539 }
540
db53a302
CW
541 trace_i915_gem_object_pread(obj, args->offset, args->size);
542
dbf7bff0 543 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 544
35b62a89 545out:
05394f39 546 drm_gem_object_unreference(&obj->base);
1d7cfea1 547unlock:
4f27b75d 548 mutex_unlock(&dev->struct_mutex);
eb01459f 549 return ret;
673a394b
EA
550}
551
0839ccb8
KP
552/* This is the fast write path which cannot handle
553 * page faults in the source data
9b7530cc 554 */
0839ccb8
KP
555
556static inline int
557fast_user_write(struct io_mapping *mapping,
558 loff_t page_base, int page_offset,
559 char __user *user_data,
560 int length)
9b7530cc 561{
4f0c7cfb
BW
562 void __iomem *vaddr_atomic;
563 void *vaddr;
0839ccb8 564 unsigned long unwritten;
9b7530cc 565
3e4d3af5 566 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
567 /* We can use the cpu mem copy function because this is X86. */
568 vaddr = (void __force*)vaddr_atomic + page_offset;
569 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 570 user_data, length);
3e4d3af5 571 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 572 return unwritten;
0839ccb8
KP
573}
574
3de09aa3
EA
575/**
576 * This is the fast pwrite path, where we copy the data directly from the
577 * user into the GTT, uncached.
578 */
673a394b 579static int
05394f39
CW
580i915_gem_gtt_pwrite_fast(struct drm_device *dev,
581 struct drm_i915_gem_object *obj,
3de09aa3 582 struct drm_i915_gem_pwrite *args,
05394f39 583 struct drm_file *file)
673a394b 584{
0839ccb8 585 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 586 ssize_t remain;
0839ccb8 587 loff_t offset, page_base;
673a394b 588 char __user *user_data;
935aaa69
DV
589 int page_offset, page_length, ret;
590
591 ret = i915_gem_object_pin(obj, 0, true);
592 if (ret)
593 goto out;
594
595 ret = i915_gem_object_set_to_gtt_domain(obj, true);
596 if (ret)
597 goto out_unpin;
598
599 ret = i915_gem_object_put_fence(obj);
600 if (ret)
601 goto out_unpin;
673a394b
EA
602
603 user_data = (char __user *) (uintptr_t) args->data_ptr;
604 remain = args->size;
673a394b 605
05394f39 606 offset = obj->gtt_offset + args->offset;
673a394b
EA
607
608 while (remain > 0) {
609 /* Operation in this page
610 *
0839ccb8
KP
611 * page_base = page offset within aperture
612 * page_offset = offset within page
613 * page_length = bytes to copy for this page
673a394b 614 */
c8cbbb8b
CW
615 page_base = offset & PAGE_MASK;
616 page_offset = offset_in_page(offset);
0839ccb8
KP
617 page_length = remain;
618 if ((page_offset + remain) > PAGE_SIZE)
619 page_length = PAGE_SIZE - page_offset;
620
0839ccb8 621 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
622 * source page isn't available. Return the error and we'll
623 * retry in the slow path.
0839ccb8 624 */
fbd5a26d 625 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
935aaa69
DV
626 page_offset, user_data, page_length)) {
627 ret = -EFAULT;
628 goto out_unpin;
629 }
673a394b 630
0839ccb8
KP
631 remain -= page_length;
632 user_data += page_length;
633 offset += page_length;
673a394b 634 }
673a394b 635
935aaa69
DV
636out_unpin:
637 i915_gem_object_unpin(obj);
638out:
3de09aa3 639 return ret;
673a394b
EA
640}
641
d174bd64
DV
642/* Per-page copy function for the shmem pwrite fastpath.
643 * Flushes invalid cachelines before writing to the target if
644 * needs_clflush_before is set and flushes out any written cachelines after
645 * writing if needs_clflush is set. */
3043c60c 646static int
d174bd64
DV
647shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
648 char __user *user_data,
649 bool page_do_bit17_swizzling,
650 bool needs_clflush_before,
651 bool needs_clflush_after)
673a394b 652{
d174bd64 653 char *vaddr;
673a394b 654 int ret;
3de09aa3 655
e7e58eb5 656 if (unlikely(page_do_bit17_swizzling))
d174bd64 657 return -EINVAL;
3de09aa3 658
d174bd64
DV
659 vaddr = kmap_atomic(page);
660 if (needs_clflush_before)
661 drm_clflush_virt_range(vaddr + shmem_page_offset,
662 page_length);
663 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
664 user_data,
665 page_length);
666 if (needs_clflush_after)
667 drm_clflush_virt_range(vaddr + shmem_page_offset,
668 page_length);
669 kunmap_atomic(vaddr);
3de09aa3
EA
670
671 return ret;
672}
673
d174bd64
DV
674/* Only difference to the fast-path function is that this can handle bit17
675 * and uses non-atomic copy and kmap functions. */
3043c60c 676static int
d174bd64
DV
677shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
678 char __user *user_data,
679 bool page_do_bit17_swizzling,
680 bool needs_clflush_before,
681 bool needs_clflush_after)
673a394b 682{
d174bd64
DV
683 char *vaddr;
684 int ret;
e5281ccd 685
d174bd64 686 vaddr = kmap(page);
e7e58eb5 687 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
688 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
689 page_length,
690 page_do_bit17_swizzling);
d174bd64
DV
691 if (page_do_bit17_swizzling)
692 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
693 user_data,
694 page_length);
d174bd64
DV
695 else
696 ret = __copy_from_user(vaddr + shmem_page_offset,
697 user_data,
698 page_length);
699 if (needs_clflush_after)
23c18c71
DV
700 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
701 page_length,
702 page_do_bit17_swizzling);
d174bd64 703 kunmap(page);
40123c1f 704
d174bd64 705 return ret;
40123c1f
EA
706}
707
40123c1f 708static int
e244a443
DV
709i915_gem_shmem_pwrite(struct drm_device *dev,
710 struct drm_i915_gem_object *obj,
711 struct drm_i915_gem_pwrite *args,
712 struct drm_file *file)
40123c1f 713{
05394f39 714 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
40123c1f 715 ssize_t remain;
8c59967c
DV
716 loff_t offset;
717 char __user *user_data;
eb2c0c81 718 int shmem_page_offset, page_length, ret = 0;
8c59967c 719 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 720 int hit_slowpath = 0;
58642885
DV
721 int needs_clflush_after = 0;
722 int needs_clflush_before = 0;
692a576b 723 int release_page;
40123c1f 724
8c59967c 725 user_data = (char __user *) (uintptr_t) args->data_ptr;
40123c1f
EA
726 remain = args->size;
727
8c59967c 728 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 729
58642885
DV
730 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
731 /* If we're not in the cpu write domain, set ourself into the gtt
732 * write domain and manually flush cachelines (if required). This
733 * optimizes for the case when the gpu will use the data
734 * right away and we therefore have to clflush anyway. */
735 if (obj->cache_level == I915_CACHE_NONE)
736 needs_clflush_after = 1;
737 ret = i915_gem_object_set_to_gtt_domain(obj, true);
738 if (ret)
739 return ret;
740 }
741 /* Same trick applies for invalidate partially written cachelines before
742 * writing. */
743 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
744 && obj->cache_level == I915_CACHE_NONE)
745 needs_clflush_before = 1;
746
673a394b 747 offset = args->offset;
05394f39 748 obj->dirty = 1;
673a394b 749
40123c1f 750 while (remain > 0) {
e5281ccd 751 struct page *page;
58642885 752 int partial_cacheline_write;
e5281ccd 753
40123c1f
EA
754 /* Operation in this page
755 *
40123c1f 756 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
757 * page_length = bytes to copy for this page
758 */
c8cbbb8b 759 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
760
761 page_length = remain;
762 if ((shmem_page_offset + page_length) > PAGE_SIZE)
763 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 764
58642885
DV
765 /* If we don't overwrite a cacheline completely we need to be
766 * careful to have up-to-date data by first clflushing. Don't
767 * overcomplicate things and flush the entire patch. */
768 partial_cacheline_write = needs_clflush_before &&
769 ((shmem_page_offset | page_length)
770 & (boot_cpu_data.x86_clflush_size - 1));
771
692a576b
DV
772 if (obj->pages) {
773 page = obj->pages[offset >> PAGE_SHIFT];
774 release_page = 0;
775 } else {
776 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
777 if (IS_ERR(page)) {
778 ret = PTR_ERR(page);
779 goto out;
780 }
781 release_page = 1;
e5281ccd
CW
782 }
783
8c59967c
DV
784 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
785 (page_to_phys(page) & (1 << 17)) != 0;
786
d174bd64
DV
787 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
788 user_data, page_do_bit17_swizzling,
789 partial_cacheline_write,
790 needs_clflush_after);
791 if (ret == 0)
792 goto next_page;
e244a443
DV
793
794 hit_slowpath = 1;
692a576b 795 page_cache_get(page);
e244a443
DV
796 mutex_unlock(&dev->struct_mutex);
797
d174bd64
DV
798 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
799 user_data, page_do_bit17_swizzling,
800 partial_cacheline_write,
801 needs_clflush_after);
40123c1f 802
e244a443 803 mutex_lock(&dev->struct_mutex);
692a576b 804 page_cache_release(page);
e244a443 805next_page:
e5281ccd
CW
806 set_page_dirty(page);
807 mark_page_accessed(page);
692a576b
DV
808 if (release_page)
809 page_cache_release(page);
e5281ccd 810
8c59967c
DV
811 if (ret) {
812 ret = -EFAULT;
813 goto out;
814 }
815
40123c1f 816 remain -= page_length;
8c59967c 817 user_data += page_length;
40123c1f 818 offset += page_length;
673a394b
EA
819 }
820
fbd5a26d 821out:
e244a443
DV
822 if (hit_slowpath) {
823 /* Fixup: Kill any reinstated backing storage pages */
824 if (obj->madv == __I915_MADV_PURGED)
825 i915_gem_object_truncate(obj);
826 /* and flush dirty cachelines in case the object isn't in the cpu write
827 * domain anymore. */
828 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
829 i915_gem_clflush_object(obj);
830 intel_gtt_chipset_flush();
831 }
8c59967c 832 }
673a394b 833
58642885
DV
834 if (needs_clflush_after)
835 intel_gtt_chipset_flush();
836
40123c1f 837 return ret;
673a394b
EA
838}
839
840/**
841 * Writes data to the object referenced by handle.
842 *
843 * On error, the contents of the buffer that were to be modified are undefined.
844 */
845int
846i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 847 struct drm_file *file)
673a394b
EA
848{
849 struct drm_i915_gem_pwrite *args = data;
05394f39 850 struct drm_i915_gem_object *obj;
51311d0a
CW
851 int ret;
852
853 if (args->size == 0)
854 return 0;
855
856 if (!access_ok(VERIFY_READ,
857 (char __user *)(uintptr_t)args->data_ptr,
858 args->size))
859 return -EFAULT;
860
f56f821f
DV
861 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
862 args->size);
51311d0a
CW
863 if (ret)
864 return -EFAULT;
673a394b 865
fbd5a26d 866 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 867 if (ret)
fbd5a26d 868 return ret;
1d7cfea1 869
05394f39 870 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 871 if (&obj->base == NULL) {
1d7cfea1
CW
872 ret = -ENOENT;
873 goto unlock;
fbd5a26d 874 }
673a394b 875
7dcd2499 876 /* Bounds check destination. */
05394f39
CW
877 if (args->offset > obj->base.size ||
878 args->size > obj->base.size - args->offset) {
ce9d419d 879 ret = -EINVAL;
35b62a89 880 goto out;
ce9d419d
CW
881 }
882
db53a302
CW
883 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
884
935aaa69 885 ret = -EFAULT;
673a394b
EA
886 /* We can only do the GTT pwrite on untiled buffers, as otherwise
887 * it would end up going through the fenced access, and we'll get
888 * different detiling behavior between reading and writing.
889 * pread/pwrite currently are reading and writing from the CPU
890 * perspective, requiring manual detiling by the client.
891 */
5c0480f2 892 if (obj->phys_obj) {
fbd5a26d 893 ret = i915_gem_phys_pwrite(dev, obj, args, file);
5c0480f2
DV
894 goto out;
895 }
896
897 if (obj->gtt_space &&
3ae53783 898 obj->cache_level == I915_CACHE_NONE &&
c07496fa 899 obj->tiling_mode == I915_TILING_NONE &&
ffc62976 900 obj->map_and_fenceable &&
5c0480f2 901 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
fbd5a26d 902 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
935aaa69
DV
903 /* Note that the gtt paths might fail with non-page-backed user
904 * pointers (e.g. gtt mappings when moving data between
905 * textures). Fallback to the shmem path in that case. */
fbd5a26d 906 }
673a394b 907
5c0480f2 908 if (ret == -EFAULT)
935aaa69 909 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
5c0480f2 910
35b62a89 911out:
05394f39 912 drm_gem_object_unreference(&obj->base);
1d7cfea1 913unlock:
fbd5a26d 914 mutex_unlock(&dev->struct_mutex);
673a394b
EA
915 return ret;
916}
917
918/**
2ef7eeaa
EA
919 * Called when user space prepares to use an object with the CPU, either
920 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
921 */
922int
923i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 924 struct drm_file *file)
673a394b
EA
925{
926 struct drm_i915_gem_set_domain *args = data;
05394f39 927 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
928 uint32_t read_domains = args->read_domains;
929 uint32_t write_domain = args->write_domain;
673a394b
EA
930 int ret;
931
932 if (!(dev->driver->driver_features & DRIVER_GEM))
933 return -ENODEV;
934
2ef7eeaa 935 /* Only handle setting domains to types used by the CPU. */
21d509e3 936 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
937 return -EINVAL;
938
21d509e3 939 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
940 return -EINVAL;
941
942 /* Having something in the write domain implies it's in the read
943 * domain, and only that read domain. Enforce that in the request.
944 */
945 if (write_domain != 0 && read_domains != write_domain)
946 return -EINVAL;
947
76c1dec1 948 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 949 if (ret)
76c1dec1 950 return ret;
1d7cfea1 951
05394f39 952 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 953 if (&obj->base == NULL) {
1d7cfea1
CW
954 ret = -ENOENT;
955 goto unlock;
76c1dec1 956 }
673a394b 957
2ef7eeaa
EA
958 if (read_domains & I915_GEM_DOMAIN_GTT) {
959 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392
EA
960
961 /* Silently promote "you're not bound, there was nothing to do"
962 * to success, since the client was just asking us to
963 * make sure everything was done.
964 */
965 if (ret == -EINVAL)
966 ret = 0;
2ef7eeaa 967 } else {
e47c68e9 968 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
969 }
970
05394f39 971 drm_gem_object_unreference(&obj->base);
1d7cfea1 972unlock:
673a394b
EA
973 mutex_unlock(&dev->struct_mutex);
974 return ret;
975}
976
977/**
978 * Called when user space has done writes to this buffer
979 */
980int
981i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 982 struct drm_file *file)
673a394b
EA
983{
984 struct drm_i915_gem_sw_finish *args = data;
05394f39 985 struct drm_i915_gem_object *obj;
673a394b
EA
986 int ret = 0;
987
988 if (!(dev->driver->driver_features & DRIVER_GEM))
989 return -ENODEV;
990
76c1dec1 991 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 992 if (ret)
76c1dec1 993 return ret;
1d7cfea1 994
05394f39 995 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 996 if (&obj->base == NULL) {
1d7cfea1
CW
997 ret = -ENOENT;
998 goto unlock;
673a394b
EA
999 }
1000
673a394b 1001 /* Pinned buffers may be scanout, so flush the cache */
05394f39 1002 if (obj->pin_count)
e47c68e9
EA
1003 i915_gem_object_flush_cpu_write_domain(obj);
1004
05394f39 1005 drm_gem_object_unreference(&obj->base);
1d7cfea1 1006unlock:
673a394b
EA
1007 mutex_unlock(&dev->struct_mutex);
1008 return ret;
1009}
1010
1011/**
1012 * Maps the contents of an object, returning the address it is mapped
1013 * into.
1014 *
1015 * While the mapping holds a reference on the contents of the object, it doesn't
1016 * imply a ref on the object itself.
1017 */
1018int
1019i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1020 struct drm_file *file)
673a394b
EA
1021{
1022 struct drm_i915_gem_mmap *args = data;
1023 struct drm_gem_object *obj;
673a394b
EA
1024 unsigned long addr;
1025
1026 if (!(dev->driver->driver_features & DRIVER_GEM))
1027 return -ENODEV;
1028
05394f39 1029 obj = drm_gem_object_lookup(dev, file, args->handle);
673a394b 1030 if (obj == NULL)
bf79cb91 1031 return -ENOENT;
673a394b 1032
673a394b
EA
1033 down_write(&current->mm->mmap_sem);
1034 addr = do_mmap(obj->filp, 0, args->size,
1035 PROT_READ | PROT_WRITE, MAP_SHARED,
1036 args->offset);
1037 up_write(&current->mm->mmap_sem);
bc9025bd 1038 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1039 if (IS_ERR((void *)addr))
1040 return addr;
1041
1042 args->addr_ptr = (uint64_t) addr;
1043
1044 return 0;
1045}
1046
de151cf6
JB
1047/**
1048 * i915_gem_fault - fault a page into the GTT
1049 * vma: VMA in question
1050 * vmf: fault info
1051 *
1052 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1053 * from userspace. The fault handler takes care of binding the object to
1054 * the GTT (if needed), allocating and programming a fence register (again,
1055 * only if needed based on whether the old reg is still valid or the object
1056 * is tiled) and inserting a new PTE into the faulting process.
1057 *
1058 * Note that the faulting process may involve evicting existing objects
1059 * from the GTT and/or fence registers to make room. So performance may
1060 * suffer if the GTT working set is large or there are few fence registers
1061 * left.
1062 */
1063int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1064{
05394f39
CW
1065 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1066 struct drm_device *dev = obj->base.dev;
7d1c4804 1067 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
1068 pgoff_t page_offset;
1069 unsigned long pfn;
1070 int ret = 0;
0f973f27 1071 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1072
1073 /* We don't use vmf->pgoff since that has the fake offset */
1074 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1075 PAGE_SHIFT;
1076
d9bc7e9f
CW
1077 ret = i915_mutex_lock_interruptible(dev);
1078 if (ret)
1079 goto out;
a00b10c3 1080
db53a302
CW
1081 trace_i915_gem_object_fault(obj, page_offset, true, write);
1082
d9bc7e9f 1083 /* Now bind it into the GTT if needed */
919926ae
CW
1084 if (!obj->map_and_fenceable) {
1085 ret = i915_gem_object_unbind(obj);
1086 if (ret)
1087 goto unlock;
a00b10c3 1088 }
05394f39 1089 if (!obj->gtt_space) {
75e9e915 1090 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
c715089f
CW
1091 if (ret)
1092 goto unlock;
de151cf6 1093
e92d03bf
EA
1094 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1095 if (ret)
1096 goto unlock;
1097 }
4a684a41 1098
74898d7e
DV
1099 if (!obj->has_global_gtt_mapping)
1100 i915_gem_gtt_bind_object(obj, obj->cache_level);
1101
06d98131 1102 ret = i915_gem_object_get_fence(obj);
d9e86c0e
CW
1103 if (ret)
1104 goto unlock;
de151cf6 1105
05394f39
CW
1106 if (i915_gem_object_is_inactive(obj))
1107 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
7d1c4804 1108
6299f992
CW
1109 obj->fault_mappable = true;
1110
05394f39 1111 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
de151cf6
JB
1112 page_offset;
1113
1114 /* Finally, remap it using the new GTT offset */
1115 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c715089f 1116unlock:
de151cf6 1117 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1118out:
de151cf6 1119 switch (ret) {
d9bc7e9f 1120 case -EIO:
045e769a 1121 case -EAGAIN:
d9bc7e9f
CW
1122 /* Give the error handler a chance to run and move the
1123 * objects off the GPU active list. Next time we service the
1124 * fault, we should be able to transition the page into the
1125 * GTT without touching the GPU (and so avoid further
1126 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1127 * with coherency, just lost writes.
1128 */
045e769a 1129 set_need_resched();
c715089f
CW
1130 case 0:
1131 case -ERESTARTSYS:
bed636ab 1132 case -EINTR:
c715089f 1133 return VM_FAULT_NOPAGE;
de151cf6 1134 case -ENOMEM:
de151cf6 1135 return VM_FAULT_OOM;
de151cf6 1136 default:
c715089f 1137 return VM_FAULT_SIGBUS;
de151cf6
JB
1138 }
1139}
1140
901782b2
CW
1141/**
1142 * i915_gem_release_mmap - remove physical page mappings
1143 * @obj: obj in question
1144 *
af901ca1 1145 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1146 * relinquish ownership of the pages back to the system.
1147 *
1148 * It is vital that we remove the page mapping if we have mapped a tiled
1149 * object through the GTT and then lose the fence register due to
1150 * resource pressure. Similarly if the object has been moved out of the
1151 * aperture, than pages mapped into userspace must be revoked. Removing the
1152 * mapping will then trigger a page fault on the next user access, allowing
1153 * fixup by i915_gem_fault().
1154 */
d05ca301 1155void
05394f39 1156i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1157{
6299f992
CW
1158 if (!obj->fault_mappable)
1159 return;
901782b2 1160
f6e47884
CW
1161 if (obj->base.dev->dev_mapping)
1162 unmap_mapping_range(obj->base.dev->dev_mapping,
1163 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1164 obj->base.size, 1);
fb7d516a 1165
6299f992 1166 obj->fault_mappable = false;
901782b2
CW
1167}
1168
92b88aeb 1169static uint32_t
e28f8711 1170i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1171{
e28f8711 1172 uint32_t gtt_size;
92b88aeb
CW
1173
1174 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1175 tiling_mode == I915_TILING_NONE)
1176 return size;
92b88aeb
CW
1177
1178 /* Previous chips need a power-of-two fence region when tiling */
1179 if (INTEL_INFO(dev)->gen == 3)
e28f8711 1180 gtt_size = 1024*1024;
92b88aeb 1181 else
e28f8711 1182 gtt_size = 512*1024;
92b88aeb 1183
e28f8711
CW
1184 while (gtt_size < size)
1185 gtt_size <<= 1;
92b88aeb 1186
e28f8711 1187 return gtt_size;
92b88aeb
CW
1188}
1189
de151cf6
JB
1190/**
1191 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1192 * @obj: object to check
1193 *
1194 * Return the required GTT alignment for an object, taking into account
5e783301 1195 * potential fence register mapping.
de151cf6
JB
1196 */
1197static uint32_t
e28f8711
CW
1198i915_gem_get_gtt_alignment(struct drm_device *dev,
1199 uint32_t size,
1200 int tiling_mode)
de151cf6 1201{
de151cf6
JB
1202 /*
1203 * Minimum alignment is 4k (GTT page size), but might be greater
1204 * if a fence register is needed for the object.
1205 */
a00b10c3 1206 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711 1207 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1208 return 4096;
1209
a00b10c3
CW
1210 /*
1211 * Previous chips need to be aligned to the size of the smallest
1212 * fence register that can contain the object.
1213 */
e28f8711 1214 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
1215}
1216
5e783301
DV
1217/**
1218 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1219 * unfenced object
e28f8711
CW
1220 * @dev: the device
1221 * @size: size of the object
1222 * @tiling_mode: tiling mode of the object
5e783301
DV
1223 *
1224 * Return the required GTT alignment for an object, only taking into account
1225 * unfenced tiled surface requirements.
1226 */
467cffba 1227uint32_t
e28f8711
CW
1228i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1229 uint32_t size,
1230 int tiling_mode)
5e783301 1231{
5e783301
DV
1232 /*
1233 * Minimum alignment is 4k (GTT page size) for sane hw.
1234 */
1235 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
e28f8711 1236 tiling_mode == I915_TILING_NONE)
5e783301
DV
1237 return 4096;
1238
e28f8711
CW
1239 /* Previous hardware however needs to be aligned to a power-of-two
1240 * tile height. The simplest method for determining this is to reuse
1241 * the power-of-tile object size.
5e783301 1242 */
e28f8711 1243 return i915_gem_get_gtt_size(dev, size, tiling_mode);
5e783301
DV
1244}
1245
de151cf6 1246int
ff72145b
DA
1247i915_gem_mmap_gtt(struct drm_file *file,
1248 struct drm_device *dev,
1249 uint32_t handle,
1250 uint64_t *offset)
de151cf6 1251{
da761a6e 1252 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1253 struct drm_i915_gem_object *obj;
de151cf6
JB
1254 int ret;
1255
1256 if (!(dev->driver->driver_features & DRIVER_GEM))
1257 return -ENODEV;
1258
76c1dec1 1259 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1260 if (ret)
76c1dec1 1261 return ret;
de151cf6 1262
ff72145b 1263 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
c8725226 1264 if (&obj->base == NULL) {
1d7cfea1
CW
1265 ret = -ENOENT;
1266 goto unlock;
1267 }
de151cf6 1268
05394f39 1269 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
da761a6e 1270 ret = -E2BIG;
ff56b0bc 1271 goto out;
da761a6e
CW
1272 }
1273
05394f39 1274 if (obj->madv != I915_MADV_WILLNEED) {
ab18282d 1275 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1d7cfea1
CW
1276 ret = -EINVAL;
1277 goto out;
ab18282d
CW
1278 }
1279
05394f39 1280 if (!obj->base.map_list.map) {
b464e9a2 1281 ret = drm_gem_create_mmap_offset(&obj->base);
1d7cfea1
CW
1282 if (ret)
1283 goto out;
de151cf6
JB
1284 }
1285
ff72145b 1286 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
de151cf6 1287
1d7cfea1 1288out:
05394f39 1289 drm_gem_object_unreference(&obj->base);
1d7cfea1 1290unlock:
de151cf6 1291 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1292 return ret;
de151cf6
JB
1293}
1294
ff72145b
DA
1295/**
1296 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1297 * @dev: DRM device
1298 * @data: GTT mapping ioctl data
1299 * @file: GEM object info
1300 *
1301 * Simply returns the fake offset to userspace so it can mmap it.
1302 * The mmap call will end up in drm_gem_mmap(), which will set things
1303 * up so we can get faults in the handler above.
1304 *
1305 * The fault handler will take care of binding the object into the GTT
1306 * (since it may have been evicted to make room for something), allocating
1307 * a fence register, and mapping the appropriate aperture address into
1308 * userspace.
1309 */
1310int
1311i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1312 struct drm_file *file)
1313{
1314 struct drm_i915_gem_mmap_gtt *args = data;
1315
1316 if (!(dev->driver->driver_features & DRIVER_GEM))
1317 return -ENODEV;
1318
1319 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1320}
1321
1322
e5281ccd 1323static int
05394f39 1324i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
e5281ccd
CW
1325 gfp_t gfpmask)
1326{
e5281ccd
CW
1327 int page_count, i;
1328 struct address_space *mapping;
1329 struct inode *inode;
1330 struct page *page;
1331
1332 /* Get the list of pages out of our struct file. They'll be pinned
1333 * at this point until we release them.
1334 */
05394f39
CW
1335 page_count = obj->base.size / PAGE_SIZE;
1336 BUG_ON(obj->pages != NULL);
1337 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1338 if (obj->pages == NULL)
e5281ccd
CW
1339 return -ENOMEM;
1340
05394f39 1341 inode = obj->base.filp->f_path.dentry->d_inode;
e5281ccd 1342 mapping = inode->i_mapping;
5949eac4
HD
1343 gfpmask |= mapping_gfp_mask(mapping);
1344
e5281ccd 1345 for (i = 0; i < page_count; i++) {
5949eac4 1346 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
e5281ccd
CW
1347 if (IS_ERR(page))
1348 goto err_pages;
1349
05394f39 1350 obj->pages[i] = page;
e5281ccd
CW
1351 }
1352
6dacfd2f 1353 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
1354 i915_gem_object_do_bit_17_swizzle(obj);
1355
1356 return 0;
1357
1358err_pages:
1359 while (i--)
05394f39 1360 page_cache_release(obj->pages[i]);
e5281ccd 1361
05394f39
CW
1362 drm_free_large(obj->pages);
1363 obj->pages = NULL;
e5281ccd
CW
1364 return PTR_ERR(page);
1365}
1366
5cdf5881 1367static void
05394f39 1368i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 1369{
05394f39 1370 int page_count = obj->base.size / PAGE_SIZE;
673a394b
EA
1371 int i;
1372
05394f39 1373 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 1374
6dacfd2f 1375 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
1376 i915_gem_object_save_bit_17_swizzle(obj);
1377
05394f39
CW
1378 if (obj->madv == I915_MADV_DONTNEED)
1379 obj->dirty = 0;
3ef94daa
CW
1380
1381 for (i = 0; i < page_count; i++) {
05394f39
CW
1382 if (obj->dirty)
1383 set_page_dirty(obj->pages[i]);
3ef94daa 1384
05394f39
CW
1385 if (obj->madv == I915_MADV_WILLNEED)
1386 mark_page_accessed(obj->pages[i]);
3ef94daa 1387
05394f39 1388 page_cache_release(obj->pages[i]);
3ef94daa 1389 }
05394f39 1390 obj->dirty = 0;
673a394b 1391
05394f39
CW
1392 drm_free_large(obj->pages);
1393 obj->pages = NULL;
673a394b
EA
1394}
1395
54cf91dc 1396void
05394f39 1397i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1ec14ad3
CW
1398 struct intel_ring_buffer *ring,
1399 u32 seqno)
673a394b 1400{
05394f39 1401 struct drm_device *dev = obj->base.dev;
69dc4987 1402 struct drm_i915_private *dev_priv = dev->dev_private;
617dbe27 1403
852835f3 1404 BUG_ON(ring == NULL);
05394f39 1405 obj->ring = ring;
673a394b
EA
1406
1407 /* Add a reference if we're newly entering the active list. */
05394f39
CW
1408 if (!obj->active) {
1409 drm_gem_object_reference(&obj->base);
1410 obj->active = 1;
673a394b 1411 }
e35a41de 1412
673a394b 1413 /* Move from whatever list we were on to the tail of execution. */
05394f39
CW
1414 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1415 list_move_tail(&obj->ring_list, &ring->active_list);
caea7476 1416
05394f39 1417 obj->last_rendering_seqno = seqno;
caea7476 1418
7dd49065 1419 if (obj->fenced_gpu_access) {
caea7476 1420 obj->last_fenced_seqno = seqno;
caea7476 1421
7dd49065
CW
1422 /* Bump MRU to take account of the delayed flush */
1423 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1424 struct drm_i915_fence_reg *reg;
1425
1426 reg = &dev_priv->fence_regs[obj->fence_reg];
1427 list_move_tail(&reg->lru_list,
1428 &dev_priv->mm.fence_list);
1429 }
caea7476
CW
1430 }
1431}
1432
1433static void
1434i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1435{
1436 list_del_init(&obj->ring_list);
1437 obj->last_rendering_seqno = 0;
15a13bbd 1438 obj->last_fenced_seqno = 0;
673a394b
EA
1439}
1440
ce44b0ea 1441static void
05394f39 1442i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
ce44b0ea 1443{
05394f39 1444 struct drm_device *dev = obj->base.dev;
ce44b0ea 1445 drm_i915_private_t *dev_priv = dev->dev_private;
ce44b0ea 1446
05394f39
CW
1447 BUG_ON(!obj->active);
1448 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
caea7476
CW
1449
1450 i915_gem_object_move_off_active(obj);
1451}
1452
1453static void
1454i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1455{
1456 struct drm_device *dev = obj->base.dev;
1457 struct drm_i915_private *dev_priv = dev->dev_private;
1458
1b50247a 1459 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
caea7476
CW
1460
1461 BUG_ON(!list_empty(&obj->gpu_write_list));
1462 BUG_ON(!obj->active);
1463 obj->ring = NULL;
1464
1465 i915_gem_object_move_off_active(obj);
1466 obj->fenced_gpu_access = false;
caea7476
CW
1467
1468 obj->active = 0;
87ca9c8a 1469 obj->pending_gpu_write = false;
caea7476
CW
1470 drm_gem_object_unreference(&obj->base);
1471
1472 WARN_ON(i915_verify_lists(dev));
ce44b0ea 1473}
673a394b 1474
963b4836
CW
1475/* Immediately discard the backing storage */
1476static void
05394f39 1477i915_gem_object_truncate(struct drm_i915_gem_object *obj)
963b4836 1478{
bb6baf76 1479 struct inode *inode;
963b4836 1480
ae9fed6b
CW
1481 /* Our goal here is to return as much of the memory as
1482 * is possible back to the system as we are called from OOM.
1483 * To do this we must instruct the shmfs to drop all of its
e2377fe0 1484 * backing pages, *now*.
ae9fed6b 1485 */
05394f39 1486 inode = obj->base.filp->f_path.dentry->d_inode;
e2377fe0 1487 shmem_truncate_range(inode, 0, (loff_t)-1);
bb6baf76 1488
a14917ee
CW
1489 if (obj->base.map_list.map)
1490 drm_gem_free_mmap_offset(&obj->base);
1491
05394f39 1492 obj->madv = __I915_MADV_PURGED;
963b4836
CW
1493}
1494
1495static inline int
05394f39 1496i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
963b4836 1497{
05394f39 1498 return obj->madv == I915_MADV_DONTNEED;
963b4836
CW
1499}
1500
63560396 1501static void
db53a302
CW
1502i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1503 uint32_t flush_domains)
63560396 1504{
05394f39 1505 struct drm_i915_gem_object *obj, *next;
63560396 1506
05394f39 1507 list_for_each_entry_safe(obj, next,
64193406 1508 &ring->gpu_write_list,
63560396 1509 gpu_write_list) {
05394f39
CW
1510 if (obj->base.write_domain & flush_domains) {
1511 uint32_t old_write_domain = obj->base.write_domain;
63560396 1512
05394f39
CW
1513 obj->base.write_domain = 0;
1514 list_del_init(&obj->gpu_write_list);
1ec14ad3 1515 i915_gem_object_move_to_active(obj, ring,
db53a302 1516 i915_gem_next_request_seqno(ring));
63560396 1517
63560396 1518 trace_i915_gem_object_change_domain(obj,
05394f39 1519 obj->base.read_domains,
63560396
DV
1520 old_write_domain);
1521 }
1522 }
1523}
8187a2b7 1524
53d227f2
DV
1525static u32
1526i915_gem_get_seqno(struct drm_device *dev)
1527{
1528 drm_i915_private_t *dev_priv = dev->dev_private;
1529 u32 seqno = dev_priv->next_seqno;
1530
1531 /* reserve 0 for non-seqno */
1532 if (++dev_priv->next_seqno == 0)
1533 dev_priv->next_seqno = 1;
1534
1535 return seqno;
1536}
1537
1538u32
1539i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
1540{
1541 if (ring->outstanding_lazy_request == 0)
1542 ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
1543
1544 return ring->outstanding_lazy_request;
1545}
1546
3cce469c 1547int
db53a302 1548i915_add_request(struct intel_ring_buffer *ring,
f787a5f5 1549 struct drm_file *file,
db53a302 1550 struct drm_i915_gem_request *request)
673a394b 1551{
db53a302 1552 drm_i915_private_t *dev_priv = ring->dev->dev_private;
673a394b 1553 uint32_t seqno;
a71d8d94 1554 u32 request_ring_position;
673a394b 1555 int was_empty;
3cce469c
CW
1556 int ret;
1557
1558 BUG_ON(request == NULL);
53d227f2 1559 seqno = i915_gem_next_request_seqno(ring);
673a394b 1560
a71d8d94
CW
1561 /* Record the position of the start of the request so that
1562 * should we detect the updated seqno part-way through the
1563 * GPU processing the request, we never over-estimate the
1564 * position of the head.
1565 */
1566 request_ring_position = intel_ring_get_tail(ring);
1567
3cce469c
CW
1568 ret = ring->add_request(ring, &seqno);
1569 if (ret)
1570 return ret;
673a394b 1571
db53a302 1572 trace_i915_gem_request_add(ring, seqno);
673a394b
EA
1573
1574 request->seqno = seqno;
852835f3 1575 request->ring = ring;
a71d8d94 1576 request->tail = request_ring_position;
673a394b 1577 request->emitted_jiffies = jiffies;
852835f3
ZN
1578 was_empty = list_empty(&ring->request_list);
1579 list_add_tail(&request->list, &ring->request_list);
1580
db53a302
CW
1581 if (file) {
1582 struct drm_i915_file_private *file_priv = file->driver_priv;
1583
1c25595f 1584 spin_lock(&file_priv->mm.lock);
f787a5f5 1585 request->file_priv = file_priv;
b962442e 1586 list_add_tail(&request->client_list,
f787a5f5 1587 &file_priv->mm.request_list);
1c25595f 1588 spin_unlock(&file_priv->mm.lock);
b962442e 1589 }
673a394b 1590
5391d0cf 1591 ring->outstanding_lazy_request = 0;
db53a302 1592
f65d9421 1593 if (!dev_priv->mm.suspended) {
3e0dc6b0
BW
1594 if (i915_enable_hangcheck) {
1595 mod_timer(&dev_priv->hangcheck_timer,
1596 jiffies +
1597 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1598 }
f65d9421 1599 if (was_empty)
b3b079db
CW
1600 queue_delayed_work(dev_priv->wq,
1601 &dev_priv->mm.retire_work, HZ);
f65d9421 1602 }
3cce469c 1603 return 0;
673a394b
EA
1604}
1605
f787a5f5
CW
1606static inline void
1607i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
673a394b 1608{
1c25595f 1609 struct drm_i915_file_private *file_priv = request->file_priv;
673a394b 1610
1c25595f
CW
1611 if (!file_priv)
1612 return;
1c5d22f7 1613
1c25595f 1614 spin_lock(&file_priv->mm.lock);
09bfa517
HRK
1615 if (request->file_priv) {
1616 list_del(&request->client_list);
1617 request->file_priv = NULL;
1618 }
1c25595f 1619 spin_unlock(&file_priv->mm.lock);
673a394b 1620}
673a394b 1621
dfaae392
CW
1622static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1623 struct intel_ring_buffer *ring)
9375e446 1624{
dfaae392
CW
1625 while (!list_empty(&ring->request_list)) {
1626 struct drm_i915_gem_request *request;
673a394b 1627
dfaae392
CW
1628 request = list_first_entry(&ring->request_list,
1629 struct drm_i915_gem_request,
1630 list);
de151cf6 1631
dfaae392 1632 list_del(&request->list);
f787a5f5 1633 i915_gem_request_remove_from_client(request);
dfaae392
CW
1634 kfree(request);
1635 }
673a394b 1636
dfaae392 1637 while (!list_empty(&ring->active_list)) {
05394f39 1638 struct drm_i915_gem_object *obj;
9375e446 1639
05394f39
CW
1640 obj = list_first_entry(&ring->active_list,
1641 struct drm_i915_gem_object,
1642 ring_list);
9375e446 1643
05394f39
CW
1644 obj->base.write_domain = 0;
1645 list_del_init(&obj->gpu_write_list);
1646 i915_gem_object_move_to_inactive(obj);
673a394b
EA
1647 }
1648}
1649
312817a3
CW
1650static void i915_gem_reset_fences(struct drm_device *dev)
1651{
1652 struct drm_i915_private *dev_priv = dev->dev_private;
1653 int i;
1654
4b9de737 1655 for (i = 0; i < dev_priv->num_fence_regs; i++) {
312817a3 1656 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
7d2cb39c 1657
ada726c7 1658 i915_gem_write_fence(dev, i, NULL);
7d2cb39c 1659
ada726c7
CW
1660 if (reg->obj)
1661 i915_gem_object_fence_lost(reg->obj);
7d2cb39c 1662
ada726c7
CW
1663 reg->pin_count = 0;
1664 reg->obj = NULL;
1665 INIT_LIST_HEAD(&reg->lru_list);
312817a3 1666 }
ada726c7
CW
1667
1668 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
312817a3
CW
1669}
1670
069efc1d 1671void i915_gem_reset(struct drm_device *dev)
673a394b 1672{
77f01230 1673 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1674 struct drm_i915_gem_object *obj;
1ec14ad3 1675 int i;
673a394b 1676
1ec14ad3
CW
1677 for (i = 0; i < I915_NUM_RINGS; i++)
1678 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
dfaae392
CW
1679
1680 /* Remove anything from the flushing lists. The GPU cache is likely
1681 * to be lost on reset along with the data, so simply move the
1682 * lost bo to the inactive list.
1683 */
1684 while (!list_empty(&dev_priv->mm.flushing_list)) {
0206e353 1685 obj = list_first_entry(&dev_priv->mm.flushing_list,
05394f39
CW
1686 struct drm_i915_gem_object,
1687 mm_list);
dfaae392 1688
05394f39
CW
1689 obj->base.write_domain = 0;
1690 list_del_init(&obj->gpu_write_list);
1691 i915_gem_object_move_to_inactive(obj);
dfaae392
CW
1692 }
1693
1694 /* Move everything out of the GPU domains to ensure we do any
1695 * necessary invalidation upon reuse.
1696 */
05394f39 1697 list_for_each_entry(obj,
77f01230 1698 &dev_priv->mm.inactive_list,
69dc4987 1699 mm_list)
77f01230 1700 {
05394f39 1701 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
77f01230 1702 }
069efc1d
CW
1703
1704 /* The fence registers are invalidated so clear them out */
312817a3 1705 i915_gem_reset_fences(dev);
673a394b
EA
1706}
1707
1708/**
1709 * This function clears the request list as sequence numbers are passed.
1710 */
a71d8d94 1711void
db53a302 1712i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
673a394b 1713{
673a394b 1714 uint32_t seqno;
1ec14ad3 1715 int i;
673a394b 1716
db53a302 1717 if (list_empty(&ring->request_list))
6c0594a3
KW
1718 return;
1719
db53a302 1720 WARN_ON(i915_verify_lists(ring->dev));
673a394b 1721
78501eac 1722 seqno = ring->get_seqno(ring);
1ec14ad3 1723
076e2c0e 1724 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1ec14ad3
CW
1725 if (seqno >= ring->sync_seqno[i])
1726 ring->sync_seqno[i] = 0;
1727
852835f3 1728 while (!list_empty(&ring->request_list)) {
673a394b 1729 struct drm_i915_gem_request *request;
673a394b 1730
852835f3 1731 request = list_first_entry(&ring->request_list,
673a394b
EA
1732 struct drm_i915_gem_request,
1733 list);
673a394b 1734
dfaae392 1735 if (!i915_seqno_passed(seqno, request->seqno))
b84d5f0c
CW
1736 break;
1737
db53a302 1738 trace_i915_gem_request_retire(ring, request->seqno);
a71d8d94
CW
1739 /* We know the GPU must have read the request to have
1740 * sent us the seqno + interrupt, so use the position
1741 * of tail of the request to update the last known position
1742 * of the GPU head.
1743 */
1744 ring->last_retired_head = request->tail;
b84d5f0c
CW
1745
1746 list_del(&request->list);
f787a5f5 1747 i915_gem_request_remove_from_client(request);
b84d5f0c
CW
1748 kfree(request);
1749 }
673a394b 1750
b84d5f0c
CW
1751 /* Move any buffers on the active list that are no longer referenced
1752 * by the ringbuffer to the flushing/inactive lists as appropriate.
1753 */
1754 while (!list_empty(&ring->active_list)) {
05394f39 1755 struct drm_i915_gem_object *obj;
b84d5f0c 1756
0206e353 1757 obj = list_first_entry(&ring->active_list,
05394f39
CW
1758 struct drm_i915_gem_object,
1759 ring_list);
673a394b 1760
05394f39 1761 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
673a394b 1762 break;
b84d5f0c 1763
05394f39 1764 if (obj->base.write_domain != 0)
b84d5f0c
CW
1765 i915_gem_object_move_to_flushing(obj);
1766 else
1767 i915_gem_object_move_to_inactive(obj);
673a394b 1768 }
9d34e5db 1769
db53a302
CW
1770 if (unlikely(ring->trace_irq_seqno &&
1771 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1ec14ad3 1772 ring->irq_put(ring);
db53a302 1773 ring->trace_irq_seqno = 0;
9d34e5db 1774 }
23bc5982 1775
db53a302 1776 WARN_ON(i915_verify_lists(ring->dev));
673a394b
EA
1777}
1778
b09a1fec
CW
1779void
1780i915_gem_retire_requests(struct drm_device *dev)
1781{
1782 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 1783 int i;
b09a1fec 1784
be72615b 1785 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
05394f39 1786 struct drm_i915_gem_object *obj, *next;
be72615b
CW
1787
1788 /* We must be careful that during unbind() we do not
1789 * accidentally infinitely recurse into retire requests.
1790 * Currently:
1791 * retire -> free -> unbind -> wait -> retire_ring
1792 */
05394f39 1793 list_for_each_entry_safe(obj, next,
be72615b 1794 &dev_priv->mm.deferred_free_list,
69dc4987 1795 mm_list)
05394f39 1796 i915_gem_free_object_tail(obj);
be72615b
CW
1797 }
1798
1ec14ad3 1799 for (i = 0; i < I915_NUM_RINGS; i++)
db53a302 1800 i915_gem_retire_requests_ring(&dev_priv->ring[i]);
b09a1fec
CW
1801}
1802
75ef9da2 1803static void
673a394b
EA
1804i915_gem_retire_work_handler(struct work_struct *work)
1805{
1806 drm_i915_private_t *dev_priv;
1807 struct drm_device *dev;
0a58705b
CW
1808 bool idle;
1809 int i;
673a394b
EA
1810
1811 dev_priv = container_of(work, drm_i915_private_t,
1812 mm.retire_work.work);
1813 dev = dev_priv->dev;
1814
891b48cf
CW
1815 /* Come back later if the device is busy... */
1816 if (!mutex_trylock(&dev->struct_mutex)) {
1817 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1818 return;
1819 }
1820
b09a1fec 1821 i915_gem_retire_requests(dev);
d1b851fc 1822
0a58705b
CW
1823 /* Send a periodic flush down the ring so we don't hold onto GEM
1824 * objects indefinitely.
1825 */
1826 idle = true;
1827 for (i = 0; i < I915_NUM_RINGS; i++) {
1828 struct intel_ring_buffer *ring = &dev_priv->ring[i];
1829
1830 if (!list_empty(&ring->gpu_write_list)) {
1831 struct drm_i915_gem_request *request;
1832 int ret;
1833
db53a302
CW
1834 ret = i915_gem_flush_ring(ring,
1835 0, I915_GEM_GPU_DOMAINS);
0a58705b
CW
1836 request = kzalloc(sizeof(*request), GFP_KERNEL);
1837 if (ret || request == NULL ||
db53a302 1838 i915_add_request(ring, NULL, request))
0a58705b
CW
1839 kfree(request);
1840 }
1841
1842 idle &= list_empty(&ring->request_list);
1843 }
1844
1845 if (!dev_priv->mm.suspended && !idle)
9c9fe1f8 1846 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
0a58705b 1847
673a394b
EA
1848 mutex_unlock(&dev->struct_mutex);
1849}
1850
db53a302
CW
1851/**
1852 * Waits for a sequence number to be signaled, and cleans up the
1853 * request and object lists appropriately for that event.
1854 */
5a5a0c64 1855int
db53a302 1856i915_wait_request(struct intel_ring_buffer *ring,
b93f9cf1
BW
1857 uint32_t seqno,
1858 bool do_retire)
673a394b 1859{
db53a302 1860 drm_i915_private_t *dev_priv = ring->dev->dev_private;
802c7eb6 1861 u32 ier;
673a394b
EA
1862 int ret = 0;
1863
1864 BUG_ON(seqno == 0);
1865
d9bc7e9f
CW
1866 if (atomic_read(&dev_priv->mm.wedged)) {
1867 struct completion *x = &dev_priv->error_completion;
1868 bool recovery_complete;
1869 unsigned long flags;
1870
1871 /* Give the error handler a chance to run. */
1872 spin_lock_irqsave(&x->wait.lock, flags);
1873 recovery_complete = x->done > 0;
1874 spin_unlock_irqrestore(&x->wait.lock, flags);
1875
1876 return recovery_complete ? -EIO : -EAGAIN;
1877 }
30dbf0c0 1878
5d97eb69 1879 if (seqno == ring->outstanding_lazy_request) {
3cce469c
CW
1880 struct drm_i915_gem_request *request;
1881
1882 request = kzalloc(sizeof(*request), GFP_KERNEL);
1883 if (request == NULL)
e35a41de 1884 return -ENOMEM;
3cce469c 1885
db53a302 1886 ret = i915_add_request(ring, NULL, request);
3cce469c
CW
1887 if (ret) {
1888 kfree(request);
1889 return ret;
1890 }
1891
1892 seqno = request->seqno;
e35a41de 1893 }
ffed1d09 1894
78501eac 1895 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
db53a302 1896 if (HAS_PCH_SPLIT(ring->dev))
036a4a7d 1897 ier = I915_READ(DEIER) | I915_READ(GTIER);
23e3f9b3
JB
1898 else if (IS_VALLEYVIEW(ring->dev))
1899 ier = I915_READ(GTIER) | I915_READ(VLV_IER);
036a4a7d
ZW
1900 else
1901 ier = I915_READ(IER);
802c7eb6
JB
1902 if (!ier) {
1903 DRM_ERROR("something (likely vbetool) disabled "
1904 "interrupts, re-enabling\n");
f01c22fd
CW
1905 ring->dev->driver->irq_preinstall(ring->dev);
1906 ring->dev->driver->irq_postinstall(ring->dev);
802c7eb6
JB
1907 }
1908
db53a302 1909 trace_i915_gem_request_wait_begin(ring, seqno);
1c5d22f7 1910
b2223497 1911 ring->waiting_seqno = seqno;
b13c2b96 1912 if (ring->irq_get(ring)) {
ce453d81 1913 if (dev_priv->mm.interruptible)
b13c2b96
CW
1914 ret = wait_event_interruptible(ring->irq_queue,
1915 i915_seqno_passed(ring->get_seqno(ring), seqno)
1916 || atomic_read(&dev_priv->mm.wedged));
1917 else
1918 wait_event(ring->irq_queue,
1919 i915_seqno_passed(ring->get_seqno(ring), seqno)
1920 || atomic_read(&dev_priv->mm.wedged));
1921
1922 ring->irq_put(ring);
e959b5db
EA
1923 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
1924 seqno) ||
1925 atomic_read(&dev_priv->mm.wedged), 3000))
b5ba177d 1926 ret = -EBUSY;
b2223497 1927 ring->waiting_seqno = 0;
1c5d22f7 1928
db53a302 1929 trace_i915_gem_request_wait_end(ring, seqno);
673a394b 1930 }
ba1234d1 1931 if (atomic_read(&dev_priv->mm.wedged))
30dbf0c0 1932 ret = -EAGAIN;
673a394b 1933
673a394b
EA
1934 /* Directly dispatch request retiring. While we have the work queue
1935 * to handle this, the waiter on a request often wants an associated
1936 * buffer to have made it to the inactive list, and we would need
1937 * a separate wait queue to handle that.
1938 */
b93f9cf1 1939 if (ret == 0 && do_retire)
db53a302 1940 i915_gem_retire_requests_ring(ring);
673a394b
EA
1941
1942 return ret;
1943}
1944
673a394b
EA
1945/**
1946 * Ensures that all rendering to the object has completed and the object is
1947 * safe to unbind from the GTT or access from the CPU.
1948 */
54cf91dc 1949int
ce453d81 1950i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
673a394b 1951{
673a394b
EA
1952 int ret;
1953
e47c68e9
EA
1954 /* This function only exists to support waiting for existing rendering,
1955 * not for emitting required flushes.
673a394b 1956 */
05394f39 1957 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
1958
1959 /* If there is rendering queued on the buffer being evicted, wait for
1960 * it.
1961 */
05394f39 1962 if (obj->active) {
b93f9cf1
BW
1963 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno,
1964 true);
2cf34d7b 1965 if (ret)
673a394b
EA
1966 return ret;
1967 }
1968
1969 return 0;
1970}
1971
5816d648
BW
1972/**
1973 * i915_gem_object_sync - sync an object to a ring.
1974 *
1975 * @obj: object which may be in use on another ring.
1976 * @to: ring we wish to use the object on. May be NULL.
1977 *
1978 * This code is meant to abstract object synchronization with the GPU.
1979 * Calling with NULL implies synchronizing the object with the CPU
1980 * rather than a particular GPU ring.
1981 *
1982 * Returns 0 if successful, else propagates up the lower layer error.
1983 */
2911a35b
BW
1984int
1985i915_gem_object_sync(struct drm_i915_gem_object *obj,
1986 struct intel_ring_buffer *to)
1987{
1988 struct intel_ring_buffer *from = obj->ring;
1989 u32 seqno;
1990 int ret, idx;
1991
1992 if (from == NULL || to == from)
1993 return 0;
1994
5816d648 1995 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2911a35b
BW
1996 return i915_gem_object_wait_rendering(obj);
1997
1998 idx = intel_ring_sync_index(from, to);
1999
2000 seqno = obj->last_rendering_seqno;
2001 if (seqno <= from->sync_seqno[idx])
2002 return 0;
2003
2004 if (seqno == from->outstanding_lazy_request) {
2005 struct drm_i915_gem_request *request;
2006
2007 request = kzalloc(sizeof(*request), GFP_KERNEL);
2008 if (request == NULL)
2009 return -ENOMEM;
2010
2011 ret = i915_add_request(from, NULL, request);
2012 if (ret) {
2013 kfree(request);
2014 return ret;
2015 }
2016
2017 seqno = request->seqno;
2018 }
2019
2911a35b 2020
1500f7ea 2021 ret = to->sync_to(to, from, seqno);
e3a5a225
BW
2022 if (!ret)
2023 from->sync_seqno[idx] = seqno;
2911a35b 2024
e3a5a225 2025 return ret;
2911a35b
BW
2026}
2027
b5ffc9bc
CW
2028static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2029{
2030 u32 old_write_domain, old_read_domains;
2031
b5ffc9bc
CW
2032 /* Act a barrier for all accesses through the GTT */
2033 mb();
2034
2035 /* Force a pagefault for domain tracking on next user access */
2036 i915_gem_release_mmap(obj);
2037
b97c3d9c
KP
2038 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2039 return;
2040
b5ffc9bc
CW
2041 old_read_domains = obj->base.read_domains;
2042 old_write_domain = obj->base.write_domain;
2043
2044 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2045 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2046
2047 trace_i915_gem_object_change_domain(obj,
2048 old_read_domains,
2049 old_write_domain);
2050}
2051
673a394b
EA
2052/**
2053 * Unbinds an object from the GTT aperture.
2054 */
0f973f27 2055int
05394f39 2056i915_gem_object_unbind(struct drm_i915_gem_object *obj)
673a394b 2057{
7bddb01f 2058 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
673a394b
EA
2059 int ret = 0;
2060
05394f39 2061 if (obj->gtt_space == NULL)
673a394b
EA
2062 return 0;
2063
05394f39 2064 if (obj->pin_count != 0) {
673a394b
EA
2065 DRM_ERROR("Attempting to unbind pinned buffer\n");
2066 return -EINVAL;
2067 }
2068
a8198eea
CW
2069 ret = i915_gem_object_finish_gpu(obj);
2070 if (ret == -ERESTARTSYS)
2071 return ret;
2072 /* Continue on if we fail due to EIO, the GPU is hung so we
2073 * should be safe and we need to cleanup or else we might
2074 * cause memory corruption through use-after-free.
2075 */
2076
b5ffc9bc 2077 i915_gem_object_finish_gtt(obj);
5323fd04 2078
673a394b
EA
2079 /* Move the object to the CPU domain to ensure that
2080 * any possible CPU writes while it's not in the GTT
a8198eea 2081 * are flushed when we go to remap it.
673a394b 2082 */
a8198eea
CW
2083 if (ret == 0)
2084 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
8dc1775d 2085 if (ret == -ERESTARTSYS)
673a394b 2086 return ret;
812ed492 2087 if (ret) {
a8198eea
CW
2088 /* In the event of a disaster, abandon all caches and
2089 * hope for the best.
2090 */
812ed492 2091 i915_gem_clflush_object(obj);
05394f39 2092 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
812ed492 2093 }
673a394b 2094
96b47b65 2095 /* release the fence reg _after_ flushing */
d9e86c0e
CW
2096 ret = i915_gem_object_put_fence(obj);
2097 if (ret == -ERESTARTSYS)
2098 return ret;
96b47b65 2099
db53a302
CW
2100 trace_i915_gem_object_unbind(obj);
2101
74898d7e
DV
2102 if (obj->has_global_gtt_mapping)
2103 i915_gem_gtt_unbind_object(obj);
7bddb01f
DV
2104 if (obj->has_aliasing_ppgtt_mapping) {
2105 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2106 obj->has_aliasing_ppgtt_mapping = 0;
2107 }
74163907 2108 i915_gem_gtt_finish_object(obj);
7bddb01f 2109
e5281ccd 2110 i915_gem_object_put_pages_gtt(obj);
673a394b 2111
6299f992 2112 list_del_init(&obj->gtt_list);
05394f39 2113 list_del_init(&obj->mm_list);
75e9e915 2114 /* Avoid an unnecessary call to unbind on rebind. */
05394f39 2115 obj->map_and_fenceable = true;
673a394b 2116
05394f39
CW
2117 drm_mm_put_block(obj->gtt_space);
2118 obj->gtt_space = NULL;
2119 obj->gtt_offset = 0;
673a394b 2120
05394f39 2121 if (i915_gem_object_is_purgeable(obj))
963b4836
CW
2122 i915_gem_object_truncate(obj);
2123
8dc1775d 2124 return ret;
673a394b
EA
2125}
2126
88241785 2127int
db53a302 2128i915_gem_flush_ring(struct intel_ring_buffer *ring,
54cf91dc
CW
2129 uint32_t invalidate_domains,
2130 uint32_t flush_domains)
2131{
88241785
CW
2132 int ret;
2133
36d527de
CW
2134 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2135 return 0;
2136
db53a302
CW
2137 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2138
88241785
CW
2139 ret = ring->flush(ring, invalidate_domains, flush_domains);
2140 if (ret)
2141 return ret;
2142
36d527de
CW
2143 if (flush_domains & I915_GEM_GPU_DOMAINS)
2144 i915_gem_process_flushing_list(ring, flush_domains);
2145
88241785 2146 return 0;
54cf91dc
CW
2147}
2148
b93f9cf1 2149static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
a56ba56c 2150{
88241785
CW
2151 int ret;
2152
395b70be 2153 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
64193406
CW
2154 return 0;
2155
88241785 2156 if (!list_empty(&ring->gpu_write_list)) {
db53a302 2157 ret = i915_gem_flush_ring(ring,
0ac74c6b 2158 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
88241785
CW
2159 if (ret)
2160 return ret;
2161 }
2162
b93f9cf1
BW
2163 return i915_wait_request(ring, i915_gem_next_request_seqno(ring),
2164 do_retire);
a56ba56c
CW
2165}
2166
b93f9cf1 2167int i915_gpu_idle(struct drm_device *dev, bool do_retire)
4df2faf4
DV
2168{
2169 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 2170 int ret, i;
4df2faf4 2171
4df2faf4 2172 /* Flush everything onto the inactive list. */
1ec14ad3 2173 for (i = 0; i < I915_NUM_RINGS; i++) {
b93f9cf1 2174 ret = i915_ring_idle(&dev_priv->ring[i], do_retire);
1ec14ad3
CW
2175 if (ret)
2176 return ret;
2177 }
4df2faf4 2178
8a1a49f9 2179 return 0;
4df2faf4
DV
2180}
2181
9ce079e4
CW
2182static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2183 struct drm_i915_gem_object *obj)
4e901fdc 2184{
4e901fdc 2185 drm_i915_private_t *dev_priv = dev->dev_private;
4e901fdc
EA
2186 uint64_t val;
2187
9ce079e4
CW
2188 if (obj) {
2189 u32 size = obj->gtt_space->size;
4e901fdc 2190
9ce079e4
CW
2191 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2192 0xfffff000) << 32;
2193 val |= obj->gtt_offset & 0xfffff000;
2194 val |= (uint64_t)((obj->stride / 128) - 1) <<
2195 SANDYBRIDGE_FENCE_PITCH_SHIFT;
4e901fdc 2196
9ce079e4
CW
2197 if (obj->tiling_mode == I915_TILING_Y)
2198 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2199 val |= I965_FENCE_REG_VALID;
2200 } else
2201 val = 0;
c6642782 2202
9ce079e4
CW
2203 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2204 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
4e901fdc
EA
2205}
2206
9ce079e4
CW
2207static void i965_write_fence_reg(struct drm_device *dev, int reg,
2208 struct drm_i915_gem_object *obj)
de151cf6 2209{
de151cf6 2210 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
2211 uint64_t val;
2212
9ce079e4
CW
2213 if (obj) {
2214 u32 size = obj->gtt_space->size;
de151cf6 2215
9ce079e4
CW
2216 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2217 0xfffff000) << 32;
2218 val |= obj->gtt_offset & 0xfffff000;
2219 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2220 if (obj->tiling_mode == I915_TILING_Y)
2221 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2222 val |= I965_FENCE_REG_VALID;
2223 } else
2224 val = 0;
c6642782 2225
9ce079e4
CW
2226 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2227 POSTING_READ(FENCE_REG_965_0 + reg * 8);
de151cf6
JB
2228}
2229
9ce079e4
CW
2230static void i915_write_fence_reg(struct drm_device *dev, int reg,
2231 struct drm_i915_gem_object *obj)
de151cf6 2232{
de151cf6 2233 drm_i915_private_t *dev_priv = dev->dev_private;
9ce079e4 2234 u32 val;
de151cf6 2235
9ce079e4
CW
2236 if (obj) {
2237 u32 size = obj->gtt_space->size;
2238 int pitch_val;
2239 int tile_width;
c6642782 2240
9ce079e4
CW
2241 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2242 (size & -size) != size ||
2243 (obj->gtt_offset & (size - 1)),
2244 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2245 obj->gtt_offset, obj->map_and_fenceable, size);
c6642782 2246
9ce079e4
CW
2247 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2248 tile_width = 128;
2249 else
2250 tile_width = 512;
2251
2252 /* Note: pitch better be a power of two tile widths */
2253 pitch_val = obj->stride / tile_width;
2254 pitch_val = ffs(pitch_val) - 1;
2255
2256 val = obj->gtt_offset;
2257 if (obj->tiling_mode == I915_TILING_Y)
2258 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2259 val |= I915_FENCE_SIZE_BITS(size);
2260 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2261 val |= I830_FENCE_REG_VALID;
2262 } else
2263 val = 0;
2264
2265 if (reg < 8)
2266 reg = FENCE_REG_830_0 + reg * 4;
2267 else
2268 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2269
2270 I915_WRITE(reg, val);
2271 POSTING_READ(reg);
de151cf6
JB
2272}
2273
9ce079e4
CW
2274static void i830_write_fence_reg(struct drm_device *dev, int reg,
2275 struct drm_i915_gem_object *obj)
de151cf6 2276{
de151cf6 2277 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6 2278 uint32_t val;
de151cf6 2279
9ce079e4
CW
2280 if (obj) {
2281 u32 size = obj->gtt_space->size;
2282 uint32_t pitch_val;
de151cf6 2283
9ce079e4
CW
2284 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2285 (size & -size) != size ||
2286 (obj->gtt_offset & (size - 1)),
2287 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2288 obj->gtt_offset, size);
e76a16de 2289
9ce079e4
CW
2290 pitch_val = obj->stride / 128;
2291 pitch_val = ffs(pitch_val) - 1;
de151cf6 2292
9ce079e4
CW
2293 val = obj->gtt_offset;
2294 if (obj->tiling_mode == I915_TILING_Y)
2295 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2296 val |= I830_FENCE_SIZE_BITS(size);
2297 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2298 val |= I830_FENCE_REG_VALID;
2299 } else
2300 val = 0;
c6642782 2301
9ce079e4
CW
2302 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2303 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2304}
2305
2306static void i915_gem_write_fence(struct drm_device *dev, int reg,
2307 struct drm_i915_gem_object *obj)
2308{
2309 switch (INTEL_INFO(dev)->gen) {
2310 case 7:
2311 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2312 case 5:
2313 case 4: i965_write_fence_reg(dev, reg, obj); break;
2314 case 3: i915_write_fence_reg(dev, reg, obj); break;
2315 case 2: i830_write_fence_reg(dev, reg, obj); break;
2316 default: break;
2317 }
de151cf6
JB
2318}
2319
61050808
CW
2320static inline int fence_number(struct drm_i915_private *dev_priv,
2321 struct drm_i915_fence_reg *fence)
2322{
2323 return fence - dev_priv->fence_regs;
2324}
2325
2326static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2327 struct drm_i915_fence_reg *fence,
2328 bool enable)
2329{
2330 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2331 int reg = fence_number(dev_priv, fence);
2332
2333 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2334
2335 if (enable) {
2336 obj->fence_reg = reg;
2337 fence->obj = obj;
2338 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2339 } else {
2340 obj->fence_reg = I915_FENCE_REG_NONE;
2341 fence->obj = NULL;
2342 list_del_init(&fence->lru_list);
2343 }
2344}
2345
d9e86c0e 2346static int
a360bb1a 2347i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
d9e86c0e
CW
2348{
2349 int ret;
2350
2351 if (obj->fenced_gpu_access) {
88241785 2352 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
1c293ea3 2353 ret = i915_gem_flush_ring(obj->ring,
88241785
CW
2354 0, obj->base.write_domain);
2355 if (ret)
2356 return ret;
2357 }
d9e86c0e
CW
2358
2359 obj->fenced_gpu_access = false;
2360 }
2361
1c293ea3 2362 if (obj->last_fenced_seqno) {
18991845
CW
2363 ret = i915_wait_request(obj->ring,
2364 obj->last_fenced_seqno,
14415745 2365 false);
18991845
CW
2366 if (ret)
2367 return ret;
d9e86c0e
CW
2368
2369 obj->last_fenced_seqno = 0;
d9e86c0e
CW
2370 }
2371
63256ec5
CW
2372 /* Ensure that all CPU reads are completed before installing a fence
2373 * and all writes before removing the fence.
2374 */
2375 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2376 mb();
2377
d9e86c0e
CW
2378 return 0;
2379}
2380
2381int
2382i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2383{
61050808 2384 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
d9e86c0e
CW
2385 int ret;
2386
a360bb1a 2387 ret = i915_gem_object_flush_fence(obj);
d9e86c0e
CW
2388 if (ret)
2389 return ret;
2390
61050808
CW
2391 if (obj->fence_reg == I915_FENCE_REG_NONE)
2392 return 0;
d9e86c0e 2393
61050808
CW
2394 i915_gem_object_update_fence(obj,
2395 &dev_priv->fence_regs[obj->fence_reg],
2396 false);
2397 i915_gem_object_fence_lost(obj);
d9e86c0e
CW
2398
2399 return 0;
2400}
2401
2402static struct drm_i915_fence_reg *
a360bb1a 2403i915_find_fence_reg(struct drm_device *dev)
ae3db24a 2404{
ae3db24a 2405 struct drm_i915_private *dev_priv = dev->dev_private;
8fe301ad 2406 struct drm_i915_fence_reg *reg, *avail;
d9e86c0e 2407 int i;
ae3db24a
DV
2408
2409 /* First try to find a free reg */
d9e86c0e 2410 avail = NULL;
ae3db24a
DV
2411 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2412 reg = &dev_priv->fence_regs[i];
2413 if (!reg->obj)
d9e86c0e 2414 return reg;
ae3db24a 2415
1690e1eb 2416 if (!reg->pin_count)
d9e86c0e 2417 avail = reg;
ae3db24a
DV
2418 }
2419
d9e86c0e
CW
2420 if (avail == NULL)
2421 return NULL;
ae3db24a
DV
2422
2423 /* None available, try to steal one or wait for a user to finish */
d9e86c0e 2424 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
1690e1eb 2425 if (reg->pin_count)
ae3db24a
DV
2426 continue;
2427
8fe301ad 2428 return reg;
ae3db24a
DV
2429 }
2430
8fe301ad 2431 return NULL;
ae3db24a
DV
2432}
2433
de151cf6 2434/**
9a5a53b3 2435 * i915_gem_object_get_fence - set up fencing for an object
de151cf6
JB
2436 * @obj: object to map through a fence reg
2437 *
2438 * When mapping objects through the GTT, userspace wants to be able to write
2439 * to them without having to worry about swizzling if the object is tiled.
de151cf6
JB
2440 * This function walks the fence regs looking for a free one for @obj,
2441 * stealing one if it can't find any.
2442 *
2443 * It then sets up the reg based on the object's properties: address, pitch
2444 * and tiling format.
9a5a53b3
CW
2445 *
2446 * For an untiled surface, this removes any existing fence.
de151cf6 2447 */
8c4b8c3f 2448int
06d98131 2449i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
de151cf6 2450{
05394f39 2451 struct drm_device *dev = obj->base.dev;
79e53945 2452 struct drm_i915_private *dev_priv = dev->dev_private;
14415745 2453 bool enable = obj->tiling_mode != I915_TILING_NONE;
d9e86c0e 2454 struct drm_i915_fence_reg *reg;
ae3db24a 2455 int ret;
de151cf6 2456
14415745
CW
2457 /* Have we updated the tiling parameters upon the object and so
2458 * will need to serialise the write to the associated fence register?
2459 */
5d82e3e6 2460 if (obj->fence_dirty) {
14415745
CW
2461 ret = i915_gem_object_flush_fence(obj);
2462 if (ret)
2463 return ret;
2464 }
9a5a53b3 2465
d9e86c0e 2466 /* Just update our place in the LRU if our fence is getting reused. */
05394f39
CW
2467 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2468 reg = &dev_priv->fence_regs[obj->fence_reg];
5d82e3e6 2469 if (!obj->fence_dirty) {
14415745
CW
2470 list_move_tail(&reg->lru_list,
2471 &dev_priv->mm.fence_list);
2472 return 0;
2473 }
2474 } else if (enable) {
2475 reg = i915_find_fence_reg(dev);
2476 if (reg == NULL)
2477 return -EDEADLK;
d9e86c0e 2478
14415745
CW
2479 if (reg->obj) {
2480 struct drm_i915_gem_object *old = reg->obj;
2481
2482 ret = i915_gem_object_flush_fence(old);
29c5a587
CW
2483 if (ret)
2484 return ret;
2485
14415745 2486 i915_gem_object_fence_lost(old);
29c5a587 2487 }
14415745 2488 } else
a09ba7fa 2489 return 0;
a09ba7fa 2490
14415745 2491 i915_gem_object_update_fence(obj, reg, enable);
5d82e3e6 2492 obj->fence_dirty = false;
14415745 2493
9ce079e4 2494 return 0;
de151cf6
JB
2495}
2496
673a394b
EA
2497/**
2498 * Finds free space in the GTT aperture and binds the object there.
2499 */
2500static int
05394f39 2501i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
920afa77 2502 unsigned alignment,
75e9e915 2503 bool map_and_fenceable)
673a394b 2504{
05394f39 2505 struct drm_device *dev = obj->base.dev;
673a394b 2506 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 2507 struct drm_mm_node *free_space;
a00b10c3 2508 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
5e783301 2509 u32 size, fence_size, fence_alignment, unfenced_alignment;
75e9e915 2510 bool mappable, fenceable;
07f73f69 2511 int ret;
673a394b 2512
05394f39 2513 if (obj->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2514 DRM_ERROR("Attempting to bind a purgeable object\n");
2515 return -EINVAL;
2516 }
2517
e28f8711
CW
2518 fence_size = i915_gem_get_gtt_size(dev,
2519 obj->base.size,
2520 obj->tiling_mode);
2521 fence_alignment = i915_gem_get_gtt_alignment(dev,
2522 obj->base.size,
2523 obj->tiling_mode);
2524 unfenced_alignment =
2525 i915_gem_get_unfenced_gtt_alignment(dev,
2526 obj->base.size,
2527 obj->tiling_mode);
a00b10c3 2528
673a394b 2529 if (alignment == 0)
5e783301
DV
2530 alignment = map_and_fenceable ? fence_alignment :
2531 unfenced_alignment;
75e9e915 2532 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
673a394b
EA
2533 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2534 return -EINVAL;
2535 }
2536
05394f39 2537 size = map_and_fenceable ? fence_size : obj->base.size;
a00b10c3 2538
654fc607
CW
2539 /* If the object is bigger than the entire aperture, reject it early
2540 * before evicting everything in a vain attempt to find space.
2541 */
05394f39 2542 if (obj->base.size >
75e9e915 2543 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
654fc607
CW
2544 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2545 return -E2BIG;
2546 }
2547
673a394b 2548 search_free:
75e9e915 2549 if (map_and_fenceable)
920afa77
DV
2550 free_space =
2551 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
a00b10c3 2552 size, alignment, 0,
920afa77
DV
2553 dev_priv->mm.gtt_mappable_end,
2554 0);
2555 else
2556 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
a00b10c3 2557 size, alignment, 0);
920afa77
DV
2558
2559 if (free_space != NULL) {
75e9e915 2560 if (map_and_fenceable)
05394f39 2561 obj->gtt_space =
920afa77 2562 drm_mm_get_block_range_generic(free_space,
a00b10c3 2563 size, alignment, 0,
920afa77
DV
2564 dev_priv->mm.gtt_mappable_end,
2565 0);
2566 else
05394f39 2567 obj->gtt_space =
a00b10c3 2568 drm_mm_get_block(free_space, size, alignment);
920afa77 2569 }
05394f39 2570 if (obj->gtt_space == NULL) {
673a394b
EA
2571 /* If the gtt is empty and we're still having trouble
2572 * fitting our object in, we're out of memory.
2573 */
75e9e915
DV
2574 ret = i915_gem_evict_something(dev, size, alignment,
2575 map_and_fenceable);
9731129c 2576 if (ret)
673a394b 2577 return ret;
9731129c 2578
673a394b
EA
2579 goto search_free;
2580 }
2581
e5281ccd 2582 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
673a394b 2583 if (ret) {
05394f39
CW
2584 drm_mm_put_block(obj->gtt_space);
2585 obj->gtt_space = NULL;
07f73f69
CW
2586
2587 if (ret == -ENOMEM) {
809b6334
CW
2588 /* first try to reclaim some memory by clearing the GTT */
2589 ret = i915_gem_evict_everything(dev, false);
07f73f69 2590 if (ret) {
07f73f69 2591 /* now try to shrink everyone else */
4bdadb97
CW
2592 if (gfpmask) {
2593 gfpmask = 0;
2594 goto search_free;
07f73f69
CW
2595 }
2596
809b6334 2597 return -ENOMEM;
07f73f69
CW
2598 }
2599
2600 goto search_free;
2601 }
2602
673a394b
EA
2603 return ret;
2604 }
2605
74163907 2606 ret = i915_gem_gtt_prepare_object(obj);
7c2e6fdf 2607 if (ret) {
e5281ccd 2608 i915_gem_object_put_pages_gtt(obj);
05394f39
CW
2609 drm_mm_put_block(obj->gtt_space);
2610 obj->gtt_space = NULL;
07f73f69 2611
809b6334 2612 if (i915_gem_evict_everything(dev, false))
07f73f69 2613 return ret;
07f73f69
CW
2614
2615 goto search_free;
673a394b 2616 }
673a394b 2617
0ebb9829
DV
2618 if (!dev_priv->mm.aliasing_ppgtt)
2619 i915_gem_gtt_bind_object(obj, obj->cache_level);
673a394b 2620
6299f992 2621 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
05394f39 2622 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
bf1a1092 2623
673a394b
EA
2624 /* Assert that the object is not currently in any GPU domain. As it
2625 * wasn't in the GTT, there shouldn't be any way it could have been in
2626 * a GPU cache
2627 */
05394f39
CW
2628 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2629 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
673a394b 2630
6299f992 2631 obj->gtt_offset = obj->gtt_space->start;
1c5d22f7 2632
75e9e915 2633 fenceable =
05394f39 2634 obj->gtt_space->size == fence_size &&
0206e353 2635 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
a00b10c3 2636
75e9e915 2637 mappable =
05394f39 2638 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
a00b10c3 2639
05394f39 2640 obj->map_and_fenceable = mappable && fenceable;
75e9e915 2641
db53a302 2642 trace_i915_gem_object_bind(obj, map_and_fenceable);
673a394b
EA
2643 return 0;
2644}
2645
2646void
05394f39 2647i915_gem_clflush_object(struct drm_i915_gem_object *obj)
673a394b 2648{
673a394b
EA
2649 /* If we don't have a page list set up, then we're not pinned
2650 * to GPU, and we can ignore the cache flush because it'll happen
2651 * again at bind time.
2652 */
05394f39 2653 if (obj->pages == NULL)
673a394b
EA
2654 return;
2655
9c23f7fc
CW
2656 /* If the GPU is snooping the contents of the CPU cache,
2657 * we do not need to manually clear the CPU cache lines. However,
2658 * the caches are only snooped when the render cache is
2659 * flushed/invalidated. As we always have to emit invalidations
2660 * and flushes when moving into and out of the RENDER domain, correct
2661 * snooping behaviour occurs naturally as the result of our domain
2662 * tracking.
2663 */
2664 if (obj->cache_level != I915_CACHE_NONE)
2665 return;
2666
1c5d22f7 2667 trace_i915_gem_object_clflush(obj);
cfa16a0d 2668
05394f39 2669 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
673a394b
EA
2670}
2671
e47c68e9 2672/** Flushes any GPU write domain for the object if it's dirty. */
88241785 2673static int
3619df03 2674i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2675{
05394f39 2676 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
88241785 2677 return 0;
e47c68e9
EA
2678
2679 /* Queue the GPU write cache flushing we need. */
db53a302 2680 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
e47c68e9
EA
2681}
2682
2683/** Flushes the GTT write domain for the object if it's dirty. */
2684static void
05394f39 2685i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2686{
1c5d22f7
CW
2687 uint32_t old_write_domain;
2688
05394f39 2689 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
2690 return;
2691
63256ec5 2692 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
2693 * to it immediately go to main memory as far as we know, so there's
2694 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
2695 *
2696 * However, we do have to enforce the order so that all writes through
2697 * the GTT land before any writes to the device, such as updates to
2698 * the GATT itself.
e47c68e9 2699 */
63256ec5
CW
2700 wmb();
2701
05394f39
CW
2702 old_write_domain = obj->base.write_domain;
2703 obj->base.write_domain = 0;
1c5d22f7
CW
2704
2705 trace_i915_gem_object_change_domain(obj,
05394f39 2706 obj->base.read_domains,
1c5d22f7 2707 old_write_domain);
e47c68e9
EA
2708}
2709
2710/** Flushes the CPU write domain for the object if it's dirty. */
2711static void
05394f39 2712i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2713{
1c5d22f7 2714 uint32_t old_write_domain;
e47c68e9 2715
05394f39 2716 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
2717 return;
2718
2719 i915_gem_clflush_object(obj);
40ce6575 2720 intel_gtt_chipset_flush();
05394f39
CW
2721 old_write_domain = obj->base.write_domain;
2722 obj->base.write_domain = 0;
1c5d22f7
CW
2723
2724 trace_i915_gem_object_change_domain(obj,
05394f39 2725 obj->base.read_domains,
1c5d22f7 2726 old_write_domain);
e47c68e9
EA
2727}
2728
2ef7eeaa
EA
2729/**
2730 * Moves a single object to the GTT read, and possibly write domain.
2731 *
2732 * This function returns when the move is complete, including waiting on
2733 * flushes to occur.
2734 */
79e53945 2735int
2021746e 2736i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 2737{
8325a09d 2738 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
1c5d22f7 2739 uint32_t old_write_domain, old_read_domains;
e47c68e9 2740 int ret;
2ef7eeaa 2741
02354392 2742 /* Not valid to be called on unbound objects. */
05394f39 2743 if (obj->gtt_space == NULL)
02354392
EA
2744 return -EINVAL;
2745
8d7e3de1
CW
2746 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2747 return 0;
2748
88241785
CW
2749 ret = i915_gem_object_flush_gpu_write_domain(obj);
2750 if (ret)
2751 return ret;
2752
87ca9c8a 2753 if (obj->pending_gpu_write || write) {
ce453d81 2754 ret = i915_gem_object_wait_rendering(obj);
87ca9c8a
CW
2755 if (ret)
2756 return ret;
2757 }
2dafb1e0 2758
7213342d 2759 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 2760
05394f39
CW
2761 old_write_domain = obj->base.write_domain;
2762 old_read_domains = obj->base.read_domains;
1c5d22f7 2763
e47c68e9
EA
2764 /* It should now be out of any other write domains, and we can update
2765 * the domain values for our changes.
2766 */
05394f39
CW
2767 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2768 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 2769 if (write) {
05394f39
CW
2770 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2771 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2772 obj->dirty = 1;
2ef7eeaa
EA
2773 }
2774
1c5d22f7
CW
2775 trace_i915_gem_object_change_domain(obj,
2776 old_read_domains,
2777 old_write_domain);
2778
8325a09d
CW
2779 /* And bump the LRU for this access */
2780 if (i915_gem_object_is_inactive(obj))
2781 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2782
e47c68e9
EA
2783 return 0;
2784}
2785
e4ffd173
CW
2786int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2787 enum i915_cache_level cache_level)
2788{
7bddb01f
DV
2789 struct drm_device *dev = obj->base.dev;
2790 drm_i915_private_t *dev_priv = dev->dev_private;
e4ffd173
CW
2791 int ret;
2792
2793 if (obj->cache_level == cache_level)
2794 return 0;
2795
2796 if (obj->pin_count) {
2797 DRM_DEBUG("can not change the cache level of pinned objects\n");
2798 return -EBUSY;
2799 }
2800
2801 if (obj->gtt_space) {
2802 ret = i915_gem_object_finish_gpu(obj);
2803 if (ret)
2804 return ret;
2805
2806 i915_gem_object_finish_gtt(obj);
2807
2808 /* Before SandyBridge, you could not use tiling or fence
2809 * registers with snooped memory, so relinquish any fences
2810 * currently pointing to our region in the aperture.
2811 */
2812 if (INTEL_INFO(obj->base.dev)->gen < 6) {
2813 ret = i915_gem_object_put_fence(obj);
2814 if (ret)
2815 return ret;
2816 }
2817
74898d7e
DV
2818 if (obj->has_global_gtt_mapping)
2819 i915_gem_gtt_bind_object(obj, cache_level);
7bddb01f
DV
2820 if (obj->has_aliasing_ppgtt_mapping)
2821 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2822 obj, cache_level);
e4ffd173
CW
2823 }
2824
2825 if (cache_level == I915_CACHE_NONE) {
2826 u32 old_read_domains, old_write_domain;
2827
2828 /* If we're coming from LLC cached, then we haven't
2829 * actually been tracking whether the data is in the
2830 * CPU cache or not, since we only allow one bit set
2831 * in obj->write_domain and have been skipping the clflushes.
2832 * Just set it to the CPU cache for now.
2833 */
2834 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
2835 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
2836
2837 old_read_domains = obj->base.read_domains;
2838 old_write_domain = obj->base.write_domain;
2839
2840 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2841 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2842
2843 trace_i915_gem_object_change_domain(obj,
2844 old_read_domains,
2845 old_write_domain);
2846 }
2847
2848 obj->cache_level = cache_level;
2849 return 0;
2850}
2851
b9241ea3 2852/*
2da3b9b9
CW
2853 * Prepare buffer for display plane (scanout, cursors, etc).
2854 * Can be called from an uninterruptible phase (modesetting) and allows
2855 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
2856 */
2857int
2da3b9b9
CW
2858i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2859 u32 alignment,
919926ae 2860 struct intel_ring_buffer *pipelined)
b9241ea3 2861{
2da3b9b9 2862 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
2863 int ret;
2864
88241785
CW
2865 ret = i915_gem_object_flush_gpu_write_domain(obj);
2866 if (ret)
2867 return ret;
2868
0be73284 2869 if (pipelined != obj->ring) {
2911a35b
BW
2870 ret = i915_gem_object_sync(obj, pipelined);
2871 if (ret)
b9241ea3
ZW
2872 return ret;
2873 }
2874
a7ef0640
EA
2875 /* The display engine is not coherent with the LLC cache on gen6. As
2876 * a result, we make sure that the pinning that is about to occur is
2877 * done with uncached PTEs. This is lowest common denominator for all
2878 * chipsets.
2879 *
2880 * However for gen6+, we could do better by using the GFDT bit instead
2881 * of uncaching, which would allow us to flush all the LLC-cached data
2882 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2883 */
2884 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2885 if (ret)
2886 return ret;
2887
2da3b9b9
CW
2888 /* As the user may map the buffer once pinned in the display plane
2889 * (e.g. libkms for the bootup splash), we have to ensure that we
2890 * always use map_and_fenceable for all scanout buffers.
2891 */
2892 ret = i915_gem_object_pin(obj, alignment, true);
2893 if (ret)
2894 return ret;
2895
b118c1e3
CW
2896 i915_gem_object_flush_cpu_write_domain(obj);
2897
2da3b9b9 2898 old_write_domain = obj->base.write_domain;
05394f39 2899 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
2900
2901 /* It should now be out of any other write domains, and we can update
2902 * the domain values for our changes.
2903 */
2904 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
05394f39 2905 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
2906
2907 trace_i915_gem_object_change_domain(obj,
2908 old_read_domains,
2da3b9b9 2909 old_write_domain);
b9241ea3
ZW
2910
2911 return 0;
2912}
2913
85345517 2914int
a8198eea 2915i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
85345517 2916{
88241785
CW
2917 int ret;
2918
a8198eea 2919 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
85345517
CW
2920 return 0;
2921
88241785 2922 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
db53a302 2923 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
88241785
CW
2924 if (ret)
2925 return ret;
2926 }
85345517 2927
c501ae7f
CW
2928 ret = i915_gem_object_wait_rendering(obj);
2929 if (ret)
2930 return ret;
2931
a8198eea
CW
2932 /* Ensure that we invalidate the GPU's caches and TLBs. */
2933 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
c501ae7f 2934 return 0;
85345517
CW
2935}
2936
e47c68e9
EA
2937/**
2938 * Moves a single object to the CPU read, and possibly write domain.
2939 *
2940 * This function returns when the move is complete, including waiting on
2941 * flushes to occur.
2942 */
dabdfe02 2943int
919926ae 2944i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 2945{
1c5d22f7 2946 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
2947 int ret;
2948
8d7e3de1
CW
2949 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
2950 return 0;
2951
88241785
CW
2952 ret = i915_gem_object_flush_gpu_write_domain(obj);
2953 if (ret)
2954 return ret;
2955
f8413190
CW
2956 if (write || obj->pending_gpu_write) {
2957 ret = i915_gem_object_wait_rendering(obj);
2958 if (ret)
2959 return ret;
2960 }
2ef7eeaa 2961
e47c68e9 2962 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2963
05394f39
CW
2964 old_write_domain = obj->base.write_domain;
2965 old_read_domains = obj->base.read_domains;
1c5d22f7 2966
e47c68e9 2967 /* Flush the CPU cache if it's still invalid. */
05394f39 2968 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2969 i915_gem_clflush_object(obj);
2ef7eeaa 2970
05394f39 2971 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2972 }
2973
2974 /* It should now be out of any other write domains, and we can update
2975 * the domain values for our changes.
2976 */
05394f39 2977 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
2978
2979 /* If we're writing through the CPU, then the GPU read domains will
2980 * need to be invalidated at next use.
2981 */
2982 if (write) {
05394f39
CW
2983 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2984 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 2985 }
2ef7eeaa 2986
1c5d22f7
CW
2987 trace_i915_gem_object_change_domain(obj,
2988 old_read_domains,
2989 old_write_domain);
2990
2ef7eeaa
EA
2991 return 0;
2992}
2993
673a394b
EA
2994/* Throttle our rendering by waiting until the ring has completed our requests
2995 * emitted over 20 msec ago.
2996 *
b962442e
EA
2997 * Note that if we were to use the current jiffies each time around the loop,
2998 * we wouldn't escape the function with any frames outstanding if the time to
2999 * render a frame was over 20ms.
3000 *
673a394b
EA
3001 * This should get us reasonable parallelism between CPU and GPU but also
3002 * relatively low latency when blocking on a particular request to finish.
3003 */
40a5f0de 3004static int
f787a5f5 3005i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3006{
f787a5f5
CW
3007 struct drm_i915_private *dev_priv = dev->dev_private;
3008 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e 3009 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
f787a5f5
CW
3010 struct drm_i915_gem_request *request;
3011 struct intel_ring_buffer *ring = NULL;
3012 u32 seqno = 0;
3013 int ret;
93533c29 3014
e110e8d6
CW
3015 if (atomic_read(&dev_priv->mm.wedged))
3016 return -EIO;
3017
1c25595f 3018 spin_lock(&file_priv->mm.lock);
f787a5f5 3019 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3020 if (time_after_eq(request->emitted_jiffies, recent_enough))
3021 break;
40a5f0de 3022
f787a5f5
CW
3023 ring = request->ring;
3024 seqno = request->seqno;
b962442e 3025 }
1c25595f 3026 spin_unlock(&file_priv->mm.lock);
40a5f0de 3027
f787a5f5
CW
3028 if (seqno == 0)
3029 return 0;
2bc43b5c 3030
f787a5f5 3031 ret = 0;
78501eac 3032 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
f787a5f5
CW
3033 /* And wait for the seqno passing without holding any locks and
3034 * causing extra latency for others. This is safe as the irq
3035 * generation is designed to be run atomically and so is
3036 * lockless.
3037 */
b13c2b96
CW
3038 if (ring->irq_get(ring)) {
3039 ret = wait_event_interruptible(ring->irq_queue,
3040 i915_seqno_passed(ring->get_seqno(ring), seqno)
3041 || atomic_read(&dev_priv->mm.wedged));
3042 ring->irq_put(ring);
40a5f0de 3043
b13c2b96
CW
3044 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3045 ret = -EIO;
e959b5db
EA
3046 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
3047 seqno) ||
7ea29b13
EA
3048 atomic_read(&dev_priv->mm.wedged), 3000)) {
3049 ret = -EBUSY;
b13c2b96 3050 }
40a5f0de
EA
3051 }
3052
f787a5f5
CW
3053 if (ret == 0)
3054 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
40a5f0de
EA
3055
3056 return ret;
3057}
3058
673a394b 3059int
05394f39
CW
3060i915_gem_object_pin(struct drm_i915_gem_object *obj,
3061 uint32_t alignment,
75e9e915 3062 bool map_and_fenceable)
673a394b 3063{
673a394b
EA
3064 int ret;
3065
05394f39 3066 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
ac0c6b5a 3067
05394f39
CW
3068 if (obj->gtt_space != NULL) {
3069 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3070 (map_and_fenceable && !obj->map_and_fenceable)) {
3071 WARN(obj->pin_count,
ae7d49d8 3072 "bo is already pinned with incorrect alignment:"
75e9e915
DV
3073 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3074 " obj->map_and_fenceable=%d\n",
05394f39 3075 obj->gtt_offset, alignment,
75e9e915 3076 map_and_fenceable,
05394f39 3077 obj->map_and_fenceable);
ac0c6b5a
CW
3078 ret = i915_gem_object_unbind(obj);
3079 if (ret)
3080 return ret;
3081 }
3082 }
3083
05394f39 3084 if (obj->gtt_space == NULL) {
a00b10c3 3085 ret = i915_gem_object_bind_to_gtt(obj, alignment,
75e9e915 3086 map_and_fenceable);
9731129c 3087 if (ret)
673a394b 3088 return ret;
22c344e9 3089 }
76446cac 3090
74898d7e
DV
3091 if (!obj->has_global_gtt_mapping && map_and_fenceable)
3092 i915_gem_gtt_bind_object(obj, obj->cache_level);
3093
1b50247a 3094 obj->pin_count++;
6299f992 3095 obj->pin_mappable |= map_and_fenceable;
673a394b
EA
3096
3097 return 0;
3098}
3099
3100void
05394f39 3101i915_gem_object_unpin(struct drm_i915_gem_object *obj)
673a394b 3102{
05394f39
CW
3103 BUG_ON(obj->pin_count == 0);
3104 BUG_ON(obj->gtt_space == NULL);
673a394b 3105
1b50247a 3106 if (--obj->pin_count == 0)
6299f992 3107 obj->pin_mappable = false;
673a394b
EA
3108}
3109
3110int
3111i915_gem_pin_ioctl(struct drm_device *dev, void *data,
05394f39 3112 struct drm_file *file)
673a394b
EA
3113{
3114 struct drm_i915_gem_pin *args = data;
05394f39 3115 struct drm_i915_gem_object *obj;
673a394b
EA
3116 int ret;
3117
1d7cfea1
CW
3118 ret = i915_mutex_lock_interruptible(dev);
3119 if (ret)
3120 return ret;
673a394b 3121
05394f39 3122 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3123 if (&obj->base == NULL) {
1d7cfea1
CW
3124 ret = -ENOENT;
3125 goto unlock;
673a394b 3126 }
673a394b 3127
05394f39 3128 if (obj->madv != I915_MADV_WILLNEED) {
bb6baf76 3129 DRM_ERROR("Attempting to pin a purgeable buffer\n");
1d7cfea1
CW
3130 ret = -EINVAL;
3131 goto out;
3ef94daa
CW
3132 }
3133
05394f39 3134 if (obj->pin_filp != NULL && obj->pin_filp != file) {
79e53945
JB
3135 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3136 args->handle);
1d7cfea1
CW
3137 ret = -EINVAL;
3138 goto out;
79e53945
JB
3139 }
3140
05394f39
CW
3141 obj->user_pin_count++;
3142 obj->pin_filp = file;
3143 if (obj->user_pin_count == 1) {
75e9e915 3144 ret = i915_gem_object_pin(obj, args->alignment, true);
1d7cfea1
CW
3145 if (ret)
3146 goto out;
673a394b
EA
3147 }
3148
3149 /* XXX - flush the CPU caches for pinned objects
3150 * as the X server doesn't manage domains yet
3151 */
e47c68e9 3152 i915_gem_object_flush_cpu_write_domain(obj);
05394f39 3153 args->offset = obj->gtt_offset;
1d7cfea1 3154out:
05394f39 3155 drm_gem_object_unreference(&obj->base);
1d7cfea1 3156unlock:
673a394b 3157 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3158 return ret;
673a394b
EA
3159}
3160
3161int
3162i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
05394f39 3163 struct drm_file *file)
673a394b
EA
3164{
3165 struct drm_i915_gem_pin *args = data;
05394f39 3166 struct drm_i915_gem_object *obj;
76c1dec1 3167 int ret;
673a394b 3168
1d7cfea1
CW
3169 ret = i915_mutex_lock_interruptible(dev);
3170 if (ret)
3171 return ret;
673a394b 3172
05394f39 3173 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3174 if (&obj->base == NULL) {
1d7cfea1
CW
3175 ret = -ENOENT;
3176 goto unlock;
673a394b 3177 }
76c1dec1 3178
05394f39 3179 if (obj->pin_filp != file) {
79e53945
JB
3180 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3181 args->handle);
1d7cfea1
CW
3182 ret = -EINVAL;
3183 goto out;
79e53945 3184 }
05394f39
CW
3185 obj->user_pin_count--;
3186 if (obj->user_pin_count == 0) {
3187 obj->pin_filp = NULL;
79e53945
JB
3188 i915_gem_object_unpin(obj);
3189 }
673a394b 3190
1d7cfea1 3191out:
05394f39 3192 drm_gem_object_unreference(&obj->base);
1d7cfea1 3193unlock:
673a394b 3194 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3195 return ret;
673a394b
EA
3196}
3197
3198int
3199i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3200 struct drm_file *file)
673a394b
EA
3201{
3202 struct drm_i915_gem_busy *args = data;
05394f39 3203 struct drm_i915_gem_object *obj;
30dbf0c0
CW
3204 int ret;
3205
76c1dec1 3206 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 3207 if (ret)
76c1dec1 3208 return ret;
673a394b 3209
05394f39 3210 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3211 if (&obj->base == NULL) {
1d7cfea1
CW
3212 ret = -ENOENT;
3213 goto unlock;
673a394b 3214 }
d1b851fc 3215
0be555b6
CW
3216 /* Count all active objects as busy, even if they are currently not used
3217 * by the gpu. Users of this interface expect objects to eventually
3218 * become non-busy without any further actions, therefore emit any
3219 * necessary flushes here.
c4de0a5d 3220 */
05394f39 3221 args->busy = obj->active;
0be555b6
CW
3222 if (args->busy) {
3223 /* Unconditionally flush objects, even when the gpu still uses this
3224 * object. Userspace calling this function indicates that it wants to
3225 * use this buffer rather sooner than later, so issuing the required
3226 * flush earlier is beneficial.
3227 */
1a1c6976 3228 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
db53a302 3229 ret = i915_gem_flush_ring(obj->ring,
88241785 3230 0, obj->base.write_domain);
1a1c6976
CW
3231 } else if (obj->ring->outstanding_lazy_request ==
3232 obj->last_rendering_seqno) {
3233 struct drm_i915_gem_request *request;
3234
7a194876
CW
3235 /* This ring is not being cleared by active usage,
3236 * so emit a request to do so.
3237 */
1a1c6976 3238 request = kzalloc(sizeof(*request), GFP_KERNEL);
457eafce 3239 if (request) {
0206e353 3240 ret = i915_add_request(obj->ring, NULL, request);
457eafce
RM
3241 if (ret)
3242 kfree(request);
3243 } else
7a194876
CW
3244 ret = -ENOMEM;
3245 }
0be555b6
CW
3246
3247 /* Update the active list for the hardware's current position.
3248 * Otherwise this only updates on a delayed timer or when irqs
3249 * are actually unmasked, and our working set ends up being
3250 * larger than required.
3251 */
db53a302 3252 i915_gem_retire_requests_ring(obj->ring);
0be555b6 3253
05394f39 3254 args->busy = obj->active;
0be555b6 3255 }
673a394b 3256
05394f39 3257 drm_gem_object_unreference(&obj->base);
1d7cfea1 3258unlock:
673a394b 3259 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3260 return ret;
673a394b
EA
3261}
3262
3263int
3264i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3265 struct drm_file *file_priv)
3266{
0206e353 3267 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3268}
3269
3ef94daa
CW
3270int
3271i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3272 struct drm_file *file_priv)
3273{
3274 struct drm_i915_gem_madvise *args = data;
05394f39 3275 struct drm_i915_gem_object *obj;
76c1dec1 3276 int ret;
3ef94daa
CW
3277
3278 switch (args->madv) {
3279 case I915_MADV_DONTNEED:
3280 case I915_MADV_WILLNEED:
3281 break;
3282 default:
3283 return -EINVAL;
3284 }
3285
1d7cfea1
CW
3286 ret = i915_mutex_lock_interruptible(dev);
3287 if (ret)
3288 return ret;
3289
05394f39 3290 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
c8725226 3291 if (&obj->base == NULL) {
1d7cfea1
CW
3292 ret = -ENOENT;
3293 goto unlock;
3ef94daa 3294 }
3ef94daa 3295
05394f39 3296 if (obj->pin_count) {
1d7cfea1
CW
3297 ret = -EINVAL;
3298 goto out;
3ef94daa
CW
3299 }
3300
05394f39
CW
3301 if (obj->madv != __I915_MADV_PURGED)
3302 obj->madv = args->madv;
3ef94daa 3303
2d7ef395 3304 /* if the object is no longer bound, discard its backing storage */
05394f39
CW
3305 if (i915_gem_object_is_purgeable(obj) &&
3306 obj->gtt_space == NULL)
2d7ef395
CW
3307 i915_gem_object_truncate(obj);
3308
05394f39 3309 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3310
1d7cfea1 3311out:
05394f39 3312 drm_gem_object_unreference(&obj->base);
1d7cfea1 3313unlock:
3ef94daa 3314 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3315 return ret;
3ef94daa
CW
3316}
3317
05394f39
CW
3318struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3319 size_t size)
ac52bc56 3320{
73aa808f 3321 struct drm_i915_private *dev_priv = dev->dev_private;
c397b908 3322 struct drm_i915_gem_object *obj;
5949eac4 3323 struct address_space *mapping;
ac52bc56 3324
c397b908
DV
3325 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3326 if (obj == NULL)
3327 return NULL;
673a394b 3328
c397b908
DV
3329 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3330 kfree(obj);
3331 return NULL;
3332 }
673a394b 3333
5949eac4
HD
3334 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3335 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3336
73aa808f
CW
3337 i915_gem_info_add_obj(dev_priv, size);
3338
c397b908
DV
3339 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3340 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 3341
3d29b842
ED
3342 if (HAS_LLC(dev)) {
3343 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
3344 * cache) for about a 10% performance improvement
3345 * compared to uncached. Graphics requests other than
3346 * display scanout are coherent with the CPU in
3347 * accessing this cache. This means in this mode we
3348 * don't need to clflush on the CPU side, and on the
3349 * GPU side we only need to flush internal caches to
3350 * get data visible to the CPU.
3351 *
3352 * However, we maintain the display planes as UC, and so
3353 * need to rebind when first used as such.
3354 */
3355 obj->cache_level = I915_CACHE_LLC;
3356 } else
3357 obj->cache_level = I915_CACHE_NONE;
3358
62b8b215 3359 obj->base.driver_private = NULL;
c397b908 3360 obj->fence_reg = I915_FENCE_REG_NONE;
69dc4987 3361 INIT_LIST_HEAD(&obj->mm_list);
93a37f20 3362 INIT_LIST_HEAD(&obj->gtt_list);
69dc4987 3363 INIT_LIST_HEAD(&obj->ring_list);
432e58ed 3364 INIT_LIST_HEAD(&obj->exec_list);
c397b908 3365 INIT_LIST_HEAD(&obj->gpu_write_list);
c397b908 3366 obj->madv = I915_MADV_WILLNEED;
75e9e915
DV
3367 /* Avoid an unnecessary call to unbind on the first bind. */
3368 obj->map_and_fenceable = true;
de151cf6 3369
05394f39 3370 return obj;
c397b908
DV
3371}
3372
3373int i915_gem_init_object(struct drm_gem_object *obj)
3374{
3375 BUG();
de151cf6 3376
673a394b
EA
3377 return 0;
3378}
3379
05394f39 3380static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
673a394b 3381{
05394f39 3382 struct drm_device *dev = obj->base.dev;
be72615b 3383 drm_i915_private_t *dev_priv = dev->dev_private;
be72615b 3384 int ret;
673a394b 3385
be72615b
CW
3386 ret = i915_gem_object_unbind(obj);
3387 if (ret == -ERESTARTSYS) {
05394f39 3388 list_move(&obj->mm_list,
be72615b
CW
3389 &dev_priv->mm.deferred_free_list);
3390 return;
3391 }
673a394b 3392
26e12f89
CW
3393 trace_i915_gem_object_destroy(obj);
3394
05394f39 3395 if (obj->base.map_list.map)
b464e9a2 3396 drm_gem_free_mmap_offset(&obj->base);
de151cf6 3397
05394f39
CW
3398 drm_gem_object_release(&obj->base);
3399 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 3400
05394f39
CW
3401 kfree(obj->bit_17);
3402 kfree(obj);
673a394b
EA
3403}
3404
05394f39 3405void i915_gem_free_object(struct drm_gem_object *gem_obj)
be72615b 3406{
05394f39
CW
3407 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3408 struct drm_device *dev = obj->base.dev;
be72615b 3409
05394f39 3410 if (obj->phys_obj)
be72615b
CW
3411 i915_gem_detach_phys_object(dev, obj);
3412
1b50247a 3413 obj->pin_count = 0;
be72615b
CW
3414 i915_gem_free_object_tail(obj);
3415}
3416
29105ccc
CW
3417int
3418i915_gem_idle(struct drm_device *dev)
3419{
3420 drm_i915_private_t *dev_priv = dev->dev_private;
3421 int ret;
28dfe52a 3422
29105ccc 3423 mutex_lock(&dev->struct_mutex);
1c5d22f7 3424
87acb0a5 3425 if (dev_priv->mm.suspended) {
29105ccc
CW
3426 mutex_unlock(&dev->struct_mutex);
3427 return 0;
28dfe52a
EA
3428 }
3429
b93f9cf1 3430 ret = i915_gpu_idle(dev, true);
6dbe2772
KP
3431 if (ret) {
3432 mutex_unlock(&dev->struct_mutex);
673a394b 3433 return ret;
6dbe2772 3434 }
673a394b 3435
29105ccc 3436 /* Under UMS, be paranoid and evict. */
a39d7efc
CW
3437 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3438 i915_gem_evict_everything(dev, false);
29105ccc 3439
312817a3
CW
3440 i915_gem_reset_fences(dev);
3441
29105ccc
CW
3442 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3443 * We need to replace this with a semaphore, or something.
3444 * And not confound mm.suspended!
3445 */
3446 dev_priv->mm.suspended = 1;
bc0c7f14 3447 del_timer_sync(&dev_priv->hangcheck_timer);
29105ccc
CW
3448
3449 i915_kernel_lost_context(dev);
6dbe2772 3450 i915_gem_cleanup_ringbuffer(dev);
29105ccc 3451
6dbe2772
KP
3452 mutex_unlock(&dev->struct_mutex);
3453
29105ccc
CW
3454 /* Cancel the retire work handler, which should be idle now. */
3455 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3456
673a394b
EA
3457 return 0;
3458}
3459
f691e2f4
DV
3460void i915_gem_init_swizzling(struct drm_device *dev)
3461{
3462 drm_i915_private_t *dev_priv = dev->dev_private;
3463
11782b02 3464 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
3465 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3466 return;
3467
3468 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3469 DISP_TILE_SURFACE_SWIZZLING);
3470
11782b02
DV
3471 if (IS_GEN5(dev))
3472 return;
3473
f691e2f4
DV
3474 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3475 if (IS_GEN6(dev))
6b26c86d 3476 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
f691e2f4 3477 else
6b26c86d 3478 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
f691e2f4 3479}
e21af88d
DV
3480
3481void i915_gem_init_ppgtt(struct drm_device *dev)
3482{
3483 drm_i915_private_t *dev_priv = dev->dev_private;
3484 uint32_t pd_offset;
3485 struct intel_ring_buffer *ring;
55a254ac
DV
3486 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
3487 uint32_t __iomem *pd_addr;
3488 uint32_t pd_entry;
e21af88d
DV
3489 int i;
3490
3491 if (!dev_priv->mm.aliasing_ppgtt)
3492 return;
3493
55a254ac
DV
3494
3495 pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
3496 for (i = 0; i < ppgtt->num_pd_entries; i++) {
3497 dma_addr_t pt_addr;
3498
3499 if (dev_priv->mm.gtt->needs_dmar)
3500 pt_addr = ppgtt->pt_dma_addr[i];
3501 else
3502 pt_addr = page_to_phys(ppgtt->pt_pages[i]);
3503
3504 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
3505 pd_entry |= GEN6_PDE_VALID;
3506
3507 writel(pd_entry, pd_addr + i);
3508 }
3509 readl(pd_addr);
3510
3511 pd_offset = ppgtt->pd_offset;
e21af88d
DV
3512 pd_offset /= 64; /* in cachelines, */
3513 pd_offset <<= 16;
3514
3515 if (INTEL_INFO(dev)->gen == 6) {
48ecfa10
DV
3516 uint32_t ecochk, gab_ctl, ecobits;
3517
3518 ecobits = I915_READ(GAC_ECO_BITS);
3519 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
be901a5a
DV
3520
3521 gab_ctl = I915_READ(GAB_CTL);
3522 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
3523
3524 ecochk = I915_READ(GAM_ECOCHK);
e21af88d
DV
3525 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
3526 ECOCHK_PPGTT_CACHE64B);
6b26c86d 3527 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
e21af88d
DV
3528 } else if (INTEL_INFO(dev)->gen >= 7) {
3529 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
3530 /* GFX_MODE is per-ring on gen7+ */
3531 }
3532
3533 for (i = 0; i < I915_NUM_RINGS; i++) {
3534 ring = &dev_priv->ring[i];
3535
3536 if (INTEL_INFO(dev)->gen >= 7)
3537 I915_WRITE(RING_MODE_GEN7(ring),
6b26c86d 3538 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
e21af88d
DV
3539
3540 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
3541 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
3542 }
3543}
3544
8187a2b7 3545int
f691e2f4 3546i915_gem_init_hw(struct drm_device *dev)
8187a2b7
ZN
3547{
3548 drm_i915_private_t *dev_priv = dev->dev_private;
3549 int ret;
68f95ba9 3550
f691e2f4
DV
3551 i915_gem_init_swizzling(dev);
3552
5c1143bb 3553 ret = intel_init_render_ring_buffer(dev);
68f95ba9 3554 if (ret)
b6913e4b 3555 return ret;
68f95ba9
CW
3556
3557 if (HAS_BSD(dev)) {
5c1143bb 3558 ret = intel_init_bsd_ring_buffer(dev);
68f95ba9
CW
3559 if (ret)
3560 goto cleanup_render_ring;
d1b851fc 3561 }
68f95ba9 3562
549f7365
CW
3563 if (HAS_BLT(dev)) {
3564 ret = intel_init_blt_ring_buffer(dev);
3565 if (ret)
3566 goto cleanup_bsd_ring;
3567 }
3568
6f392d54
CW
3569 dev_priv->next_seqno = 1;
3570
e21af88d
DV
3571 i915_gem_init_ppgtt(dev);
3572
68f95ba9
CW
3573 return 0;
3574
549f7365 3575cleanup_bsd_ring:
1ec14ad3 3576 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
68f95ba9 3577cleanup_render_ring:
1ec14ad3 3578 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
8187a2b7
ZN
3579 return ret;
3580}
3581
3582void
3583i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3584{
3585 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 3586 int i;
8187a2b7 3587
1ec14ad3
CW
3588 for (i = 0; i < I915_NUM_RINGS; i++)
3589 intel_cleanup_ring_buffer(&dev_priv->ring[i]);
8187a2b7
ZN
3590}
3591
673a394b
EA
3592int
3593i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3594 struct drm_file *file_priv)
3595{
3596 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 3597 int ret, i;
673a394b 3598
79e53945
JB
3599 if (drm_core_check_feature(dev, DRIVER_MODESET))
3600 return 0;
3601
ba1234d1 3602 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 3603 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 3604 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
3605 }
3606
673a394b 3607 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
3608 dev_priv->mm.suspended = 0;
3609
f691e2f4 3610 ret = i915_gem_init_hw(dev);
d816f6ac
WF
3611 if (ret != 0) {
3612 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 3613 return ret;
d816f6ac 3614 }
9bb2d6f9 3615
69dc4987 3616 BUG_ON(!list_empty(&dev_priv->mm.active_list));
673a394b
EA
3617 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3618 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
1ec14ad3
CW
3619 for (i = 0; i < I915_NUM_RINGS; i++) {
3620 BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3621 BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3622 }
673a394b 3623 mutex_unlock(&dev->struct_mutex);
dbb19d30 3624
5f35308b
CW
3625 ret = drm_irq_install(dev);
3626 if (ret)
3627 goto cleanup_ringbuffer;
dbb19d30 3628
673a394b 3629 return 0;
5f35308b
CW
3630
3631cleanup_ringbuffer:
3632 mutex_lock(&dev->struct_mutex);
3633 i915_gem_cleanup_ringbuffer(dev);
3634 dev_priv->mm.suspended = 1;
3635 mutex_unlock(&dev->struct_mutex);
3636
3637 return ret;
673a394b
EA
3638}
3639
3640int
3641i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3642 struct drm_file *file_priv)
3643{
79e53945
JB
3644 if (drm_core_check_feature(dev, DRIVER_MODESET))
3645 return 0;
3646
dbb19d30 3647 drm_irq_uninstall(dev);
e6890f6f 3648 return i915_gem_idle(dev);
673a394b
EA
3649}
3650
3651void
3652i915_gem_lastclose(struct drm_device *dev)
3653{
3654 int ret;
673a394b 3655
e806b495
EA
3656 if (drm_core_check_feature(dev, DRIVER_MODESET))
3657 return;
3658
6dbe2772
KP
3659 ret = i915_gem_idle(dev);
3660 if (ret)
3661 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
3662}
3663
64193406
CW
3664static void
3665init_ring_lists(struct intel_ring_buffer *ring)
3666{
3667 INIT_LIST_HEAD(&ring->active_list);
3668 INIT_LIST_HEAD(&ring->request_list);
3669 INIT_LIST_HEAD(&ring->gpu_write_list);
3670}
3671
673a394b
EA
3672void
3673i915_gem_load(struct drm_device *dev)
3674{
b5aa8a0f 3675 int i;
673a394b
EA
3676 drm_i915_private_t *dev_priv = dev->dev_private;
3677
69dc4987 3678 INIT_LIST_HEAD(&dev_priv->mm.active_list);
673a394b
EA
3679 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3680 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
a09ba7fa 3681 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
be72615b 3682 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
93a37f20 3683 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
1ec14ad3
CW
3684 for (i = 0; i < I915_NUM_RINGS; i++)
3685 init_ring_lists(&dev_priv->ring[i]);
4b9de737 3686 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 3687 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
3688 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3689 i915_gem_retire_work_handler);
30dbf0c0 3690 init_completion(&dev_priv->error_completion);
31169714 3691
94400120
DA
3692 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3693 if (IS_GEN3(dev)) {
3694 u32 tmp = I915_READ(MI_ARB_STATE);
3695 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3696 /* arb state is a masked write, so set bit + bit in mask */
3697 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3698 I915_WRITE(MI_ARB_STATE, tmp);
3699 }
3700 }
3701
72bfa19c
CW
3702 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3703
de151cf6 3704 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
3705 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3706 dev_priv->fence_reg_start = 3;
de151cf6 3707
a6c45cf0 3708 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
3709 dev_priv->num_fence_regs = 16;
3710 else
3711 dev_priv->num_fence_regs = 8;
3712
b5aa8a0f 3713 /* Initialize fence registers to zero */
ada726c7 3714 i915_gem_reset_fences(dev);
10ed13e4 3715
673a394b 3716 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 3717 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 3718
ce453d81
CW
3719 dev_priv->mm.interruptible = true;
3720
17250b71
CW
3721 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3722 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3723 register_shrinker(&dev_priv->mm.inactive_shrinker);
673a394b 3724}
71acb5eb
DA
3725
3726/*
3727 * Create a physically contiguous memory object for this object
3728 * e.g. for cursor + overlay regs
3729 */
995b6762
CW
3730static int i915_gem_init_phys_object(struct drm_device *dev,
3731 int id, int size, int align)
71acb5eb
DA
3732{
3733 drm_i915_private_t *dev_priv = dev->dev_private;
3734 struct drm_i915_gem_phys_object *phys_obj;
3735 int ret;
3736
3737 if (dev_priv->mm.phys_objs[id - 1] || !size)
3738 return 0;
3739
9a298b2a 3740 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
3741 if (!phys_obj)
3742 return -ENOMEM;
3743
3744 phys_obj->id = id;
3745
6eeefaf3 3746 phys_obj->handle = drm_pci_alloc(dev, size, align);
71acb5eb
DA
3747 if (!phys_obj->handle) {
3748 ret = -ENOMEM;
3749 goto kfree_obj;
3750 }
3751#ifdef CONFIG_X86
3752 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3753#endif
3754
3755 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3756
3757 return 0;
3758kfree_obj:
9a298b2a 3759 kfree(phys_obj);
71acb5eb
DA
3760 return ret;
3761}
3762
995b6762 3763static void i915_gem_free_phys_object(struct drm_device *dev, int id)
71acb5eb
DA
3764{
3765 drm_i915_private_t *dev_priv = dev->dev_private;
3766 struct drm_i915_gem_phys_object *phys_obj;
3767
3768 if (!dev_priv->mm.phys_objs[id - 1])
3769 return;
3770
3771 phys_obj = dev_priv->mm.phys_objs[id - 1];
3772 if (phys_obj->cur_obj) {
3773 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3774 }
3775
3776#ifdef CONFIG_X86
3777 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3778#endif
3779 drm_pci_free(dev, phys_obj->handle);
3780 kfree(phys_obj);
3781 dev_priv->mm.phys_objs[id - 1] = NULL;
3782}
3783
3784void i915_gem_free_all_phys_object(struct drm_device *dev)
3785{
3786 int i;
3787
260883c8 3788 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
3789 i915_gem_free_phys_object(dev, i);
3790}
3791
3792void i915_gem_detach_phys_object(struct drm_device *dev,
05394f39 3793 struct drm_i915_gem_object *obj)
71acb5eb 3794{
05394f39 3795 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
e5281ccd 3796 char *vaddr;
71acb5eb 3797 int i;
71acb5eb
DA
3798 int page_count;
3799
05394f39 3800 if (!obj->phys_obj)
71acb5eb 3801 return;
05394f39 3802 vaddr = obj->phys_obj->handle->vaddr;
71acb5eb 3803
05394f39 3804 page_count = obj->base.size / PAGE_SIZE;
71acb5eb 3805 for (i = 0; i < page_count; i++) {
5949eac4 3806 struct page *page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
3807 if (!IS_ERR(page)) {
3808 char *dst = kmap_atomic(page);
3809 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3810 kunmap_atomic(dst);
3811
3812 drm_clflush_pages(&page, 1);
3813
3814 set_page_dirty(page);
3815 mark_page_accessed(page);
3816 page_cache_release(page);
3817 }
71acb5eb 3818 }
40ce6575 3819 intel_gtt_chipset_flush();
d78b47b9 3820
05394f39
CW
3821 obj->phys_obj->cur_obj = NULL;
3822 obj->phys_obj = NULL;
71acb5eb
DA
3823}
3824
3825int
3826i915_gem_attach_phys_object(struct drm_device *dev,
05394f39 3827 struct drm_i915_gem_object *obj,
6eeefaf3
CW
3828 int id,
3829 int align)
71acb5eb 3830{
05394f39 3831 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
71acb5eb 3832 drm_i915_private_t *dev_priv = dev->dev_private;
71acb5eb
DA
3833 int ret = 0;
3834 int page_count;
3835 int i;
3836
3837 if (id > I915_MAX_PHYS_OBJECT)
3838 return -EINVAL;
3839
05394f39
CW
3840 if (obj->phys_obj) {
3841 if (obj->phys_obj->id == id)
71acb5eb
DA
3842 return 0;
3843 i915_gem_detach_phys_object(dev, obj);
3844 }
3845
71acb5eb
DA
3846 /* create a new object */
3847 if (!dev_priv->mm.phys_objs[id - 1]) {
3848 ret = i915_gem_init_phys_object(dev, id,
05394f39 3849 obj->base.size, align);
71acb5eb 3850 if (ret) {
05394f39
CW
3851 DRM_ERROR("failed to init phys object %d size: %zu\n",
3852 id, obj->base.size);
e5281ccd 3853 return ret;
71acb5eb
DA
3854 }
3855 }
3856
3857 /* bind to the object */
05394f39
CW
3858 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3859 obj->phys_obj->cur_obj = obj;
71acb5eb 3860
05394f39 3861 page_count = obj->base.size / PAGE_SIZE;
71acb5eb
DA
3862
3863 for (i = 0; i < page_count; i++) {
e5281ccd
CW
3864 struct page *page;
3865 char *dst, *src;
3866
5949eac4 3867 page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
3868 if (IS_ERR(page))
3869 return PTR_ERR(page);
71acb5eb 3870
ff75b9bc 3871 src = kmap_atomic(page);
05394f39 3872 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
71acb5eb 3873 memcpy(dst, src, PAGE_SIZE);
3e4d3af5 3874 kunmap_atomic(src);
71acb5eb 3875
e5281ccd
CW
3876 mark_page_accessed(page);
3877 page_cache_release(page);
3878 }
d78b47b9 3879
71acb5eb 3880 return 0;
71acb5eb
DA
3881}
3882
3883static int
05394f39
CW
3884i915_gem_phys_pwrite(struct drm_device *dev,
3885 struct drm_i915_gem_object *obj,
71acb5eb
DA
3886 struct drm_i915_gem_pwrite *args,
3887 struct drm_file *file_priv)
3888{
05394f39 3889 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
b47b30cc 3890 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
71acb5eb 3891
b47b30cc
CW
3892 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
3893 unsigned long unwritten;
3894
3895 /* The physical object once assigned is fixed for the lifetime
3896 * of the obj, so we can safely drop the lock and continue
3897 * to access vaddr.
3898 */
3899 mutex_unlock(&dev->struct_mutex);
3900 unwritten = copy_from_user(vaddr, user_data, args->size);
3901 mutex_lock(&dev->struct_mutex);
3902 if (unwritten)
3903 return -EFAULT;
3904 }
71acb5eb 3905
40ce6575 3906 intel_gtt_chipset_flush();
71acb5eb
DA
3907 return 0;
3908}
b962442e 3909
f787a5f5 3910void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 3911{
f787a5f5 3912 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
3913
3914 /* Clean up our request list when the client is going away, so that
3915 * later retire_requests won't dereference our soon-to-be-gone
3916 * file_priv.
3917 */
1c25595f 3918 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
3919 while (!list_empty(&file_priv->mm.request_list)) {
3920 struct drm_i915_gem_request *request;
3921
3922 request = list_first_entry(&file_priv->mm.request_list,
3923 struct drm_i915_gem_request,
3924 client_list);
3925 list_del(&request->client_list);
3926 request->file_priv = NULL;
3927 }
1c25595f 3928 spin_unlock(&file_priv->mm.lock);
b962442e 3929}
31169714 3930
1637ef41
CW
3931static int
3932i915_gpu_is_active(struct drm_device *dev)
3933{
3934 drm_i915_private_t *dev_priv = dev->dev_private;
3935 int lists_empty;
3936
1637ef41 3937 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
17250b71 3938 list_empty(&dev_priv->mm.active_list);
1637ef41
CW
3939
3940 return !lists_empty;
3941}
3942
31169714 3943static int
1495f230 3944i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
31169714 3945{
17250b71
CW
3946 struct drm_i915_private *dev_priv =
3947 container_of(shrinker,
3948 struct drm_i915_private,
3949 mm.inactive_shrinker);
3950 struct drm_device *dev = dev_priv->dev;
3951 struct drm_i915_gem_object *obj, *next;
1495f230 3952 int nr_to_scan = sc->nr_to_scan;
17250b71
CW
3953 int cnt;
3954
3955 if (!mutex_trylock(&dev->struct_mutex))
bbe2e11a 3956 return 0;
31169714
CW
3957
3958 /* "fast-path" to count number of available objects */
3959 if (nr_to_scan == 0) {
17250b71
CW
3960 cnt = 0;
3961 list_for_each_entry(obj,
3962 &dev_priv->mm.inactive_list,
3963 mm_list)
3964 cnt++;
3965 mutex_unlock(&dev->struct_mutex);
3966 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714
CW
3967 }
3968
1637ef41 3969rescan:
31169714 3970 /* first scan for clean buffers */
17250b71 3971 i915_gem_retire_requests(dev);
31169714 3972
17250b71
CW
3973 list_for_each_entry_safe(obj, next,
3974 &dev_priv->mm.inactive_list,
3975 mm_list) {
3976 if (i915_gem_object_is_purgeable(obj)) {
2021746e
CW
3977 if (i915_gem_object_unbind(obj) == 0 &&
3978 --nr_to_scan == 0)
17250b71 3979 break;
31169714 3980 }
31169714
CW
3981 }
3982
3983 /* second pass, evict/count anything still on the inactive list */
17250b71
CW
3984 cnt = 0;
3985 list_for_each_entry_safe(obj, next,
3986 &dev_priv->mm.inactive_list,
3987 mm_list) {
2021746e
CW
3988 if (nr_to_scan &&
3989 i915_gem_object_unbind(obj) == 0)
17250b71 3990 nr_to_scan--;
2021746e 3991 else
17250b71
CW
3992 cnt++;
3993 }
3994
3995 if (nr_to_scan && i915_gpu_is_active(dev)) {
1637ef41
CW
3996 /*
3997 * We are desperate for pages, so as a last resort, wait
3998 * for the GPU to finish and discard whatever we can.
3999 * This has a dramatic impact to reduce the number of
4000 * OOM-killer events whilst running the GPU aggressively.
4001 */
b93f9cf1 4002 if (i915_gpu_idle(dev, true) == 0)
1637ef41
CW
4003 goto rescan;
4004 }
17250b71
CW
4005 mutex_unlock(&dev->struct_mutex);
4006 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714 4007}