]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/gpu/drm/i915/i915_gem.c
drm/i915: rewrite shmem_pwrite_slow to use copy_from_user
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
1c5d22f7 32#include "i915_trace.h"
652c393a 33#include "intel_drv.h"
5949eac4 34#include <linux/shmem_fs.h>
5a0e3ad6 35#include <linux/slab.h>
673a394b 36#include <linux/swap.h>
79e53945 37#include <linux/pci.h>
673a394b 38
88241785 39static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
05394f39
CW
40static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
41static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
88241785
CW
42static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
43 bool write);
44static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
45 uint64_t offset,
46 uint64_t size);
05394f39 47static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
88241785
CW
48static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
49 unsigned alignment,
50 bool map_and_fenceable);
d9e86c0e
CW
51static void i915_gem_clear_fence_reg(struct drm_device *dev,
52 struct drm_i915_fence_reg *reg);
05394f39
CW
53static int i915_gem_phys_pwrite(struct drm_device *dev,
54 struct drm_i915_gem_object *obj,
71acb5eb 55 struct drm_i915_gem_pwrite *args,
05394f39
CW
56 struct drm_file *file);
57static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
673a394b 58
17250b71 59static int i915_gem_inactive_shrink(struct shrinker *shrinker,
1495f230 60 struct shrink_control *sc);
8c59967c 61static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
31169714 62
73aa808f
CW
63/* some bookkeeping */
64static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
65 size_t size)
66{
67 dev_priv->mm.object_count++;
68 dev_priv->mm.object_memory += size;
69}
70
71static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
72 size_t size)
73{
74 dev_priv->mm.object_count--;
75 dev_priv->mm.object_memory -= size;
76}
77
21dd3734
CW
78static int
79i915_gem_wait_for_error(struct drm_device *dev)
30dbf0c0
CW
80{
81 struct drm_i915_private *dev_priv = dev->dev_private;
82 struct completion *x = &dev_priv->error_completion;
83 unsigned long flags;
84 int ret;
85
86 if (!atomic_read(&dev_priv->mm.wedged))
87 return 0;
88
89 ret = wait_for_completion_interruptible(x);
90 if (ret)
91 return ret;
92
21dd3734
CW
93 if (atomic_read(&dev_priv->mm.wedged)) {
94 /* GPU is hung, bump the completion count to account for
95 * the token we just consumed so that we never hit zero and
96 * end up waiting upon a subsequent completion event that
97 * will never happen.
98 */
99 spin_lock_irqsave(&x->wait.lock, flags);
100 x->done++;
101 spin_unlock_irqrestore(&x->wait.lock, flags);
102 }
103 return 0;
30dbf0c0
CW
104}
105
54cf91dc 106int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 107{
76c1dec1
CW
108 int ret;
109
21dd3734 110 ret = i915_gem_wait_for_error(dev);
76c1dec1
CW
111 if (ret)
112 return ret;
113
114 ret = mutex_lock_interruptible(&dev->struct_mutex);
115 if (ret)
116 return ret;
117
23bc5982 118 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
119 return 0;
120}
30dbf0c0 121
7d1c4804 122static inline bool
05394f39 123i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
7d1c4804 124{
05394f39 125 return obj->gtt_space && !obj->active && obj->pin_count == 0;
7d1c4804
CW
126}
127
2021746e
CW
128void i915_gem_do_init(struct drm_device *dev,
129 unsigned long start,
130 unsigned long mappable_end,
131 unsigned long end)
673a394b
EA
132{
133 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 134
bee4a186 135 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
673a394b 136
bee4a186
CW
137 dev_priv->mm.gtt_start = start;
138 dev_priv->mm.gtt_mappable_end = mappable_end;
139 dev_priv->mm.gtt_end = end;
73aa808f 140 dev_priv->mm.gtt_total = end - start;
fb7d516a 141 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
bee4a186
CW
142
143 /* Take over this portion of the GTT */
144 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
79e53945 145}
673a394b 146
79e53945
JB
147int
148i915_gem_init_ioctl(struct drm_device *dev, void *data,
05394f39 149 struct drm_file *file)
79e53945
JB
150{
151 struct drm_i915_gem_init *args = data;
2021746e
CW
152
153 if (args->gtt_start >= args->gtt_end ||
154 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
155 return -EINVAL;
79e53945
JB
156
157 mutex_lock(&dev->struct_mutex);
2021746e 158 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
673a394b
EA
159 mutex_unlock(&dev->struct_mutex);
160
2021746e 161 return 0;
673a394b
EA
162}
163
5a125c3c
EA
164int
165i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 166 struct drm_file *file)
5a125c3c 167{
73aa808f 168 struct drm_i915_private *dev_priv = dev->dev_private;
5a125c3c 169 struct drm_i915_gem_get_aperture *args = data;
6299f992
CW
170 struct drm_i915_gem_object *obj;
171 size_t pinned;
5a125c3c
EA
172
173 if (!(dev->driver->driver_features & DRIVER_GEM))
174 return -ENODEV;
175
6299f992 176 pinned = 0;
73aa808f 177 mutex_lock(&dev->struct_mutex);
6299f992
CW
178 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
179 pinned += obj->gtt_space->size;
73aa808f 180 mutex_unlock(&dev->struct_mutex);
5a125c3c 181
6299f992 182 args->aper_size = dev_priv->mm.gtt_total;
0206e353 183 args->aper_available_size = args->aper_size - pinned;
6299f992 184
5a125c3c
EA
185 return 0;
186}
187
ff72145b
DA
188static int
189i915_gem_create(struct drm_file *file,
190 struct drm_device *dev,
191 uint64_t size,
192 uint32_t *handle_p)
673a394b 193{
05394f39 194 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
195 int ret;
196 u32 handle;
673a394b 197
ff72145b 198 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
199 if (size == 0)
200 return -EINVAL;
673a394b
EA
201
202 /* Allocate the new object */
ff72145b 203 obj = i915_gem_alloc_object(dev, size);
673a394b
EA
204 if (obj == NULL)
205 return -ENOMEM;
206
05394f39 207 ret = drm_gem_handle_create(file, &obj->base, &handle);
1dfd9754 208 if (ret) {
05394f39
CW
209 drm_gem_object_release(&obj->base);
210 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
202f2fef 211 kfree(obj);
673a394b 212 return ret;
1dfd9754 213 }
673a394b 214
202f2fef 215 /* drop reference from allocate - handle holds it now */
05394f39 216 drm_gem_object_unreference(&obj->base);
202f2fef
CW
217 trace_i915_gem_object_create(obj);
218
ff72145b 219 *handle_p = handle;
673a394b
EA
220 return 0;
221}
222
ff72145b
DA
223int
224i915_gem_dumb_create(struct drm_file *file,
225 struct drm_device *dev,
226 struct drm_mode_create_dumb *args)
227{
228 /* have to work out size/pitch and return them */
ed0291fd 229 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
ff72145b
DA
230 args->size = args->pitch * args->height;
231 return i915_gem_create(file, dev,
232 args->size, &args->handle);
233}
234
235int i915_gem_dumb_destroy(struct drm_file *file,
236 struct drm_device *dev,
237 uint32_t handle)
238{
239 return drm_gem_handle_delete(file, handle);
240}
241
242/**
243 * Creates a new mm object and returns a handle to it.
244 */
245int
246i915_gem_create_ioctl(struct drm_device *dev, void *data,
247 struct drm_file *file)
248{
249 struct drm_i915_gem_create *args = data;
250 return i915_gem_create(file, dev,
251 args->size, &args->handle);
252}
253
05394f39 254static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
280b713b 255{
05394f39 256 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
280b713b
EA
257
258 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
05394f39 259 obj->tiling_mode != I915_TILING_NONE;
280b713b
EA
260}
261
99a03df5 262static inline void
40123c1f
EA
263slow_shmem_copy(struct page *dst_page,
264 int dst_offset,
265 struct page *src_page,
266 int src_offset,
267 int length)
268{
269 char *dst_vaddr, *src_vaddr;
270
99a03df5
CW
271 dst_vaddr = kmap(dst_page);
272 src_vaddr = kmap(src_page);
40123c1f
EA
273
274 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
275
99a03df5
CW
276 kunmap(src_page);
277 kunmap(dst_page);
40123c1f
EA
278}
279
99a03df5 280static inline void
280b713b
EA
281slow_shmem_bit17_copy(struct page *gpu_page,
282 int gpu_offset,
283 struct page *cpu_page,
284 int cpu_offset,
285 int length,
286 int is_read)
287{
288 char *gpu_vaddr, *cpu_vaddr;
289
290 /* Use the unswizzled path if this page isn't affected. */
291 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
292 if (is_read)
293 return slow_shmem_copy(cpu_page, cpu_offset,
294 gpu_page, gpu_offset, length);
295 else
296 return slow_shmem_copy(gpu_page, gpu_offset,
297 cpu_page, cpu_offset, length);
298 }
299
99a03df5
CW
300 gpu_vaddr = kmap(gpu_page);
301 cpu_vaddr = kmap(cpu_page);
280b713b
EA
302
303 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
304 * XORing with the other bits (A9 for Y, A9 and A10 for X)
305 */
306 while (length > 0) {
307 int cacheline_end = ALIGN(gpu_offset + 1, 64);
308 int this_length = min(cacheline_end - gpu_offset, length);
309 int swizzled_gpu_offset = gpu_offset ^ 64;
310
311 if (is_read) {
312 memcpy(cpu_vaddr + cpu_offset,
313 gpu_vaddr + swizzled_gpu_offset,
314 this_length);
315 } else {
316 memcpy(gpu_vaddr + swizzled_gpu_offset,
317 cpu_vaddr + cpu_offset,
318 this_length);
319 }
320 cpu_offset += this_length;
321 gpu_offset += this_length;
322 length -= this_length;
323 }
324
99a03df5
CW
325 kunmap(cpu_page);
326 kunmap(gpu_page);
280b713b
EA
327}
328
eb01459f
EA
329/**
330 * This is the fast shmem pread path, which attempts to copy_from_user directly
331 * from the backing pages of the object to the user's address space. On a
332 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
333 */
334static int
05394f39
CW
335i915_gem_shmem_pread_fast(struct drm_device *dev,
336 struct drm_i915_gem_object *obj,
eb01459f 337 struct drm_i915_gem_pread *args,
05394f39 338 struct drm_file *file)
eb01459f 339{
05394f39 340 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
eb01459f 341 ssize_t remain;
e5281ccd 342 loff_t offset;
eb01459f
EA
343 char __user *user_data;
344 int page_offset, page_length;
eb01459f
EA
345
346 user_data = (char __user *) (uintptr_t) args->data_ptr;
347 remain = args->size;
348
eb01459f
EA
349 offset = args->offset;
350
351 while (remain > 0) {
e5281ccd
CW
352 struct page *page;
353 char *vaddr;
354 int ret;
355
eb01459f
EA
356 /* Operation in this page
357 *
eb01459f
EA
358 * page_offset = offset within page
359 * page_length = bytes to copy for this page
360 */
c8cbbb8b 361 page_offset = offset_in_page(offset);
eb01459f
EA
362 page_length = remain;
363 if ((page_offset + remain) > PAGE_SIZE)
364 page_length = PAGE_SIZE - page_offset;
365
5949eac4 366 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
e5281ccd
CW
367 if (IS_ERR(page))
368 return PTR_ERR(page);
369
370 vaddr = kmap_atomic(page);
371 ret = __copy_to_user_inatomic(user_data,
372 vaddr + page_offset,
373 page_length);
374 kunmap_atomic(vaddr);
375
376 mark_page_accessed(page);
377 page_cache_release(page);
378 if (ret)
4f27b75d 379 return -EFAULT;
eb01459f
EA
380
381 remain -= page_length;
382 user_data += page_length;
383 offset += page_length;
384 }
385
4f27b75d 386 return 0;
eb01459f
EA
387}
388
8c59967c
DV
389static inline int
390__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
391 const char *cpu_vaddr,
392 int length)
393{
394 int ret, cpu_offset = 0;
395
396 while (length > 0) {
397 int cacheline_end = ALIGN(gpu_offset + 1, 64);
398 int this_length = min(cacheline_end - gpu_offset, length);
399 int swizzled_gpu_offset = gpu_offset ^ 64;
400
401 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
402 cpu_vaddr + cpu_offset,
403 this_length);
404 if (ret)
405 return ret + length;
406
407 cpu_offset += this_length;
408 gpu_offset += this_length;
409 length -= this_length;
410 }
411
412 return 0;
413}
414
eb01459f
EA
415/**
416 * This is the fallback shmem pread path, which allocates temporary storage
417 * in kernel space to copy_to_user into outside of the struct_mutex, so we
418 * can copy out of the object's backing pages while holding the struct mutex
419 * and not take page faults.
420 */
421static int
05394f39
CW
422i915_gem_shmem_pread_slow(struct drm_device *dev,
423 struct drm_i915_gem_object *obj,
eb01459f 424 struct drm_i915_gem_pread *args,
05394f39 425 struct drm_file *file)
eb01459f 426{
05394f39 427 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
eb01459f
EA
428 struct mm_struct *mm = current->mm;
429 struct page **user_pages;
430 ssize_t remain;
431 loff_t offset, pinned_pages, i;
432 loff_t first_data_page, last_data_page, num_pages;
e5281ccd
CW
433 int shmem_page_offset;
434 int data_page_index, data_page_offset;
eb01459f
EA
435 int page_length;
436 int ret;
437 uint64_t data_ptr = args->data_ptr;
280b713b 438 int do_bit17_swizzling;
eb01459f
EA
439
440 remain = args->size;
441
442 /* Pin the user pages containing the data. We can't fault while
443 * holding the struct mutex, yet we want to hold it while
444 * dereferencing the user data.
445 */
446 first_data_page = data_ptr / PAGE_SIZE;
447 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
448 num_pages = last_data_page - first_data_page + 1;
449
4f27b75d 450 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
eb01459f
EA
451 if (user_pages == NULL)
452 return -ENOMEM;
453
4f27b75d 454 mutex_unlock(&dev->struct_mutex);
eb01459f
EA
455 down_read(&mm->mmap_sem);
456 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
e5e9ecde 457 num_pages, 1, 0, user_pages, NULL);
eb01459f 458 up_read(&mm->mmap_sem);
4f27b75d 459 mutex_lock(&dev->struct_mutex);
eb01459f
EA
460 if (pinned_pages < num_pages) {
461 ret = -EFAULT;
4f27b75d 462 goto out;
eb01459f
EA
463 }
464
4f27b75d
CW
465 ret = i915_gem_object_set_cpu_read_domain_range(obj,
466 args->offset,
467 args->size);
07f73f69 468 if (ret)
4f27b75d 469 goto out;
eb01459f 470
4f27b75d 471 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 472
eb01459f
EA
473 offset = args->offset;
474
475 while (remain > 0) {
e5281ccd
CW
476 struct page *page;
477
eb01459f
EA
478 /* Operation in this page
479 *
eb01459f
EA
480 * shmem_page_offset = offset within page in shmem file
481 * data_page_index = page number in get_user_pages return
482 * data_page_offset = offset with data_page_index page.
483 * page_length = bytes to copy for this page
484 */
c8cbbb8b 485 shmem_page_offset = offset_in_page(offset);
eb01459f 486 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
c8cbbb8b 487 data_page_offset = offset_in_page(data_ptr);
eb01459f
EA
488
489 page_length = remain;
490 if ((shmem_page_offset + page_length) > PAGE_SIZE)
491 page_length = PAGE_SIZE - shmem_page_offset;
492 if ((data_page_offset + page_length) > PAGE_SIZE)
493 page_length = PAGE_SIZE - data_page_offset;
494
5949eac4 495 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
b65552f0
JJ
496 if (IS_ERR(page)) {
497 ret = PTR_ERR(page);
498 goto out;
499 }
e5281ccd 500
280b713b 501 if (do_bit17_swizzling) {
e5281ccd 502 slow_shmem_bit17_copy(page,
280b713b 503 shmem_page_offset,
99a03df5
CW
504 user_pages[data_page_index],
505 data_page_offset,
506 page_length,
507 1);
508 } else {
509 slow_shmem_copy(user_pages[data_page_index],
510 data_page_offset,
e5281ccd 511 page,
99a03df5
CW
512 shmem_page_offset,
513 page_length);
280b713b 514 }
eb01459f 515
e5281ccd
CW
516 mark_page_accessed(page);
517 page_cache_release(page);
518
eb01459f
EA
519 remain -= page_length;
520 data_ptr += page_length;
521 offset += page_length;
522 }
523
4f27b75d 524out:
eb01459f
EA
525 for (i = 0; i < pinned_pages; i++) {
526 SetPageDirty(user_pages[i]);
e5281ccd 527 mark_page_accessed(user_pages[i]);
eb01459f
EA
528 page_cache_release(user_pages[i]);
529 }
8e7d2b2c 530 drm_free_large(user_pages);
eb01459f
EA
531
532 return ret;
533}
534
673a394b
EA
535/**
536 * Reads data from the object referenced by handle.
537 *
538 * On error, the contents of *data are undefined.
539 */
540int
541i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 542 struct drm_file *file)
673a394b
EA
543{
544 struct drm_i915_gem_pread *args = data;
05394f39 545 struct drm_i915_gem_object *obj;
35b62a89 546 int ret = 0;
673a394b 547
51311d0a
CW
548 if (args->size == 0)
549 return 0;
550
551 if (!access_ok(VERIFY_WRITE,
552 (char __user *)(uintptr_t)args->data_ptr,
553 args->size))
554 return -EFAULT;
555
556 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
557 args->size);
558 if (ret)
559 return -EFAULT;
560
4f27b75d 561 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 562 if (ret)
4f27b75d 563 return ret;
673a394b 564
05394f39 565 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 566 if (&obj->base == NULL) {
1d7cfea1
CW
567 ret = -ENOENT;
568 goto unlock;
4f27b75d 569 }
673a394b 570
7dcd2499 571 /* Bounds check source. */
05394f39
CW
572 if (args->offset > obj->base.size ||
573 args->size > obj->base.size - args->offset) {
ce9d419d 574 ret = -EINVAL;
35b62a89 575 goto out;
ce9d419d
CW
576 }
577
db53a302
CW
578 trace_i915_gem_object_pread(obj, args->offset, args->size);
579
4f27b75d
CW
580 ret = i915_gem_object_set_cpu_read_domain_range(obj,
581 args->offset,
582 args->size);
583 if (ret)
e5281ccd 584 goto out;
4f27b75d
CW
585
586 ret = -EFAULT;
587 if (!i915_gem_object_needs_bit17_swizzle(obj))
05394f39 588 ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
4f27b75d 589 if (ret == -EFAULT)
05394f39 590 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
673a394b 591
35b62a89 592out:
05394f39 593 drm_gem_object_unreference(&obj->base);
1d7cfea1 594unlock:
4f27b75d 595 mutex_unlock(&dev->struct_mutex);
eb01459f 596 return ret;
673a394b
EA
597}
598
0839ccb8
KP
599/* This is the fast write path which cannot handle
600 * page faults in the source data
9b7530cc 601 */
0839ccb8
KP
602
603static inline int
604fast_user_write(struct io_mapping *mapping,
605 loff_t page_base, int page_offset,
606 char __user *user_data,
607 int length)
9b7530cc 608{
9b7530cc 609 char *vaddr_atomic;
0839ccb8 610 unsigned long unwritten;
9b7530cc 611
3e4d3af5 612 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
0839ccb8
KP
613 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
614 user_data, length);
3e4d3af5 615 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 616 return unwritten;
0839ccb8
KP
617}
618
619/* Here's the write path which can sleep for
620 * page faults
621 */
622
ab34c226 623static inline void
3de09aa3
EA
624slow_kernel_write(struct io_mapping *mapping,
625 loff_t gtt_base, int gtt_offset,
626 struct page *user_page, int user_offset,
627 int length)
0839ccb8 628{
ab34c226
CW
629 char __iomem *dst_vaddr;
630 char *src_vaddr;
0839ccb8 631
ab34c226
CW
632 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
633 src_vaddr = kmap(user_page);
634
635 memcpy_toio(dst_vaddr + gtt_offset,
636 src_vaddr + user_offset,
637 length);
638
639 kunmap(user_page);
640 io_mapping_unmap(dst_vaddr);
9b7530cc
LT
641}
642
3de09aa3
EA
643/**
644 * This is the fast pwrite path, where we copy the data directly from the
645 * user into the GTT, uncached.
646 */
673a394b 647static int
05394f39
CW
648i915_gem_gtt_pwrite_fast(struct drm_device *dev,
649 struct drm_i915_gem_object *obj,
3de09aa3 650 struct drm_i915_gem_pwrite *args,
05394f39 651 struct drm_file *file)
673a394b 652{
0839ccb8 653 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 654 ssize_t remain;
0839ccb8 655 loff_t offset, page_base;
673a394b 656 char __user *user_data;
0839ccb8 657 int page_offset, page_length;
673a394b
EA
658
659 user_data = (char __user *) (uintptr_t) args->data_ptr;
660 remain = args->size;
673a394b 661
05394f39 662 offset = obj->gtt_offset + args->offset;
673a394b
EA
663
664 while (remain > 0) {
665 /* Operation in this page
666 *
0839ccb8
KP
667 * page_base = page offset within aperture
668 * page_offset = offset within page
669 * page_length = bytes to copy for this page
673a394b 670 */
c8cbbb8b
CW
671 page_base = offset & PAGE_MASK;
672 page_offset = offset_in_page(offset);
0839ccb8
KP
673 page_length = remain;
674 if ((page_offset + remain) > PAGE_SIZE)
675 page_length = PAGE_SIZE - page_offset;
676
0839ccb8 677 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
678 * source page isn't available. Return the error and we'll
679 * retry in the slow path.
0839ccb8 680 */
fbd5a26d
CW
681 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
682 page_offset, user_data, page_length))
fbd5a26d 683 return -EFAULT;
673a394b 684
0839ccb8
KP
685 remain -= page_length;
686 user_data += page_length;
687 offset += page_length;
673a394b 688 }
673a394b 689
fbd5a26d 690 return 0;
673a394b
EA
691}
692
3de09aa3
EA
693/**
694 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
695 * the memory and maps it using kmap_atomic for copying.
696 *
697 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
698 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
699 */
3043c60c 700static int
05394f39
CW
701i915_gem_gtt_pwrite_slow(struct drm_device *dev,
702 struct drm_i915_gem_object *obj,
3de09aa3 703 struct drm_i915_gem_pwrite *args,
05394f39 704 struct drm_file *file)
673a394b 705{
3de09aa3
EA
706 drm_i915_private_t *dev_priv = dev->dev_private;
707 ssize_t remain;
708 loff_t gtt_page_base, offset;
709 loff_t first_data_page, last_data_page, num_pages;
710 loff_t pinned_pages, i;
711 struct page **user_pages;
712 struct mm_struct *mm = current->mm;
713 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 714 int ret;
3de09aa3
EA
715 uint64_t data_ptr = args->data_ptr;
716
717 remain = args->size;
718
719 /* Pin the user pages containing the data. We can't fault while
720 * holding the struct mutex, and all of the pwrite implementations
721 * want to hold it while dereferencing the user data.
722 */
723 first_data_page = data_ptr / PAGE_SIZE;
724 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
725 num_pages = last_data_page - first_data_page + 1;
726
fbd5a26d 727 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
3de09aa3
EA
728 if (user_pages == NULL)
729 return -ENOMEM;
730
fbd5a26d 731 mutex_unlock(&dev->struct_mutex);
3de09aa3
EA
732 down_read(&mm->mmap_sem);
733 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
734 num_pages, 0, 0, user_pages, NULL);
735 up_read(&mm->mmap_sem);
fbd5a26d 736 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
737 if (pinned_pages < num_pages) {
738 ret = -EFAULT;
739 goto out_unpin_pages;
740 }
673a394b 741
d9e86c0e
CW
742 ret = i915_gem_object_set_to_gtt_domain(obj, true);
743 if (ret)
744 goto out_unpin_pages;
745
746 ret = i915_gem_object_put_fence(obj);
3de09aa3 747 if (ret)
fbd5a26d 748 goto out_unpin_pages;
3de09aa3 749
05394f39 750 offset = obj->gtt_offset + args->offset;
3de09aa3
EA
751
752 while (remain > 0) {
753 /* Operation in this page
754 *
755 * gtt_page_base = page offset within aperture
756 * gtt_page_offset = offset within page in aperture
757 * data_page_index = page number in get_user_pages return
758 * data_page_offset = offset with data_page_index page.
759 * page_length = bytes to copy for this page
760 */
761 gtt_page_base = offset & PAGE_MASK;
c8cbbb8b 762 gtt_page_offset = offset_in_page(offset);
3de09aa3 763 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
c8cbbb8b 764 data_page_offset = offset_in_page(data_ptr);
3de09aa3
EA
765
766 page_length = remain;
767 if ((gtt_page_offset + page_length) > PAGE_SIZE)
768 page_length = PAGE_SIZE - gtt_page_offset;
769 if ((data_page_offset + page_length) > PAGE_SIZE)
770 page_length = PAGE_SIZE - data_page_offset;
771
ab34c226
CW
772 slow_kernel_write(dev_priv->mm.gtt_mapping,
773 gtt_page_base, gtt_page_offset,
774 user_pages[data_page_index],
775 data_page_offset,
776 page_length);
3de09aa3
EA
777
778 remain -= page_length;
779 offset += page_length;
780 data_ptr += page_length;
781 }
782
3de09aa3
EA
783out_unpin_pages:
784 for (i = 0; i < pinned_pages; i++)
785 page_cache_release(user_pages[i]);
8e7d2b2c 786 drm_free_large(user_pages);
3de09aa3
EA
787
788 return ret;
789}
790
40123c1f
EA
791/**
792 * This is the fast shmem pwrite path, which attempts to directly
793 * copy_from_user into the kmapped pages backing the object.
794 */
3043c60c 795static int
05394f39
CW
796i915_gem_shmem_pwrite_fast(struct drm_device *dev,
797 struct drm_i915_gem_object *obj,
40123c1f 798 struct drm_i915_gem_pwrite *args,
05394f39 799 struct drm_file *file)
673a394b 800{
05394f39 801 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
40123c1f 802 ssize_t remain;
e5281ccd 803 loff_t offset;
40123c1f
EA
804 char __user *user_data;
805 int page_offset, page_length;
40123c1f
EA
806
807 user_data = (char __user *) (uintptr_t) args->data_ptr;
808 remain = args->size;
673a394b 809
40123c1f 810 offset = args->offset;
05394f39 811 obj->dirty = 1;
40123c1f
EA
812
813 while (remain > 0) {
e5281ccd
CW
814 struct page *page;
815 char *vaddr;
816 int ret;
817
40123c1f
EA
818 /* Operation in this page
819 *
40123c1f
EA
820 * page_offset = offset within page
821 * page_length = bytes to copy for this page
822 */
c8cbbb8b 823 page_offset = offset_in_page(offset);
40123c1f
EA
824 page_length = remain;
825 if ((page_offset + remain) > PAGE_SIZE)
826 page_length = PAGE_SIZE - page_offset;
827
5949eac4 828 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
e5281ccd
CW
829 if (IS_ERR(page))
830 return PTR_ERR(page);
831
130c2561 832 vaddr = kmap_atomic(page);
e5281ccd
CW
833 ret = __copy_from_user_inatomic(vaddr + page_offset,
834 user_data,
835 page_length);
130c2561 836 kunmap_atomic(vaddr);
e5281ccd
CW
837
838 set_page_dirty(page);
839 mark_page_accessed(page);
840 page_cache_release(page);
841
842 /* If we get a fault while copying data, then (presumably) our
843 * source page isn't available. Return the error and we'll
844 * retry in the slow path.
845 */
846 if (ret)
fbd5a26d 847 return -EFAULT;
40123c1f
EA
848
849 remain -= page_length;
850 user_data += page_length;
851 offset += page_length;
852 }
853
fbd5a26d 854 return 0;
40123c1f
EA
855}
856
857/**
858 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
859 * the memory and maps it using kmap_atomic for copying.
860 *
861 * This avoids taking mmap_sem for faulting on the user's address while the
862 * struct_mutex is held.
863 */
864static int
05394f39
CW
865i915_gem_shmem_pwrite_slow(struct drm_device *dev,
866 struct drm_i915_gem_object *obj,
40123c1f 867 struct drm_i915_gem_pwrite *args,
05394f39 868 struct drm_file *file)
40123c1f 869{
05394f39 870 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
40123c1f 871 ssize_t remain;
8c59967c
DV
872 loff_t offset;
873 char __user *user_data;
874 int shmem_page_offset, page_length, ret;
875 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
40123c1f 876
8c59967c 877 user_data = (char __user *) (uintptr_t) args->data_ptr;
40123c1f
EA
878 remain = args->size;
879
8c59967c 880 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 881
673a394b 882 offset = args->offset;
05394f39 883 obj->dirty = 1;
673a394b 884
8c59967c
DV
885 mutex_unlock(&dev->struct_mutex);
886
40123c1f 887 while (remain > 0) {
e5281ccd 888 struct page *page;
8c59967c 889 char *vaddr;
e5281ccd 890
40123c1f
EA
891 /* Operation in this page
892 *
40123c1f 893 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
894 * page_length = bytes to copy for this page
895 */
c8cbbb8b 896 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
897
898 page_length = remain;
899 if ((shmem_page_offset + page_length) > PAGE_SIZE)
900 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 901
5949eac4 902 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
e5281ccd
CW
903 if (IS_ERR(page)) {
904 ret = PTR_ERR(page);
905 goto out;
906 }
907
8c59967c
DV
908 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
909 (page_to_phys(page) & (1 << 17)) != 0;
910
911 vaddr = kmap(page);
912 if (page_do_bit17_swizzling)
913 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
914 user_data,
915 page_length);
916 else
917 ret = __copy_from_user(vaddr + shmem_page_offset,
918 user_data,
919 page_length);
920 kunmap(page);
40123c1f 921
e5281ccd
CW
922 set_page_dirty(page);
923 mark_page_accessed(page);
924 page_cache_release(page);
925
8c59967c
DV
926 if (ret) {
927 ret = -EFAULT;
928 goto out;
929 }
930
40123c1f 931 remain -= page_length;
8c59967c 932 user_data += page_length;
40123c1f 933 offset += page_length;
673a394b
EA
934 }
935
fbd5a26d 936out:
8c59967c
DV
937 mutex_lock(&dev->struct_mutex);
938 /* Fixup: Kill any reinstated backing storage pages */
939 if (obj->madv == __I915_MADV_PURGED)
940 i915_gem_object_truncate(obj);
941 /* and flush dirty cachelines in case the object isn't in the cpu write
942 * domain anymore. */
943 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
944 i915_gem_clflush_object(obj);
945 intel_gtt_chipset_flush();
946 }
673a394b 947
40123c1f 948 return ret;
673a394b
EA
949}
950
951/**
952 * Writes data to the object referenced by handle.
953 *
954 * On error, the contents of the buffer that were to be modified are undefined.
955 */
956int
957i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 958 struct drm_file *file)
673a394b
EA
959{
960 struct drm_i915_gem_pwrite *args = data;
05394f39 961 struct drm_i915_gem_object *obj;
51311d0a
CW
962 int ret;
963
964 if (args->size == 0)
965 return 0;
966
967 if (!access_ok(VERIFY_READ,
968 (char __user *)(uintptr_t)args->data_ptr,
969 args->size))
970 return -EFAULT;
971
972 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
973 args->size);
974 if (ret)
975 return -EFAULT;
673a394b 976
fbd5a26d 977 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 978 if (ret)
fbd5a26d 979 return ret;
1d7cfea1 980
05394f39 981 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 982 if (&obj->base == NULL) {
1d7cfea1
CW
983 ret = -ENOENT;
984 goto unlock;
fbd5a26d 985 }
673a394b 986
7dcd2499 987 /* Bounds check destination. */
05394f39
CW
988 if (args->offset > obj->base.size ||
989 args->size > obj->base.size - args->offset) {
ce9d419d 990 ret = -EINVAL;
35b62a89 991 goto out;
ce9d419d
CW
992 }
993
db53a302
CW
994 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
995
673a394b
EA
996 /* We can only do the GTT pwrite on untiled buffers, as otherwise
997 * it would end up going through the fenced access, and we'll get
998 * different detiling behavior between reading and writing.
999 * pread/pwrite currently are reading and writing from the CPU
1000 * perspective, requiring manual detiling by the client.
1001 */
5c0480f2 1002 if (obj->phys_obj) {
fbd5a26d 1003 ret = i915_gem_phys_pwrite(dev, obj, args, file);
5c0480f2
DV
1004 goto out;
1005 }
1006
1007 if (obj->gtt_space &&
1008 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
75e9e915 1009 ret = i915_gem_object_pin(obj, 0, true);
fbd5a26d
CW
1010 if (ret)
1011 goto out;
1012
d9e86c0e
CW
1013 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1014 if (ret)
1015 goto out_unpin;
1016
1017 ret = i915_gem_object_put_fence(obj);
fbd5a26d
CW
1018 if (ret)
1019 goto out_unpin;
1020
1021 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1022 if (ret == -EFAULT)
1023 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
1024
1025out_unpin:
1026 i915_gem_object_unpin(obj);
673a394b 1027
5c0480f2
DV
1028 if (ret != -EFAULT)
1029 goto out;
1030 /* Fall through to the shmfs paths because the gtt paths might
1031 * fail with non-page-backed user pointers (e.g. gtt mappings
1032 * when moving data between textures). */
fbd5a26d 1033 }
673a394b 1034
5c0480f2
DV
1035 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1036 if (ret)
1037 goto out;
1038
1039 ret = -EFAULT;
1040 if (!i915_gem_object_needs_bit17_swizzle(obj))
1041 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1042 if (ret == -EFAULT)
1043 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1044
35b62a89 1045out:
05394f39 1046 drm_gem_object_unreference(&obj->base);
1d7cfea1 1047unlock:
fbd5a26d 1048 mutex_unlock(&dev->struct_mutex);
673a394b
EA
1049 return ret;
1050}
1051
1052/**
2ef7eeaa
EA
1053 * Called when user space prepares to use an object with the CPU, either
1054 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
1055 */
1056int
1057i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1058 struct drm_file *file)
673a394b
EA
1059{
1060 struct drm_i915_gem_set_domain *args = data;
05394f39 1061 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1062 uint32_t read_domains = args->read_domains;
1063 uint32_t write_domain = args->write_domain;
673a394b
EA
1064 int ret;
1065
1066 if (!(dev->driver->driver_features & DRIVER_GEM))
1067 return -ENODEV;
1068
2ef7eeaa 1069 /* Only handle setting domains to types used by the CPU. */
21d509e3 1070 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1071 return -EINVAL;
1072
21d509e3 1073 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1074 return -EINVAL;
1075
1076 /* Having something in the write domain implies it's in the read
1077 * domain, and only that read domain. Enforce that in the request.
1078 */
1079 if (write_domain != 0 && read_domains != write_domain)
1080 return -EINVAL;
1081
76c1dec1 1082 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1083 if (ret)
76c1dec1 1084 return ret;
1d7cfea1 1085
05394f39 1086 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1087 if (&obj->base == NULL) {
1d7cfea1
CW
1088 ret = -ENOENT;
1089 goto unlock;
76c1dec1 1090 }
673a394b 1091
2ef7eeaa
EA
1092 if (read_domains & I915_GEM_DOMAIN_GTT) {
1093 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392
EA
1094
1095 /* Silently promote "you're not bound, there was nothing to do"
1096 * to success, since the client was just asking us to
1097 * make sure everything was done.
1098 */
1099 if (ret == -EINVAL)
1100 ret = 0;
2ef7eeaa 1101 } else {
e47c68e9 1102 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1103 }
1104
05394f39 1105 drm_gem_object_unreference(&obj->base);
1d7cfea1 1106unlock:
673a394b
EA
1107 mutex_unlock(&dev->struct_mutex);
1108 return ret;
1109}
1110
1111/**
1112 * Called when user space has done writes to this buffer
1113 */
1114int
1115i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1116 struct drm_file *file)
673a394b
EA
1117{
1118 struct drm_i915_gem_sw_finish *args = data;
05394f39 1119 struct drm_i915_gem_object *obj;
673a394b
EA
1120 int ret = 0;
1121
1122 if (!(dev->driver->driver_features & DRIVER_GEM))
1123 return -ENODEV;
1124
76c1dec1 1125 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1126 if (ret)
76c1dec1 1127 return ret;
1d7cfea1 1128
05394f39 1129 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 1130 if (&obj->base == NULL) {
1d7cfea1
CW
1131 ret = -ENOENT;
1132 goto unlock;
673a394b
EA
1133 }
1134
673a394b 1135 /* Pinned buffers may be scanout, so flush the cache */
05394f39 1136 if (obj->pin_count)
e47c68e9
EA
1137 i915_gem_object_flush_cpu_write_domain(obj);
1138
05394f39 1139 drm_gem_object_unreference(&obj->base);
1d7cfea1 1140unlock:
673a394b
EA
1141 mutex_unlock(&dev->struct_mutex);
1142 return ret;
1143}
1144
1145/**
1146 * Maps the contents of an object, returning the address it is mapped
1147 * into.
1148 *
1149 * While the mapping holds a reference on the contents of the object, it doesn't
1150 * imply a ref on the object itself.
1151 */
1152int
1153i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1154 struct drm_file *file)
673a394b
EA
1155{
1156 struct drm_i915_gem_mmap *args = data;
1157 struct drm_gem_object *obj;
673a394b
EA
1158 unsigned long addr;
1159
1160 if (!(dev->driver->driver_features & DRIVER_GEM))
1161 return -ENODEV;
1162
05394f39 1163 obj = drm_gem_object_lookup(dev, file, args->handle);
673a394b 1164 if (obj == NULL)
bf79cb91 1165 return -ENOENT;
673a394b 1166
673a394b
EA
1167 down_write(&current->mm->mmap_sem);
1168 addr = do_mmap(obj->filp, 0, args->size,
1169 PROT_READ | PROT_WRITE, MAP_SHARED,
1170 args->offset);
1171 up_write(&current->mm->mmap_sem);
bc9025bd 1172 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1173 if (IS_ERR((void *)addr))
1174 return addr;
1175
1176 args->addr_ptr = (uint64_t) addr;
1177
1178 return 0;
1179}
1180
de151cf6
JB
1181/**
1182 * i915_gem_fault - fault a page into the GTT
1183 * vma: VMA in question
1184 * vmf: fault info
1185 *
1186 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1187 * from userspace. The fault handler takes care of binding the object to
1188 * the GTT (if needed), allocating and programming a fence register (again,
1189 * only if needed based on whether the old reg is still valid or the object
1190 * is tiled) and inserting a new PTE into the faulting process.
1191 *
1192 * Note that the faulting process may involve evicting existing objects
1193 * from the GTT and/or fence registers to make room. So performance may
1194 * suffer if the GTT working set is large or there are few fence registers
1195 * left.
1196 */
1197int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1198{
05394f39
CW
1199 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1200 struct drm_device *dev = obj->base.dev;
7d1c4804 1201 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
1202 pgoff_t page_offset;
1203 unsigned long pfn;
1204 int ret = 0;
0f973f27 1205 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1206
1207 /* We don't use vmf->pgoff since that has the fake offset */
1208 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1209 PAGE_SHIFT;
1210
d9bc7e9f
CW
1211 ret = i915_mutex_lock_interruptible(dev);
1212 if (ret)
1213 goto out;
a00b10c3 1214
db53a302
CW
1215 trace_i915_gem_object_fault(obj, page_offset, true, write);
1216
d9bc7e9f 1217 /* Now bind it into the GTT if needed */
919926ae
CW
1218 if (!obj->map_and_fenceable) {
1219 ret = i915_gem_object_unbind(obj);
1220 if (ret)
1221 goto unlock;
a00b10c3 1222 }
05394f39 1223 if (!obj->gtt_space) {
75e9e915 1224 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
c715089f
CW
1225 if (ret)
1226 goto unlock;
de151cf6 1227
e92d03bf
EA
1228 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1229 if (ret)
1230 goto unlock;
1231 }
4a684a41 1232
d9e86c0e
CW
1233 if (obj->tiling_mode == I915_TILING_NONE)
1234 ret = i915_gem_object_put_fence(obj);
1235 else
ce453d81 1236 ret = i915_gem_object_get_fence(obj, NULL);
d9e86c0e
CW
1237 if (ret)
1238 goto unlock;
de151cf6 1239
05394f39
CW
1240 if (i915_gem_object_is_inactive(obj))
1241 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
7d1c4804 1242
6299f992
CW
1243 obj->fault_mappable = true;
1244
05394f39 1245 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
de151cf6
JB
1246 page_offset;
1247
1248 /* Finally, remap it using the new GTT offset */
1249 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c715089f 1250unlock:
de151cf6 1251 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1252out:
de151cf6 1253 switch (ret) {
d9bc7e9f 1254 case -EIO:
045e769a 1255 case -EAGAIN:
d9bc7e9f
CW
1256 /* Give the error handler a chance to run and move the
1257 * objects off the GPU active list. Next time we service the
1258 * fault, we should be able to transition the page into the
1259 * GTT without touching the GPU (and so avoid further
1260 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1261 * with coherency, just lost writes.
1262 */
045e769a 1263 set_need_resched();
c715089f
CW
1264 case 0:
1265 case -ERESTARTSYS:
bed636ab 1266 case -EINTR:
c715089f 1267 return VM_FAULT_NOPAGE;
de151cf6 1268 case -ENOMEM:
de151cf6 1269 return VM_FAULT_OOM;
de151cf6 1270 default:
c715089f 1271 return VM_FAULT_SIGBUS;
de151cf6
JB
1272 }
1273}
1274
901782b2
CW
1275/**
1276 * i915_gem_release_mmap - remove physical page mappings
1277 * @obj: obj in question
1278 *
af901ca1 1279 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1280 * relinquish ownership of the pages back to the system.
1281 *
1282 * It is vital that we remove the page mapping if we have mapped a tiled
1283 * object through the GTT and then lose the fence register due to
1284 * resource pressure. Similarly if the object has been moved out of the
1285 * aperture, than pages mapped into userspace must be revoked. Removing the
1286 * mapping will then trigger a page fault on the next user access, allowing
1287 * fixup by i915_gem_fault().
1288 */
d05ca301 1289void
05394f39 1290i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1291{
6299f992
CW
1292 if (!obj->fault_mappable)
1293 return;
901782b2 1294
f6e47884
CW
1295 if (obj->base.dev->dev_mapping)
1296 unmap_mapping_range(obj->base.dev->dev_mapping,
1297 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1298 obj->base.size, 1);
fb7d516a 1299
6299f992 1300 obj->fault_mappable = false;
901782b2
CW
1301}
1302
92b88aeb 1303static uint32_t
e28f8711 1304i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1305{
e28f8711 1306 uint32_t gtt_size;
92b88aeb
CW
1307
1308 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1309 tiling_mode == I915_TILING_NONE)
1310 return size;
92b88aeb
CW
1311
1312 /* Previous chips need a power-of-two fence region when tiling */
1313 if (INTEL_INFO(dev)->gen == 3)
e28f8711 1314 gtt_size = 1024*1024;
92b88aeb 1315 else
e28f8711 1316 gtt_size = 512*1024;
92b88aeb 1317
e28f8711
CW
1318 while (gtt_size < size)
1319 gtt_size <<= 1;
92b88aeb 1320
e28f8711 1321 return gtt_size;
92b88aeb
CW
1322}
1323
de151cf6
JB
1324/**
1325 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1326 * @obj: object to check
1327 *
1328 * Return the required GTT alignment for an object, taking into account
5e783301 1329 * potential fence register mapping.
de151cf6
JB
1330 */
1331static uint32_t
e28f8711
CW
1332i915_gem_get_gtt_alignment(struct drm_device *dev,
1333 uint32_t size,
1334 int tiling_mode)
de151cf6 1335{
de151cf6
JB
1336 /*
1337 * Minimum alignment is 4k (GTT page size), but might be greater
1338 * if a fence register is needed for the object.
1339 */
a00b10c3 1340 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711 1341 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1342 return 4096;
1343
a00b10c3
CW
1344 /*
1345 * Previous chips need to be aligned to the size of the smallest
1346 * fence register that can contain the object.
1347 */
e28f8711 1348 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
1349}
1350
5e783301
DV
1351/**
1352 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1353 * unfenced object
e28f8711
CW
1354 * @dev: the device
1355 * @size: size of the object
1356 * @tiling_mode: tiling mode of the object
5e783301
DV
1357 *
1358 * Return the required GTT alignment for an object, only taking into account
1359 * unfenced tiled surface requirements.
1360 */
467cffba 1361uint32_t
e28f8711
CW
1362i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1363 uint32_t size,
1364 int tiling_mode)
5e783301 1365{
5e783301
DV
1366 /*
1367 * Minimum alignment is 4k (GTT page size) for sane hw.
1368 */
1369 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
e28f8711 1370 tiling_mode == I915_TILING_NONE)
5e783301
DV
1371 return 4096;
1372
e28f8711
CW
1373 /* Previous hardware however needs to be aligned to a power-of-two
1374 * tile height. The simplest method for determining this is to reuse
1375 * the power-of-tile object size.
5e783301 1376 */
e28f8711 1377 return i915_gem_get_gtt_size(dev, size, tiling_mode);
5e783301
DV
1378}
1379
de151cf6 1380int
ff72145b
DA
1381i915_gem_mmap_gtt(struct drm_file *file,
1382 struct drm_device *dev,
1383 uint32_t handle,
1384 uint64_t *offset)
de151cf6 1385{
da761a6e 1386 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1387 struct drm_i915_gem_object *obj;
de151cf6
JB
1388 int ret;
1389
1390 if (!(dev->driver->driver_features & DRIVER_GEM))
1391 return -ENODEV;
1392
76c1dec1 1393 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1394 if (ret)
76c1dec1 1395 return ret;
de151cf6 1396
ff72145b 1397 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
c8725226 1398 if (&obj->base == NULL) {
1d7cfea1
CW
1399 ret = -ENOENT;
1400 goto unlock;
1401 }
de151cf6 1402
05394f39 1403 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
da761a6e 1404 ret = -E2BIG;
ff56b0bc 1405 goto out;
da761a6e
CW
1406 }
1407
05394f39 1408 if (obj->madv != I915_MADV_WILLNEED) {
ab18282d 1409 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1d7cfea1
CW
1410 ret = -EINVAL;
1411 goto out;
ab18282d
CW
1412 }
1413
05394f39 1414 if (!obj->base.map_list.map) {
b464e9a2 1415 ret = drm_gem_create_mmap_offset(&obj->base);
1d7cfea1
CW
1416 if (ret)
1417 goto out;
de151cf6
JB
1418 }
1419
ff72145b 1420 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
de151cf6 1421
1d7cfea1 1422out:
05394f39 1423 drm_gem_object_unreference(&obj->base);
1d7cfea1 1424unlock:
de151cf6 1425 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1426 return ret;
de151cf6
JB
1427}
1428
ff72145b
DA
1429/**
1430 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1431 * @dev: DRM device
1432 * @data: GTT mapping ioctl data
1433 * @file: GEM object info
1434 *
1435 * Simply returns the fake offset to userspace so it can mmap it.
1436 * The mmap call will end up in drm_gem_mmap(), which will set things
1437 * up so we can get faults in the handler above.
1438 *
1439 * The fault handler will take care of binding the object into the GTT
1440 * (since it may have been evicted to make room for something), allocating
1441 * a fence register, and mapping the appropriate aperture address into
1442 * userspace.
1443 */
1444int
1445i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1446 struct drm_file *file)
1447{
1448 struct drm_i915_gem_mmap_gtt *args = data;
1449
1450 if (!(dev->driver->driver_features & DRIVER_GEM))
1451 return -ENODEV;
1452
1453 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1454}
1455
1456
e5281ccd 1457static int
05394f39 1458i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
e5281ccd
CW
1459 gfp_t gfpmask)
1460{
e5281ccd
CW
1461 int page_count, i;
1462 struct address_space *mapping;
1463 struct inode *inode;
1464 struct page *page;
1465
1466 /* Get the list of pages out of our struct file. They'll be pinned
1467 * at this point until we release them.
1468 */
05394f39
CW
1469 page_count = obj->base.size / PAGE_SIZE;
1470 BUG_ON(obj->pages != NULL);
1471 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1472 if (obj->pages == NULL)
e5281ccd
CW
1473 return -ENOMEM;
1474
05394f39 1475 inode = obj->base.filp->f_path.dentry->d_inode;
e5281ccd 1476 mapping = inode->i_mapping;
5949eac4
HD
1477 gfpmask |= mapping_gfp_mask(mapping);
1478
e5281ccd 1479 for (i = 0; i < page_count; i++) {
5949eac4 1480 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
e5281ccd
CW
1481 if (IS_ERR(page))
1482 goto err_pages;
1483
05394f39 1484 obj->pages[i] = page;
e5281ccd
CW
1485 }
1486
6dacfd2f 1487 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
1488 i915_gem_object_do_bit_17_swizzle(obj);
1489
1490 return 0;
1491
1492err_pages:
1493 while (i--)
05394f39 1494 page_cache_release(obj->pages[i]);
e5281ccd 1495
05394f39
CW
1496 drm_free_large(obj->pages);
1497 obj->pages = NULL;
e5281ccd
CW
1498 return PTR_ERR(page);
1499}
1500
5cdf5881 1501static void
05394f39 1502i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 1503{
05394f39 1504 int page_count = obj->base.size / PAGE_SIZE;
673a394b
EA
1505 int i;
1506
05394f39 1507 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 1508
6dacfd2f 1509 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
1510 i915_gem_object_save_bit_17_swizzle(obj);
1511
05394f39
CW
1512 if (obj->madv == I915_MADV_DONTNEED)
1513 obj->dirty = 0;
3ef94daa
CW
1514
1515 for (i = 0; i < page_count; i++) {
05394f39
CW
1516 if (obj->dirty)
1517 set_page_dirty(obj->pages[i]);
3ef94daa 1518
05394f39
CW
1519 if (obj->madv == I915_MADV_WILLNEED)
1520 mark_page_accessed(obj->pages[i]);
3ef94daa 1521
05394f39 1522 page_cache_release(obj->pages[i]);
3ef94daa 1523 }
05394f39 1524 obj->dirty = 0;
673a394b 1525
05394f39
CW
1526 drm_free_large(obj->pages);
1527 obj->pages = NULL;
673a394b
EA
1528}
1529
54cf91dc 1530void
05394f39 1531i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1ec14ad3
CW
1532 struct intel_ring_buffer *ring,
1533 u32 seqno)
673a394b 1534{
05394f39 1535 struct drm_device *dev = obj->base.dev;
69dc4987 1536 struct drm_i915_private *dev_priv = dev->dev_private;
617dbe27 1537
852835f3 1538 BUG_ON(ring == NULL);
05394f39 1539 obj->ring = ring;
673a394b
EA
1540
1541 /* Add a reference if we're newly entering the active list. */
05394f39
CW
1542 if (!obj->active) {
1543 drm_gem_object_reference(&obj->base);
1544 obj->active = 1;
673a394b 1545 }
e35a41de 1546
673a394b 1547 /* Move from whatever list we were on to the tail of execution. */
05394f39
CW
1548 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1549 list_move_tail(&obj->ring_list, &ring->active_list);
caea7476 1550
05394f39 1551 obj->last_rendering_seqno = seqno;
caea7476
CW
1552 if (obj->fenced_gpu_access) {
1553 struct drm_i915_fence_reg *reg;
1554
1555 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1556
1557 obj->last_fenced_seqno = seqno;
1558 obj->last_fenced_ring = ring;
1559
1560 reg = &dev_priv->fence_regs[obj->fence_reg];
1561 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1562 }
1563}
1564
1565static void
1566i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1567{
1568 list_del_init(&obj->ring_list);
1569 obj->last_rendering_seqno = 0;
673a394b
EA
1570}
1571
ce44b0ea 1572static void
05394f39 1573i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
ce44b0ea 1574{
05394f39 1575 struct drm_device *dev = obj->base.dev;
ce44b0ea 1576 drm_i915_private_t *dev_priv = dev->dev_private;
ce44b0ea 1577
05394f39
CW
1578 BUG_ON(!obj->active);
1579 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
caea7476
CW
1580
1581 i915_gem_object_move_off_active(obj);
1582}
1583
1584static void
1585i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1586{
1587 struct drm_device *dev = obj->base.dev;
1588 struct drm_i915_private *dev_priv = dev->dev_private;
1589
1590 if (obj->pin_count != 0)
1591 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1592 else
1593 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1594
1595 BUG_ON(!list_empty(&obj->gpu_write_list));
1596 BUG_ON(!obj->active);
1597 obj->ring = NULL;
1598
1599 i915_gem_object_move_off_active(obj);
1600 obj->fenced_gpu_access = false;
caea7476
CW
1601
1602 obj->active = 0;
87ca9c8a 1603 obj->pending_gpu_write = false;
caea7476
CW
1604 drm_gem_object_unreference(&obj->base);
1605
1606 WARN_ON(i915_verify_lists(dev));
ce44b0ea 1607}
673a394b 1608
963b4836
CW
1609/* Immediately discard the backing storage */
1610static void
05394f39 1611i915_gem_object_truncate(struct drm_i915_gem_object *obj)
963b4836 1612{
bb6baf76 1613 struct inode *inode;
963b4836 1614
ae9fed6b
CW
1615 /* Our goal here is to return as much of the memory as
1616 * is possible back to the system as we are called from OOM.
1617 * To do this we must instruct the shmfs to drop all of its
e2377fe0 1618 * backing pages, *now*.
ae9fed6b 1619 */
05394f39 1620 inode = obj->base.filp->f_path.dentry->d_inode;
e2377fe0 1621 shmem_truncate_range(inode, 0, (loff_t)-1);
bb6baf76 1622
05394f39 1623 obj->madv = __I915_MADV_PURGED;
963b4836
CW
1624}
1625
1626static inline int
05394f39 1627i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
963b4836 1628{
05394f39 1629 return obj->madv == I915_MADV_DONTNEED;
963b4836
CW
1630}
1631
63560396 1632static void
db53a302
CW
1633i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1634 uint32_t flush_domains)
63560396 1635{
05394f39 1636 struct drm_i915_gem_object *obj, *next;
63560396 1637
05394f39 1638 list_for_each_entry_safe(obj, next,
64193406 1639 &ring->gpu_write_list,
63560396 1640 gpu_write_list) {
05394f39
CW
1641 if (obj->base.write_domain & flush_domains) {
1642 uint32_t old_write_domain = obj->base.write_domain;
63560396 1643
05394f39
CW
1644 obj->base.write_domain = 0;
1645 list_del_init(&obj->gpu_write_list);
1ec14ad3 1646 i915_gem_object_move_to_active(obj, ring,
db53a302 1647 i915_gem_next_request_seqno(ring));
63560396 1648
63560396 1649 trace_i915_gem_object_change_domain(obj,
05394f39 1650 obj->base.read_domains,
63560396
DV
1651 old_write_domain);
1652 }
1653 }
1654}
8187a2b7 1655
3cce469c 1656int
db53a302 1657i915_add_request(struct intel_ring_buffer *ring,
f787a5f5 1658 struct drm_file *file,
db53a302 1659 struct drm_i915_gem_request *request)
673a394b 1660{
db53a302 1661 drm_i915_private_t *dev_priv = ring->dev->dev_private;
673a394b
EA
1662 uint32_t seqno;
1663 int was_empty;
3cce469c
CW
1664 int ret;
1665
1666 BUG_ON(request == NULL);
673a394b 1667
3cce469c
CW
1668 ret = ring->add_request(ring, &seqno);
1669 if (ret)
1670 return ret;
673a394b 1671
db53a302 1672 trace_i915_gem_request_add(ring, seqno);
673a394b
EA
1673
1674 request->seqno = seqno;
852835f3 1675 request->ring = ring;
673a394b 1676 request->emitted_jiffies = jiffies;
852835f3
ZN
1677 was_empty = list_empty(&ring->request_list);
1678 list_add_tail(&request->list, &ring->request_list);
1679
db53a302
CW
1680 if (file) {
1681 struct drm_i915_file_private *file_priv = file->driver_priv;
1682
1c25595f 1683 spin_lock(&file_priv->mm.lock);
f787a5f5 1684 request->file_priv = file_priv;
b962442e 1685 list_add_tail(&request->client_list,
f787a5f5 1686 &file_priv->mm.request_list);
1c25595f 1687 spin_unlock(&file_priv->mm.lock);
b962442e 1688 }
673a394b 1689
db53a302
CW
1690 ring->outstanding_lazy_request = false;
1691
f65d9421 1692 if (!dev_priv->mm.suspended) {
3e0dc6b0
BW
1693 if (i915_enable_hangcheck) {
1694 mod_timer(&dev_priv->hangcheck_timer,
1695 jiffies +
1696 msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1697 }
f65d9421 1698 if (was_empty)
b3b079db
CW
1699 queue_delayed_work(dev_priv->wq,
1700 &dev_priv->mm.retire_work, HZ);
f65d9421 1701 }
3cce469c 1702 return 0;
673a394b
EA
1703}
1704
f787a5f5
CW
1705static inline void
1706i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
673a394b 1707{
1c25595f 1708 struct drm_i915_file_private *file_priv = request->file_priv;
673a394b 1709
1c25595f
CW
1710 if (!file_priv)
1711 return;
1c5d22f7 1712
1c25595f 1713 spin_lock(&file_priv->mm.lock);
09bfa517
HRK
1714 if (request->file_priv) {
1715 list_del(&request->client_list);
1716 request->file_priv = NULL;
1717 }
1c25595f 1718 spin_unlock(&file_priv->mm.lock);
673a394b 1719}
673a394b 1720
dfaae392
CW
1721static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1722 struct intel_ring_buffer *ring)
9375e446 1723{
dfaae392
CW
1724 while (!list_empty(&ring->request_list)) {
1725 struct drm_i915_gem_request *request;
673a394b 1726
dfaae392
CW
1727 request = list_first_entry(&ring->request_list,
1728 struct drm_i915_gem_request,
1729 list);
de151cf6 1730
dfaae392 1731 list_del(&request->list);
f787a5f5 1732 i915_gem_request_remove_from_client(request);
dfaae392
CW
1733 kfree(request);
1734 }
673a394b 1735
dfaae392 1736 while (!list_empty(&ring->active_list)) {
05394f39 1737 struct drm_i915_gem_object *obj;
9375e446 1738
05394f39
CW
1739 obj = list_first_entry(&ring->active_list,
1740 struct drm_i915_gem_object,
1741 ring_list);
9375e446 1742
05394f39
CW
1743 obj->base.write_domain = 0;
1744 list_del_init(&obj->gpu_write_list);
1745 i915_gem_object_move_to_inactive(obj);
673a394b
EA
1746 }
1747}
1748
312817a3
CW
1749static void i915_gem_reset_fences(struct drm_device *dev)
1750{
1751 struct drm_i915_private *dev_priv = dev->dev_private;
1752 int i;
1753
4b9de737 1754 for (i = 0; i < dev_priv->num_fence_regs; i++) {
312817a3 1755 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
7d2cb39c
CW
1756 struct drm_i915_gem_object *obj = reg->obj;
1757
1758 if (!obj)
1759 continue;
1760
1761 if (obj->tiling_mode)
1762 i915_gem_release_mmap(obj);
1763
d9e86c0e
CW
1764 reg->obj->fence_reg = I915_FENCE_REG_NONE;
1765 reg->obj->fenced_gpu_access = false;
1766 reg->obj->last_fenced_seqno = 0;
1767 reg->obj->last_fenced_ring = NULL;
1768 i915_gem_clear_fence_reg(dev, reg);
312817a3
CW
1769 }
1770}
1771
069efc1d 1772void i915_gem_reset(struct drm_device *dev)
673a394b 1773{
77f01230 1774 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1775 struct drm_i915_gem_object *obj;
1ec14ad3 1776 int i;
673a394b 1777
1ec14ad3
CW
1778 for (i = 0; i < I915_NUM_RINGS; i++)
1779 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
dfaae392
CW
1780
1781 /* Remove anything from the flushing lists. The GPU cache is likely
1782 * to be lost on reset along with the data, so simply move the
1783 * lost bo to the inactive list.
1784 */
1785 while (!list_empty(&dev_priv->mm.flushing_list)) {
0206e353 1786 obj = list_first_entry(&dev_priv->mm.flushing_list,
05394f39
CW
1787 struct drm_i915_gem_object,
1788 mm_list);
dfaae392 1789
05394f39
CW
1790 obj->base.write_domain = 0;
1791 list_del_init(&obj->gpu_write_list);
1792 i915_gem_object_move_to_inactive(obj);
dfaae392
CW
1793 }
1794
1795 /* Move everything out of the GPU domains to ensure we do any
1796 * necessary invalidation upon reuse.
1797 */
05394f39 1798 list_for_each_entry(obj,
77f01230 1799 &dev_priv->mm.inactive_list,
69dc4987 1800 mm_list)
77f01230 1801 {
05394f39 1802 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
77f01230 1803 }
069efc1d
CW
1804
1805 /* The fence registers are invalidated so clear them out */
312817a3 1806 i915_gem_reset_fences(dev);
673a394b
EA
1807}
1808
1809/**
1810 * This function clears the request list as sequence numbers are passed.
1811 */
b09a1fec 1812static void
db53a302 1813i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
673a394b 1814{
673a394b 1815 uint32_t seqno;
1ec14ad3 1816 int i;
673a394b 1817
db53a302 1818 if (list_empty(&ring->request_list))
6c0594a3
KW
1819 return;
1820
db53a302 1821 WARN_ON(i915_verify_lists(ring->dev));
673a394b 1822
78501eac 1823 seqno = ring->get_seqno(ring);
1ec14ad3 1824
076e2c0e 1825 for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1ec14ad3
CW
1826 if (seqno >= ring->sync_seqno[i])
1827 ring->sync_seqno[i] = 0;
1828
852835f3 1829 while (!list_empty(&ring->request_list)) {
673a394b 1830 struct drm_i915_gem_request *request;
673a394b 1831
852835f3 1832 request = list_first_entry(&ring->request_list,
673a394b
EA
1833 struct drm_i915_gem_request,
1834 list);
673a394b 1835
dfaae392 1836 if (!i915_seqno_passed(seqno, request->seqno))
b84d5f0c
CW
1837 break;
1838
db53a302 1839 trace_i915_gem_request_retire(ring, request->seqno);
b84d5f0c
CW
1840
1841 list_del(&request->list);
f787a5f5 1842 i915_gem_request_remove_from_client(request);
b84d5f0c
CW
1843 kfree(request);
1844 }
673a394b 1845
b84d5f0c
CW
1846 /* Move any buffers on the active list that are no longer referenced
1847 * by the ringbuffer to the flushing/inactive lists as appropriate.
1848 */
1849 while (!list_empty(&ring->active_list)) {
05394f39 1850 struct drm_i915_gem_object *obj;
b84d5f0c 1851
0206e353 1852 obj = list_first_entry(&ring->active_list,
05394f39
CW
1853 struct drm_i915_gem_object,
1854 ring_list);
673a394b 1855
05394f39 1856 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
673a394b 1857 break;
b84d5f0c 1858
05394f39 1859 if (obj->base.write_domain != 0)
b84d5f0c
CW
1860 i915_gem_object_move_to_flushing(obj);
1861 else
1862 i915_gem_object_move_to_inactive(obj);
673a394b 1863 }
9d34e5db 1864
db53a302
CW
1865 if (unlikely(ring->trace_irq_seqno &&
1866 i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1ec14ad3 1867 ring->irq_put(ring);
db53a302 1868 ring->trace_irq_seqno = 0;
9d34e5db 1869 }
23bc5982 1870
db53a302 1871 WARN_ON(i915_verify_lists(ring->dev));
673a394b
EA
1872}
1873
b09a1fec
CW
1874void
1875i915_gem_retire_requests(struct drm_device *dev)
1876{
1877 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 1878 int i;
b09a1fec 1879
be72615b 1880 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
05394f39 1881 struct drm_i915_gem_object *obj, *next;
be72615b
CW
1882
1883 /* We must be careful that during unbind() we do not
1884 * accidentally infinitely recurse into retire requests.
1885 * Currently:
1886 * retire -> free -> unbind -> wait -> retire_ring
1887 */
05394f39 1888 list_for_each_entry_safe(obj, next,
be72615b 1889 &dev_priv->mm.deferred_free_list,
69dc4987 1890 mm_list)
05394f39 1891 i915_gem_free_object_tail(obj);
be72615b
CW
1892 }
1893
1ec14ad3 1894 for (i = 0; i < I915_NUM_RINGS; i++)
db53a302 1895 i915_gem_retire_requests_ring(&dev_priv->ring[i]);
b09a1fec
CW
1896}
1897
75ef9da2 1898static void
673a394b
EA
1899i915_gem_retire_work_handler(struct work_struct *work)
1900{
1901 drm_i915_private_t *dev_priv;
1902 struct drm_device *dev;
0a58705b
CW
1903 bool idle;
1904 int i;
673a394b
EA
1905
1906 dev_priv = container_of(work, drm_i915_private_t,
1907 mm.retire_work.work);
1908 dev = dev_priv->dev;
1909
891b48cf
CW
1910 /* Come back later if the device is busy... */
1911 if (!mutex_trylock(&dev->struct_mutex)) {
1912 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1913 return;
1914 }
1915
b09a1fec 1916 i915_gem_retire_requests(dev);
d1b851fc 1917
0a58705b
CW
1918 /* Send a periodic flush down the ring so we don't hold onto GEM
1919 * objects indefinitely.
1920 */
1921 idle = true;
1922 for (i = 0; i < I915_NUM_RINGS; i++) {
1923 struct intel_ring_buffer *ring = &dev_priv->ring[i];
1924
1925 if (!list_empty(&ring->gpu_write_list)) {
1926 struct drm_i915_gem_request *request;
1927 int ret;
1928
db53a302
CW
1929 ret = i915_gem_flush_ring(ring,
1930 0, I915_GEM_GPU_DOMAINS);
0a58705b
CW
1931 request = kzalloc(sizeof(*request), GFP_KERNEL);
1932 if (ret || request == NULL ||
db53a302 1933 i915_add_request(ring, NULL, request))
0a58705b
CW
1934 kfree(request);
1935 }
1936
1937 idle &= list_empty(&ring->request_list);
1938 }
1939
1940 if (!dev_priv->mm.suspended && !idle)
9c9fe1f8 1941 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
0a58705b 1942
673a394b
EA
1943 mutex_unlock(&dev->struct_mutex);
1944}
1945
db53a302
CW
1946/**
1947 * Waits for a sequence number to be signaled, and cleans up the
1948 * request and object lists appropriately for that event.
1949 */
5a5a0c64 1950int
db53a302 1951i915_wait_request(struct intel_ring_buffer *ring,
b93f9cf1
BW
1952 uint32_t seqno,
1953 bool do_retire)
673a394b 1954{
db53a302 1955 drm_i915_private_t *dev_priv = ring->dev->dev_private;
802c7eb6 1956 u32 ier;
673a394b
EA
1957 int ret = 0;
1958
1959 BUG_ON(seqno == 0);
1960
d9bc7e9f
CW
1961 if (atomic_read(&dev_priv->mm.wedged)) {
1962 struct completion *x = &dev_priv->error_completion;
1963 bool recovery_complete;
1964 unsigned long flags;
1965
1966 /* Give the error handler a chance to run. */
1967 spin_lock_irqsave(&x->wait.lock, flags);
1968 recovery_complete = x->done > 0;
1969 spin_unlock_irqrestore(&x->wait.lock, flags);
1970
1971 return recovery_complete ? -EIO : -EAGAIN;
1972 }
30dbf0c0 1973
5d97eb69 1974 if (seqno == ring->outstanding_lazy_request) {
3cce469c
CW
1975 struct drm_i915_gem_request *request;
1976
1977 request = kzalloc(sizeof(*request), GFP_KERNEL);
1978 if (request == NULL)
e35a41de 1979 return -ENOMEM;
3cce469c 1980
db53a302 1981 ret = i915_add_request(ring, NULL, request);
3cce469c
CW
1982 if (ret) {
1983 kfree(request);
1984 return ret;
1985 }
1986
1987 seqno = request->seqno;
e35a41de 1988 }
ffed1d09 1989
78501eac 1990 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
db53a302 1991 if (HAS_PCH_SPLIT(ring->dev))
036a4a7d
ZW
1992 ier = I915_READ(DEIER) | I915_READ(GTIER);
1993 else
1994 ier = I915_READ(IER);
802c7eb6
JB
1995 if (!ier) {
1996 DRM_ERROR("something (likely vbetool) disabled "
1997 "interrupts, re-enabling\n");
f01c22fd
CW
1998 ring->dev->driver->irq_preinstall(ring->dev);
1999 ring->dev->driver->irq_postinstall(ring->dev);
802c7eb6
JB
2000 }
2001
db53a302 2002 trace_i915_gem_request_wait_begin(ring, seqno);
1c5d22f7 2003
b2223497 2004 ring->waiting_seqno = seqno;
b13c2b96 2005 if (ring->irq_get(ring)) {
ce453d81 2006 if (dev_priv->mm.interruptible)
b13c2b96
CW
2007 ret = wait_event_interruptible(ring->irq_queue,
2008 i915_seqno_passed(ring->get_seqno(ring), seqno)
2009 || atomic_read(&dev_priv->mm.wedged));
2010 else
2011 wait_event(ring->irq_queue,
2012 i915_seqno_passed(ring->get_seqno(ring), seqno)
2013 || atomic_read(&dev_priv->mm.wedged));
2014
2015 ring->irq_put(ring);
e959b5db
EA
2016 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
2017 seqno) ||
2018 atomic_read(&dev_priv->mm.wedged), 3000))
b5ba177d 2019 ret = -EBUSY;
b2223497 2020 ring->waiting_seqno = 0;
1c5d22f7 2021
db53a302 2022 trace_i915_gem_request_wait_end(ring, seqno);
673a394b 2023 }
ba1234d1 2024 if (atomic_read(&dev_priv->mm.wedged))
30dbf0c0 2025 ret = -EAGAIN;
673a394b
EA
2026
2027 if (ret && ret != -ERESTARTSYS)
8bff917c 2028 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
78501eac 2029 __func__, ret, seqno, ring->get_seqno(ring),
8bff917c 2030 dev_priv->next_seqno);
673a394b
EA
2031
2032 /* Directly dispatch request retiring. While we have the work queue
2033 * to handle this, the waiter on a request often wants an associated
2034 * buffer to have made it to the inactive list, and we would need
2035 * a separate wait queue to handle that.
2036 */
b93f9cf1 2037 if (ret == 0 && do_retire)
db53a302 2038 i915_gem_retire_requests_ring(ring);
673a394b
EA
2039
2040 return ret;
2041}
2042
673a394b
EA
2043/**
2044 * Ensures that all rendering to the object has completed and the object is
2045 * safe to unbind from the GTT or access from the CPU.
2046 */
54cf91dc 2047int
ce453d81 2048i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
673a394b 2049{
673a394b
EA
2050 int ret;
2051
e47c68e9
EA
2052 /* This function only exists to support waiting for existing rendering,
2053 * not for emitting required flushes.
673a394b 2054 */
05394f39 2055 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
2056
2057 /* If there is rendering queued on the buffer being evicted, wait for
2058 * it.
2059 */
05394f39 2060 if (obj->active) {
b93f9cf1
BW
2061 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno,
2062 true);
2cf34d7b 2063 if (ret)
673a394b
EA
2064 return ret;
2065 }
2066
2067 return 0;
2068}
2069
b5ffc9bc
CW
2070static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2071{
2072 u32 old_write_domain, old_read_domains;
2073
b5ffc9bc
CW
2074 /* Act a barrier for all accesses through the GTT */
2075 mb();
2076
2077 /* Force a pagefault for domain tracking on next user access */
2078 i915_gem_release_mmap(obj);
2079
b97c3d9c
KP
2080 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2081 return;
2082
b5ffc9bc
CW
2083 old_read_domains = obj->base.read_domains;
2084 old_write_domain = obj->base.write_domain;
2085
2086 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2087 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2088
2089 trace_i915_gem_object_change_domain(obj,
2090 old_read_domains,
2091 old_write_domain);
2092}
2093
673a394b
EA
2094/**
2095 * Unbinds an object from the GTT aperture.
2096 */
0f973f27 2097int
05394f39 2098i915_gem_object_unbind(struct drm_i915_gem_object *obj)
673a394b 2099{
673a394b
EA
2100 int ret = 0;
2101
05394f39 2102 if (obj->gtt_space == NULL)
673a394b
EA
2103 return 0;
2104
05394f39 2105 if (obj->pin_count != 0) {
673a394b
EA
2106 DRM_ERROR("Attempting to unbind pinned buffer\n");
2107 return -EINVAL;
2108 }
2109
a8198eea
CW
2110 ret = i915_gem_object_finish_gpu(obj);
2111 if (ret == -ERESTARTSYS)
2112 return ret;
2113 /* Continue on if we fail due to EIO, the GPU is hung so we
2114 * should be safe and we need to cleanup or else we might
2115 * cause memory corruption through use-after-free.
2116 */
2117
b5ffc9bc 2118 i915_gem_object_finish_gtt(obj);
5323fd04 2119
673a394b
EA
2120 /* Move the object to the CPU domain to ensure that
2121 * any possible CPU writes while it's not in the GTT
a8198eea 2122 * are flushed when we go to remap it.
673a394b 2123 */
a8198eea
CW
2124 if (ret == 0)
2125 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
8dc1775d 2126 if (ret == -ERESTARTSYS)
673a394b 2127 return ret;
812ed492 2128 if (ret) {
a8198eea
CW
2129 /* In the event of a disaster, abandon all caches and
2130 * hope for the best.
2131 */
812ed492 2132 i915_gem_clflush_object(obj);
05394f39 2133 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
812ed492 2134 }
673a394b 2135
96b47b65 2136 /* release the fence reg _after_ flushing */
d9e86c0e
CW
2137 ret = i915_gem_object_put_fence(obj);
2138 if (ret == -ERESTARTSYS)
2139 return ret;
96b47b65 2140
db53a302
CW
2141 trace_i915_gem_object_unbind(obj);
2142
7c2e6fdf 2143 i915_gem_gtt_unbind_object(obj);
e5281ccd 2144 i915_gem_object_put_pages_gtt(obj);
673a394b 2145
6299f992 2146 list_del_init(&obj->gtt_list);
05394f39 2147 list_del_init(&obj->mm_list);
75e9e915 2148 /* Avoid an unnecessary call to unbind on rebind. */
05394f39 2149 obj->map_and_fenceable = true;
673a394b 2150
05394f39
CW
2151 drm_mm_put_block(obj->gtt_space);
2152 obj->gtt_space = NULL;
2153 obj->gtt_offset = 0;
673a394b 2154
05394f39 2155 if (i915_gem_object_is_purgeable(obj))
963b4836
CW
2156 i915_gem_object_truncate(obj);
2157
8dc1775d 2158 return ret;
673a394b
EA
2159}
2160
88241785 2161int
db53a302 2162i915_gem_flush_ring(struct intel_ring_buffer *ring,
54cf91dc
CW
2163 uint32_t invalidate_domains,
2164 uint32_t flush_domains)
2165{
88241785
CW
2166 int ret;
2167
36d527de
CW
2168 if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2169 return 0;
2170
db53a302
CW
2171 trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2172
88241785
CW
2173 ret = ring->flush(ring, invalidate_domains, flush_domains);
2174 if (ret)
2175 return ret;
2176
36d527de
CW
2177 if (flush_domains & I915_GEM_GPU_DOMAINS)
2178 i915_gem_process_flushing_list(ring, flush_domains);
2179
88241785 2180 return 0;
54cf91dc
CW
2181}
2182
b93f9cf1 2183static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
a56ba56c 2184{
88241785
CW
2185 int ret;
2186
395b70be 2187 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
64193406
CW
2188 return 0;
2189
88241785 2190 if (!list_empty(&ring->gpu_write_list)) {
db53a302 2191 ret = i915_gem_flush_ring(ring,
0ac74c6b 2192 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
88241785
CW
2193 if (ret)
2194 return ret;
2195 }
2196
b93f9cf1
BW
2197 return i915_wait_request(ring, i915_gem_next_request_seqno(ring),
2198 do_retire);
a56ba56c
CW
2199}
2200
b93f9cf1 2201int i915_gpu_idle(struct drm_device *dev, bool do_retire)
4df2faf4
DV
2202{
2203 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 2204 int ret, i;
4df2faf4 2205
4df2faf4 2206 /* Flush everything onto the inactive list. */
1ec14ad3 2207 for (i = 0; i < I915_NUM_RINGS; i++) {
b93f9cf1 2208 ret = i915_ring_idle(&dev_priv->ring[i], do_retire);
1ec14ad3
CW
2209 if (ret)
2210 return ret;
2211 }
4df2faf4 2212
8a1a49f9 2213 return 0;
4df2faf4
DV
2214}
2215
c6642782
DV
2216static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2217 struct intel_ring_buffer *pipelined)
4e901fdc 2218{
05394f39 2219 struct drm_device *dev = obj->base.dev;
4e901fdc 2220 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2221 u32 size = obj->gtt_space->size;
2222 int regnum = obj->fence_reg;
4e901fdc
EA
2223 uint64_t val;
2224
05394f39 2225 val = (uint64_t)((obj->gtt_offset + size - 4096) &
c6642782 2226 0xfffff000) << 32;
05394f39
CW
2227 val |= obj->gtt_offset & 0xfffff000;
2228 val |= (uint64_t)((obj->stride / 128) - 1) <<
4e901fdc
EA
2229 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2230
05394f39 2231 if (obj->tiling_mode == I915_TILING_Y)
4e901fdc
EA
2232 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2233 val |= I965_FENCE_REG_VALID;
2234
c6642782
DV
2235 if (pipelined) {
2236 int ret = intel_ring_begin(pipelined, 6);
2237 if (ret)
2238 return ret;
2239
2240 intel_ring_emit(pipelined, MI_NOOP);
2241 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2242 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2243 intel_ring_emit(pipelined, (u32)val);
2244 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2245 intel_ring_emit(pipelined, (u32)(val >> 32));
2246 intel_ring_advance(pipelined);
2247 } else
2248 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2249
2250 return 0;
4e901fdc
EA
2251}
2252
c6642782
DV
2253static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2254 struct intel_ring_buffer *pipelined)
de151cf6 2255{
05394f39 2256 struct drm_device *dev = obj->base.dev;
de151cf6 2257 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2258 u32 size = obj->gtt_space->size;
2259 int regnum = obj->fence_reg;
de151cf6
JB
2260 uint64_t val;
2261
05394f39 2262 val = (uint64_t)((obj->gtt_offset + size - 4096) &
de151cf6 2263 0xfffff000) << 32;
05394f39
CW
2264 val |= obj->gtt_offset & 0xfffff000;
2265 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2266 if (obj->tiling_mode == I915_TILING_Y)
de151cf6
JB
2267 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2268 val |= I965_FENCE_REG_VALID;
2269
c6642782
DV
2270 if (pipelined) {
2271 int ret = intel_ring_begin(pipelined, 6);
2272 if (ret)
2273 return ret;
2274
2275 intel_ring_emit(pipelined, MI_NOOP);
2276 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2277 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2278 intel_ring_emit(pipelined, (u32)val);
2279 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2280 intel_ring_emit(pipelined, (u32)(val >> 32));
2281 intel_ring_advance(pipelined);
2282 } else
2283 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2284
2285 return 0;
de151cf6
JB
2286}
2287
c6642782
DV
2288static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2289 struct intel_ring_buffer *pipelined)
de151cf6 2290{
05394f39 2291 struct drm_device *dev = obj->base.dev;
de151cf6 2292 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39 2293 u32 size = obj->gtt_space->size;
c6642782 2294 u32 fence_reg, val, pitch_val;
0f973f27 2295 int tile_width;
de151cf6 2296
c6642782
DV
2297 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2298 (size & -size) != size ||
2299 (obj->gtt_offset & (size - 1)),
2300 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2301 obj->gtt_offset, obj->map_and_fenceable, size))
2302 return -EINVAL;
de151cf6 2303
c6642782 2304 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
0f973f27 2305 tile_width = 128;
de151cf6 2306 else
0f973f27
JB
2307 tile_width = 512;
2308
2309 /* Note: pitch better be a power of two tile widths */
05394f39 2310 pitch_val = obj->stride / tile_width;
0f973f27 2311 pitch_val = ffs(pitch_val) - 1;
de151cf6 2312
05394f39
CW
2313 val = obj->gtt_offset;
2314 if (obj->tiling_mode == I915_TILING_Y)
de151cf6 2315 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
a00b10c3 2316 val |= I915_FENCE_SIZE_BITS(size);
de151cf6
JB
2317 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2318 val |= I830_FENCE_REG_VALID;
2319
05394f39 2320 fence_reg = obj->fence_reg;
a00b10c3
CW
2321 if (fence_reg < 8)
2322 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
dc529a4f 2323 else
a00b10c3 2324 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
c6642782
DV
2325
2326 if (pipelined) {
2327 int ret = intel_ring_begin(pipelined, 4);
2328 if (ret)
2329 return ret;
2330
2331 intel_ring_emit(pipelined, MI_NOOP);
2332 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2333 intel_ring_emit(pipelined, fence_reg);
2334 intel_ring_emit(pipelined, val);
2335 intel_ring_advance(pipelined);
2336 } else
2337 I915_WRITE(fence_reg, val);
2338
2339 return 0;
de151cf6
JB
2340}
2341
c6642782
DV
2342static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2343 struct intel_ring_buffer *pipelined)
de151cf6 2344{
05394f39 2345 struct drm_device *dev = obj->base.dev;
de151cf6 2346 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2347 u32 size = obj->gtt_space->size;
2348 int regnum = obj->fence_reg;
de151cf6
JB
2349 uint32_t val;
2350 uint32_t pitch_val;
2351
c6642782
DV
2352 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2353 (size & -size) != size ||
2354 (obj->gtt_offset & (size - 1)),
2355 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2356 obj->gtt_offset, size))
2357 return -EINVAL;
de151cf6 2358
05394f39 2359 pitch_val = obj->stride / 128;
e76a16de 2360 pitch_val = ffs(pitch_val) - 1;
e76a16de 2361
05394f39
CW
2362 val = obj->gtt_offset;
2363 if (obj->tiling_mode == I915_TILING_Y)
de151cf6 2364 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
c6642782 2365 val |= I830_FENCE_SIZE_BITS(size);
de151cf6
JB
2366 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2367 val |= I830_FENCE_REG_VALID;
2368
c6642782
DV
2369 if (pipelined) {
2370 int ret = intel_ring_begin(pipelined, 4);
2371 if (ret)
2372 return ret;
2373
2374 intel_ring_emit(pipelined, MI_NOOP);
2375 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2376 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2377 intel_ring_emit(pipelined, val);
2378 intel_ring_advance(pipelined);
2379 } else
2380 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2381
2382 return 0;
de151cf6
JB
2383}
2384
d9e86c0e
CW
2385static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
2386{
2387 return i915_seqno_passed(ring->get_seqno(ring), seqno);
2388}
2389
2390static int
2391i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
ce453d81 2392 struct intel_ring_buffer *pipelined)
d9e86c0e
CW
2393{
2394 int ret;
2395
2396 if (obj->fenced_gpu_access) {
88241785 2397 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
db53a302 2398 ret = i915_gem_flush_ring(obj->last_fenced_ring,
88241785
CW
2399 0, obj->base.write_domain);
2400 if (ret)
2401 return ret;
2402 }
d9e86c0e
CW
2403
2404 obj->fenced_gpu_access = false;
2405 }
2406
2407 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
2408 if (!ring_passed_seqno(obj->last_fenced_ring,
2409 obj->last_fenced_seqno)) {
db53a302 2410 ret = i915_wait_request(obj->last_fenced_ring,
b93f9cf1
BW
2411 obj->last_fenced_seqno,
2412 true);
d9e86c0e
CW
2413 if (ret)
2414 return ret;
2415 }
2416
2417 obj->last_fenced_seqno = 0;
2418 obj->last_fenced_ring = NULL;
2419 }
2420
63256ec5
CW
2421 /* Ensure that all CPU reads are completed before installing a fence
2422 * and all writes before removing the fence.
2423 */
2424 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2425 mb();
2426
d9e86c0e
CW
2427 return 0;
2428}
2429
2430int
2431i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2432{
2433 int ret;
2434
2435 if (obj->tiling_mode)
2436 i915_gem_release_mmap(obj);
2437
ce453d81 2438 ret = i915_gem_object_flush_fence(obj, NULL);
d9e86c0e
CW
2439 if (ret)
2440 return ret;
2441
2442 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2443 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1690e1eb
CW
2444
2445 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);
d9e86c0e
CW
2446 i915_gem_clear_fence_reg(obj->base.dev,
2447 &dev_priv->fence_regs[obj->fence_reg]);
2448
2449 obj->fence_reg = I915_FENCE_REG_NONE;
2450 }
2451
2452 return 0;
2453}
2454
2455static struct drm_i915_fence_reg *
2456i915_find_fence_reg(struct drm_device *dev,
2457 struct intel_ring_buffer *pipelined)
ae3db24a 2458{
ae3db24a 2459 struct drm_i915_private *dev_priv = dev->dev_private;
d9e86c0e
CW
2460 struct drm_i915_fence_reg *reg, *first, *avail;
2461 int i;
ae3db24a
DV
2462
2463 /* First try to find a free reg */
d9e86c0e 2464 avail = NULL;
ae3db24a
DV
2465 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2466 reg = &dev_priv->fence_regs[i];
2467 if (!reg->obj)
d9e86c0e 2468 return reg;
ae3db24a 2469
1690e1eb 2470 if (!reg->pin_count)
d9e86c0e 2471 avail = reg;
ae3db24a
DV
2472 }
2473
d9e86c0e
CW
2474 if (avail == NULL)
2475 return NULL;
ae3db24a
DV
2476
2477 /* None available, try to steal one or wait for a user to finish */
d9e86c0e
CW
2478 avail = first = NULL;
2479 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
1690e1eb 2480 if (reg->pin_count)
ae3db24a
DV
2481 continue;
2482
d9e86c0e
CW
2483 if (first == NULL)
2484 first = reg;
2485
2486 if (!pipelined ||
2487 !reg->obj->last_fenced_ring ||
2488 reg->obj->last_fenced_ring == pipelined) {
2489 avail = reg;
2490 break;
2491 }
ae3db24a
DV
2492 }
2493
d9e86c0e
CW
2494 if (avail == NULL)
2495 avail = first;
ae3db24a 2496
a00b10c3 2497 return avail;
ae3db24a
DV
2498}
2499
de151cf6 2500/**
d9e86c0e 2501 * i915_gem_object_get_fence - set up a fence reg for an object
de151cf6 2502 * @obj: object to map through a fence reg
d9e86c0e
CW
2503 * @pipelined: ring on which to queue the change, or NULL for CPU access
2504 * @interruptible: must we wait uninterruptibly for the register to retire?
de151cf6
JB
2505 *
2506 * When mapping objects through the GTT, userspace wants to be able to write
2507 * to them without having to worry about swizzling if the object is tiled.
2508 *
2509 * This function walks the fence regs looking for a free one for @obj,
2510 * stealing one if it can't find any.
2511 *
2512 * It then sets up the reg based on the object's properties: address, pitch
2513 * and tiling format.
2514 */
8c4b8c3f 2515int
d9e86c0e 2516i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
ce453d81 2517 struct intel_ring_buffer *pipelined)
de151cf6 2518{
05394f39 2519 struct drm_device *dev = obj->base.dev;
79e53945 2520 struct drm_i915_private *dev_priv = dev->dev_private;
d9e86c0e 2521 struct drm_i915_fence_reg *reg;
ae3db24a 2522 int ret;
de151cf6 2523
6bda10d1
CW
2524 /* XXX disable pipelining. There are bugs. Shocking. */
2525 pipelined = NULL;
2526
d9e86c0e 2527 /* Just update our place in the LRU if our fence is getting reused. */
05394f39
CW
2528 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2529 reg = &dev_priv->fence_regs[obj->fence_reg];
007cc8ac 2530 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
d9e86c0e 2531
29c5a587
CW
2532 if (obj->tiling_changed) {
2533 ret = i915_gem_object_flush_fence(obj, pipelined);
2534 if (ret)
2535 return ret;
2536
2537 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2538 pipelined = NULL;
2539
2540 if (pipelined) {
2541 reg->setup_seqno =
2542 i915_gem_next_request_seqno(pipelined);
2543 obj->last_fenced_seqno = reg->setup_seqno;
2544 obj->last_fenced_ring = pipelined;
2545 }
2546
2547 goto update;
2548 }
d9e86c0e
CW
2549
2550 if (!pipelined) {
2551 if (reg->setup_seqno) {
2552 if (!ring_passed_seqno(obj->last_fenced_ring,
2553 reg->setup_seqno)) {
db53a302 2554 ret = i915_wait_request(obj->last_fenced_ring,
b93f9cf1
BW
2555 reg->setup_seqno,
2556 true);
d9e86c0e
CW
2557 if (ret)
2558 return ret;
2559 }
2560
2561 reg->setup_seqno = 0;
2562 }
2563 } else if (obj->last_fenced_ring &&
2564 obj->last_fenced_ring != pipelined) {
ce453d81 2565 ret = i915_gem_object_flush_fence(obj, pipelined);
d9e86c0e
CW
2566 if (ret)
2567 return ret;
d9e86c0e
CW
2568 }
2569
a09ba7fa
EA
2570 return 0;
2571 }
2572
d9e86c0e
CW
2573 reg = i915_find_fence_reg(dev, pipelined);
2574 if (reg == NULL)
39965b37 2575 return -EDEADLK;
de151cf6 2576
ce453d81 2577 ret = i915_gem_object_flush_fence(obj, pipelined);
d9e86c0e 2578 if (ret)
ae3db24a 2579 return ret;
de151cf6 2580
d9e86c0e
CW
2581 if (reg->obj) {
2582 struct drm_i915_gem_object *old = reg->obj;
2583
2584 drm_gem_object_reference(&old->base);
2585
2586 if (old->tiling_mode)
2587 i915_gem_release_mmap(old);
2588
ce453d81 2589 ret = i915_gem_object_flush_fence(old, pipelined);
d9e86c0e
CW
2590 if (ret) {
2591 drm_gem_object_unreference(&old->base);
2592 return ret;
2593 }
2594
2595 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
2596 pipelined = NULL;
2597
2598 old->fence_reg = I915_FENCE_REG_NONE;
2599 old->last_fenced_ring = pipelined;
2600 old->last_fenced_seqno =
db53a302 2601 pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
d9e86c0e
CW
2602
2603 drm_gem_object_unreference(&old->base);
2604 } else if (obj->last_fenced_seqno == 0)
2605 pipelined = NULL;
a09ba7fa 2606
de151cf6 2607 reg->obj = obj;
d9e86c0e
CW
2608 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2609 obj->fence_reg = reg - dev_priv->fence_regs;
2610 obj->last_fenced_ring = pipelined;
de151cf6 2611
d9e86c0e 2612 reg->setup_seqno =
db53a302 2613 pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
d9e86c0e
CW
2614 obj->last_fenced_seqno = reg->setup_seqno;
2615
2616update:
2617 obj->tiling_changed = false;
e259befd 2618 switch (INTEL_INFO(dev)->gen) {
25aebfc3 2619 case 7:
e259befd 2620 case 6:
c6642782 2621 ret = sandybridge_write_fence_reg(obj, pipelined);
e259befd
CW
2622 break;
2623 case 5:
2624 case 4:
c6642782 2625 ret = i965_write_fence_reg(obj, pipelined);
e259befd
CW
2626 break;
2627 case 3:
c6642782 2628 ret = i915_write_fence_reg(obj, pipelined);
e259befd
CW
2629 break;
2630 case 2:
c6642782 2631 ret = i830_write_fence_reg(obj, pipelined);
e259befd
CW
2632 break;
2633 }
d9ddcb96 2634
c6642782 2635 return ret;
de151cf6
JB
2636}
2637
2638/**
2639 * i915_gem_clear_fence_reg - clear out fence register info
2640 * @obj: object to clear
2641 *
2642 * Zeroes out the fence register itself and clears out the associated
05394f39 2643 * data structures in dev_priv and obj.
de151cf6
JB
2644 */
2645static void
d9e86c0e
CW
2646i915_gem_clear_fence_reg(struct drm_device *dev,
2647 struct drm_i915_fence_reg *reg)
de151cf6 2648{
79e53945 2649 drm_i915_private_t *dev_priv = dev->dev_private;
d9e86c0e 2650 uint32_t fence_reg = reg - dev_priv->fence_regs;
de151cf6 2651
e259befd 2652 switch (INTEL_INFO(dev)->gen) {
25aebfc3 2653 case 7:
e259befd 2654 case 6:
d9e86c0e 2655 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
e259befd
CW
2656 break;
2657 case 5:
2658 case 4:
d9e86c0e 2659 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
e259befd
CW
2660 break;
2661 case 3:
d9e86c0e
CW
2662 if (fence_reg >= 8)
2663 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
dc529a4f 2664 else
e259befd 2665 case 2:
d9e86c0e 2666 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
dc529a4f
EA
2667
2668 I915_WRITE(fence_reg, 0);
e259befd 2669 break;
dc529a4f 2670 }
de151cf6 2671
007cc8ac 2672 list_del_init(&reg->lru_list);
d9e86c0e
CW
2673 reg->obj = NULL;
2674 reg->setup_seqno = 0;
1690e1eb 2675 reg->pin_count = 0;
52dc7d32
CW
2676}
2677
673a394b
EA
2678/**
2679 * Finds free space in the GTT aperture and binds the object there.
2680 */
2681static int
05394f39 2682i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
920afa77 2683 unsigned alignment,
75e9e915 2684 bool map_and_fenceable)
673a394b 2685{
05394f39 2686 struct drm_device *dev = obj->base.dev;
673a394b 2687 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 2688 struct drm_mm_node *free_space;
a00b10c3 2689 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
5e783301 2690 u32 size, fence_size, fence_alignment, unfenced_alignment;
75e9e915 2691 bool mappable, fenceable;
07f73f69 2692 int ret;
673a394b 2693
05394f39 2694 if (obj->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2695 DRM_ERROR("Attempting to bind a purgeable object\n");
2696 return -EINVAL;
2697 }
2698
e28f8711
CW
2699 fence_size = i915_gem_get_gtt_size(dev,
2700 obj->base.size,
2701 obj->tiling_mode);
2702 fence_alignment = i915_gem_get_gtt_alignment(dev,
2703 obj->base.size,
2704 obj->tiling_mode);
2705 unfenced_alignment =
2706 i915_gem_get_unfenced_gtt_alignment(dev,
2707 obj->base.size,
2708 obj->tiling_mode);
a00b10c3 2709
673a394b 2710 if (alignment == 0)
5e783301
DV
2711 alignment = map_and_fenceable ? fence_alignment :
2712 unfenced_alignment;
75e9e915 2713 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
673a394b
EA
2714 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2715 return -EINVAL;
2716 }
2717
05394f39 2718 size = map_and_fenceable ? fence_size : obj->base.size;
a00b10c3 2719
654fc607
CW
2720 /* If the object is bigger than the entire aperture, reject it early
2721 * before evicting everything in a vain attempt to find space.
2722 */
05394f39 2723 if (obj->base.size >
75e9e915 2724 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
654fc607
CW
2725 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2726 return -E2BIG;
2727 }
2728
673a394b 2729 search_free:
75e9e915 2730 if (map_and_fenceable)
920afa77
DV
2731 free_space =
2732 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
a00b10c3 2733 size, alignment, 0,
920afa77
DV
2734 dev_priv->mm.gtt_mappable_end,
2735 0);
2736 else
2737 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
a00b10c3 2738 size, alignment, 0);
920afa77
DV
2739
2740 if (free_space != NULL) {
75e9e915 2741 if (map_and_fenceable)
05394f39 2742 obj->gtt_space =
920afa77 2743 drm_mm_get_block_range_generic(free_space,
a00b10c3 2744 size, alignment, 0,
920afa77
DV
2745 dev_priv->mm.gtt_mappable_end,
2746 0);
2747 else
05394f39 2748 obj->gtt_space =
a00b10c3 2749 drm_mm_get_block(free_space, size, alignment);
920afa77 2750 }
05394f39 2751 if (obj->gtt_space == NULL) {
673a394b
EA
2752 /* If the gtt is empty and we're still having trouble
2753 * fitting our object in, we're out of memory.
2754 */
75e9e915
DV
2755 ret = i915_gem_evict_something(dev, size, alignment,
2756 map_and_fenceable);
9731129c 2757 if (ret)
673a394b 2758 return ret;
9731129c 2759
673a394b
EA
2760 goto search_free;
2761 }
2762
e5281ccd 2763 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
673a394b 2764 if (ret) {
05394f39
CW
2765 drm_mm_put_block(obj->gtt_space);
2766 obj->gtt_space = NULL;
07f73f69
CW
2767
2768 if (ret == -ENOMEM) {
809b6334
CW
2769 /* first try to reclaim some memory by clearing the GTT */
2770 ret = i915_gem_evict_everything(dev, false);
07f73f69 2771 if (ret) {
07f73f69 2772 /* now try to shrink everyone else */
4bdadb97
CW
2773 if (gfpmask) {
2774 gfpmask = 0;
2775 goto search_free;
07f73f69
CW
2776 }
2777
809b6334 2778 return -ENOMEM;
07f73f69
CW
2779 }
2780
2781 goto search_free;
2782 }
2783
673a394b
EA
2784 return ret;
2785 }
2786
7c2e6fdf
DV
2787 ret = i915_gem_gtt_bind_object(obj);
2788 if (ret) {
e5281ccd 2789 i915_gem_object_put_pages_gtt(obj);
05394f39
CW
2790 drm_mm_put_block(obj->gtt_space);
2791 obj->gtt_space = NULL;
07f73f69 2792
809b6334 2793 if (i915_gem_evict_everything(dev, false))
07f73f69 2794 return ret;
07f73f69
CW
2795
2796 goto search_free;
673a394b 2797 }
673a394b 2798
6299f992 2799 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
05394f39 2800 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
bf1a1092 2801
673a394b
EA
2802 /* Assert that the object is not currently in any GPU domain. As it
2803 * wasn't in the GTT, there shouldn't be any way it could have been in
2804 * a GPU cache
2805 */
05394f39
CW
2806 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2807 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
673a394b 2808
6299f992 2809 obj->gtt_offset = obj->gtt_space->start;
1c5d22f7 2810
75e9e915 2811 fenceable =
05394f39 2812 obj->gtt_space->size == fence_size &&
0206e353 2813 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
a00b10c3 2814
75e9e915 2815 mappable =
05394f39 2816 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
a00b10c3 2817
05394f39 2818 obj->map_and_fenceable = mappable && fenceable;
75e9e915 2819
db53a302 2820 trace_i915_gem_object_bind(obj, map_and_fenceable);
673a394b
EA
2821 return 0;
2822}
2823
2824void
05394f39 2825i915_gem_clflush_object(struct drm_i915_gem_object *obj)
673a394b 2826{
673a394b
EA
2827 /* If we don't have a page list set up, then we're not pinned
2828 * to GPU, and we can ignore the cache flush because it'll happen
2829 * again at bind time.
2830 */
05394f39 2831 if (obj->pages == NULL)
673a394b
EA
2832 return;
2833
9c23f7fc
CW
2834 /* If the GPU is snooping the contents of the CPU cache,
2835 * we do not need to manually clear the CPU cache lines. However,
2836 * the caches are only snooped when the render cache is
2837 * flushed/invalidated. As we always have to emit invalidations
2838 * and flushes when moving into and out of the RENDER domain, correct
2839 * snooping behaviour occurs naturally as the result of our domain
2840 * tracking.
2841 */
2842 if (obj->cache_level != I915_CACHE_NONE)
2843 return;
2844
1c5d22f7 2845 trace_i915_gem_object_clflush(obj);
cfa16a0d 2846
05394f39 2847 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
673a394b
EA
2848}
2849
e47c68e9 2850/** Flushes any GPU write domain for the object if it's dirty. */
88241785 2851static int
3619df03 2852i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2853{
05394f39 2854 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
88241785 2855 return 0;
e47c68e9
EA
2856
2857 /* Queue the GPU write cache flushing we need. */
db53a302 2858 return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
e47c68e9
EA
2859}
2860
2861/** Flushes the GTT write domain for the object if it's dirty. */
2862static void
05394f39 2863i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2864{
1c5d22f7
CW
2865 uint32_t old_write_domain;
2866
05394f39 2867 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
2868 return;
2869
63256ec5 2870 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
2871 * to it immediately go to main memory as far as we know, so there's
2872 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
2873 *
2874 * However, we do have to enforce the order so that all writes through
2875 * the GTT land before any writes to the device, such as updates to
2876 * the GATT itself.
e47c68e9 2877 */
63256ec5
CW
2878 wmb();
2879
05394f39
CW
2880 old_write_domain = obj->base.write_domain;
2881 obj->base.write_domain = 0;
1c5d22f7
CW
2882
2883 trace_i915_gem_object_change_domain(obj,
05394f39 2884 obj->base.read_domains,
1c5d22f7 2885 old_write_domain);
e47c68e9
EA
2886}
2887
2888/** Flushes the CPU write domain for the object if it's dirty. */
2889static void
05394f39 2890i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2891{
1c5d22f7 2892 uint32_t old_write_domain;
e47c68e9 2893
05394f39 2894 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
2895 return;
2896
2897 i915_gem_clflush_object(obj);
40ce6575 2898 intel_gtt_chipset_flush();
05394f39
CW
2899 old_write_domain = obj->base.write_domain;
2900 obj->base.write_domain = 0;
1c5d22f7
CW
2901
2902 trace_i915_gem_object_change_domain(obj,
05394f39 2903 obj->base.read_domains,
1c5d22f7 2904 old_write_domain);
e47c68e9
EA
2905}
2906
2ef7eeaa
EA
2907/**
2908 * Moves a single object to the GTT read, and possibly write domain.
2909 *
2910 * This function returns when the move is complete, including waiting on
2911 * flushes to occur.
2912 */
79e53945 2913int
2021746e 2914i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 2915{
1c5d22f7 2916 uint32_t old_write_domain, old_read_domains;
e47c68e9 2917 int ret;
2ef7eeaa 2918
02354392 2919 /* Not valid to be called on unbound objects. */
05394f39 2920 if (obj->gtt_space == NULL)
02354392
EA
2921 return -EINVAL;
2922
8d7e3de1
CW
2923 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2924 return 0;
2925
88241785
CW
2926 ret = i915_gem_object_flush_gpu_write_domain(obj);
2927 if (ret)
2928 return ret;
2929
87ca9c8a 2930 if (obj->pending_gpu_write || write) {
ce453d81 2931 ret = i915_gem_object_wait_rendering(obj);
87ca9c8a
CW
2932 if (ret)
2933 return ret;
2934 }
2dafb1e0 2935
7213342d 2936 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 2937
05394f39
CW
2938 old_write_domain = obj->base.write_domain;
2939 old_read_domains = obj->base.read_domains;
1c5d22f7 2940
e47c68e9
EA
2941 /* It should now be out of any other write domains, and we can update
2942 * the domain values for our changes.
2943 */
05394f39
CW
2944 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2945 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 2946 if (write) {
05394f39
CW
2947 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2948 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2949 obj->dirty = 1;
2ef7eeaa
EA
2950 }
2951
1c5d22f7
CW
2952 trace_i915_gem_object_change_domain(obj,
2953 old_read_domains,
2954 old_write_domain);
2955
e47c68e9
EA
2956 return 0;
2957}
2958
e4ffd173
CW
2959int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2960 enum i915_cache_level cache_level)
2961{
2962 int ret;
2963
2964 if (obj->cache_level == cache_level)
2965 return 0;
2966
2967 if (obj->pin_count) {
2968 DRM_DEBUG("can not change the cache level of pinned objects\n");
2969 return -EBUSY;
2970 }
2971
2972 if (obj->gtt_space) {
2973 ret = i915_gem_object_finish_gpu(obj);
2974 if (ret)
2975 return ret;
2976
2977 i915_gem_object_finish_gtt(obj);
2978
2979 /* Before SandyBridge, you could not use tiling or fence
2980 * registers with snooped memory, so relinquish any fences
2981 * currently pointing to our region in the aperture.
2982 */
2983 if (INTEL_INFO(obj->base.dev)->gen < 6) {
2984 ret = i915_gem_object_put_fence(obj);
2985 if (ret)
2986 return ret;
2987 }
2988
2989 i915_gem_gtt_rebind_object(obj, cache_level);
2990 }
2991
2992 if (cache_level == I915_CACHE_NONE) {
2993 u32 old_read_domains, old_write_domain;
2994
2995 /* If we're coming from LLC cached, then we haven't
2996 * actually been tracking whether the data is in the
2997 * CPU cache or not, since we only allow one bit set
2998 * in obj->write_domain and have been skipping the clflushes.
2999 * Just set it to the CPU cache for now.
3000 */
3001 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
3002 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
3003
3004 old_read_domains = obj->base.read_domains;
3005 old_write_domain = obj->base.write_domain;
3006
3007 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3008 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3009
3010 trace_i915_gem_object_change_domain(obj,
3011 old_read_domains,
3012 old_write_domain);
3013 }
3014
3015 obj->cache_level = cache_level;
3016 return 0;
3017}
3018
b9241ea3 3019/*
2da3b9b9
CW
3020 * Prepare buffer for display plane (scanout, cursors, etc).
3021 * Can be called from an uninterruptible phase (modesetting) and allows
3022 * any flushes to be pipelined (for pageflips).
3023 *
3024 * For the display plane, we want to be in the GTT but out of any write
3025 * domains. So in many ways this looks like set_to_gtt_domain() apart from the
3026 * ability to pipeline the waits, pinning and any additional subtleties
3027 * that may differentiate the display plane from ordinary buffers.
b9241ea3
ZW
3028 */
3029int
2da3b9b9
CW
3030i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3031 u32 alignment,
919926ae 3032 struct intel_ring_buffer *pipelined)
b9241ea3 3033{
2da3b9b9 3034 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3035 int ret;
3036
88241785
CW
3037 ret = i915_gem_object_flush_gpu_write_domain(obj);
3038 if (ret)
3039 return ret;
3040
0be73284 3041 if (pipelined != obj->ring) {
ce453d81 3042 ret = i915_gem_object_wait_rendering(obj);
f0b69efc 3043 if (ret == -ERESTARTSYS)
b9241ea3
ZW
3044 return ret;
3045 }
3046
a7ef0640
EA
3047 /* The display engine is not coherent with the LLC cache on gen6. As
3048 * a result, we make sure that the pinning that is about to occur is
3049 * done with uncached PTEs. This is lowest common denominator for all
3050 * chipsets.
3051 *
3052 * However for gen6+, we could do better by using the GFDT bit instead
3053 * of uncaching, which would allow us to flush all the LLC-cached data
3054 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3055 */
3056 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3057 if (ret)
3058 return ret;
3059
2da3b9b9
CW
3060 /* As the user may map the buffer once pinned in the display plane
3061 * (e.g. libkms for the bootup splash), we have to ensure that we
3062 * always use map_and_fenceable for all scanout buffers.
3063 */
3064 ret = i915_gem_object_pin(obj, alignment, true);
3065 if (ret)
3066 return ret;
3067
b118c1e3
CW
3068 i915_gem_object_flush_cpu_write_domain(obj);
3069
2da3b9b9 3070 old_write_domain = obj->base.write_domain;
05394f39 3071 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3072
3073 /* It should now be out of any other write domains, and we can update
3074 * the domain values for our changes.
3075 */
3076 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
05394f39 3077 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3078
3079 trace_i915_gem_object_change_domain(obj,
3080 old_read_domains,
2da3b9b9 3081 old_write_domain);
b9241ea3
ZW
3082
3083 return 0;
3084}
3085
85345517 3086int
a8198eea 3087i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
85345517 3088{
88241785
CW
3089 int ret;
3090
a8198eea 3091 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
85345517
CW
3092 return 0;
3093
88241785 3094 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
db53a302 3095 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
88241785
CW
3096 if (ret)
3097 return ret;
3098 }
85345517 3099
a8198eea
CW
3100 /* Ensure that we invalidate the GPU's caches and TLBs. */
3101 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3102
ce453d81 3103 return i915_gem_object_wait_rendering(obj);
85345517
CW
3104}
3105
e47c68e9
EA
3106/**
3107 * Moves a single object to the CPU read, and possibly write domain.
3108 *
3109 * This function returns when the move is complete, including waiting on
3110 * flushes to occur.
3111 */
3112static int
919926ae 3113i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3114{
1c5d22f7 3115 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3116 int ret;
3117
8d7e3de1
CW
3118 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3119 return 0;
3120
88241785
CW
3121 ret = i915_gem_object_flush_gpu_write_domain(obj);
3122 if (ret)
3123 return ret;
3124
ce453d81 3125 ret = i915_gem_object_wait_rendering(obj);
de18a29e 3126 if (ret)
e47c68e9 3127 return ret;
2ef7eeaa 3128
e47c68e9 3129 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3130
e47c68e9
EA
3131 /* If we have a partially-valid cache of the object in the CPU,
3132 * finish invalidating it and free the per-page flags.
2ef7eeaa 3133 */
e47c68e9 3134 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 3135
05394f39
CW
3136 old_write_domain = obj->base.write_domain;
3137 old_read_domains = obj->base.read_domains;
1c5d22f7 3138
e47c68e9 3139 /* Flush the CPU cache if it's still invalid. */
05394f39 3140 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 3141 i915_gem_clflush_object(obj);
2ef7eeaa 3142
05394f39 3143 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3144 }
3145
3146 /* It should now be out of any other write domains, and we can update
3147 * the domain values for our changes.
3148 */
05394f39 3149 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3150
3151 /* If we're writing through the CPU, then the GPU read domains will
3152 * need to be invalidated at next use.
3153 */
3154 if (write) {
05394f39
CW
3155 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3156 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3157 }
2ef7eeaa 3158
1c5d22f7
CW
3159 trace_i915_gem_object_change_domain(obj,
3160 old_read_domains,
3161 old_write_domain);
3162
2ef7eeaa
EA
3163 return 0;
3164}
3165
673a394b 3166/**
e47c68e9 3167 * Moves the object from a partially CPU read to a full one.
673a394b 3168 *
e47c68e9
EA
3169 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3170 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 3171 */
e47c68e9 3172static void
05394f39 3173i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
673a394b 3174{
05394f39 3175 if (!obj->page_cpu_valid)
e47c68e9
EA
3176 return;
3177
3178 /* If we're partially in the CPU read domain, finish moving it in.
3179 */
05394f39 3180 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
3181 int i;
3182
05394f39
CW
3183 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3184 if (obj->page_cpu_valid[i])
e47c68e9 3185 continue;
05394f39 3186 drm_clflush_pages(obj->pages + i, 1);
e47c68e9 3187 }
e47c68e9
EA
3188 }
3189
3190 /* Free the page_cpu_valid mappings which are now stale, whether
3191 * or not we've got I915_GEM_DOMAIN_CPU.
3192 */
05394f39
CW
3193 kfree(obj->page_cpu_valid);
3194 obj->page_cpu_valid = NULL;
e47c68e9
EA
3195}
3196
3197/**
3198 * Set the CPU read domain on a range of the object.
3199 *
3200 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3201 * not entirely valid. The page_cpu_valid member of the object flags which
3202 * pages have been flushed, and will be respected by
3203 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3204 * of the whole object.
3205 *
3206 * This function returns when the move is complete, including waiting on
3207 * flushes to occur.
3208 */
3209static int
05394f39 3210i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
e47c68e9
EA
3211 uint64_t offset, uint64_t size)
3212{
1c5d22f7 3213 uint32_t old_read_domains;
e47c68e9 3214 int i, ret;
673a394b 3215
05394f39 3216 if (offset == 0 && size == obj->base.size)
e47c68e9 3217 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 3218
88241785
CW
3219 ret = i915_gem_object_flush_gpu_write_domain(obj);
3220 if (ret)
3221 return ret;
3222
ce453d81 3223 ret = i915_gem_object_wait_rendering(obj);
de18a29e 3224 if (ret)
6a47baa6 3225 return ret;
de18a29e 3226
e47c68e9
EA
3227 i915_gem_object_flush_gtt_write_domain(obj);
3228
3229 /* If we're already fully in the CPU read domain, we're done. */
05394f39
CW
3230 if (obj->page_cpu_valid == NULL &&
3231 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
e47c68e9 3232 return 0;
673a394b 3233
e47c68e9
EA
3234 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3235 * newly adding I915_GEM_DOMAIN_CPU
3236 */
05394f39
CW
3237 if (obj->page_cpu_valid == NULL) {
3238 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3239 GFP_KERNEL);
3240 if (obj->page_cpu_valid == NULL)
e47c68e9 3241 return -ENOMEM;
05394f39
CW
3242 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3243 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
673a394b
EA
3244
3245 /* Flush the cache on any pages that are still invalid from the CPU's
3246 * perspective.
3247 */
e47c68e9
EA
3248 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3249 i++) {
05394f39 3250 if (obj->page_cpu_valid[i])
673a394b
EA
3251 continue;
3252
05394f39 3253 drm_clflush_pages(obj->pages + i, 1);
673a394b 3254
05394f39 3255 obj->page_cpu_valid[i] = 1;
673a394b
EA
3256 }
3257
e47c68e9
EA
3258 /* It should now be out of any other write domains, and we can update
3259 * the domain values for our changes.
3260 */
05394f39 3261 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9 3262
05394f39
CW
3263 old_read_domains = obj->base.read_domains;
3264 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
e47c68e9 3265
1c5d22f7
CW
3266 trace_i915_gem_object_change_domain(obj,
3267 old_read_domains,
05394f39 3268 obj->base.write_domain);
1c5d22f7 3269
673a394b
EA
3270 return 0;
3271}
3272
673a394b
EA
3273/* Throttle our rendering by waiting until the ring has completed our requests
3274 * emitted over 20 msec ago.
3275 *
b962442e
EA
3276 * Note that if we were to use the current jiffies each time around the loop,
3277 * we wouldn't escape the function with any frames outstanding if the time to
3278 * render a frame was over 20ms.
3279 *
673a394b
EA
3280 * This should get us reasonable parallelism between CPU and GPU but also
3281 * relatively low latency when blocking on a particular request to finish.
3282 */
40a5f0de 3283static int
f787a5f5 3284i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3285{
f787a5f5
CW
3286 struct drm_i915_private *dev_priv = dev->dev_private;
3287 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e 3288 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
f787a5f5
CW
3289 struct drm_i915_gem_request *request;
3290 struct intel_ring_buffer *ring = NULL;
3291 u32 seqno = 0;
3292 int ret;
93533c29 3293
e110e8d6
CW
3294 if (atomic_read(&dev_priv->mm.wedged))
3295 return -EIO;
3296
1c25595f 3297 spin_lock(&file_priv->mm.lock);
f787a5f5 3298 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3299 if (time_after_eq(request->emitted_jiffies, recent_enough))
3300 break;
40a5f0de 3301
f787a5f5
CW
3302 ring = request->ring;
3303 seqno = request->seqno;
b962442e 3304 }
1c25595f 3305 spin_unlock(&file_priv->mm.lock);
40a5f0de 3306
f787a5f5
CW
3307 if (seqno == 0)
3308 return 0;
2bc43b5c 3309
f787a5f5 3310 ret = 0;
78501eac 3311 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
f787a5f5
CW
3312 /* And wait for the seqno passing without holding any locks and
3313 * causing extra latency for others. This is safe as the irq
3314 * generation is designed to be run atomically and so is
3315 * lockless.
3316 */
b13c2b96
CW
3317 if (ring->irq_get(ring)) {
3318 ret = wait_event_interruptible(ring->irq_queue,
3319 i915_seqno_passed(ring->get_seqno(ring), seqno)
3320 || atomic_read(&dev_priv->mm.wedged));
3321 ring->irq_put(ring);
40a5f0de 3322
b13c2b96
CW
3323 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3324 ret = -EIO;
e959b5db
EA
3325 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
3326 seqno) ||
7ea29b13
EA
3327 atomic_read(&dev_priv->mm.wedged), 3000)) {
3328 ret = -EBUSY;
b13c2b96 3329 }
40a5f0de
EA
3330 }
3331
f787a5f5
CW
3332 if (ret == 0)
3333 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
40a5f0de
EA
3334
3335 return ret;
3336}
3337
673a394b 3338int
05394f39
CW
3339i915_gem_object_pin(struct drm_i915_gem_object *obj,
3340 uint32_t alignment,
75e9e915 3341 bool map_and_fenceable)
673a394b 3342{
05394f39 3343 struct drm_device *dev = obj->base.dev;
f13d3f73 3344 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
3345 int ret;
3346
05394f39 3347 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
23bc5982 3348 WARN_ON(i915_verify_lists(dev));
ac0c6b5a 3349
05394f39
CW
3350 if (obj->gtt_space != NULL) {
3351 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3352 (map_and_fenceable && !obj->map_and_fenceable)) {
3353 WARN(obj->pin_count,
ae7d49d8 3354 "bo is already pinned with incorrect alignment:"
75e9e915
DV
3355 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3356 " obj->map_and_fenceable=%d\n",
05394f39 3357 obj->gtt_offset, alignment,
75e9e915 3358 map_and_fenceable,
05394f39 3359 obj->map_and_fenceable);
ac0c6b5a
CW
3360 ret = i915_gem_object_unbind(obj);
3361 if (ret)
3362 return ret;
3363 }
3364 }
3365
05394f39 3366 if (obj->gtt_space == NULL) {
a00b10c3 3367 ret = i915_gem_object_bind_to_gtt(obj, alignment,
75e9e915 3368 map_and_fenceable);
9731129c 3369 if (ret)
673a394b 3370 return ret;
22c344e9 3371 }
76446cac 3372
05394f39 3373 if (obj->pin_count++ == 0) {
05394f39
CW
3374 if (!obj->active)
3375 list_move_tail(&obj->mm_list,
f13d3f73 3376 &dev_priv->mm.pinned_list);
673a394b 3377 }
6299f992 3378 obj->pin_mappable |= map_and_fenceable;
673a394b 3379
23bc5982 3380 WARN_ON(i915_verify_lists(dev));
673a394b
EA
3381 return 0;
3382}
3383
3384void
05394f39 3385i915_gem_object_unpin(struct drm_i915_gem_object *obj)
673a394b 3386{
05394f39 3387 struct drm_device *dev = obj->base.dev;
673a394b 3388 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 3389
23bc5982 3390 WARN_ON(i915_verify_lists(dev));
05394f39
CW
3391 BUG_ON(obj->pin_count == 0);
3392 BUG_ON(obj->gtt_space == NULL);
673a394b 3393
05394f39
CW
3394 if (--obj->pin_count == 0) {
3395 if (!obj->active)
3396 list_move_tail(&obj->mm_list,
673a394b 3397 &dev_priv->mm.inactive_list);
6299f992 3398 obj->pin_mappable = false;
673a394b 3399 }
23bc5982 3400 WARN_ON(i915_verify_lists(dev));
673a394b
EA
3401}
3402
3403int
3404i915_gem_pin_ioctl(struct drm_device *dev, void *data,
05394f39 3405 struct drm_file *file)
673a394b
EA
3406{
3407 struct drm_i915_gem_pin *args = data;
05394f39 3408 struct drm_i915_gem_object *obj;
673a394b
EA
3409 int ret;
3410
1d7cfea1
CW
3411 ret = i915_mutex_lock_interruptible(dev);
3412 if (ret)
3413 return ret;
673a394b 3414
05394f39 3415 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3416 if (&obj->base == NULL) {
1d7cfea1
CW
3417 ret = -ENOENT;
3418 goto unlock;
673a394b 3419 }
673a394b 3420
05394f39 3421 if (obj->madv != I915_MADV_WILLNEED) {
bb6baf76 3422 DRM_ERROR("Attempting to pin a purgeable buffer\n");
1d7cfea1
CW
3423 ret = -EINVAL;
3424 goto out;
3ef94daa
CW
3425 }
3426
05394f39 3427 if (obj->pin_filp != NULL && obj->pin_filp != file) {
79e53945
JB
3428 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3429 args->handle);
1d7cfea1
CW
3430 ret = -EINVAL;
3431 goto out;
79e53945
JB
3432 }
3433
05394f39
CW
3434 obj->user_pin_count++;
3435 obj->pin_filp = file;
3436 if (obj->user_pin_count == 1) {
75e9e915 3437 ret = i915_gem_object_pin(obj, args->alignment, true);
1d7cfea1
CW
3438 if (ret)
3439 goto out;
673a394b
EA
3440 }
3441
3442 /* XXX - flush the CPU caches for pinned objects
3443 * as the X server doesn't manage domains yet
3444 */
e47c68e9 3445 i915_gem_object_flush_cpu_write_domain(obj);
05394f39 3446 args->offset = obj->gtt_offset;
1d7cfea1 3447out:
05394f39 3448 drm_gem_object_unreference(&obj->base);
1d7cfea1 3449unlock:
673a394b 3450 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3451 return ret;
673a394b
EA
3452}
3453
3454int
3455i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
05394f39 3456 struct drm_file *file)
673a394b
EA
3457{
3458 struct drm_i915_gem_pin *args = data;
05394f39 3459 struct drm_i915_gem_object *obj;
76c1dec1 3460 int ret;
673a394b 3461
1d7cfea1
CW
3462 ret = i915_mutex_lock_interruptible(dev);
3463 if (ret)
3464 return ret;
673a394b 3465
05394f39 3466 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3467 if (&obj->base == NULL) {
1d7cfea1
CW
3468 ret = -ENOENT;
3469 goto unlock;
673a394b 3470 }
76c1dec1 3471
05394f39 3472 if (obj->pin_filp != file) {
79e53945
JB
3473 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3474 args->handle);
1d7cfea1
CW
3475 ret = -EINVAL;
3476 goto out;
79e53945 3477 }
05394f39
CW
3478 obj->user_pin_count--;
3479 if (obj->user_pin_count == 0) {
3480 obj->pin_filp = NULL;
79e53945
JB
3481 i915_gem_object_unpin(obj);
3482 }
673a394b 3483
1d7cfea1 3484out:
05394f39 3485 drm_gem_object_unreference(&obj->base);
1d7cfea1 3486unlock:
673a394b 3487 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3488 return ret;
673a394b
EA
3489}
3490
3491int
3492i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3493 struct drm_file *file)
673a394b
EA
3494{
3495 struct drm_i915_gem_busy *args = data;
05394f39 3496 struct drm_i915_gem_object *obj;
30dbf0c0
CW
3497 int ret;
3498
76c1dec1 3499 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 3500 if (ret)
76c1dec1 3501 return ret;
673a394b 3502
05394f39 3503 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
c8725226 3504 if (&obj->base == NULL) {
1d7cfea1
CW
3505 ret = -ENOENT;
3506 goto unlock;
673a394b 3507 }
d1b851fc 3508
0be555b6
CW
3509 /* Count all active objects as busy, even if they are currently not used
3510 * by the gpu. Users of this interface expect objects to eventually
3511 * become non-busy without any further actions, therefore emit any
3512 * necessary flushes here.
c4de0a5d 3513 */
05394f39 3514 args->busy = obj->active;
0be555b6
CW
3515 if (args->busy) {
3516 /* Unconditionally flush objects, even when the gpu still uses this
3517 * object. Userspace calling this function indicates that it wants to
3518 * use this buffer rather sooner than later, so issuing the required
3519 * flush earlier is beneficial.
3520 */
1a1c6976 3521 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
db53a302 3522 ret = i915_gem_flush_ring(obj->ring,
88241785 3523 0, obj->base.write_domain);
1a1c6976
CW
3524 } else if (obj->ring->outstanding_lazy_request ==
3525 obj->last_rendering_seqno) {
3526 struct drm_i915_gem_request *request;
3527
7a194876
CW
3528 /* This ring is not being cleared by active usage,
3529 * so emit a request to do so.
3530 */
1a1c6976 3531 request = kzalloc(sizeof(*request), GFP_KERNEL);
457eafce 3532 if (request) {
0206e353 3533 ret = i915_add_request(obj->ring, NULL, request);
457eafce
RM
3534 if (ret)
3535 kfree(request);
3536 } else
7a194876
CW
3537 ret = -ENOMEM;
3538 }
0be555b6
CW
3539
3540 /* Update the active list for the hardware's current position.
3541 * Otherwise this only updates on a delayed timer or when irqs
3542 * are actually unmasked, and our working set ends up being
3543 * larger than required.
3544 */
db53a302 3545 i915_gem_retire_requests_ring(obj->ring);
0be555b6 3546
05394f39 3547 args->busy = obj->active;
0be555b6 3548 }
673a394b 3549
05394f39 3550 drm_gem_object_unreference(&obj->base);
1d7cfea1 3551unlock:
673a394b 3552 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3553 return ret;
673a394b
EA
3554}
3555
3556int
3557i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3558 struct drm_file *file_priv)
3559{
0206e353 3560 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3561}
3562
3ef94daa
CW
3563int
3564i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3565 struct drm_file *file_priv)
3566{
3567 struct drm_i915_gem_madvise *args = data;
05394f39 3568 struct drm_i915_gem_object *obj;
76c1dec1 3569 int ret;
3ef94daa
CW
3570
3571 switch (args->madv) {
3572 case I915_MADV_DONTNEED:
3573 case I915_MADV_WILLNEED:
3574 break;
3575 default:
3576 return -EINVAL;
3577 }
3578
1d7cfea1
CW
3579 ret = i915_mutex_lock_interruptible(dev);
3580 if (ret)
3581 return ret;
3582
05394f39 3583 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
c8725226 3584 if (&obj->base == NULL) {
1d7cfea1
CW
3585 ret = -ENOENT;
3586 goto unlock;
3ef94daa 3587 }
3ef94daa 3588
05394f39 3589 if (obj->pin_count) {
1d7cfea1
CW
3590 ret = -EINVAL;
3591 goto out;
3ef94daa
CW
3592 }
3593
05394f39
CW
3594 if (obj->madv != __I915_MADV_PURGED)
3595 obj->madv = args->madv;
3ef94daa 3596
2d7ef395 3597 /* if the object is no longer bound, discard its backing storage */
05394f39
CW
3598 if (i915_gem_object_is_purgeable(obj) &&
3599 obj->gtt_space == NULL)
2d7ef395
CW
3600 i915_gem_object_truncate(obj);
3601
05394f39 3602 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3603
1d7cfea1 3604out:
05394f39 3605 drm_gem_object_unreference(&obj->base);
1d7cfea1 3606unlock:
3ef94daa 3607 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3608 return ret;
3ef94daa
CW
3609}
3610
05394f39
CW
3611struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3612 size_t size)
ac52bc56 3613{
73aa808f 3614 struct drm_i915_private *dev_priv = dev->dev_private;
c397b908 3615 struct drm_i915_gem_object *obj;
5949eac4 3616 struct address_space *mapping;
ac52bc56 3617
c397b908
DV
3618 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3619 if (obj == NULL)
3620 return NULL;
673a394b 3621
c397b908
DV
3622 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3623 kfree(obj);
3624 return NULL;
3625 }
673a394b 3626
5949eac4
HD
3627 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3628 mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3629
73aa808f
CW
3630 i915_gem_info_add_obj(dev_priv, size);
3631
c397b908
DV
3632 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3633 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 3634
3d29b842
ED
3635 if (HAS_LLC(dev)) {
3636 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
3637 * cache) for about a 10% performance improvement
3638 * compared to uncached. Graphics requests other than
3639 * display scanout are coherent with the CPU in
3640 * accessing this cache. This means in this mode we
3641 * don't need to clflush on the CPU side, and on the
3642 * GPU side we only need to flush internal caches to
3643 * get data visible to the CPU.
3644 *
3645 * However, we maintain the display planes as UC, and so
3646 * need to rebind when first used as such.
3647 */
3648 obj->cache_level = I915_CACHE_LLC;
3649 } else
3650 obj->cache_level = I915_CACHE_NONE;
3651
62b8b215 3652 obj->base.driver_private = NULL;
c397b908 3653 obj->fence_reg = I915_FENCE_REG_NONE;
69dc4987 3654 INIT_LIST_HEAD(&obj->mm_list);
93a37f20 3655 INIT_LIST_HEAD(&obj->gtt_list);
69dc4987 3656 INIT_LIST_HEAD(&obj->ring_list);
432e58ed 3657 INIT_LIST_HEAD(&obj->exec_list);
c397b908 3658 INIT_LIST_HEAD(&obj->gpu_write_list);
c397b908 3659 obj->madv = I915_MADV_WILLNEED;
75e9e915
DV
3660 /* Avoid an unnecessary call to unbind on the first bind. */
3661 obj->map_and_fenceable = true;
de151cf6 3662
05394f39 3663 return obj;
c397b908
DV
3664}
3665
3666int i915_gem_init_object(struct drm_gem_object *obj)
3667{
3668 BUG();
de151cf6 3669
673a394b
EA
3670 return 0;
3671}
3672
05394f39 3673static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
673a394b 3674{
05394f39 3675 struct drm_device *dev = obj->base.dev;
be72615b 3676 drm_i915_private_t *dev_priv = dev->dev_private;
be72615b 3677 int ret;
673a394b 3678
be72615b
CW
3679 ret = i915_gem_object_unbind(obj);
3680 if (ret == -ERESTARTSYS) {
05394f39 3681 list_move(&obj->mm_list,
be72615b
CW
3682 &dev_priv->mm.deferred_free_list);
3683 return;
3684 }
673a394b 3685
26e12f89
CW
3686 trace_i915_gem_object_destroy(obj);
3687
05394f39 3688 if (obj->base.map_list.map)
b464e9a2 3689 drm_gem_free_mmap_offset(&obj->base);
de151cf6 3690
05394f39
CW
3691 drm_gem_object_release(&obj->base);
3692 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 3693
05394f39
CW
3694 kfree(obj->page_cpu_valid);
3695 kfree(obj->bit_17);
3696 kfree(obj);
673a394b
EA
3697}
3698
05394f39 3699void i915_gem_free_object(struct drm_gem_object *gem_obj)
be72615b 3700{
05394f39
CW
3701 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3702 struct drm_device *dev = obj->base.dev;
be72615b 3703
05394f39 3704 while (obj->pin_count > 0)
be72615b
CW
3705 i915_gem_object_unpin(obj);
3706
05394f39 3707 if (obj->phys_obj)
be72615b
CW
3708 i915_gem_detach_phys_object(dev, obj);
3709
3710 i915_gem_free_object_tail(obj);
3711}
3712
29105ccc
CW
3713int
3714i915_gem_idle(struct drm_device *dev)
3715{
3716 drm_i915_private_t *dev_priv = dev->dev_private;
3717 int ret;
28dfe52a 3718
29105ccc 3719 mutex_lock(&dev->struct_mutex);
1c5d22f7 3720
87acb0a5 3721 if (dev_priv->mm.suspended) {
29105ccc
CW
3722 mutex_unlock(&dev->struct_mutex);
3723 return 0;
28dfe52a
EA
3724 }
3725
b93f9cf1 3726 ret = i915_gpu_idle(dev, true);
6dbe2772
KP
3727 if (ret) {
3728 mutex_unlock(&dev->struct_mutex);
673a394b 3729 return ret;
6dbe2772 3730 }
673a394b 3731
29105ccc
CW
3732 /* Under UMS, be paranoid and evict. */
3733 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
5eac3ab4 3734 ret = i915_gem_evict_inactive(dev, false);
29105ccc
CW
3735 if (ret) {
3736 mutex_unlock(&dev->struct_mutex);
3737 return ret;
3738 }
3739 }
3740
312817a3
CW
3741 i915_gem_reset_fences(dev);
3742
29105ccc
CW
3743 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3744 * We need to replace this with a semaphore, or something.
3745 * And not confound mm.suspended!
3746 */
3747 dev_priv->mm.suspended = 1;
bc0c7f14 3748 del_timer_sync(&dev_priv->hangcheck_timer);
29105ccc
CW
3749
3750 i915_kernel_lost_context(dev);
6dbe2772 3751 i915_gem_cleanup_ringbuffer(dev);
29105ccc 3752
6dbe2772
KP
3753 mutex_unlock(&dev->struct_mutex);
3754
29105ccc
CW
3755 /* Cancel the retire work handler, which should be idle now. */
3756 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3757
673a394b
EA
3758 return 0;
3759}
3760
8187a2b7
ZN
3761int
3762i915_gem_init_ringbuffer(struct drm_device *dev)
3763{
3764 drm_i915_private_t *dev_priv = dev->dev_private;
3765 int ret;
68f95ba9 3766
5c1143bb 3767 ret = intel_init_render_ring_buffer(dev);
68f95ba9 3768 if (ret)
b6913e4b 3769 return ret;
68f95ba9
CW
3770
3771 if (HAS_BSD(dev)) {
5c1143bb 3772 ret = intel_init_bsd_ring_buffer(dev);
68f95ba9
CW
3773 if (ret)
3774 goto cleanup_render_ring;
d1b851fc 3775 }
68f95ba9 3776
549f7365
CW
3777 if (HAS_BLT(dev)) {
3778 ret = intel_init_blt_ring_buffer(dev);
3779 if (ret)
3780 goto cleanup_bsd_ring;
3781 }
3782
6f392d54
CW
3783 dev_priv->next_seqno = 1;
3784
68f95ba9
CW
3785 return 0;
3786
549f7365 3787cleanup_bsd_ring:
1ec14ad3 3788 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
68f95ba9 3789cleanup_render_ring:
1ec14ad3 3790 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
8187a2b7
ZN
3791 return ret;
3792}
3793
3794void
3795i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3796{
3797 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 3798 int i;
8187a2b7 3799
1ec14ad3
CW
3800 for (i = 0; i < I915_NUM_RINGS; i++)
3801 intel_cleanup_ring_buffer(&dev_priv->ring[i]);
8187a2b7
ZN
3802}
3803
673a394b
EA
3804int
3805i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3806 struct drm_file *file_priv)
3807{
3808 drm_i915_private_t *dev_priv = dev->dev_private;
1ec14ad3 3809 int ret, i;
673a394b 3810
79e53945
JB
3811 if (drm_core_check_feature(dev, DRIVER_MODESET))
3812 return 0;
3813
ba1234d1 3814 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 3815 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 3816 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
3817 }
3818
673a394b 3819 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
3820 dev_priv->mm.suspended = 0;
3821
3822 ret = i915_gem_init_ringbuffer(dev);
d816f6ac
WF
3823 if (ret != 0) {
3824 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 3825 return ret;
d816f6ac 3826 }
9bb2d6f9 3827
69dc4987 3828 BUG_ON(!list_empty(&dev_priv->mm.active_list));
673a394b
EA
3829 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3830 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
1ec14ad3
CW
3831 for (i = 0; i < I915_NUM_RINGS; i++) {
3832 BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3833 BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3834 }
673a394b 3835 mutex_unlock(&dev->struct_mutex);
dbb19d30 3836
5f35308b
CW
3837 ret = drm_irq_install(dev);
3838 if (ret)
3839 goto cleanup_ringbuffer;
dbb19d30 3840
673a394b 3841 return 0;
5f35308b
CW
3842
3843cleanup_ringbuffer:
3844 mutex_lock(&dev->struct_mutex);
3845 i915_gem_cleanup_ringbuffer(dev);
3846 dev_priv->mm.suspended = 1;
3847 mutex_unlock(&dev->struct_mutex);
3848
3849 return ret;
673a394b
EA
3850}
3851
3852int
3853i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3854 struct drm_file *file_priv)
3855{
79e53945
JB
3856 if (drm_core_check_feature(dev, DRIVER_MODESET))
3857 return 0;
3858
dbb19d30 3859 drm_irq_uninstall(dev);
e6890f6f 3860 return i915_gem_idle(dev);
673a394b
EA
3861}
3862
3863void
3864i915_gem_lastclose(struct drm_device *dev)
3865{
3866 int ret;
673a394b 3867
e806b495
EA
3868 if (drm_core_check_feature(dev, DRIVER_MODESET))
3869 return;
3870
6dbe2772
KP
3871 ret = i915_gem_idle(dev);
3872 if (ret)
3873 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
3874}
3875
64193406
CW
3876static void
3877init_ring_lists(struct intel_ring_buffer *ring)
3878{
3879 INIT_LIST_HEAD(&ring->active_list);
3880 INIT_LIST_HEAD(&ring->request_list);
3881 INIT_LIST_HEAD(&ring->gpu_write_list);
3882}
3883
673a394b
EA
3884void
3885i915_gem_load(struct drm_device *dev)
3886{
b5aa8a0f 3887 int i;
673a394b
EA
3888 drm_i915_private_t *dev_priv = dev->dev_private;
3889
69dc4987 3890 INIT_LIST_HEAD(&dev_priv->mm.active_list);
673a394b
EA
3891 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3892 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
f13d3f73 3893 INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
a09ba7fa 3894 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
be72615b 3895 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
93a37f20 3896 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
1ec14ad3
CW
3897 for (i = 0; i < I915_NUM_RINGS; i++)
3898 init_ring_lists(&dev_priv->ring[i]);
4b9de737 3899 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 3900 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
3901 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3902 i915_gem_retire_work_handler);
30dbf0c0 3903 init_completion(&dev_priv->error_completion);
31169714 3904
94400120
DA
3905 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3906 if (IS_GEN3(dev)) {
3907 u32 tmp = I915_READ(MI_ARB_STATE);
3908 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3909 /* arb state is a masked write, so set bit + bit in mask */
3910 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3911 I915_WRITE(MI_ARB_STATE, tmp);
3912 }
3913 }
3914
72bfa19c
CW
3915 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3916
de151cf6 3917 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
3918 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3919 dev_priv->fence_reg_start = 3;
de151cf6 3920
a6c45cf0 3921 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
3922 dev_priv->num_fence_regs = 16;
3923 else
3924 dev_priv->num_fence_regs = 8;
3925
b5aa8a0f 3926 /* Initialize fence registers to zero */
10ed13e4
EA
3927 for (i = 0; i < dev_priv->num_fence_regs; i++) {
3928 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
b5aa8a0f 3929 }
10ed13e4 3930
673a394b 3931 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 3932 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 3933
ce453d81
CW
3934 dev_priv->mm.interruptible = true;
3935
17250b71
CW
3936 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3937 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3938 register_shrinker(&dev_priv->mm.inactive_shrinker);
673a394b 3939}
71acb5eb
DA
3940
3941/*
3942 * Create a physically contiguous memory object for this object
3943 * e.g. for cursor + overlay regs
3944 */
995b6762
CW
3945static int i915_gem_init_phys_object(struct drm_device *dev,
3946 int id, int size, int align)
71acb5eb
DA
3947{
3948 drm_i915_private_t *dev_priv = dev->dev_private;
3949 struct drm_i915_gem_phys_object *phys_obj;
3950 int ret;
3951
3952 if (dev_priv->mm.phys_objs[id - 1] || !size)
3953 return 0;
3954
9a298b2a 3955 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
3956 if (!phys_obj)
3957 return -ENOMEM;
3958
3959 phys_obj->id = id;
3960
6eeefaf3 3961 phys_obj->handle = drm_pci_alloc(dev, size, align);
71acb5eb
DA
3962 if (!phys_obj->handle) {
3963 ret = -ENOMEM;
3964 goto kfree_obj;
3965 }
3966#ifdef CONFIG_X86
3967 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3968#endif
3969
3970 dev_priv->mm.phys_objs[id - 1] = phys_obj;
3971
3972 return 0;
3973kfree_obj:
9a298b2a 3974 kfree(phys_obj);
71acb5eb
DA
3975 return ret;
3976}
3977
995b6762 3978static void i915_gem_free_phys_object(struct drm_device *dev, int id)
71acb5eb
DA
3979{
3980 drm_i915_private_t *dev_priv = dev->dev_private;
3981 struct drm_i915_gem_phys_object *phys_obj;
3982
3983 if (!dev_priv->mm.phys_objs[id - 1])
3984 return;
3985
3986 phys_obj = dev_priv->mm.phys_objs[id - 1];
3987 if (phys_obj->cur_obj) {
3988 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3989 }
3990
3991#ifdef CONFIG_X86
3992 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3993#endif
3994 drm_pci_free(dev, phys_obj->handle);
3995 kfree(phys_obj);
3996 dev_priv->mm.phys_objs[id - 1] = NULL;
3997}
3998
3999void i915_gem_free_all_phys_object(struct drm_device *dev)
4000{
4001 int i;
4002
260883c8 4003 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4004 i915_gem_free_phys_object(dev, i);
4005}
4006
4007void i915_gem_detach_phys_object(struct drm_device *dev,
05394f39 4008 struct drm_i915_gem_object *obj)
71acb5eb 4009{
05394f39 4010 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
e5281ccd 4011 char *vaddr;
71acb5eb 4012 int i;
71acb5eb
DA
4013 int page_count;
4014
05394f39 4015 if (!obj->phys_obj)
71acb5eb 4016 return;
05394f39 4017 vaddr = obj->phys_obj->handle->vaddr;
71acb5eb 4018
05394f39 4019 page_count = obj->base.size / PAGE_SIZE;
71acb5eb 4020 for (i = 0; i < page_count; i++) {
5949eac4 4021 struct page *page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
4022 if (!IS_ERR(page)) {
4023 char *dst = kmap_atomic(page);
4024 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4025 kunmap_atomic(dst);
4026
4027 drm_clflush_pages(&page, 1);
4028
4029 set_page_dirty(page);
4030 mark_page_accessed(page);
4031 page_cache_release(page);
4032 }
71acb5eb 4033 }
40ce6575 4034 intel_gtt_chipset_flush();
d78b47b9 4035
05394f39
CW
4036 obj->phys_obj->cur_obj = NULL;
4037 obj->phys_obj = NULL;
71acb5eb
DA
4038}
4039
4040int
4041i915_gem_attach_phys_object(struct drm_device *dev,
05394f39 4042 struct drm_i915_gem_object *obj,
6eeefaf3
CW
4043 int id,
4044 int align)
71acb5eb 4045{
05394f39 4046 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
71acb5eb 4047 drm_i915_private_t *dev_priv = dev->dev_private;
71acb5eb
DA
4048 int ret = 0;
4049 int page_count;
4050 int i;
4051
4052 if (id > I915_MAX_PHYS_OBJECT)
4053 return -EINVAL;
4054
05394f39
CW
4055 if (obj->phys_obj) {
4056 if (obj->phys_obj->id == id)
71acb5eb
DA
4057 return 0;
4058 i915_gem_detach_phys_object(dev, obj);
4059 }
4060
71acb5eb
DA
4061 /* create a new object */
4062 if (!dev_priv->mm.phys_objs[id - 1]) {
4063 ret = i915_gem_init_phys_object(dev, id,
05394f39 4064 obj->base.size, align);
71acb5eb 4065 if (ret) {
05394f39
CW
4066 DRM_ERROR("failed to init phys object %d size: %zu\n",
4067 id, obj->base.size);
e5281ccd 4068 return ret;
71acb5eb
DA
4069 }
4070 }
4071
4072 /* bind to the object */
05394f39
CW
4073 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4074 obj->phys_obj->cur_obj = obj;
71acb5eb 4075
05394f39 4076 page_count = obj->base.size / PAGE_SIZE;
71acb5eb
DA
4077
4078 for (i = 0; i < page_count; i++) {
e5281ccd
CW
4079 struct page *page;
4080 char *dst, *src;
4081
5949eac4 4082 page = shmem_read_mapping_page(mapping, i);
e5281ccd
CW
4083 if (IS_ERR(page))
4084 return PTR_ERR(page);
71acb5eb 4085
ff75b9bc 4086 src = kmap_atomic(page);
05394f39 4087 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
71acb5eb 4088 memcpy(dst, src, PAGE_SIZE);
3e4d3af5 4089 kunmap_atomic(src);
71acb5eb 4090
e5281ccd
CW
4091 mark_page_accessed(page);
4092 page_cache_release(page);
4093 }
d78b47b9 4094
71acb5eb 4095 return 0;
71acb5eb
DA
4096}
4097
4098static int
05394f39
CW
4099i915_gem_phys_pwrite(struct drm_device *dev,
4100 struct drm_i915_gem_object *obj,
71acb5eb
DA
4101 struct drm_i915_gem_pwrite *args,
4102 struct drm_file *file_priv)
4103{
05394f39 4104 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
b47b30cc 4105 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
71acb5eb 4106
b47b30cc
CW
4107 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4108 unsigned long unwritten;
4109
4110 /* The physical object once assigned is fixed for the lifetime
4111 * of the obj, so we can safely drop the lock and continue
4112 * to access vaddr.
4113 */
4114 mutex_unlock(&dev->struct_mutex);
4115 unwritten = copy_from_user(vaddr, user_data, args->size);
4116 mutex_lock(&dev->struct_mutex);
4117 if (unwritten)
4118 return -EFAULT;
4119 }
71acb5eb 4120
40ce6575 4121 intel_gtt_chipset_flush();
71acb5eb
DA
4122 return 0;
4123}
b962442e 4124
f787a5f5 4125void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4126{
f787a5f5 4127 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
4128
4129 /* Clean up our request list when the client is going away, so that
4130 * later retire_requests won't dereference our soon-to-be-gone
4131 * file_priv.
4132 */
1c25595f 4133 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
4134 while (!list_empty(&file_priv->mm.request_list)) {
4135 struct drm_i915_gem_request *request;
4136
4137 request = list_first_entry(&file_priv->mm.request_list,
4138 struct drm_i915_gem_request,
4139 client_list);
4140 list_del(&request->client_list);
4141 request->file_priv = NULL;
4142 }
1c25595f 4143 spin_unlock(&file_priv->mm.lock);
b962442e 4144}
31169714 4145
1637ef41
CW
4146static int
4147i915_gpu_is_active(struct drm_device *dev)
4148{
4149 drm_i915_private_t *dev_priv = dev->dev_private;
4150 int lists_empty;
4151
1637ef41 4152 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
17250b71 4153 list_empty(&dev_priv->mm.active_list);
1637ef41
CW
4154
4155 return !lists_empty;
4156}
4157
31169714 4158static int
1495f230 4159i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
31169714 4160{
17250b71
CW
4161 struct drm_i915_private *dev_priv =
4162 container_of(shrinker,
4163 struct drm_i915_private,
4164 mm.inactive_shrinker);
4165 struct drm_device *dev = dev_priv->dev;
4166 struct drm_i915_gem_object *obj, *next;
1495f230 4167 int nr_to_scan = sc->nr_to_scan;
17250b71
CW
4168 int cnt;
4169
4170 if (!mutex_trylock(&dev->struct_mutex))
bbe2e11a 4171 return 0;
31169714
CW
4172
4173 /* "fast-path" to count number of available objects */
4174 if (nr_to_scan == 0) {
17250b71
CW
4175 cnt = 0;
4176 list_for_each_entry(obj,
4177 &dev_priv->mm.inactive_list,
4178 mm_list)
4179 cnt++;
4180 mutex_unlock(&dev->struct_mutex);
4181 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714
CW
4182 }
4183
1637ef41 4184rescan:
31169714 4185 /* first scan for clean buffers */
17250b71 4186 i915_gem_retire_requests(dev);
31169714 4187
17250b71
CW
4188 list_for_each_entry_safe(obj, next,
4189 &dev_priv->mm.inactive_list,
4190 mm_list) {
4191 if (i915_gem_object_is_purgeable(obj)) {
2021746e
CW
4192 if (i915_gem_object_unbind(obj) == 0 &&
4193 --nr_to_scan == 0)
17250b71 4194 break;
31169714 4195 }
31169714
CW
4196 }
4197
4198 /* second pass, evict/count anything still on the inactive list */
17250b71
CW
4199 cnt = 0;
4200 list_for_each_entry_safe(obj, next,
4201 &dev_priv->mm.inactive_list,
4202 mm_list) {
2021746e
CW
4203 if (nr_to_scan &&
4204 i915_gem_object_unbind(obj) == 0)
17250b71 4205 nr_to_scan--;
2021746e 4206 else
17250b71
CW
4207 cnt++;
4208 }
4209
4210 if (nr_to_scan && i915_gpu_is_active(dev)) {
1637ef41
CW
4211 /*
4212 * We are desperate for pages, so as a last resort, wait
4213 * for the GPU to finish and discard whatever we can.
4214 * This has a dramatic impact to reduce the number of
4215 * OOM-killer events whilst running the GPU aggressively.
4216 */
b93f9cf1 4217 if (i915_gpu_idle(dev, true) == 0)
1637ef41
CW
4218 goto rescan;
4219 }
17250b71
CW
4220 mutex_unlock(&dev->struct_mutex);
4221 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714 4222}