]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/i915/i915_gem_execbuffer.c
drm/i915: Tidy up flush cpu/gtt write domains
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
CommitLineData
54cf91dc
CW
1/*
2 * Copyright © 2008,2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Chris Wilson <chris@chris-wilson.co.uk>
26 *
27 */
28
ad778f89
CW
29#include <linux/dma_remapping.h>
30#include <linux/reservation.h>
31#include <linux/uaccess.h>
32
760285e7
DH
33#include <drm/drmP.h>
34#include <drm/i915_drm.h>
ad778f89 35
54cf91dc 36#include "i915_drv.h"
ad778f89 37#include "i915_gem_dmabuf.h"
54cf91dc
CW
38#include "i915_trace.h"
39#include "intel_drv.h"
5d723d7a 40#include "intel_frontbuffer.h"
54cf91dc 41
9e2793f6
DG
42#define __EXEC_OBJECT_HAS_PIN (1<<31)
43#define __EXEC_OBJECT_HAS_FENCE (1<<30)
44#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
45#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
46#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
d23db88c
CW
47
48#define BATCH_OFFSET_BIAS (256*1024)
a415d355 49
5b043f4e
CW
50struct i915_execbuffer_params {
51 struct drm_device *dev;
52 struct drm_file *file;
59bfa124
CW
53 struct i915_vma *batch;
54 u32 dispatch_flags;
55 u32 args_batch_start_offset;
5b043f4e 56 struct intel_engine_cs *engine;
5b043f4e
CW
57 struct i915_gem_context *ctx;
58 struct drm_i915_gem_request *request;
59};
60
27173f1f
BW
61struct eb_vmas {
62 struct list_head vmas;
67731b87 63 int and;
eef90ccb 64 union {
27173f1f 65 struct i915_vma *lut[0];
eef90ccb
CW
66 struct hlist_head buckets[0];
67 };
67731b87
CW
68};
69
27173f1f 70static struct eb_vmas *
17601cbc 71eb_create(struct drm_i915_gem_execbuffer2 *args)
67731b87 72{
27173f1f 73 struct eb_vmas *eb = NULL;
eef90ccb
CW
74
75 if (args->flags & I915_EXEC_HANDLE_LUT) {
b205ca57 76 unsigned size = args->buffer_count;
27173f1f
BW
77 size *= sizeof(struct i915_vma *);
78 size += sizeof(struct eb_vmas);
eef90ccb
CW
79 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
80 }
81
82 if (eb == NULL) {
b205ca57
DV
83 unsigned size = args->buffer_count;
84 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
27b7c63a 85 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
eef90ccb
CW
86 while (count > 2*size)
87 count >>= 1;
88 eb = kzalloc(count*sizeof(struct hlist_head) +
27173f1f 89 sizeof(struct eb_vmas),
eef90ccb
CW
90 GFP_TEMPORARY);
91 if (eb == NULL)
92 return eb;
93
94 eb->and = count - 1;
95 } else
96 eb->and = -args->buffer_count;
97
27173f1f 98 INIT_LIST_HEAD(&eb->vmas);
67731b87
CW
99 return eb;
100}
101
102static void
27173f1f 103eb_reset(struct eb_vmas *eb)
67731b87 104{
eef90ccb
CW
105 if (eb->and >= 0)
106 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
67731b87
CW
107}
108
59bfa124
CW
109static struct i915_vma *
110eb_get_batch(struct eb_vmas *eb)
111{
112 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
113
114 /*
115 * SNA is doing fancy tricks with compressing batch buffers, which leads
116 * to negative relocation deltas. Usually that works out ok since the
117 * relocate address is still positive, except when the batch is placed
118 * very low in the GTT. Ensure this doesn't happen.
119 *
120 * Note that actual hangs have only been observed on gen7, but for
121 * paranoia do it everywhere.
122 */
123 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
124 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
125
126 return vma;
127}
128
3b96eff4 129static int
27173f1f
BW
130eb_lookup_vmas(struct eb_vmas *eb,
131 struct drm_i915_gem_exec_object2 *exec,
132 const struct drm_i915_gem_execbuffer2 *args,
133 struct i915_address_space *vm,
134 struct drm_file *file)
3b96eff4 135{
27173f1f
BW
136 struct drm_i915_gem_object *obj;
137 struct list_head objects;
9ae9ab52 138 int i, ret;
3b96eff4 139
27173f1f 140 INIT_LIST_HEAD(&objects);
3b96eff4 141 spin_lock(&file->table_lock);
27173f1f
BW
142 /* Grab a reference to the object and release the lock so we can lookup
143 * or create the VMA without using GFP_ATOMIC */
eef90ccb 144 for (i = 0; i < args->buffer_count; i++) {
3b96eff4
CW
145 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
146 if (obj == NULL) {
147 spin_unlock(&file->table_lock);
148 DRM_DEBUG("Invalid object handle %d at index %d\n",
149 exec[i].handle, i);
27173f1f 150 ret = -ENOENT;
9ae9ab52 151 goto err;
3b96eff4
CW
152 }
153
27173f1f 154 if (!list_empty(&obj->obj_exec_link)) {
3b96eff4
CW
155 spin_unlock(&file->table_lock);
156 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
157 obj, exec[i].handle, i);
27173f1f 158 ret = -EINVAL;
9ae9ab52 159 goto err;
3b96eff4
CW
160 }
161
25dc556a 162 i915_gem_object_get(obj);
27173f1f
BW
163 list_add_tail(&obj->obj_exec_link, &objects);
164 }
165 spin_unlock(&file->table_lock);
3b96eff4 166
27173f1f 167 i = 0;
9ae9ab52 168 while (!list_empty(&objects)) {
27173f1f 169 struct i915_vma *vma;
6f65e29a 170
9ae9ab52
CW
171 obj = list_first_entry(&objects,
172 struct drm_i915_gem_object,
173 obj_exec_link);
174
e656a6cb
DV
175 /*
176 * NOTE: We can leak any vmas created here when something fails
177 * later on. But that's no issue since vma_unbind can deal with
178 * vmas which are not actually bound. And since only
179 * lookup_or_create exists as an interface to get at the vma
180 * from the (obj, vm) we don't run the risk of creating
181 * duplicated vmas for the same vm.
182 */
058d88c4
CW
183 vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL);
184 if (unlikely(IS_ERR(vma))) {
27173f1f
BW
185 DRM_DEBUG("Failed to lookup VMA\n");
186 ret = PTR_ERR(vma);
9ae9ab52 187 goto err;
27173f1f
BW
188 }
189
9ae9ab52 190 /* Transfer ownership from the objects list to the vmas list. */
27173f1f 191 list_add_tail(&vma->exec_list, &eb->vmas);
9ae9ab52 192 list_del_init(&obj->obj_exec_link);
27173f1f
BW
193
194 vma->exec_entry = &exec[i];
eef90ccb 195 if (eb->and < 0) {
27173f1f 196 eb->lut[i] = vma;
eef90ccb
CW
197 } else {
198 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
27173f1f
BW
199 vma->exec_handle = handle;
200 hlist_add_head(&vma->exec_node,
eef90ccb
CW
201 &eb->buckets[handle & eb->and]);
202 }
27173f1f 203 ++i;
3b96eff4 204 }
3b96eff4 205
9ae9ab52 206 return 0;
27173f1f 207
27173f1f 208
9ae9ab52 209err:
27173f1f
BW
210 while (!list_empty(&objects)) {
211 obj = list_first_entry(&objects,
212 struct drm_i915_gem_object,
213 obj_exec_link);
214 list_del_init(&obj->obj_exec_link);
f8c417cd 215 i915_gem_object_put(obj);
27173f1f 216 }
9ae9ab52
CW
217 /*
218 * Objects already transfered to the vmas list will be unreferenced by
219 * eb_destroy.
220 */
221
27173f1f 222 return ret;
3b96eff4
CW
223}
224
27173f1f 225static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
67731b87 226{
eef90ccb
CW
227 if (eb->and < 0) {
228 if (handle >= -eb->and)
229 return NULL;
230 return eb->lut[handle];
231 } else {
232 struct hlist_head *head;
aa45950b 233 struct i915_vma *vma;
67731b87 234
eef90ccb 235 head = &eb->buckets[handle & eb->and];
aa45950b 236 hlist_for_each_entry(vma, head, exec_node) {
27173f1f
BW
237 if (vma->exec_handle == handle)
238 return vma;
eef90ccb
CW
239 }
240 return NULL;
241 }
67731b87
CW
242}
243
a415d355
CW
244static void
245i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
246{
247 struct drm_i915_gem_exec_object2 *entry;
248 struct drm_i915_gem_object *obj = vma->obj;
249
250 if (!drm_mm_node_allocated(&vma->node))
251 return;
252
253 entry = vma->exec_entry;
254
255 if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
256 i915_gem_object_unpin_fence(obj);
257
258 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
20dfbde4 259 __i915_vma_unpin(vma);
a415d355 260
de4e783a 261 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
a415d355
CW
262}
263
264static void eb_destroy(struct eb_vmas *eb)
265{
27173f1f
BW
266 while (!list_empty(&eb->vmas)) {
267 struct i915_vma *vma;
bcffc3fa 268
27173f1f
BW
269 vma = list_first_entry(&eb->vmas,
270 struct i915_vma,
bcffc3fa 271 exec_list);
27173f1f 272 list_del_init(&vma->exec_list);
a415d355 273 i915_gem_execbuffer_unreserve_vma(vma);
624192cf 274 i915_vma_put(vma);
bcffc3fa 275 }
67731b87
CW
276 kfree(eb);
277}
278
dabdfe02
CW
279static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
280{
2cc86b82
CW
281 return (HAS_LLC(obj->base.dev) ||
282 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
dabdfe02
CW
283 obj->cache_level != I915_CACHE_NONE);
284}
285
934acce3
MW
286/* Used to convert any address to canonical form.
287 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
288 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
289 * addresses to be in a canonical form:
290 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
291 * canonical form [63:48] == [47]."
292 */
293#define GEN8_HIGH_ADDRESS_BIT 47
294static inline uint64_t gen8_canonical_addr(uint64_t address)
295{
296 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
297}
298
299static inline uint64_t gen8_noncanonical_addr(uint64_t address)
300{
301 return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
302}
303
304static inline uint64_t
305relocation_target(struct drm_i915_gem_relocation_entry *reloc,
306 uint64_t target_offset)
307{
308 return gen8_canonical_addr((int)reloc->delta + target_offset);
309}
310
31a39207
CW
311struct reloc_cache {
312 void *vaddr;
313 unsigned int page;
314 enum { KMAP, IOMAP } type;
315};
316
317static void reloc_cache_init(struct reloc_cache *cache)
318{
319 cache->page = -1;
320 cache->vaddr = NULL;
321}
322
323static void reloc_cache_fini(struct reloc_cache *cache)
324{
325 if (!cache->vaddr)
326 return;
327
328 switch (cache->type) {
329 case KMAP:
330 kunmap_atomic(cache->vaddr);
331 break;
332
333 case IOMAP:
334 io_mapping_unmap_atomic(cache->vaddr);
335 break;
336 }
337}
338
339static void *reloc_kmap(struct drm_i915_gem_object *obj,
340 struct reloc_cache *cache,
341 int page)
342{
343 if (cache->page == page)
344 return cache->vaddr;
345
346 if (cache->vaddr)
347 kunmap_atomic(cache->vaddr);
348
349 cache->page = page;
350 cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
351 cache->type = KMAP;
352
353 return cache->vaddr;
354}
355
5032d871
RB
356static int
357relocate_entry_cpu(struct drm_i915_gem_object *obj,
d9ceb957 358 struct drm_i915_gem_relocation_entry *reloc,
31a39207 359 struct reloc_cache *cache,
d9ceb957 360 uint64_t target_offset)
5032d871 361{
3c94ceee 362 struct drm_device *dev = obj->base.dev;
5032d871 363 uint32_t page_offset = offset_in_page(reloc->offset);
934acce3 364 uint64_t delta = relocation_target(reloc, target_offset);
5032d871 365 char *vaddr;
8b78f0e5 366 int ret;
5032d871 367
2cc86b82 368 ret = i915_gem_object_set_to_cpu_domain(obj, true);
5032d871
RB
369 if (ret)
370 return ret;
371
31a39207 372 vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
d9ceb957 373 *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
3c94ceee 374
31a39207
CW
375 if (INTEL_GEN(dev) >= 8) {
376 page_offset += sizeof(uint32_t);
377 if (page_offset == PAGE_SIZE) {
378 vaddr = reloc_kmap(obj, cache, cache->page + 1);
379 page_offset = 0;
3c94ceee 380 }
d9ceb957 381 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
3c94ceee
BW
382 }
383
5032d871
RB
384 return 0;
385}
386
31a39207
CW
387static void *reloc_iomap(struct drm_i915_private *i915,
388 struct reloc_cache *cache,
389 uint64_t offset)
390{
391 if (cache->page == offset >> PAGE_SHIFT)
392 return cache->vaddr;
393
394 if (cache->vaddr)
395 io_mapping_unmap_atomic(cache->vaddr);
396
397 cache->page = offset >> PAGE_SHIFT;
398 cache->vaddr =
399 io_mapping_map_atomic_wc(i915->ggtt.mappable,
400 offset & PAGE_MASK);
401 cache->type = IOMAP;
402
403 return cache->vaddr;
404}
405
5032d871
RB
406static int
407relocate_entry_gtt(struct drm_i915_gem_object *obj,
d9ceb957 408 struct drm_i915_gem_relocation_entry *reloc,
31a39207 409 struct reloc_cache *cache,
d9ceb957 410 uint64_t target_offset)
5032d871 411{
058d88c4 412 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
058d88c4 413 struct i915_vma *vma;
934acce3 414 uint64_t delta = relocation_target(reloc, target_offset);
906843c3 415 uint64_t offset;
5032d871 416 void __iomem *reloc_page;
8b78f0e5 417 int ret;
5032d871 418
058d88c4
CW
419 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
420 if (IS_ERR(vma))
421 return PTR_ERR(vma);
422
5032d871
RB
423 ret = i915_gem_object_set_to_gtt_domain(obj, true);
424 if (ret)
058d88c4 425 goto unpin;
5032d871
RB
426
427 ret = i915_gem_object_put_fence(obj);
428 if (ret)
058d88c4 429 goto unpin;
5032d871
RB
430
431 /* Map the page containing the relocation we're going to perform. */
058d88c4 432 offset = vma->node.start + reloc->offset;
31a39207 433 reloc_page = reloc_iomap(dev_priv, cache, offset);
906843c3 434 iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
3c94ceee 435
058d88c4 436 if (INTEL_GEN(dev_priv) >= 8) {
906843c3 437 offset += sizeof(uint32_t);
31a39207
CW
438 if (offset_in_page(offset) == 0)
439 reloc_page = reloc_iomap(dev_priv, cache, offset);
906843c3
CW
440 iowrite32(upper_32_bits(delta),
441 reloc_page + offset_in_page(offset));
3c94ceee
BW
442 }
443
058d88c4 444unpin:
31a39207 445 __i915_vma_unpin(vma);
058d88c4 446 return ret;
5032d871
RB
447}
448
edf4427b
CW
449static void
450clflush_write32(void *addr, uint32_t value)
451{
452 /* This is not a fast path, so KISS. */
453 drm_clflush_virt_range(addr, sizeof(uint32_t));
454 *(uint32_t *)addr = value;
455 drm_clflush_virt_range(addr, sizeof(uint32_t));
456}
457
458static int
459relocate_entry_clflush(struct drm_i915_gem_object *obj,
460 struct drm_i915_gem_relocation_entry *reloc,
31a39207 461 struct reloc_cache *cache,
edf4427b
CW
462 uint64_t target_offset)
463{
464 struct drm_device *dev = obj->base.dev;
465 uint32_t page_offset = offset_in_page(reloc->offset);
934acce3 466 uint64_t delta = relocation_target(reloc, target_offset);
edf4427b
CW
467 char *vaddr;
468 int ret;
469
470 ret = i915_gem_object_set_to_gtt_domain(obj, true);
471 if (ret)
472 return ret;
473
31a39207 474 vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
edf4427b
CW
475 clflush_write32(vaddr + page_offset, lower_32_bits(delta));
476
31a39207
CW
477 if (INTEL_GEN(dev) >= 8) {
478 page_offset += sizeof(uint32_t);
479 if (page_offset == PAGE_SIZE) {
480 vaddr = reloc_kmap(obj, cache, cache->page + 1);
481 page_offset = 0;
edf4427b 482 }
edf4427b
CW
483 clflush_write32(vaddr + page_offset, upper_32_bits(delta));
484 }
485
edf4427b
CW
486 return 0;
487}
488
909d074c
CW
489static bool object_is_idle(struct drm_i915_gem_object *obj)
490{
573adb39 491 unsigned long active = i915_gem_object_get_active(obj);
909d074c
CW
492 int idx;
493
494 for_each_active(active, idx) {
495 if (!i915_gem_active_is_idle(&obj->last_read[idx],
496 &obj->base.dev->struct_mutex))
497 return false;
498 }
499
500 return true;
501}
502
54cf91dc
CW
503static int
504i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
27173f1f 505 struct eb_vmas *eb,
31a39207
CW
506 struct drm_i915_gem_relocation_entry *reloc,
507 struct reloc_cache *cache)
54cf91dc
CW
508{
509 struct drm_device *dev = obj->base.dev;
510 struct drm_gem_object *target_obj;
149c8407 511 struct drm_i915_gem_object *target_i915_obj;
27173f1f 512 struct i915_vma *target_vma;
d9ceb957 513 uint64_t target_offset;
8b78f0e5 514 int ret;
54cf91dc 515
67731b87 516 /* we've already hold a reference to all valid objects */
27173f1f
BW
517 target_vma = eb_get_vma(eb, reloc->target_handle);
518 if (unlikely(target_vma == NULL))
54cf91dc 519 return -ENOENT;
27173f1f
BW
520 target_i915_obj = target_vma->obj;
521 target_obj = &target_vma->obj->base;
54cf91dc 522
934acce3 523 target_offset = gen8_canonical_addr(target_vma->node.start);
54cf91dc 524
e844b990
EA
525 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
526 * pipe_control writes because the gpu doesn't properly redirect them
527 * through the ppgtt for non_secure batchbuffers. */
528 if (unlikely(IS_GEN6(dev) &&
0875546c 529 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
fe14d5f4 530 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
0875546c 531 PIN_GLOBAL);
fe14d5f4
TU
532 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
533 return ret;
534 }
e844b990 535
54cf91dc 536 /* Validate that the target is in a valid r/w GPU domain */
b8f7ab17 537 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
ff240199 538 DRM_DEBUG("reloc with multiple write domains: "
54cf91dc
CW
539 "obj %p target %d offset %d "
540 "read %08x write %08x",
541 obj, reloc->target_handle,
542 (int) reloc->offset,
543 reloc->read_domains,
544 reloc->write_domain);
8b78f0e5 545 return -EINVAL;
54cf91dc 546 }
4ca4a250
DV
547 if (unlikely((reloc->write_domain | reloc->read_domains)
548 & ~I915_GEM_GPU_DOMAINS)) {
ff240199 549 DRM_DEBUG("reloc with read/write non-GPU domains: "
54cf91dc
CW
550 "obj %p target %d offset %d "
551 "read %08x write %08x",
552 obj, reloc->target_handle,
553 (int) reloc->offset,
554 reloc->read_domains,
555 reloc->write_domain);
8b78f0e5 556 return -EINVAL;
54cf91dc 557 }
54cf91dc
CW
558
559 target_obj->pending_read_domains |= reloc->read_domains;
560 target_obj->pending_write_domain |= reloc->write_domain;
561
562 /* If the relocation already has the right value in it, no
563 * more work needs to be done.
564 */
565 if (target_offset == reloc->presumed_offset)
67731b87 566 return 0;
54cf91dc
CW
567
568 /* Check that the relocation address is valid... */
3c94ceee
BW
569 if (unlikely(reloc->offset >
570 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
ff240199 571 DRM_DEBUG("Relocation beyond object bounds: "
54cf91dc
CW
572 "obj %p target %d offset %d size %d.\n",
573 obj, reloc->target_handle,
574 (int) reloc->offset,
575 (int) obj->base.size);
8b78f0e5 576 return -EINVAL;
54cf91dc 577 }
b8f7ab17 578 if (unlikely(reloc->offset & 3)) {
ff240199 579 DRM_DEBUG("Relocation not 4-byte aligned: "
54cf91dc
CW
580 "obj %p target %d offset %d.\n",
581 obj, reloc->target_handle,
582 (int) reloc->offset);
8b78f0e5 583 return -EINVAL;
54cf91dc
CW
584 }
585
dabdfe02 586 /* We can't wait for rendering with pagefaults disabled */
909d074c 587 if (pagefault_disabled() && !object_is_idle(obj))
dabdfe02
CW
588 return -EFAULT;
589
5032d871 590 if (use_cpu_reloc(obj))
31a39207 591 ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
edf4427b 592 else if (obj->map_and_fenceable)
31a39207 593 ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
906bf7fd 594 else if (static_cpu_has(X86_FEATURE_CLFLUSH))
31a39207 595 ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
edf4427b
CW
596 else {
597 WARN_ONCE(1, "Impossible case in relocation handling\n");
598 ret = -ENODEV;
599 }
54cf91dc 600
d4d36014
DV
601 if (ret)
602 return ret;
603
54cf91dc
CW
604 /* and update the user's relocation entry */
605 reloc->presumed_offset = target_offset;
606
67731b87 607 return 0;
54cf91dc
CW
608}
609
610static int
27173f1f
BW
611i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
612 struct eb_vmas *eb)
54cf91dc 613{
1d83f442
CW
614#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
615 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
54cf91dc 616 struct drm_i915_gem_relocation_entry __user *user_relocs;
27173f1f 617 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
31a39207
CW
618 struct reloc_cache cache;
619 int remain, ret = 0;
54cf91dc 620
3ed605bc 621 user_relocs = u64_to_user_ptr(entry->relocs_ptr);
31a39207 622 reloc_cache_init(&cache);
54cf91dc 623
1d83f442
CW
624 remain = entry->relocation_count;
625 while (remain) {
626 struct drm_i915_gem_relocation_entry *r = stack_reloc;
627 int count = remain;
628 if (count > ARRAY_SIZE(stack_reloc))
629 count = ARRAY_SIZE(stack_reloc);
630 remain -= count;
631
31a39207
CW
632 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) {
633 ret = -EFAULT;
634 goto out;
635 }
54cf91dc 636
1d83f442
CW
637 do {
638 u64 offset = r->presumed_offset;
54cf91dc 639
31a39207 640 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
1d83f442 641 if (ret)
31a39207 642 goto out;
1d83f442
CW
643
644 if (r->presumed_offset != offset &&
31a39207
CW
645 __put_user(r->presumed_offset,
646 &user_relocs->presumed_offset)) {
647 ret = -EFAULT;
648 goto out;
1d83f442
CW
649 }
650
651 user_relocs++;
652 r++;
653 } while (--count);
54cf91dc
CW
654 }
655
31a39207
CW
656out:
657 reloc_cache_fini(&cache);
658 return ret;
1d83f442 659#undef N_RELOC
54cf91dc
CW
660}
661
662static int
27173f1f
BW
663i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
664 struct eb_vmas *eb,
665 struct drm_i915_gem_relocation_entry *relocs)
54cf91dc 666{
27173f1f 667 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
31a39207
CW
668 struct reloc_cache cache;
669 int i, ret = 0;
54cf91dc 670
31a39207 671 reloc_cache_init(&cache);
54cf91dc 672 for (i = 0; i < entry->relocation_count; i++) {
31a39207 673 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
54cf91dc 674 if (ret)
31a39207 675 break;
54cf91dc 676 }
31a39207 677 reloc_cache_fini(&cache);
54cf91dc 678
31a39207 679 return ret;
54cf91dc
CW
680}
681
682static int
17601cbc 683i915_gem_execbuffer_relocate(struct eb_vmas *eb)
54cf91dc 684{
27173f1f 685 struct i915_vma *vma;
d4aeee77
CW
686 int ret = 0;
687
688 /* This is the fast path and we cannot handle a pagefault whilst
689 * holding the struct mutex lest the user pass in the relocations
690 * contained within a mmaped bo. For in such a case we, the page
691 * fault handler would call i915_gem_fault() and we would try to
692 * acquire the struct mutex again. Obviously this is bad and so
693 * lockdep complains vehemently.
694 */
695 pagefault_disable();
27173f1f
BW
696 list_for_each_entry(vma, &eb->vmas, exec_list) {
697 ret = i915_gem_execbuffer_relocate_vma(vma, eb);
54cf91dc 698 if (ret)
d4aeee77 699 break;
54cf91dc 700 }
d4aeee77 701 pagefault_enable();
54cf91dc 702
d4aeee77 703 return ret;
54cf91dc
CW
704}
705
edf4427b
CW
706static bool only_mappable_for_reloc(unsigned int flags)
707{
708 return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
709 __EXEC_OBJECT_NEEDS_MAP;
710}
711
1690e1eb 712static int
27173f1f 713i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
0bc40be8 714 struct intel_engine_cs *engine,
27173f1f 715 bool *need_reloc)
1690e1eb 716{
6f65e29a 717 struct drm_i915_gem_object *obj = vma->obj;
27173f1f 718 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
d23db88c 719 uint64_t flags;
1690e1eb
CW
720 int ret;
721
0875546c 722 flags = PIN_USER;
0229da32
DV
723 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
724 flags |= PIN_GLOBAL;
725
edf4427b 726 if (!drm_mm_node_allocated(&vma->node)) {
101b506a
MT
727 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
728 * limit address to the first 4GBs for unflagged objects.
729 */
730 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
731 flags |= PIN_ZONE_4G;
edf4427b
CW
732 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
733 flags |= PIN_GLOBAL | PIN_MAPPABLE;
edf4427b
CW
734 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
735 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
506a8e87
CW
736 if (entry->flags & EXEC_OBJECT_PINNED)
737 flags |= entry->offset | PIN_OFFSET_FIXED;
101b506a
MT
738 if ((flags & PIN_MAPPABLE) == 0)
739 flags |= PIN_HIGH;
edf4427b 740 }
1ec9e26d 741
59bfa124
CW
742 ret = i915_vma_pin(vma,
743 entry->pad_to_size,
744 entry->alignment,
745 flags);
746 if ((ret == -ENOSPC || ret == -E2BIG) &&
edf4427b 747 only_mappable_for_reloc(entry->flags))
59bfa124
CW
748 ret = i915_vma_pin(vma,
749 entry->pad_to_size,
750 entry->alignment,
751 flags & ~PIN_MAPPABLE);
1690e1eb
CW
752 if (ret)
753 return ret;
754
7788a765
CW
755 entry->flags |= __EXEC_OBJECT_HAS_PIN;
756
82b6b6d7
CW
757 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
758 ret = i915_gem_object_get_fence(obj);
759 if (ret)
760 return ret;
9a5a53b3 761
82b6b6d7
CW
762 if (i915_gem_object_pin_fence(obj))
763 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
1690e1eb
CW
764 }
765
27173f1f
BW
766 if (entry->offset != vma->node.start) {
767 entry->offset = vma->node.start;
ed5982e6
DV
768 *need_reloc = true;
769 }
770
771 if (entry->flags & EXEC_OBJECT_WRITE) {
772 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
773 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
774 }
775
1690e1eb 776 return 0;
7788a765 777}
1690e1eb 778
d23db88c 779static bool
e6a84468 780need_reloc_mappable(struct i915_vma *vma)
d23db88c
CW
781{
782 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
d23db88c 783
e6a84468
CW
784 if (entry->relocation_count == 0)
785 return false;
786
3272db53 787 if (!i915_vma_is_ggtt(vma))
e6a84468
CW
788 return false;
789
790 /* See also use_cpu_reloc() */
791 if (HAS_LLC(vma->obj->base.dev))
792 return false;
793
794 if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
795 return false;
796
797 return true;
798}
799
800static bool
801eb_vma_misplaced(struct i915_vma *vma)
802{
803 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
804 struct drm_i915_gem_object *obj = vma->obj;
d23db88c 805
3272db53
CW
806 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
807 !i915_vma_is_ggtt(vma));
d23db88c
CW
808
809 if (entry->alignment &&
810 vma->node.start & (entry->alignment - 1))
811 return true;
812
91b2db6f
CW
813 if (vma->node.size < entry->pad_to_size)
814 return true;
815
506a8e87
CW
816 if (entry->flags & EXEC_OBJECT_PINNED &&
817 vma->node.start != entry->offset)
818 return true;
819
d23db88c
CW
820 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
821 vma->node.start < BATCH_OFFSET_BIAS)
822 return true;
823
edf4427b
CW
824 /* avoid costly ping-pong once a batch bo ended up non-mappable */
825 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
826 return !only_mappable_for_reloc(entry->flags);
827
101b506a
MT
828 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
829 (vma->node.start + vma->node.size - 1) >> 32)
830 return true;
831
d23db88c
CW
832 return false;
833}
834
54cf91dc 835static int
0bc40be8 836i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
27173f1f 837 struct list_head *vmas,
e2efd130 838 struct i915_gem_context *ctx,
ed5982e6 839 bool *need_relocs)
54cf91dc 840{
432e58ed 841 struct drm_i915_gem_object *obj;
27173f1f 842 struct i915_vma *vma;
68c8c17f 843 struct i915_address_space *vm;
27173f1f 844 struct list_head ordered_vmas;
506a8e87 845 struct list_head pinned_vmas;
c033666a 846 bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
7788a765 847 int retry;
6fe4f140 848
68c8c17f
BW
849 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
850
27173f1f 851 INIT_LIST_HEAD(&ordered_vmas);
506a8e87 852 INIT_LIST_HEAD(&pinned_vmas);
27173f1f 853 while (!list_empty(vmas)) {
6fe4f140
CW
854 struct drm_i915_gem_exec_object2 *entry;
855 bool need_fence, need_mappable;
856
27173f1f
BW
857 vma = list_first_entry(vmas, struct i915_vma, exec_list);
858 obj = vma->obj;
859 entry = vma->exec_entry;
6fe4f140 860
b1b38278
DW
861 if (ctx->flags & CONTEXT_NO_ZEROMAP)
862 entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
863
82b6b6d7
CW
864 if (!has_fenced_gpu_access)
865 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
6fe4f140 866 need_fence =
6fe4f140 867 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3e510a8e 868 i915_gem_object_is_tiled(obj);
27173f1f 869 need_mappable = need_fence || need_reloc_mappable(vma);
6fe4f140 870
506a8e87
CW
871 if (entry->flags & EXEC_OBJECT_PINNED)
872 list_move_tail(&vma->exec_list, &pinned_vmas);
873 else if (need_mappable) {
e6a84468 874 entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
27173f1f 875 list_move(&vma->exec_list, &ordered_vmas);
e6a84468 876 } else
27173f1f 877 list_move_tail(&vma->exec_list, &ordered_vmas);
595dad76 878
ed5982e6 879 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
595dad76 880 obj->base.pending_write_domain = 0;
6fe4f140 881 }
27173f1f 882 list_splice(&ordered_vmas, vmas);
506a8e87 883 list_splice(&pinned_vmas, vmas);
54cf91dc
CW
884
885 /* Attempt to pin all of the buffers into the GTT.
886 * This is done in 3 phases:
887 *
888 * 1a. Unbind all objects that do not match the GTT constraints for
889 * the execbuffer (fenceable, mappable, alignment etc).
890 * 1b. Increment pin count for already bound objects.
891 * 2. Bind new objects.
892 * 3. Decrement pin count.
893 *
7788a765 894 * This avoid unnecessary unbinding of later objects in order to make
54cf91dc
CW
895 * room for the earlier objects *unless* we need to defragment.
896 */
897 retry = 0;
898 do {
7788a765 899 int ret = 0;
54cf91dc
CW
900
901 /* Unbind any ill-fitting objects or pin. */
27173f1f 902 list_for_each_entry(vma, vmas, exec_list) {
27173f1f 903 if (!drm_mm_node_allocated(&vma->node))
54cf91dc
CW
904 continue;
905
e6a84468 906 if (eb_vma_misplaced(vma))
27173f1f 907 ret = i915_vma_unbind(vma);
54cf91dc 908 else
0bc40be8
TU
909 ret = i915_gem_execbuffer_reserve_vma(vma,
910 engine,
911 need_relocs);
432e58ed 912 if (ret)
54cf91dc 913 goto err;
54cf91dc
CW
914 }
915
916 /* Bind fresh objects */
27173f1f
BW
917 list_for_each_entry(vma, vmas, exec_list) {
918 if (drm_mm_node_allocated(&vma->node))
1690e1eb 919 continue;
54cf91dc 920
0bc40be8
TU
921 ret = i915_gem_execbuffer_reserve_vma(vma, engine,
922 need_relocs);
7788a765
CW
923 if (ret)
924 goto err;
54cf91dc
CW
925 }
926
a415d355 927err:
6c085a72 928 if (ret != -ENOSPC || retry++)
54cf91dc
CW
929 return ret;
930
a415d355
CW
931 /* Decrement pin count for bound objects */
932 list_for_each_entry(vma, vmas, exec_list)
933 i915_gem_execbuffer_unreserve_vma(vma);
934
68c8c17f 935 ret = i915_gem_evict_vm(vm, true);
54cf91dc
CW
936 if (ret)
937 return ret;
54cf91dc
CW
938 } while (1);
939}
940
941static int
942i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
ed5982e6 943 struct drm_i915_gem_execbuffer2 *args,
54cf91dc 944 struct drm_file *file,
0bc40be8 945 struct intel_engine_cs *engine,
27173f1f 946 struct eb_vmas *eb,
b1b38278 947 struct drm_i915_gem_exec_object2 *exec,
e2efd130 948 struct i915_gem_context *ctx)
54cf91dc
CW
949{
950 struct drm_i915_gem_relocation_entry *reloc;
27173f1f
BW
951 struct i915_address_space *vm;
952 struct i915_vma *vma;
ed5982e6 953 bool need_relocs;
dd6864a4 954 int *reloc_offset;
54cf91dc 955 int i, total, ret;
b205ca57 956 unsigned count = args->buffer_count;
54cf91dc 957
27173f1f
BW
958 vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
959
67731b87 960 /* We may process another execbuffer during the unlock... */
27173f1f
BW
961 while (!list_empty(&eb->vmas)) {
962 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
963 list_del_init(&vma->exec_list);
a415d355 964 i915_gem_execbuffer_unreserve_vma(vma);
624192cf 965 i915_vma_put(vma);
67731b87
CW
966 }
967
54cf91dc
CW
968 mutex_unlock(&dev->struct_mutex);
969
970 total = 0;
971 for (i = 0; i < count; i++)
432e58ed 972 total += exec[i].relocation_count;
54cf91dc 973
dd6864a4 974 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
54cf91dc 975 reloc = drm_malloc_ab(total, sizeof(*reloc));
dd6864a4
CW
976 if (reloc == NULL || reloc_offset == NULL) {
977 drm_free_large(reloc);
978 drm_free_large(reloc_offset);
54cf91dc
CW
979 mutex_lock(&dev->struct_mutex);
980 return -ENOMEM;
981 }
982
983 total = 0;
984 for (i = 0; i < count; i++) {
985 struct drm_i915_gem_relocation_entry __user *user_relocs;
262b6d36
CW
986 u64 invalid_offset = (u64)-1;
987 int j;
54cf91dc 988
3ed605bc 989 user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
54cf91dc
CW
990
991 if (copy_from_user(reloc+total, user_relocs,
432e58ed 992 exec[i].relocation_count * sizeof(*reloc))) {
54cf91dc
CW
993 ret = -EFAULT;
994 mutex_lock(&dev->struct_mutex);
995 goto err;
996 }
997
262b6d36
CW
998 /* As we do not update the known relocation offsets after
999 * relocating (due to the complexities in lock handling),
1000 * we need to mark them as invalid now so that we force the
1001 * relocation processing next time. Just in case the target
1002 * object is evicted and then rebound into its old
1003 * presumed_offset before the next execbuffer - if that
1004 * happened we would make the mistake of assuming that the
1005 * relocations were valid.
1006 */
1007 for (j = 0; j < exec[i].relocation_count; j++) {
9aab8bff
CW
1008 if (__copy_to_user(&user_relocs[j].presumed_offset,
1009 &invalid_offset,
1010 sizeof(invalid_offset))) {
262b6d36
CW
1011 ret = -EFAULT;
1012 mutex_lock(&dev->struct_mutex);
1013 goto err;
1014 }
1015 }
1016
dd6864a4 1017 reloc_offset[i] = total;
432e58ed 1018 total += exec[i].relocation_count;
54cf91dc
CW
1019 }
1020
1021 ret = i915_mutex_lock_interruptible(dev);
1022 if (ret) {
1023 mutex_lock(&dev->struct_mutex);
1024 goto err;
1025 }
1026
67731b87 1027 /* reacquire the objects */
67731b87 1028 eb_reset(eb);
27173f1f 1029 ret = eb_lookup_vmas(eb, exec, args, vm, file);
3b96eff4
CW
1030 if (ret)
1031 goto err;
67731b87 1032
ed5982e6 1033 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
0bc40be8
TU
1034 ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1035 &need_relocs);
54cf91dc
CW
1036 if (ret)
1037 goto err;
1038
27173f1f
BW
1039 list_for_each_entry(vma, &eb->vmas, exec_list) {
1040 int offset = vma->exec_entry - exec;
1041 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
1042 reloc + reloc_offset[offset]);
54cf91dc
CW
1043 if (ret)
1044 goto err;
54cf91dc
CW
1045 }
1046
1047 /* Leave the user relocations as are, this is the painfully slow path,
1048 * and we want to avoid the complication of dropping the lock whilst
1049 * having buffers reserved in the aperture and so causing spurious
1050 * ENOSPC for random operations.
1051 */
1052
1053err:
1054 drm_free_large(reloc);
dd6864a4 1055 drm_free_large(reloc_offset);
54cf91dc
CW
1056 return ret;
1057}
1058
573adb39
CW
1059static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
1060{
1061 unsigned int mask;
1062
1063 mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
1064 mask <<= I915_BO_ACTIVE_SHIFT;
1065
1066 return mask;
1067}
1068
54cf91dc 1069static int
535fbe82 1070i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
27173f1f 1071 struct list_head *vmas)
54cf91dc 1072{
573adb39 1073 const unsigned int other_rings = eb_other_engines(req);
27173f1f 1074 struct i915_vma *vma;
432e58ed 1075 int ret;
54cf91dc 1076
27173f1f
BW
1077 list_for_each_entry(vma, vmas, exec_list) {
1078 struct drm_i915_gem_object *obj = vma->obj;
03ade511 1079
573adb39 1080 if (obj->flags & other_rings) {
8e637178 1081 ret = i915_gem_object_sync(obj, req);
03ade511
CW
1082 if (ret)
1083 return ret;
1084 }
6ac42f41
DV
1085
1086 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
600f4368 1087 i915_gem_clflush_object(obj, false);
c59a333f
CW
1088 }
1089
600f4368
CW
1090 /* Unconditionally flush any chipset caches (for streaming writes). */
1091 i915_gem_chipset_flush(req->engine->i915);
6ac42f41 1092
c7fe7d25 1093 /* Unconditionally invalidate GPU caches and TLBs. */
7c9cf4e3 1094 return req->engine->emit_flush(req, EMIT_INVALIDATE);
54cf91dc
CW
1095}
1096
432e58ed
CW
1097static bool
1098i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
54cf91dc 1099{
ed5982e6
DV
1100 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
1101 return false;
1102
2f5945bc
CW
1103 /* Kernel clipping was a DRI1 misfeature */
1104 if (exec->num_cliprects || exec->cliprects_ptr)
1105 return false;
1106
1107 if (exec->DR4 == 0xffffffff) {
1108 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1109 exec->DR4 = 0;
1110 }
1111 if (exec->DR1 || exec->DR4)
1112 return false;
1113
1114 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1115 return false;
1116
1117 return true;
54cf91dc
CW
1118}
1119
1120static int
ad19f10b
CW
1121validate_exec_list(struct drm_device *dev,
1122 struct drm_i915_gem_exec_object2 *exec,
54cf91dc
CW
1123 int count)
1124{
b205ca57
DV
1125 unsigned relocs_total = 0;
1126 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
ad19f10b
CW
1127 unsigned invalid_flags;
1128 int i;
1129
9e2793f6
DG
1130 /* INTERNAL flags must not overlap with external ones */
1131 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1132
ad19f10b
CW
1133 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1134 if (USES_FULL_PPGTT(dev))
1135 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
54cf91dc
CW
1136
1137 for (i = 0; i < count; i++) {
3ed605bc 1138 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
54cf91dc
CW
1139 int length; /* limited by fault_in_pages_readable() */
1140
ad19f10b 1141 if (exec[i].flags & invalid_flags)
ed5982e6
DV
1142 return -EINVAL;
1143
934acce3
MW
1144 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1145 * any non-page-aligned or non-canonical addresses.
1146 */
1147 if (exec[i].flags & EXEC_OBJECT_PINNED) {
1148 if (exec[i].offset !=
1149 gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1150 return -EINVAL;
1151
1152 /* From drm_mm perspective address space is continuous,
1153 * so from this point we're always using non-canonical
1154 * form internally.
1155 */
1156 exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1157 }
1158
55a9785d
CW
1159 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1160 return -EINVAL;
1161
91b2db6f
CW
1162 /* pad_to_size was once a reserved field, so sanitize it */
1163 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1164 if (offset_in_page(exec[i].pad_to_size))
1165 return -EINVAL;
1166 } else {
1167 exec[i].pad_to_size = 0;
1168 }
1169
3118a4f6
KC
1170 /* First check for malicious input causing overflow in
1171 * the worst case where we need to allocate the entire
1172 * relocation tree as a single array.
1173 */
1174 if (exec[i].relocation_count > relocs_max - relocs_total)
54cf91dc 1175 return -EINVAL;
3118a4f6 1176 relocs_total += exec[i].relocation_count;
54cf91dc
CW
1177
1178 length = exec[i].relocation_count *
1179 sizeof(struct drm_i915_gem_relocation_entry);
30587535
KC
1180 /*
1181 * We must check that the entire relocation array is safe
1182 * to read, but since we may need to update the presumed
1183 * offsets during execution, check for full write access.
1184 */
54cf91dc
CW
1185 if (!access_ok(VERIFY_WRITE, ptr, length))
1186 return -EFAULT;
1187
d330a953 1188 if (likely(!i915.prefault_disable)) {
0b74b508
XZ
1189 if (fault_in_multipages_readable(ptr, length))
1190 return -EFAULT;
1191 }
54cf91dc
CW
1192 }
1193
1194 return 0;
1195}
1196
e2efd130 1197static struct i915_gem_context *
d299cce7 1198i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
0bc40be8 1199 struct intel_engine_cs *engine, const u32 ctx_id)
d299cce7 1200{
e2efd130 1201 struct i915_gem_context *ctx = NULL;
d299cce7
MK
1202 struct i915_ctx_hang_stats *hs;
1203
0bc40be8 1204 if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
7c9c4b8f
DV
1205 return ERR_PTR(-EINVAL);
1206
ca585b5d 1207 ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
72ad5c45 1208 if (IS_ERR(ctx))
41bde553 1209 return ctx;
d299cce7 1210
41bde553 1211 hs = &ctx->hang_stats;
d299cce7
MK
1212 if (hs->banned) {
1213 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
41bde553 1214 return ERR_PTR(-EIO);
d299cce7
MK
1215 }
1216
41bde553 1217 return ctx;
d299cce7
MK
1218}
1219
5cf3d280
CW
1220void i915_vma_move_to_active(struct i915_vma *vma,
1221 struct drm_i915_gem_request *req,
1222 unsigned int flags)
1223{
1224 struct drm_i915_gem_object *obj = vma->obj;
1225 const unsigned int idx = req->engine->id;
1226
1227 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1228
1229 obj->dirty = 1; /* be paranoid */
1230
b0decaf7
CW
1231 /* Add a reference if we're newly entering the active list.
1232 * The order in which we add operations to the retirement queue is
1233 * vital here: mark_active adds to the start of the callback list,
1234 * such that subsequent callbacks are called first. Therefore we
1235 * add the active reference first and queue for it to be dropped
1236 * *last*.
1237 */
573adb39 1238 if (!i915_gem_object_is_active(obj))
5cf3d280 1239 i915_gem_object_get(obj);
573adb39 1240 i915_gem_object_set_active(obj, idx);
5cf3d280
CW
1241 i915_gem_active_set(&obj->last_read[idx], req);
1242
1243 if (flags & EXEC_OBJECT_WRITE) {
1244 i915_gem_active_set(&obj->last_write, req);
1245
1246 intel_fb_obj_invalidate(obj, ORIGIN_CS);
1247
1248 /* update for the implicit flush after a batch */
1249 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1250 }
1251
1252 if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1253 i915_gem_active_set(&obj->last_fence, req);
1254 if (flags & __EXEC_OBJECT_HAS_FENCE) {
1255 struct drm_i915_private *dev_priv = req->i915;
1256
1257 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1258 &dev_priv->mm.fence_list);
1259 }
1260 }
1261
b0decaf7
CW
1262 i915_vma_set_active(vma, idx);
1263 i915_gem_active_set(&vma->last_read[idx], req);
5cf3d280
CW
1264 list_move_tail(&vma->vm_link, &vma->vm->active_list);
1265}
1266
ad778f89
CW
1267static void eb_export_fence(struct drm_i915_gem_object *obj,
1268 struct drm_i915_gem_request *req,
1269 unsigned int flags)
1270{
1271 struct reservation_object *resv;
1272
1273 resv = i915_gem_object_get_dmabuf_resv(obj);
1274 if (!resv)
1275 return;
1276
1277 /* Ignore errors from failing to allocate the new fence, we can't
1278 * handle an error right now. Worst case should be missed
1279 * synchronisation leading to rendering corruption.
1280 */
1281 ww_mutex_lock(&resv->lock, NULL);
1282 if (flags & EXEC_OBJECT_WRITE)
1283 reservation_object_add_excl_fence(resv, &req->fence);
1284 else if (reservation_object_reserve_shared(resv) == 0)
1285 reservation_object_add_shared_fence(resv, &req->fence);
1286 ww_mutex_unlock(&resv->lock);
1287}
1288
5b043f4e 1289static void
27173f1f 1290i915_gem_execbuffer_move_to_active(struct list_head *vmas,
8a8edb59 1291 struct drm_i915_gem_request *req)
432e58ed 1292{
27173f1f 1293 struct i915_vma *vma;
432e58ed 1294
27173f1f
BW
1295 list_for_each_entry(vma, vmas, exec_list) {
1296 struct drm_i915_gem_object *obj = vma->obj;
69c2fc89
CW
1297 u32 old_read = obj->base.read_domains;
1298 u32 old_write = obj->base.write_domain;
db53a302 1299
432e58ed 1300 obj->base.write_domain = obj->base.pending_write_domain;
5cf3d280
CW
1301 if (obj->base.write_domain)
1302 vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1303 else
ed5982e6
DV
1304 obj->base.pending_read_domains |= obj->base.read_domains;
1305 obj->base.read_domains = obj->base.pending_read_domains;
432e58ed 1306
5cf3d280 1307 i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
ad778f89 1308 eb_export_fence(obj, req, vma->exec_entry->flags);
db53a302 1309 trace_i915_gem_object_change_domain(obj, old_read, old_write);
432e58ed
CW
1310 }
1311}
1312
ae662d31 1313static int
b5321f30 1314i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
ae662d31 1315{
7e37f889 1316 struct intel_ring *ring = req->ring;
ae662d31
EA
1317 int ret, i;
1318
b5321f30 1319 if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
9d662da8
DV
1320 DRM_DEBUG("sol reset is gen7/rcs only\n");
1321 return -EINVAL;
1322 }
ae662d31 1323
5fb9de1a 1324 ret = intel_ring_begin(req, 4 * 3);
ae662d31
EA
1325 if (ret)
1326 return ret;
1327
1328 for (i = 0; i < 4; i++) {
b5321f30
CW
1329 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1330 intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1331 intel_ring_emit(ring, 0);
ae662d31
EA
1332 }
1333
b5321f30 1334 intel_ring_advance(ring);
ae662d31
EA
1335
1336 return 0;
1337}
1338
058d88c4 1339static struct i915_vma *
0bc40be8 1340i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
71745376 1341 struct drm_i915_gem_exec_object2 *shadow_exec_entry,
71745376 1342 struct drm_i915_gem_object *batch_obj,
59bfa124 1343 struct eb_vmas *eb,
71745376
BV
1344 u32 batch_start_offset,
1345 u32 batch_len,
17cabf57 1346 bool is_master)
71745376 1347{
71745376 1348 struct drm_i915_gem_object *shadow_batch_obj;
17cabf57 1349 struct i915_vma *vma;
71745376
BV
1350 int ret;
1351
0bc40be8 1352 shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
17cabf57 1353 PAGE_ALIGN(batch_len));
71745376 1354 if (IS_ERR(shadow_batch_obj))
59bfa124 1355 return ERR_CAST(shadow_batch_obj);
71745376 1356
33a051a5
CW
1357 ret = intel_engine_cmd_parser(engine,
1358 batch_obj,
1359 shadow_batch_obj,
1360 batch_start_offset,
1361 batch_len,
1362 is_master);
058d88c4
CW
1363 if (ret) {
1364 if (ret == -EACCES) /* unhandled chained batch */
1365 vma = NULL;
1366 else
1367 vma = ERR_PTR(ret);
1368 goto out;
1369 }
71745376 1370
058d88c4
CW
1371 vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1372 if (IS_ERR(vma))
1373 goto out;
de4e783a 1374
17cabf57 1375 memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
71745376 1376
17cabf57 1377 vma->exec_entry = shadow_exec_entry;
de4e783a 1378 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
25dc556a 1379 i915_gem_object_get(shadow_batch_obj);
17cabf57 1380 list_add_tail(&vma->exec_list, &eb->vmas);
71745376 1381
058d88c4 1382out:
de4e783a 1383 i915_gem_object_unpin_pages(shadow_batch_obj);
058d88c4 1384 return vma;
71745376 1385}
5c6c6003 1386
5b043f4e
CW
1387static int
1388execbuf_submit(struct i915_execbuffer_params *params,
1389 struct drm_i915_gem_execbuffer2 *args,
1390 struct list_head *vmas)
78382593 1391{
b5321f30 1392 struct drm_i915_private *dev_priv = params->request->i915;
5f19e2bf 1393 u64 exec_start, exec_len;
78382593
OM
1394 int instp_mode;
1395 u32 instp_mask;
2f5945bc 1396 int ret;
78382593 1397
535fbe82 1398 ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
78382593 1399 if (ret)
2f5945bc 1400 return ret;
78382593 1401
ba01cc93 1402 ret = i915_switch_context(params->request);
78382593 1403 if (ret)
2f5945bc 1404 return ret;
78382593
OM
1405
1406 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1407 instp_mask = I915_EXEC_CONSTANTS_MASK;
1408 switch (instp_mode) {
1409 case I915_EXEC_CONSTANTS_REL_GENERAL:
1410 case I915_EXEC_CONSTANTS_ABSOLUTE:
1411 case I915_EXEC_CONSTANTS_REL_SURFACE:
b5321f30 1412 if (instp_mode != 0 && params->engine->id != RCS) {
78382593 1413 DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
2f5945bc 1414 return -EINVAL;
78382593
OM
1415 }
1416
1417 if (instp_mode != dev_priv->relative_constants_mode) {
b5321f30 1418 if (INTEL_INFO(dev_priv)->gen < 4) {
78382593 1419 DRM_DEBUG("no rel constants on pre-gen4\n");
2f5945bc 1420 return -EINVAL;
78382593
OM
1421 }
1422
b5321f30 1423 if (INTEL_INFO(dev_priv)->gen > 5 &&
78382593
OM
1424 instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1425 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
2f5945bc 1426 return -EINVAL;
78382593
OM
1427 }
1428
1429 /* The HW changed the meaning on this bit on gen6 */
b5321f30 1430 if (INTEL_INFO(dev_priv)->gen >= 6)
78382593
OM
1431 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1432 }
1433 break;
1434 default:
1435 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
2f5945bc 1436 return -EINVAL;
78382593
OM
1437 }
1438
b5321f30 1439 if (params->engine->id == RCS &&
2f5945bc 1440 instp_mode != dev_priv->relative_constants_mode) {
7e37f889 1441 struct intel_ring *ring = params->request->ring;
b5321f30 1442
5fb9de1a 1443 ret = intel_ring_begin(params->request, 4);
78382593 1444 if (ret)
2f5945bc 1445 return ret;
78382593 1446
b5321f30
CW
1447 intel_ring_emit(ring, MI_NOOP);
1448 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1449 intel_ring_emit_reg(ring, INSTPM);
1450 intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1451 intel_ring_advance(ring);
78382593
OM
1452
1453 dev_priv->relative_constants_mode = instp_mode;
1454 }
1455
1456 if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
b5321f30 1457 ret = i915_reset_gen7_sol_offsets(params->request);
78382593 1458 if (ret)
2f5945bc 1459 return ret;
78382593
OM
1460 }
1461
5f19e2bf 1462 exec_len = args->batch_len;
59bfa124 1463 exec_start = params->batch->node.start +
5f19e2bf
JH
1464 params->args_batch_start_offset;
1465
9d611c03 1466 if (exec_len == 0)
59bfa124 1467 exec_len = params->batch->size;
9d611c03 1468
803688ba
CW
1469 ret = params->engine->emit_bb_start(params->request,
1470 exec_start, exec_len,
1471 params->dispatch_flags);
2f5945bc
CW
1472 if (ret)
1473 return ret;
78382593 1474
95c24161 1475 trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
78382593 1476
8a8edb59 1477 i915_gem_execbuffer_move_to_active(vmas, params->request);
78382593 1478
2f5945bc 1479 return 0;
78382593
OM
1480}
1481
a8ebba75
ZY
1482/**
1483 * Find one BSD ring to dispatch the corresponding BSD command.
c80ff16e 1484 * The engine index is returned.
a8ebba75 1485 */
de1add36 1486static unsigned int
c80ff16e
CW
1487gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1488 struct drm_file *file)
a8ebba75 1489{
a8ebba75
ZY
1490 struct drm_i915_file_private *file_priv = file->driver_priv;
1491
de1add36 1492 /* Check whether the file_priv has already selected one ring. */
c80ff16e 1493 if ((int)file_priv->bsd_engine < 0) {
de1add36 1494 /* If not, use the ping-pong mechanism to select one. */
91c8a326 1495 mutex_lock(&dev_priv->drm.struct_mutex);
c80ff16e
CW
1496 file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index;
1497 dev_priv->mm.bsd_engine_dispatch_index ^= 1;
91c8a326 1498 mutex_unlock(&dev_priv->drm.struct_mutex);
a8ebba75 1499 }
de1add36 1500
c80ff16e 1501 return file_priv->bsd_engine;
a8ebba75
ZY
1502}
1503
de1add36
TU
1504#define I915_USER_RINGS (4)
1505
117897f4 1506static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
de1add36
TU
1507 [I915_EXEC_DEFAULT] = RCS,
1508 [I915_EXEC_RENDER] = RCS,
1509 [I915_EXEC_BLT] = BCS,
1510 [I915_EXEC_BSD] = VCS,
1511 [I915_EXEC_VEBOX] = VECS
1512};
1513
f8ca0c07
DG
1514static struct intel_engine_cs *
1515eb_select_engine(struct drm_i915_private *dev_priv,
1516 struct drm_file *file,
1517 struct drm_i915_gem_execbuffer2 *args)
de1add36
TU
1518{
1519 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
f8ca0c07 1520 struct intel_engine_cs *engine;
de1add36
TU
1521
1522 if (user_ring_id > I915_USER_RINGS) {
1523 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
f8ca0c07 1524 return NULL;
de1add36
TU
1525 }
1526
1527 if ((user_ring_id != I915_EXEC_BSD) &&
1528 ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1529 DRM_DEBUG("execbuf with non bsd ring but with invalid "
1530 "bsd dispatch flags: %d\n", (int)(args->flags));
f8ca0c07 1531 return NULL;
de1add36
TU
1532 }
1533
1534 if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1535 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1536
1537 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
c80ff16e 1538 bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
de1add36
TU
1539 } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1540 bsd_idx <= I915_EXEC_BSD_RING2) {
d9da6aa0 1541 bsd_idx >>= I915_EXEC_BSD_SHIFT;
de1add36
TU
1542 bsd_idx--;
1543 } else {
1544 DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1545 bsd_idx);
f8ca0c07 1546 return NULL;
de1add36
TU
1547 }
1548
f8ca0c07 1549 engine = &dev_priv->engine[_VCS(bsd_idx)];
de1add36 1550 } else {
f8ca0c07 1551 engine = &dev_priv->engine[user_ring_map[user_ring_id]];
de1add36
TU
1552 }
1553
f8ca0c07 1554 if (!intel_engine_initialized(engine)) {
de1add36 1555 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
f8ca0c07 1556 return NULL;
de1add36
TU
1557 }
1558
f8ca0c07 1559 return engine;
de1add36
TU
1560}
1561
54cf91dc
CW
1562static int
1563i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1564 struct drm_file *file,
1565 struct drm_i915_gem_execbuffer2 *args,
41bde553 1566 struct drm_i915_gem_exec_object2 *exec)
54cf91dc 1567{
72e96d64
JL
1568 struct drm_i915_private *dev_priv = to_i915(dev);
1569 struct i915_ggtt *ggtt = &dev_priv->ggtt;
27173f1f 1570 struct eb_vmas *eb;
78a42377 1571 struct drm_i915_gem_exec_object2 shadow_exec_entry;
e2f80391 1572 struct intel_engine_cs *engine;
e2efd130 1573 struct i915_gem_context *ctx;
41bde553 1574 struct i915_address_space *vm;
5f19e2bf
JH
1575 struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1576 struct i915_execbuffer_params *params = &params_master;
d299cce7 1577 const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
8e004efc 1578 u32 dispatch_flags;
78382593 1579 int ret;
ed5982e6 1580 bool need_relocs;
54cf91dc 1581
ed5982e6 1582 if (!i915_gem_check_execbuffer(args))
432e58ed 1583 return -EINVAL;
432e58ed 1584
ad19f10b 1585 ret = validate_exec_list(dev, exec, args->buffer_count);
54cf91dc
CW
1586 if (ret)
1587 return ret;
1588
8e004efc 1589 dispatch_flags = 0;
d7d4eedd 1590 if (args->flags & I915_EXEC_SECURE) {
b3ac9f25 1591 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
d7d4eedd
CW
1592 return -EPERM;
1593
8e004efc 1594 dispatch_flags |= I915_DISPATCH_SECURE;
d7d4eedd 1595 }
b45305fc 1596 if (args->flags & I915_EXEC_IS_PINNED)
8e004efc 1597 dispatch_flags |= I915_DISPATCH_PINNED;
d7d4eedd 1598
f8ca0c07
DG
1599 engine = eb_select_engine(dev_priv, file, args);
1600 if (!engine)
1601 return -EINVAL;
54cf91dc
CW
1602
1603 if (args->buffer_count < 1) {
ff240199 1604 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
54cf91dc
CW
1605 return -EINVAL;
1606 }
54cf91dc 1607
a9ed33ca
AJ
1608 if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1609 if (!HAS_RESOURCE_STREAMER(dev)) {
1610 DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1611 return -EINVAL;
1612 }
e2f80391 1613 if (engine->id != RCS) {
a9ed33ca 1614 DRM_DEBUG("RS is not available on %s\n",
e2f80391 1615 engine->name);
a9ed33ca
AJ
1616 return -EINVAL;
1617 }
1618
1619 dispatch_flags |= I915_DISPATCH_RS;
1620 }
1621
67d97da3
CW
1622 /* Take a local wakeref for preparing to dispatch the execbuf as
1623 * we expect to access the hardware fairly frequently in the
1624 * process. Upon first dispatch, we acquire another prolonged
1625 * wakeref that we hold until the GPU has been idle for at least
1626 * 100ms.
1627 */
f65c9168
PZ
1628 intel_runtime_pm_get(dev_priv);
1629
54cf91dc
CW
1630 ret = i915_mutex_lock_interruptible(dev);
1631 if (ret)
1632 goto pre_mutex_err;
1633
e2f80391 1634 ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
72ad5c45 1635 if (IS_ERR(ctx)) {
d299cce7 1636 mutex_unlock(&dev->struct_mutex);
41bde553 1637 ret = PTR_ERR(ctx);
d299cce7 1638 goto pre_mutex_err;
935f38d6 1639 }
41bde553 1640
9a6feaf0 1641 i915_gem_context_get(ctx);
41bde553 1642
ae6c4806
DV
1643 if (ctx->ppgtt)
1644 vm = &ctx->ppgtt->base;
1645 else
72e96d64 1646 vm = &ggtt->base;
d299cce7 1647
5f19e2bf
JH
1648 memset(&params_master, 0x00, sizeof(params_master));
1649
17601cbc 1650 eb = eb_create(args);
67731b87 1651 if (eb == NULL) {
9a6feaf0 1652 i915_gem_context_put(ctx);
67731b87
CW
1653 mutex_unlock(&dev->struct_mutex);
1654 ret = -ENOMEM;
1655 goto pre_mutex_err;
1656 }
1657
54cf91dc 1658 /* Look up object handles */
27173f1f 1659 ret = eb_lookup_vmas(eb, exec, args, vm, file);
3b96eff4
CW
1660 if (ret)
1661 goto err;
54cf91dc 1662
6fe4f140 1663 /* take note of the batch buffer before we might reorder the lists */
59bfa124 1664 params->batch = eb_get_batch(eb);
6fe4f140 1665
54cf91dc 1666 /* Move the objects en-masse into the GTT, evicting if necessary. */
ed5982e6 1667 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
e2f80391
TU
1668 ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1669 &need_relocs);
54cf91dc
CW
1670 if (ret)
1671 goto err;
1672
1673 /* The objects are in their final locations, apply the relocations. */
ed5982e6 1674 if (need_relocs)
17601cbc 1675 ret = i915_gem_execbuffer_relocate(eb);
54cf91dc
CW
1676 if (ret) {
1677 if (ret == -EFAULT) {
e2f80391
TU
1678 ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
1679 engine,
b1b38278 1680 eb, exec, ctx);
54cf91dc
CW
1681 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1682 }
1683 if (ret)
1684 goto err;
1685 }
1686
1687 /* Set the pending read domains for the batch buffer to COMMAND */
59bfa124 1688 if (params->batch->obj->base.pending_write_domain) {
ff240199 1689 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
54cf91dc
CW
1690 ret = -EINVAL;
1691 goto err;
1692 }
54cf91dc 1693
5f19e2bf 1694 params->args_batch_start_offset = args->batch_start_offset;
33a051a5 1695 if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
59bfa124
CW
1696 struct i915_vma *vma;
1697
1698 vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1699 params->batch->obj,
1700 eb,
1701 args->batch_start_offset,
1702 args->batch_len,
1703 drm_is_current_master(file));
1704 if (IS_ERR(vma)) {
1705 ret = PTR_ERR(vma);
78a42377
BV
1706 goto err;
1707 }
17cabf57 1708
59bfa124 1709 if (vma) {
c7c7372e
RP
1710 /*
1711 * Batch parsed and accepted:
1712 *
1713 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1714 * bit from MI_BATCH_BUFFER_START commands issued in
1715 * the dispatch_execbuffer implementations. We
1716 * specifically don't want that set on batches the
1717 * command parser has accepted.
1718 */
1719 dispatch_flags |= I915_DISPATCH_SECURE;
5f19e2bf 1720 params->args_batch_start_offset = 0;
59bfa124 1721 params->batch = vma;
c7c7372e 1722 }
351e3db2
BV
1723 }
1724
59bfa124 1725 params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
78a42377 1726
d7d4eedd
CW
1727 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1728 * batch" bit. Hence we need to pin secure batches into the global gtt.
28cf5415 1729 * hsw should have this fixed, but bdw mucks it up again. */
8e004efc 1730 if (dispatch_flags & I915_DISPATCH_SECURE) {
59bfa124 1731 struct drm_i915_gem_object *obj = params->batch->obj;
058d88c4 1732 struct i915_vma *vma;
59bfa124 1733
da51a1e7
DV
1734 /*
1735 * So on first glance it looks freaky that we pin the batch here
1736 * outside of the reservation loop. But:
1737 * - The batch is already pinned into the relevant ppgtt, so we
1738 * already have the backing storage fully allocated.
1739 * - No other BO uses the global gtt (well contexts, but meh),
fd0753cf 1740 * so we don't really have issues with multiple objects not
da51a1e7
DV
1741 * fitting due to fragmentation.
1742 * So this is actually safe.
1743 */
058d88c4
CW
1744 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1745 if (IS_ERR(vma)) {
1746 ret = PTR_ERR(vma);
da51a1e7 1747 goto err;
058d88c4 1748 }
d7d4eedd 1749
058d88c4 1750 params->batch = vma;
59bfa124 1751 }
d7d4eedd 1752
0c8dac88 1753 /* Allocate a request for this batch buffer nice and early. */
8e637178
CW
1754 params->request = i915_gem_request_alloc(engine, ctx);
1755 if (IS_ERR(params->request)) {
1756 ret = PTR_ERR(params->request);
0c8dac88 1757 goto err_batch_unpin;
26827088 1758 }
0c8dac88 1759
17f298cf
CW
1760 /* Whilst this request exists, batch_obj will be on the
1761 * active_list, and so will hold the active reference. Only when this
1762 * request is retired will the the batch_obj be moved onto the
1763 * inactive_list and lose its active reference. Hence we do not need
1764 * to explicitly hold another reference here.
1765 */
058d88c4 1766 params->request->batch = params->batch;
17f298cf 1767
8e637178 1768 ret = i915_gem_request_add_to_client(params->request, file);
fcfa423c 1769 if (ret)
aa9b7810 1770 goto err_request;
fcfa423c 1771
5f19e2bf
JH
1772 /*
1773 * Save assorted stuff away to pass through to *_submission().
1774 * NB: This data should be 'persistent' and not local as it will
1775 * kept around beyond the duration of the IOCTL once the GPU
1776 * scheduler arrives.
1777 */
1778 params->dev = dev;
1779 params->file = file;
4a570db5 1780 params->engine = engine;
5f19e2bf 1781 params->dispatch_flags = dispatch_flags;
5f19e2bf
JH
1782 params->ctx = ctx;
1783
5b043f4e 1784 ret = execbuf_submit(params, args, &eb->vmas);
aa9b7810 1785err_request:
17f298cf 1786 __i915_add_request(params->request, ret == 0);
54cf91dc 1787
0c8dac88 1788err_batch_unpin:
da51a1e7
DV
1789 /*
1790 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1791 * batch vma for correctness. For less ugly and less fragility this
1792 * needs to be adjusted to also track the ggtt batch vma properly as
1793 * active.
1794 */
8e004efc 1795 if (dispatch_flags & I915_DISPATCH_SECURE)
59bfa124 1796 i915_vma_unpin(params->batch);
54cf91dc 1797err:
41bde553 1798 /* the request owns the ref now */
9a6feaf0 1799 i915_gem_context_put(ctx);
67731b87 1800 eb_destroy(eb);
54cf91dc
CW
1801
1802 mutex_unlock(&dev->struct_mutex);
1803
1804pre_mutex_err:
f65c9168
PZ
1805 /* intel_gpu_busy should also get a ref, so it will free when the device
1806 * is really idle. */
1807 intel_runtime_pm_put(dev_priv);
54cf91dc
CW
1808 return ret;
1809}
1810
1811/*
1812 * Legacy execbuffer just creates an exec2 list from the original exec object
1813 * list array and passes it to the real function.
1814 */
1815int
1816i915_gem_execbuffer(struct drm_device *dev, void *data,
1817 struct drm_file *file)
1818{
1819 struct drm_i915_gem_execbuffer *args = data;
1820 struct drm_i915_gem_execbuffer2 exec2;
1821 struct drm_i915_gem_exec_object *exec_list = NULL;
1822 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1823 int ret, i;
1824
54cf91dc 1825 if (args->buffer_count < 1) {
ff240199 1826 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
54cf91dc
CW
1827 return -EINVAL;
1828 }
1829
1830 /* Copy in the exec list from userland */
1831 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1832 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1833 if (exec_list == NULL || exec2_list == NULL) {
ff240199 1834 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
54cf91dc
CW
1835 args->buffer_count);
1836 drm_free_large(exec_list);
1837 drm_free_large(exec2_list);
1838 return -ENOMEM;
1839 }
1840 ret = copy_from_user(exec_list,
3ed605bc 1841 u64_to_user_ptr(args->buffers_ptr),
54cf91dc
CW
1842 sizeof(*exec_list) * args->buffer_count);
1843 if (ret != 0) {
ff240199 1844 DRM_DEBUG("copy %d exec entries failed %d\n",
54cf91dc
CW
1845 args->buffer_count, ret);
1846 drm_free_large(exec_list);
1847 drm_free_large(exec2_list);
1848 return -EFAULT;
1849 }
1850
1851 for (i = 0; i < args->buffer_count; i++) {
1852 exec2_list[i].handle = exec_list[i].handle;
1853 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1854 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1855 exec2_list[i].alignment = exec_list[i].alignment;
1856 exec2_list[i].offset = exec_list[i].offset;
1857 if (INTEL_INFO(dev)->gen < 4)
1858 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1859 else
1860 exec2_list[i].flags = 0;
1861 }
1862
1863 exec2.buffers_ptr = args->buffers_ptr;
1864 exec2.buffer_count = args->buffer_count;
1865 exec2.batch_start_offset = args->batch_start_offset;
1866 exec2.batch_len = args->batch_len;
1867 exec2.DR1 = args->DR1;
1868 exec2.DR4 = args->DR4;
1869 exec2.num_cliprects = args->num_cliprects;
1870 exec2.cliprects_ptr = args->cliprects_ptr;
1871 exec2.flags = I915_EXEC_RENDER;
6e0a69db 1872 i915_execbuffer2_set_context_id(exec2, 0);
54cf91dc 1873
41bde553 1874 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
54cf91dc 1875 if (!ret) {
9aab8bff 1876 struct drm_i915_gem_exec_object __user *user_exec_list =
3ed605bc 1877 u64_to_user_ptr(args->buffers_ptr);
9aab8bff 1878
54cf91dc 1879 /* Copy the new buffer offsets back to the user's exec list. */
9aab8bff 1880 for (i = 0; i < args->buffer_count; i++) {
934acce3
MW
1881 exec2_list[i].offset =
1882 gen8_canonical_addr(exec2_list[i].offset);
9aab8bff
CW
1883 ret = __copy_to_user(&user_exec_list[i].offset,
1884 &exec2_list[i].offset,
1885 sizeof(user_exec_list[i].offset));
1886 if (ret) {
1887 ret = -EFAULT;
1888 DRM_DEBUG("failed to copy %d exec entries "
1889 "back to user (%d)\n",
1890 args->buffer_count, ret);
1891 break;
1892 }
54cf91dc
CW
1893 }
1894 }
1895
1896 drm_free_large(exec_list);
1897 drm_free_large(exec2_list);
1898 return ret;
1899}
1900
1901int
1902i915_gem_execbuffer2(struct drm_device *dev, void *data,
1903 struct drm_file *file)
1904{
1905 struct drm_i915_gem_execbuffer2 *args = data;
1906 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1907 int ret;
1908
ed8cd3b2
XW
1909 if (args->buffer_count < 1 ||
1910 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
ff240199 1911 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
54cf91dc
CW
1912 return -EINVAL;
1913 }
1914
9cb34664
DV
1915 if (args->rsvd2 != 0) {
1916 DRM_DEBUG("dirty rvsd2 field\n");
1917 return -EINVAL;
1918 }
1919
f2a85e19
CW
1920 exec2_list = drm_malloc_gfp(args->buffer_count,
1921 sizeof(*exec2_list),
1922 GFP_TEMPORARY);
54cf91dc 1923 if (exec2_list == NULL) {
ff240199 1924 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
54cf91dc
CW
1925 args->buffer_count);
1926 return -ENOMEM;
1927 }
1928 ret = copy_from_user(exec2_list,
3ed605bc 1929 u64_to_user_ptr(args->buffers_ptr),
54cf91dc
CW
1930 sizeof(*exec2_list) * args->buffer_count);
1931 if (ret != 0) {
ff240199 1932 DRM_DEBUG("copy %d exec entries failed %d\n",
54cf91dc
CW
1933 args->buffer_count, ret);
1934 drm_free_large(exec2_list);
1935 return -EFAULT;
1936 }
1937
41bde553 1938 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
54cf91dc
CW
1939 if (!ret) {
1940 /* Copy the new buffer offsets back to the user's exec list. */
d593d992 1941 struct drm_i915_gem_exec_object2 __user *user_exec_list =
3ed605bc 1942 u64_to_user_ptr(args->buffers_ptr);
9aab8bff
CW
1943 int i;
1944
1945 for (i = 0; i < args->buffer_count; i++) {
934acce3
MW
1946 exec2_list[i].offset =
1947 gen8_canonical_addr(exec2_list[i].offset);
9aab8bff
CW
1948 ret = __copy_to_user(&user_exec_list[i].offset,
1949 &exec2_list[i].offset,
1950 sizeof(user_exec_list[i].offset));
1951 if (ret) {
1952 ret = -EFAULT;
1953 DRM_DEBUG("failed to copy %d exec entries "
1954 "back to user\n",
1955 args->buffer_count);
1956 break;
1957 }
54cf91dc
CW
1958 }
1959 }
1960
1961 drm_free_large(exec2_list);
1962 return ret;
1963}