]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/i915/gt/intel_ringbuffer.c
drm/i915: Enable pipe HDR mode on ICL if only HDR planes are used
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / i915 / gt / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
a4d8a0fe 30#include <linux/log2.h>
7c2fa7fa 31
760285e7 32#include <drm/i915_drm.h>
7c2fa7fa
CW
33
34#include "i915_drv.h"
35#include "i915_gem_render_state.h"
62fdfeaf 36#include "i915_trace.h"
112ed2d3 37#include "intel_reset.h"
7d3c425f 38#include "intel_workarounds.h"
62fdfeaf 39
a0442461
CW
40/* Rough estimate of the typical request size, performing a flush,
41 * set-context and then emitting the batch.
42 */
43#define LEGACY_REQUEST_SIZE 200
44
95aebcb2 45unsigned int intel_ring_update_space(struct intel_ring *ring)
ebd0fd4b 46{
95aebcb2
CW
47 unsigned int space;
48
49 space = __intel_ring_space(ring->head, ring->emit, ring->size);
50
51 ring->space = space;
52 return space;
ebd0fd4b
DG
53}
54
b72f3acb 55static int
e61e0f51 56gen2_render_ring_flush(struct i915_request *rq, u32 mode)
46f0f8d1 57{
a889580c 58 unsigned int num_store_dw;
73dec95e 59 u32 cmd, *cs;
46f0f8d1
CW
60
61 cmd = MI_FLUSH;
a889580c 62 num_store_dw = 0;
7c9cf4e3 63 if (mode & EMIT_INVALIDATE)
46f0f8d1 64 cmd |= MI_READ_FLUSH;
a889580c
CW
65 if (mode & EMIT_FLUSH)
66 num_store_dw = 4;
46f0f8d1 67
a889580c 68 cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
73dec95e
TU
69 if (IS_ERR(cs))
70 return PTR_ERR(cs);
46f0f8d1 71
73dec95e 72 *cs++ = cmd;
a889580c
CW
73 while (num_store_dw--) {
74 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
75 *cs++ = i915_scratch_offset(rq->i915);
76 *cs++ = 0;
77 }
78 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
79
e61e0f51 80 intel_ring_advance(rq, cs);
46f0f8d1
CW
81
82 return 0;
83}
84
85static int
e61e0f51 86gen4_render_ring_flush(struct i915_request *rq, u32 mode)
62fdfeaf 87{
73dec95e 88 u32 cmd, *cs;
55f99bf2 89 int i;
6f392d54 90
36d527de
CW
91 /*
92 * read/write caches:
93 *
94 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
95 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
96 * also flushed at 2d versus 3d pipeline switches.
97 *
98 * read-only caches:
99 *
100 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
101 * MI_READ_FLUSH is set, and is always flushed on 965.
102 *
103 * I915_GEM_DOMAIN_COMMAND may not exist?
104 *
105 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
106 * invalidated when MI_EXE_FLUSH is set.
107 *
108 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
109 * invalidated with every MI_FLUSH.
110 *
111 * TLBs:
112 *
113 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
114 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
115 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
116 * are flushed at any MI_FLUSH.
117 */
118
b5321f30 119 cmd = MI_FLUSH;
7c9cf4e3 120 if (mode & EMIT_INVALIDATE) {
36d527de 121 cmd |= MI_EXE_FLUSH;
cf819eff 122 if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
b5321f30
CW
123 cmd |= MI_INVALIDATE_ISP;
124 }
70eac33e 125
55f99bf2
CW
126 i = 2;
127 if (mode & EMIT_INVALIDATE)
128 i += 20;
129
130 cs = intel_ring_begin(rq, i);
73dec95e
TU
131 if (IS_ERR(cs))
132 return PTR_ERR(cs);
b72f3acb 133
73dec95e 134 *cs++ = cmd;
55f99bf2
CW
135
136 /*
137 * A random delay to let the CS invalidate take effect? Without this
138 * delay, the GPU relocation path fails as the CS does not see
139 * the updated contents. Just as important, if we apply the flushes
140 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
141 * write and before the invalidate on the next batch), the relocations
142 * still fail. This implies that is a delay following invalidation
143 * that is required to reset the caches as opposed to a delay to
144 * ensure the memory is written.
145 */
146 if (mode & EMIT_INVALIDATE) {
147 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
51797499 148 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
55f99bf2
CW
149 *cs++ = 0;
150 *cs++ = 0;
151
152 for (i = 0; i < 12; i++)
153 *cs++ = MI_FLUSH;
154
155 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
51797499 156 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
55f99bf2
CW
157 *cs++ = 0;
158 *cs++ = 0;
159 }
160
161 *cs++ = cmd;
162
e61e0f51 163 intel_ring_advance(rq, cs);
b72f3acb
CW
164
165 return 0;
8187a2b7
ZN
166}
167
179f4025 168/*
8d315287
JB
169 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
170 * implementing two workarounds on gen6. From section 1.4.7.1
171 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
172 *
173 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
174 * produced by non-pipelined state commands), software needs to first
175 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
176 * 0.
177 *
178 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
179 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
180 *
181 * And the workaround for these two requires this workaround first:
182 *
183 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
184 * BEFORE the pipe-control with a post-sync op and no write-cache
185 * flushes.
186 *
187 * And this last workaround is tricky because of the requirements on
188 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
189 * volume 2 part 1:
190 *
191 * "1 of the following must also be set:
192 * - Render Target Cache Flush Enable ([12] of DW1)
193 * - Depth Cache Flush Enable ([0] of DW1)
194 * - Stall at Pixel Scoreboard ([1] of DW1)
195 * - Depth Stall ([13] of DW1)
196 * - Post-Sync Operation ([13] of DW1)
197 * - Notify Enable ([8] of DW1)"
198 *
199 * The cache flushes require the workaround flush that triggered this
200 * one, so we can't use it. Depth stall would trigger the same.
201 * Post-sync nonzero is what triggered this second workaround, so we
202 * can't use that one either. Notify enable is IRQs, which aren't
203 * really our business. That leaves only stall at scoreboard.
204 */
205static int
caa5915b 206gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
8d315287 207{
51797499 208 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
73dec95e
TU
209 u32 *cs;
210
e61e0f51 211 cs = intel_ring_begin(rq, 6);
73dec95e
TU
212 if (IS_ERR(cs))
213 return PTR_ERR(cs);
214
215 *cs++ = GFX_OP_PIPE_CONTROL(5);
216 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
217 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
218 *cs++ = 0; /* low dword */
219 *cs++ = 0; /* high dword */
220 *cs++ = MI_NOOP;
e61e0f51 221 intel_ring_advance(rq, cs);
73dec95e 222
e61e0f51 223 cs = intel_ring_begin(rq, 6);
73dec95e
TU
224 if (IS_ERR(cs))
225 return PTR_ERR(cs);
226
227 *cs++ = GFX_OP_PIPE_CONTROL(5);
228 *cs++ = PIPE_CONTROL_QW_WRITE;
229 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
230 *cs++ = 0;
231 *cs++ = 0;
232 *cs++ = MI_NOOP;
e61e0f51 233 intel_ring_advance(rq, cs);
8d315287
JB
234
235 return 0;
236}
237
238static int
e61e0f51 239gen6_render_ring_flush(struct i915_request *rq, u32 mode)
8d315287 240{
51797499 241 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
73dec95e 242 u32 *cs, flags = 0;
8d315287
JB
243 int ret;
244
b3111509 245 /* Force SNB workarounds for PIPE_CONTROL flushes */
caa5915b 246 ret = gen6_emit_post_sync_nonzero_flush(rq);
b3111509
PZ
247 if (ret)
248 return ret;
249
8d315287
JB
250 /* Just flush everything. Experiments have shown that reducing the
251 * number of bits based on the write domains has little performance
252 * impact.
253 */
7c9cf4e3 254 if (mode & EMIT_FLUSH) {
7d54a904
CW
255 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
256 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
257 /*
258 * Ensure that any following seqno writes only happen
259 * when the render cache is indeed flushed.
260 */
97f209bc 261 flags |= PIPE_CONTROL_CS_STALL;
7d54a904 262 }
7c9cf4e3 263 if (mode & EMIT_INVALIDATE) {
7d54a904
CW
264 flags |= PIPE_CONTROL_TLB_INVALIDATE;
265 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
266 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
267 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
268 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
269 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
270 /*
271 * TLB invalidate requires a post-sync write.
272 */
3ac78313 273 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 274 }
8d315287 275
e61e0f51 276 cs = intel_ring_begin(rq, 4);
73dec95e
TU
277 if (IS_ERR(cs))
278 return PTR_ERR(cs);
8d315287 279
73dec95e
TU
280 *cs++ = GFX_OP_PIPE_CONTROL(4);
281 *cs++ = flags;
282 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
283 *cs++ = 0;
e61e0f51 284 intel_ring_advance(rq, cs);
8d315287
JB
285
286 return 0;
287}
288
e1a73a54 289static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b
CW
290{
291 /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
292 *cs++ = GFX_OP_PIPE_CONTROL(4);
293 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
294 *cs++ = 0;
295 *cs++ = 0;
296
297 *cs++ = GFX_OP_PIPE_CONTROL(4);
298 *cs++ = PIPE_CONTROL_QW_WRITE;
299 *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
300 *cs++ = 0;
301
302 /* Finally we can flush and with it emit the breadcrumb */
303 *cs++ = GFX_OP_PIPE_CONTROL(4);
304 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
305 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
306 PIPE_CONTROL_DC_FLUSH_ENABLE |
307 PIPE_CONTROL_QW_WRITE |
308 PIPE_CONTROL_CS_STALL);
5013eb8c
CW
309 *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
310 *cs++ = rq->fence.seqno;
311
89531e7d 312 *cs++ = GFX_OP_PIPE_CONTROL(4);
54939ea0
CW
313 *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
314 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
89531e7d
CW
315 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
316
caa5915b
CW
317 *cs++ = MI_USER_INTERRUPT;
318 *cs++ = MI_NOOP;
319
320 rq->tail = intel_ring_offset(rq, cs);
321 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
322
323 return cs;
caa5915b 324}
caa5915b 325
f3987631 326static int
e61e0f51 327gen7_render_ring_cs_stall_wa(struct i915_request *rq)
f3987631 328{
73dec95e 329 u32 *cs;
f3987631 330
e61e0f51 331 cs = intel_ring_begin(rq, 4);
73dec95e
TU
332 if (IS_ERR(cs))
333 return PTR_ERR(cs);
f3987631 334
73dec95e
TU
335 *cs++ = GFX_OP_PIPE_CONTROL(4);
336 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
337 *cs++ = 0;
338 *cs++ = 0;
e61e0f51 339 intel_ring_advance(rq, cs);
f3987631
PZ
340
341 return 0;
342}
343
4772eaeb 344static int
e61e0f51 345gen7_render_ring_flush(struct i915_request *rq, u32 mode)
4772eaeb 346{
51797499 347 u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
73dec95e 348 u32 *cs, flags = 0;
4772eaeb 349
f3987631
PZ
350 /*
351 * Ensure that any following seqno writes only happen when the render
352 * cache is indeed flushed.
353 *
354 * Workaround: 4th PIPE_CONTROL command (except the ones with only
355 * read-cache invalidate bits set) must have the CS_STALL bit set. We
356 * don't try to be clever and just set it unconditionally.
357 */
358 flags |= PIPE_CONTROL_CS_STALL;
359
4772eaeb
PZ
360 /* Just flush everything. Experiments have shown that reducing the
361 * number of bits based on the write domains has little performance
362 * impact.
363 */
7c9cf4e3 364 if (mode & EMIT_FLUSH) {
4772eaeb
PZ
365 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
366 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 367 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 368 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb 369 }
7c9cf4e3 370 if (mode & EMIT_INVALIDATE) {
4772eaeb
PZ
371 flags |= PIPE_CONTROL_TLB_INVALIDATE;
372 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
373 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
374 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
375 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
376 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 377 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
378 /*
379 * TLB invalidate requires a post-sync write.
380 */
381 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 382 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 383
add284a3
CW
384 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
385
f3987631
PZ
386 /* Workaround: we must issue a pipe_control with CS-stall bit
387 * set before a pipe_control command that has the state cache
388 * invalidate bit set. */
e61e0f51 389 gen7_render_ring_cs_stall_wa(rq);
4772eaeb
PZ
390 }
391
e61e0f51 392 cs = intel_ring_begin(rq, 4);
73dec95e
TU
393 if (IS_ERR(cs))
394 return PTR_ERR(cs);
4772eaeb 395
73dec95e
TU
396 *cs++ = GFX_OP_PIPE_CONTROL(4);
397 *cs++ = flags;
398 *cs++ = scratch_addr;
399 *cs++ = 0;
e61e0f51 400 intel_ring_advance(rq, cs);
4772eaeb
PZ
401
402 return 0;
403}
404
e1a73a54 405static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b
CW
406{
407 *cs++ = GFX_OP_PIPE_CONTROL(4);
408 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
409 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
410 PIPE_CONTROL_DC_FLUSH_ENABLE |
411 PIPE_CONTROL_FLUSH_ENABLE |
412 PIPE_CONTROL_QW_WRITE |
413 PIPE_CONTROL_GLOBAL_GTT_IVB |
414 PIPE_CONTROL_CS_STALL);
5013eb8c
CW
415 *cs++ = rq->timeline->hwsp_offset;
416 *cs++ = rq->fence.seqno;
417
89531e7d 418 *cs++ = GFX_OP_PIPE_CONTROL(4);
54939ea0
CW
419 *cs++ = (PIPE_CONTROL_QW_WRITE |
420 PIPE_CONTROL_STORE_DATA_INDEX |
421 PIPE_CONTROL_GLOBAL_GTT_IVB);
422 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
89531e7d
CW
423 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
424
caa5915b
CW
425 *cs++ = MI_USER_INTERRUPT;
426 *cs++ = MI_NOOP;
427
428 rq->tail = intel_ring_offset(rq, cs);
429 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
430
431 return cs;
caa5915b 432}
caa5915b 433
e1a73a54 434static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b 435{
5013eb8c
CW
436 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
437 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
438
439 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
440 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
441 *cs++ = rq->fence.seqno;
442
89531e7d
CW
443 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
444 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
445 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
446
caa5915b 447 *cs++ = MI_USER_INTERRUPT;
b300fde8 448 *cs++ = MI_NOOP;
caa5915b
CW
449
450 rq->tail = intel_ring_offset(rq, cs);
451 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
452
453 return cs;
caa5915b 454}
caa5915b 455
1212bd82 456#define GEN7_XCS_WA 32
e1a73a54 457static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
1212bd82
CW
458{
459 int i;
460
5013eb8c
CW
461 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
462 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
463
464 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
465 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
466 *cs++ = rq->fence.seqno;
467
89531e7d
CW
468 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
469 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
470 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
471
1212bd82
CW
472 for (i = 0; i < GEN7_XCS_WA; i++) {
473 *cs++ = MI_STORE_DWORD_INDEX;
5013eb8c
CW
474 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
475 *cs++ = rq->fence.seqno;
1212bd82
CW
476 }
477
478 *cs++ = MI_FLUSH_DW;
479 *cs++ = 0;
480 *cs++ = 0;
481
482 *cs++ = MI_USER_INTERRUPT;
1212bd82
CW
483
484 rq->tail = intel_ring_offset(rq, cs);
485 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
486
487 return cs;
1212bd82 488}
1212bd82
CW
489#undef GEN7_XCS_WA
490
060f2322
CW
491static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
492{
493 /*
494 * Keep the render interrupt unmasked as this papers over
495 * lost interrupts following a reset.
496 */
497 if (engine->class == RENDER_CLASS) {
498 if (INTEL_GEN(engine->i915) >= 6)
499 mask &= ~BIT(0);
500 else
501 mask &= ~I915_USER_INTERRUPT;
502 }
503
504 intel_engine_set_hwsp_writemask(engine, mask);
505}
506
507static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
035dc1e0 508{
c033666a 509 struct drm_i915_private *dev_priv = engine->i915;
035dc1e0
DV
510 u32 addr;
511
d6acae36 512 addr = lower_32_bits(phys);
c033666a 513 if (INTEL_GEN(dev_priv) >= 4)
d6acae36
CW
514 addr |= (phys >> 28) & 0xf0;
515
035dc1e0
DV
516 I915_WRITE(HWS_PGA, addr);
517}
518
0ca88ba0 519static struct page *status_page(struct intel_engine_cs *engine)
060f2322 520{
0ca88ba0 521 struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
060f2322 522
0ca88ba0
CW
523 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
524 return sg_page(obj->mm.pages->sgl);
525}
526
527static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
528{
529 set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
060f2322
CW
530 set_hwstam(engine, ~0u);
531}
532
533static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
af75f269 534{
c033666a 535 struct drm_i915_private *dev_priv = engine->i915;
060f2322 536 i915_reg_t hwsp;
af75f269 537
060f2322
CW
538 /*
539 * The ring status page addresses are no longer next to the rest of
af75f269
DL
540 * the ring registers as of gen7.
541 */
cf819eff 542 if (IS_GEN(dev_priv, 7)) {
0bc40be8 543 switch (engine->id) {
a2d3d265
MT
544 /*
545 * No more rings exist on Gen7. Default case is only to shut up
546 * gcc switch check warning.
547 */
548 default:
549 GEM_BUG_ON(engine->id);
8a68d464
CW
550 /* fallthrough */
551 case RCS0:
060f2322 552 hwsp = RENDER_HWS_PGA_GEN7;
af75f269 553 break;
8a68d464 554 case BCS0:
060f2322 555 hwsp = BLT_HWS_PGA_GEN7;
af75f269 556 break;
8a68d464 557 case VCS0:
060f2322 558 hwsp = BSD_HWS_PGA_GEN7;
af75f269 559 break;
8a68d464 560 case VECS0:
060f2322 561 hwsp = VEBOX_HWS_PGA_GEN7;
af75f269
DL
562 break;
563 }
cf819eff 564 } else if (IS_GEN(dev_priv, 6)) {
060f2322 565 hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
af75f269 566 } else {
060f2322 567 hwsp = RING_HWS_PGA(engine->mmio_base);
a4a71701 568 }
c5498089 569
060f2322
CW
570 I915_WRITE(hwsp, offset);
571 POSTING_READ(hwsp);
572}
af75f269 573
060f2322
CW
574static void flush_cs_tlb(struct intel_engine_cs *engine)
575{
576 struct drm_i915_private *dev_priv = engine->i915;
060f2322
CW
577
578 if (!IS_GEN_RANGE(dev_priv, 6, 7))
579 return;
580
581 /* ring should be idle before issuing a sync flush*/
baba6e57
DCS
582 WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
583
584 ENGINE_WRITE(engine, RING_INSTPM,
585 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
586 INSTPM_SYNC_FLUSH));
587 if (intel_wait_for_register(engine->uncore,
588 RING_INSTPM(engine->mmio_base),
589 INSTPM_SYNC_FLUSH, 0,
060f2322
CW
590 1000))
591 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
592 engine->name);
593}
af75f269 594
060f2322
CW
595static void ring_setup_status_page(struct intel_engine_cs *engine)
596{
0ca88ba0 597 set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
060f2322 598 set_hwstam(engine, ~0u);
af75f269 599
060f2322 600 flush_cs_tlb(engine);
af75f269
DL
601}
602
0bc40be8 603static bool stop_ring(struct intel_engine_cs *engine)
8187a2b7 604{
c033666a 605 struct drm_i915_private *dev_priv = engine->i915;
8187a2b7 606
21a2c58a 607 if (INTEL_GEN(dev_priv) > 2) {
baba6e57
DCS
608 ENGINE_WRITE(engine,
609 RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
610 if (intel_wait_for_register(engine->uncore,
3d808eb1
CW
611 RING_MI_MODE(engine->mmio_base),
612 MODE_IDLE,
613 MODE_IDLE,
614 1000)) {
0bc40be8
TU
615 DRM_ERROR("%s : timed out trying to stop ring\n",
616 engine->name);
baba6e57
DCS
617
618 /*
619 * Sometimes we observe that the idle flag is not
9bec9b13
CW
620 * set even though the ring is empty. So double
621 * check before giving up.
622 */
baba6e57
DCS
623 if (ENGINE_READ(engine, RING_HEAD) !=
624 ENGINE_READ(engine, RING_TAIL))
9bec9b13 625 return false;
9991ae78
CW
626 }
627 }
b7884eb4 628
baba6e57 629 ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
11caf551 630
baba6e57
DCS
631 ENGINE_WRITE(engine, RING_HEAD, 0);
632 ENGINE_WRITE(engine, RING_TAIL, 0);
8187a2b7 633
11caf551 634 /* The ring must be empty before it is disabled */
baba6e57 635 ENGINE_WRITE(engine, RING_CTL, 0);
11caf551 636
baba6e57 637 return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
9991ae78 638}
8187a2b7 639
79ffac85 640static int xcs_resume(struct intel_engine_cs *engine)
9991ae78 641{
c033666a 642 struct drm_i915_private *dev_priv = engine->i915;
7e37f889 643 struct intel_ring *ring = engine->buffer;
9991ae78
CW
644 int ret = 0;
645
79ffac85
CW
646 GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
647 engine->name, ring->head, ring->tail);
648
baba6e57 649 intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
9991ae78 650
0bc40be8 651 if (!stop_ring(engine)) {
9991ae78 652 /* G45 ring initialization often fails to reset head to zero */
8177e112
CW
653 DRM_DEBUG_DRIVER("%s head not reset to zero "
654 "ctl %08x head %08x tail %08x start %08x\n",
655 engine->name,
baba6e57
DCS
656 ENGINE_READ(engine, RING_CTL),
657 ENGINE_READ(engine, RING_HEAD),
658 ENGINE_READ(engine, RING_TAIL),
659 ENGINE_READ(engine, RING_START));
8187a2b7 660
0bc40be8 661 if (!stop_ring(engine)) {
6fd0d56e
CW
662 DRM_ERROR("failed to set %s head to zero "
663 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8 664 engine->name,
baba6e57
DCS
665 ENGINE_READ(engine, RING_CTL),
666 ENGINE_READ(engine, RING_HEAD),
667 ENGINE_READ(engine, RING_TAIL),
668 ENGINE_READ(engine, RING_START));
9991ae78
CW
669 ret = -EIO;
670 goto out;
6fd0d56e 671 }
8187a2b7
ZN
672 }
673
3177659a 674 if (HWS_NEEDS_PHYSICAL(dev_priv))
0bc40be8 675 ring_setup_phys_status_page(engine);
3177659a 676 else
060f2322 677 ring_setup_status_page(engine);
9991ae78 678
ad07dfcd 679 intel_engine_reset_breadcrumbs(engine);
821ed7df 680
ece4a17d 681 /* Enforce ordering by reading HEAD register back */
baba6e57 682 ENGINE_READ(engine, RING_HEAD);
ece4a17d 683
0d8957c8
DV
684 /* Initialize the ring. This must happen _after_ we've cleared the ring
685 * registers with the above sequence (the readback of the HEAD registers
686 * also enforces ordering), otherwise the hw might lose the new ring
687 * register values. */
baba6e57 688 ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
95468892
CW
689
690 /* WaClearRingBufHeadRegAtInit:ctg,elk */
baba6e57 691 if (ENGINE_READ(engine, RING_HEAD))
8177e112 692 DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n",
baba6e57 693 engine->name, ENGINE_READ(engine, RING_HEAD));
821ed7df 694
41d37680
CW
695 /* Check that the ring offsets point within the ring! */
696 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
697 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
821ed7df 698 intel_ring_update_space(ring);
b7f21899
CW
699
700 /* First wake the ring up to an empty/idle ring */
baba6e57
DCS
701 ENGINE_WRITE(engine, RING_HEAD, ring->head);
702 ENGINE_WRITE(engine, RING_TAIL, ring->head);
703 ENGINE_POSTING_READ(engine, RING_TAIL);
95468892 704
baba6e57 705 ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
8187a2b7 706
8187a2b7 707 /* If the head is still not zero, the ring is dead */
baba6e57 708 if (intel_wait_for_register(engine->uncore,
97a04e0d 709 RING_CTL(engine->mmio_base),
f42bb651
CW
710 RING_VALID, RING_VALID,
711 50)) {
e74cfed5 712 DRM_ERROR("%s initialization failed "
821ed7df 713 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
0bc40be8 714 engine->name,
baba6e57
DCS
715 ENGINE_READ(engine, RING_CTL),
716 ENGINE_READ(engine, RING_CTL) & RING_VALID,
717 ENGINE_READ(engine, RING_HEAD), ring->head,
718 ENGINE_READ(engine, RING_TAIL), ring->tail,
719 ENGINE_READ(engine, RING_START),
bde13ebd 720 i915_ggtt_offset(ring->vma));
b7884eb4
DV
721 ret = -EIO;
722 goto out;
8187a2b7
ZN
723 }
724
7836cd02 725 if (INTEL_GEN(dev_priv) > 2)
baba6e57
DCS
726 ENGINE_WRITE(engine,
727 RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
7836cd02 728
b7f21899
CW
729 /* Now awake, let it get started */
730 if (ring->tail != ring->head) {
baba6e57
DCS
731 ENGINE_WRITE(engine, RING_TAIL, ring->tail);
732 ENGINE_POSTING_READ(engine, RING_TAIL);
b7f21899
CW
733 }
734
d6fee0de 735 /* Papering over lost _interrupts_ immediately following the restart */
52c0fdb2 736 intel_engine_queue_breadcrumbs(engine);
b7884eb4 737out:
baba6e57 738 intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
b7884eb4
DV
739
740 return ret;
8187a2b7
ZN
741}
742
eb8d0f5a 743static void reset_prepare(struct intel_engine_cs *engine)
821ed7df 744{
3f6e9822 745 intel_engine_stop_cs(engine);
5adfb772
CW
746}
747
eb8d0f5a 748static void reset_ring(struct intel_engine_cs *engine, bool stalled)
5adfb772 749{
eb8d0f5a
CW
750 struct i915_timeline *tl = &engine->timeline;
751 struct i915_request *pos, *rq;
752 unsigned long flags;
b3ee09a4 753 u32 head;
5adfb772 754
eb8d0f5a
CW
755 rq = NULL;
756 spin_lock_irqsave(&tl->lock, flags);
757 list_for_each_entry(pos, &tl->requests, link) {
5013eb8c 758 if (!i915_request_completed(pos)) {
eb8d0f5a
CW
759 rq = pos;
760 break;
761 }
b3ee09a4 762 }
67e64564
CW
763
764 /*
eb8d0f5a 765 * The guilty request will get skipped on a hung engine.
c0dcb203 766 *
eb8d0f5a
CW
767 * Users of client default contexts do not rely on logical
768 * state preserved between batches so it is safe to execute
769 * queued requests following the hang. Non default contexts
770 * rely on preserved state, so skipping a batch loses the
771 * evolution of the state and it needs to be considered corrupted.
772 * Executing more queued batches on top of corrupted state is
773 * risky. But we take the risk by trying to advance through
774 * the queued requests in order to make the client behaviour
775 * more predictable around resets, by not throwing away random
776 * amount of batches it has prepared for execution. Sophisticated
777 * clients can use gem_reset_stats_ioctl and dma fence status
778 * (exported via sync_file info ioctl on explicit fences) to observe
779 * when it loses the context state and should rebuild accordingly.
c0dcb203 780 *
eb8d0f5a
CW
781 * The context ban, and ultimately the client ban, mechanism are safety
782 * valves if client submission ends up resulting in nothing more than
783 * subsequent hangs.
c0dcb203 784 */
eb8d0f5a 785
b3ee09a4 786 if (rq) {
eb8d0f5a
CW
787 /*
788 * Try to restore the logical GPU state to match the
789 * continuation of the request queue. If we skip the
790 * context/PD restore, then the next request may try to execute
791 * assuming that its context is valid and loaded on the GPU and
792 * so may try to access invalid memory, prompting repeated GPU
793 * hangs.
794 *
795 * If the request was guilty, we still restore the logical
796 * state in case the next request requires it (e.g. the
797 * aliasing ppgtt), but skip over the hung batch.
798 *
799 * If the request was innocent, we try to replay the request
800 * with the restored context.
801 */
802 i915_reset_request(rq, stalled);
803
804 GEM_BUG_ON(rq->ring != engine->buffer);
805 head = rq->head;
806 } else {
807 head = engine->buffer->tail;
c0dcb203 808 }
eb8d0f5a
CW
809 engine->buffer->head = intel_ring_wrap(engine->buffer, head);
810
811 spin_unlock_irqrestore(&tl->lock, flags);
821ed7df
CW
812}
813
5adfb772
CW
814static void reset_finish(struct intel_engine_cs *engine)
815{
816}
817
e61e0f51 818static int intel_rcs_ctx_init(struct i915_request *rq)
8f0e2b9d
DV
819{
820 int ret;
821
452420d2 822 ret = intel_engine_emit_ctx_wa(rq);
8f0e2b9d
DV
823 if (ret != 0)
824 return ret;
825
e61e0f51 826 ret = i915_gem_render_state_emit(rq);
8f0e2b9d 827 if (ret)
e26e1b97 828 return ret;
8f0e2b9d 829
e26e1b97 830 return 0;
8f0e2b9d
DV
831}
832
79ffac85 833static int rcs_resume(struct intel_engine_cs *engine)
8187a2b7 834{
c033666a 835 struct drm_i915_private *dev_priv = engine->i915;
a69ffdbf 836
9ce9bdb0
CW
837 /*
838 * Disable CONSTANT_BUFFER before it is loaded from the context
839 * image. For as it is loaded, it is executed and the stored
840 * address may no longer be valid, leading to a GPU hang.
841 *
842 * This imposes the requirement that userspace reload their
843 * CONSTANT_BUFFER on every batch, fortunately a requirement
844 * they are already accustomed to from before contexts were
845 * enabled.
846 */
847 if (IS_GEN(dev_priv, 4))
848 I915_WRITE(ECOSKPD,
849 _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
850
61a563a2 851 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
00690008 852 if (IS_GEN_RANGE(dev_priv, 4, 6))
6b26c86d 853 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
854
855 /* We need to disable the AsyncFlip performance optimisations in order
856 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
857 * programmed to '1' on all products.
8693a824 858 *
2441f877 859 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 860 */
00690008 861 if (IS_GEN_RANGE(dev_priv, 6, 7))
1c8c38c5
CW
862 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
863
f05bb0c7 864 /* Required for the hardware to program scanline values for waiting */
01fa0302 865 /* WaEnableFlushTlbInvalidationMode:snb */
cf819eff 866 if (IS_GEN(dev_priv, 6))
f05bb0c7 867 I915_WRITE(GFX_MODE,
aa83e30d 868 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 869
01fa0302 870 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
cf819eff 871 if (IS_GEN(dev_priv, 7))
1c8c38c5 872 I915_WRITE(GFX_MODE_GEN7,
01fa0302 873 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 874 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 875
cf819eff 876 if (IS_GEN(dev_priv, 6)) {
3a69ddd6
KG
877 /* From the Sandybridge PRM, volume 1 part 3, page 24:
878 * "If this bit is set, STCunit will have LRA as replacement
879 * policy. [...] This bit must be reset. LRA replacement
880 * policy is not supported."
881 */
882 I915_WRITE(CACHE_MODE_0,
5e13a0c5 883 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
884 }
885
00690008 886 if (IS_GEN_RANGE(dev_priv, 6, 7))
6b26c86d 887 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 888
79ffac85 889 return xcs_resume(engine);
8187a2b7
ZN
890}
891
27a5f61b
CW
892static void cancel_requests(struct intel_engine_cs *engine)
893{
e61e0f51 894 struct i915_request *request;
27a5f61b
CW
895 unsigned long flags;
896
a89d1f92 897 spin_lock_irqsave(&engine->timeline.lock, flags);
27a5f61b
CW
898
899 /* Mark all submitted requests as skipped. */
a89d1f92 900 list_for_each_entry(request, &engine->timeline.requests, link) {
5013eb8c
CW
901 if (!i915_request_signaled(request))
902 dma_fence_set_error(&request->fence, -EIO);
3800960a 903
5013eb8c 904 i915_request_mark_complete(request);
27a5f61b 905 }
3800960a 906
27a5f61b
CW
907 /* Remaining _unready_ requests will be nop'ed when submitted */
908
a89d1f92 909 spin_unlock_irqrestore(&engine->timeline.lock, flags);
27a5f61b
CW
910}
911
e61e0f51 912static void i9xx_submit_request(struct i915_request *request)
b0411e7d 913{
e61e0f51 914 i915_request_submit(request);
d55ac5bf 915
baba6e57
DCS
916 ENGINE_WRITE(request->engine, RING_TAIL,
917 intel_ring_set_tail(request->ring, request->tail));
b0411e7d
CW
918}
919
e1a73a54 920static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
1ec14ad3 921{
5013eb8c
CW
922 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
923 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
924
caa5915b
CW
925 *cs++ = MI_FLUSH;
926
5013eb8c
CW
927 *cs++ = MI_STORE_DWORD_INDEX;
928 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
929 *cs++ = rq->fence.seqno;
930
89531e7d
CW
931 *cs++ = MI_STORE_DWORD_INDEX;
932 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
933 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
934
73dec95e 935 *cs++ = MI_USER_INTERRUPT;
1ec14ad3 936
e61e0f51
CW
937 rq->tail = intel_ring_offset(rq, cs);
938 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
939
940 return cs;
1ec14ad3 941}
98f29e8d 942
835051d3 943#define GEN5_WA_STORES 8 /* must be at least 1! */
e1a73a54 944static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
c6df541c 945{
835051d3
CW
946 int i;
947
5013eb8c
CW
948 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
949 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
950
835051d3
CW
951 *cs++ = MI_FLUSH;
952
89531e7d
CW
953 *cs++ = MI_STORE_DWORD_INDEX;
954 *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
955 *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
956
835051d3
CW
957 BUILD_BUG_ON(GEN5_WA_STORES < 1);
958 for (i = 0; i < GEN5_WA_STORES; i++) {
959 *cs++ = MI_STORE_DWORD_INDEX;
b300fde8
CW
960 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
961 *cs++ = rq->fence.seqno;
835051d3
CW
962 }
963
964 *cs++ = MI_USER_INTERRUPT;
b300fde8 965 *cs++ = MI_NOOP;
835051d3
CW
966
967 rq->tail = intel_ring_offset(rq, cs);
968 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
969
970 return cs;
c6df541c 971}
835051d3 972#undef GEN5_WA_STORES
c6df541c 973
31bb59cc
CW
974static void
975gen5_irq_enable(struct intel_engine_cs *engine)
e48d8634 976{
31bb59cc 977 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
978}
979
980static void
31bb59cc 981gen5_irq_disable(struct intel_engine_cs *engine)
e48d8634 982{
31bb59cc 983 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
984}
985
31bb59cc
CW
986static void
987i9xx_irq_enable(struct intel_engine_cs *engine)
62fdfeaf 988{
baba6e57 989 engine->i915->irq_mask &= ~engine->irq_enable_mask;
9d9523d8
PZ
990 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
991 intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
62fdfeaf
EA
992}
993
8187a2b7 994static void
31bb59cc 995i9xx_irq_disable(struct intel_engine_cs *engine)
62fdfeaf 996{
baba6e57 997 engine->i915->irq_mask |= engine->irq_enable_mask;
9d9523d8 998 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
62fdfeaf
EA
999}
1000
31bb59cc
CW
1001static void
1002i8xx_irq_enable(struct intel_engine_cs *engine)
c2798b19 1003{
c033666a 1004 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 1005
31bb59cc 1006 dev_priv->irq_mask &= ~engine->irq_enable_mask;
9d9523d8 1007 I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
31bb59cc 1008 POSTING_READ16(RING_IMR(engine->mmio_base));
c2798b19
CW
1009}
1010
1011static void
31bb59cc 1012i8xx_irq_disable(struct intel_engine_cs *engine)
c2798b19 1013{
c033666a 1014 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 1015
31bb59cc 1016 dev_priv->irq_mask |= engine->irq_enable_mask;
9d9523d8 1017 I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
c2798b19
CW
1018}
1019
b72f3acb 1020static int
e61e0f51 1021bsd_ring_flush(struct i915_request *rq, u32 mode)
d1b851fc 1022{
73dec95e 1023 u32 *cs;
b72f3acb 1024
e61e0f51 1025 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1026 if (IS_ERR(cs))
1027 return PTR_ERR(cs);
b72f3acb 1028
73dec95e
TU
1029 *cs++ = MI_FLUSH;
1030 *cs++ = MI_NOOP;
e61e0f51 1031 intel_ring_advance(rq, cs);
b72f3acb 1032 return 0;
d1b851fc
ZN
1033}
1034
31bb59cc
CW
1035static void
1036gen6_irq_enable(struct intel_engine_cs *engine)
0f46832f 1037{
baba6e57
DCS
1038 ENGINE_WRITE(engine, RING_IMR,
1039 ~(engine->irq_enable_mask | engine->irq_keep_mask));
476af9c2
CW
1040
1041 /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
baba6e57 1042 ENGINE_POSTING_READ(engine, RING_IMR);
476af9c2 1043
baba6e57 1044 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
0f46832f
CW
1045}
1046
1047static void
31bb59cc 1048gen6_irq_disable(struct intel_engine_cs *engine)
0f46832f 1049{
baba6e57
DCS
1050 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
1051 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
d1b851fc
ZN
1052}
1053
31bb59cc
CW
1054static void
1055hsw_vebox_irq_enable(struct intel_engine_cs *engine)
a19d2933 1056{
baba6e57 1057 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
e4fc69f2
CW
1058
1059 /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
baba6e57 1060 ENGINE_POSTING_READ(engine, RING_IMR);
e4fc69f2 1061
baba6e57 1062 gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask);
a19d2933
BW
1063}
1064
1065static void
31bb59cc 1066hsw_vebox_irq_disable(struct intel_engine_cs *engine)
a19d2933 1067{
baba6e57
DCS
1068 ENGINE_WRITE(engine, RING_IMR, ~0);
1069 gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask);
a19d2933
BW
1070}
1071
d1b851fc 1072static int
e61e0f51 1073i965_emit_bb_start(struct i915_request *rq,
803688ba
CW
1074 u64 offset, u32 length,
1075 unsigned int dispatch_flags)
d1b851fc 1076{
73dec95e 1077 u32 *cs;
78501eac 1078
e61e0f51 1079 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1080 if (IS_ERR(cs))
1081 return PTR_ERR(cs);
e1f99ce6 1082
73dec95e
TU
1083 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
1084 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
1085 *cs++ = offset;
e61e0f51 1086 intel_ring_advance(rq, cs);
78501eac 1087
d1b851fc
ZN
1088 return 0;
1089}
1090
b45305fc 1091/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
51797499 1092#define I830_BATCH_LIMIT SZ_256K
c4d69da1
CW
1093#define I830_TLB_ENTRIES (2)
1094#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1095static int
e61e0f51 1096i830_emit_bb_start(struct i915_request *rq,
803688ba
CW
1097 u64 offset, u32 len,
1098 unsigned int dispatch_flags)
62fdfeaf 1099{
51797499
CW
1100 u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
1101
1102 GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
62fdfeaf 1103
e61e0f51 1104 cs = intel_ring_begin(rq, 6);
73dec95e
TU
1105 if (IS_ERR(cs))
1106 return PTR_ERR(cs);
62fdfeaf 1107
c4d69da1 1108 /* Evict the invalid PTE TLBs */
73dec95e
TU
1109 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
1110 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
1111 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
1112 *cs++ = cs_offset;
1113 *cs++ = 0xdeadbeef;
1114 *cs++ = MI_NOOP;
e61e0f51 1115 intel_ring_advance(rq, cs);
b45305fc 1116
8e004efc 1117 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1118 if (len > I830_BATCH_LIMIT)
1119 return -ENOSPC;
1120
e61e0f51 1121 cs = intel_ring_begin(rq, 6 + 2);
73dec95e
TU
1122 if (IS_ERR(cs))
1123 return PTR_ERR(cs);
c4d69da1
CW
1124
1125 /* Blit the batch (which has now all relocs applied) to the
1126 * stable batch scratch bo area (so that the CS never
1127 * stumbles over its tlb invalidation bug) ...
1128 */
73dec95e
TU
1129 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
1130 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
1131 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
1132 *cs++ = cs_offset;
1133 *cs++ = 4096;
1134 *cs++ = offset;
1135
1136 *cs++ = MI_FLUSH;
1137 *cs++ = MI_NOOP;
e61e0f51 1138 intel_ring_advance(rq, cs);
b45305fc
DV
1139
1140 /* ... and execute it. */
c4d69da1 1141 offset = cs_offset;
b45305fc 1142 }
e1f99ce6 1143
e61e0f51 1144 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1145 if (IS_ERR(cs))
1146 return PTR_ERR(cs);
c4d69da1 1147
73dec95e
TU
1148 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1149 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1150 MI_BATCH_NON_SECURE);
e61e0f51 1151 intel_ring_advance(rq, cs);
c4d69da1 1152
fb3256da
DV
1153 return 0;
1154}
1155
1156static int
e61e0f51 1157i915_emit_bb_start(struct i915_request *rq,
803688ba
CW
1158 u64 offset, u32 len,
1159 unsigned int dispatch_flags)
fb3256da 1160{
73dec95e 1161 u32 *cs;
fb3256da 1162
e61e0f51 1163 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1164 if (IS_ERR(cs))
1165 return PTR_ERR(cs);
fb3256da 1166
73dec95e
TU
1167 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1168 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1169 MI_BATCH_NON_SECURE);
e61e0f51 1170 intel_ring_advance(rq, cs);
62fdfeaf 1171
62fdfeaf
EA
1172 return 0;
1173}
1174
5503cb0d 1175int intel_ring_pin(struct intel_ring *ring)
7ba717cf 1176{
57e88531 1177 struct i915_vma *vma = ring->vma;
89d5efcc 1178 enum i915_map_type map = i915_coherent_map_type(vma->vm->i915);
d822bb18 1179 unsigned int flags;
8305216f 1180 void *addr;
7ba717cf
TD
1181 int ret;
1182
57e88531 1183 GEM_BUG_ON(ring->vaddr);
7ba717cf 1184
5013eb8c
CW
1185 ret = i915_timeline_pin(ring->timeline);
1186 if (ret)
1187 return ret;
1188
d3ef1af6 1189 flags = PIN_GLOBAL;
496bcce3
JB
1190
1191 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
1192 flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
1193
9d80841e 1194 if (vma->obj->stolen)
57e88531 1195 flags |= PIN_MAPPABLE;
2edd4e69
CW
1196 else
1197 flags |= PIN_HIGH;
def0c5f6 1198
7a859c65 1199 ret = i915_vma_pin(vma, 0, 0, flags);
57e88531 1200 if (unlikely(ret))
5013eb8c 1201 goto unpin_timeline;
def0c5f6 1202
9d80841e 1203 if (i915_vma_is_map_and_fenceable(vma))
57e88531
CW
1204 addr = (void __force *)i915_vma_pin_iomap(vma);
1205 else
9d80841e 1206 addr = i915_gem_object_pin_map(vma->obj, map);
5013eb8c
CW
1207 if (IS_ERR(addr)) {
1208 ret = PTR_ERR(addr);
1209 goto unpin_ring;
1210 }
7ba717cf 1211
3d574a6b
CW
1212 vma->obj->pin_global++;
1213
32c04f16 1214 ring->vaddr = addr;
7ba717cf 1215 return 0;
d2cad535 1216
5013eb8c 1217unpin_ring:
57e88531 1218 i915_vma_unpin(vma);
5013eb8c
CW
1219unpin_timeline:
1220 i915_timeline_unpin(ring->timeline);
1221 return ret;
7ba717cf
TD
1222}
1223
e6ba9992
CW
1224void intel_ring_reset(struct intel_ring *ring, u32 tail)
1225{
41d37680
CW
1226 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
1227
e6ba9992
CW
1228 ring->tail = tail;
1229 ring->head = tail;
1230 ring->emit = tail;
1231 intel_ring_update_space(ring);
1232}
1233
aad29fbb
CW
1234void intel_ring_unpin(struct intel_ring *ring)
1235{
1236 GEM_BUG_ON(!ring->vma);
1237 GEM_BUG_ON(!ring->vaddr);
1238
e6ba9992
CW
1239 /* Discard any unused bytes beyond that submitted to hw. */
1240 intel_ring_reset(ring, ring->tail);
1241
9d80841e 1242 if (i915_vma_is_map_and_fenceable(ring->vma))
aad29fbb 1243 i915_vma_unpin_iomap(ring->vma);
57e88531
CW
1244 else
1245 i915_gem_object_unpin_map(ring->vma->obj);
aad29fbb
CW
1246 ring->vaddr = NULL;
1247
3d574a6b 1248 ring->vma->obj->pin_global--;
57e88531 1249 i915_vma_unpin(ring->vma);
5013eb8c
CW
1250
1251 i915_timeline_unpin(ring->timeline);
2919d291
OM
1252}
1253
57e88531
CW
1254static struct i915_vma *
1255intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
62fdfeaf 1256{
250f8c81 1257 struct i915_address_space *vm = &dev_priv->ggtt.vm;
05394f39 1258 struct drm_i915_gem_object *obj;
57e88531 1259 struct i915_vma *vma;
62fdfeaf 1260
187685cb 1261 obj = i915_gem_object_create_stolen(dev_priv, size);
c58b735f 1262 if (!obj)
2d6c4c84 1263 obj = i915_gem_object_create_internal(dev_priv, size);
57e88531
CW
1264 if (IS_ERR(obj))
1265 return ERR_CAST(obj);
8187a2b7 1266
250f8c81
JB
1267 /*
1268 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
1269 * if supported by the platform's GGTT.
1270 */
1271 if (vm->has_read_only)
3e977ac6 1272 i915_gem_object_set_readonly(obj);
24f3a8cf 1273
250f8c81 1274 vma = i915_vma_instance(obj, vm, NULL);
57e88531
CW
1275 if (IS_ERR(vma))
1276 goto err;
1277
1278 return vma;
e3efda49 1279
57e88531
CW
1280err:
1281 i915_gem_object_put(obj);
1282 return vma;
e3efda49
CW
1283}
1284
7e37f889 1285struct intel_ring *
65fcb806 1286intel_engine_create_ring(struct intel_engine_cs *engine,
a89d1f92 1287 struct i915_timeline *timeline,
65fcb806 1288 int size)
01101fa7 1289{
7e37f889 1290 struct intel_ring *ring;
57e88531 1291 struct i915_vma *vma;
01101fa7 1292
8f942018 1293 GEM_BUG_ON(!is_power_of_2(size));
62ae14b1 1294 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
a89d1f92 1295 GEM_BUG_ON(timeline == &engine->timeline);
b887d615 1296 lockdep_assert_held(&engine->i915->drm.struct_mutex);
8f942018 1297
01101fa7 1298 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
57e88531 1299 if (!ring)
01101fa7
CW
1300 return ERR_PTR(-ENOMEM);
1301
65baf0ef 1302 kref_init(&ring->ref);
675d9ad7 1303 INIT_LIST_HEAD(&ring->request_list);
a89d1f92 1304 ring->timeline = i915_timeline_get(timeline);
675d9ad7 1305
01101fa7
CW
1306 ring->size = size;
1307 /* Workaround an erratum on the i830 which causes a hang if
1308 * the TAIL pointer points to within the last 2 cachelines
1309 * of the buffer.
1310 */
1311 ring->effective_size = size;
2a307c2e 1312 if (IS_I830(engine->i915) || IS_I845G(engine->i915))
01101fa7
CW
1313 ring->effective_size -= 2 * CACHELINE_BYTES;
1314
01101fa7
CW
1315 intel_ring_update_space(ring);
1316
57e88531
CW
1317 vma = intel_ring_create_vma(engine->i915, size);
1318 if (IS_ERR(vma)) {
01101fa7 1319 kfree(ring);
57e88531 1320 return ERR_CAST(vma);
01101fa7 1321 }
57e88531 1322 ring->vma = vma;
01101fa7
CW
1323
1324 return ring;
1325}
1326
65baf0ef 1327void intel_ring_free(struct kref *ref)
01101fa7 1328{
65baf0ef 1329 struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
f8a7fde4
CW
1330 struct drm_i915_gem_object *obj = ring->vma->obj;
1331
1332 i915_vma_close(ring->vma);
1333 __i915_gem_object_release_unless_active(obj);
1334
a89d1f92 1335 i915_timeline_put(ring->timeline);
01101fa7
CW
1336 kfree(ring);
1337}
1338
c4d52feb
CW
1339static void __ring_context_fini(struct intel_context *ce)
1340{
1341 GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
1342 i915_gem_object_put(ce->state->obj);
1343}
1344
4c5896dc 1345static void ring_context_destroy(struct kref *ref)
1fc44d9b 1346{
4c5896dc
CW
1347 struct intel_context *ce = container_of(ref, typeof(*ce), ref);
1348
08819549 1349 GEM_BUG_ON(intel_context_is_pinned(ce));
1fc44d9b 1350
c4d52feb
CW
1351 if (ce->state)
1352 __ring_context_fini(ce);
efe79d48 1353
c4d52feb 1354 intel_context_free(ce);
1fc44d9b
CW
1355}
1356
a2bbf714
CW
1357static int __context_pin_ppgtt(struct i915_gem_context *ctx)
1358{
1359 struct i915_hw_ppgtt *ppgtt;
1360 int err = 0;
1361
1362 ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
1363 if (ppgtt)
1364 err = gen6_ppgtt_pin(ppgtt);
1365
1366 return err;
1367}
1368
1369static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
1370{
1371 struct i915_hw_ppgtt *ppgtt;
1372
1373 ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
1374 if (ppgtt)
1375 gen6_ppgtt_unpin(ppgtt);
1376}
1377
1fc44d9b 1378static int __context_pin(struct intel_context *ce)
e8a9c58f 1379{
d901e8e6
CW
1380 struct i915_vma *vma;
1381 int err;
1382
1383 vma = ce->state;
1384 if (!vma)
1385 return 0;
e8a9c58f 1386
7a859c65 1387 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
d901e8e6
CW
1388 if (err)
1389 return err;
1390
1391 /*
1392 * And mark is as a globally pinned object to let the shrinker know
1393 * it cannot reclaim the object until we release it.
1394 */
1395 vma->obj->pin_global++;
a679f58d 1396 vma->obj->mm.dirty = true;
d901e8e6
CW
1397
1398 return 0;
1399}
1400
1401static void __context_unpin(struct intel_context *ce)
1402{
1403 struct i915_vma *vma;
1404
1405 vma = ce->state;
1406 if (!vma)
1407 return;
1408
1409 vma->obj->pin_global--;
1410 i915_vma_unpin(vma);
1411}
1412
4dc84b77 1413static void ring_context_unpin(struct intel_context *ce)
d901e8e6 1414{
a2bbf714 1415 __context_unpin_ppgtt(ce->gem_context);
d901e8e6 1416 __context_unpin(ce);
e8a9c58f
CW
1417}
1418
3204c343
CW
1419static struct i915_vma *
1420alloc_context_vma(struct intel_engine_cs *engine)
1421{
1422 struct drm_i915_private *i915 = engine->i915;
1423 struct drm_i915_gem_object *obj;
1424 struct i915_vma *vma;
d2b4b979 1425 int err;
3204c343 1426
63ffbcda 1427 obj = i915_gem_object_create(i915, engine->context_size);
3204c343
CW
1428 if (IS_ERR(obj))
1429 return ERR_CAST(obj);
1430
a679f58d
CW
1431 /*
1432 * Try to make the context utilize L3 as well as LLC.
1433 *
1434 * On VLV we don't have L3 controls in the PTEs so we
1435 * shouldn't touch the cache level, especially as that
1436 * would make the object snooped which might have a
1437 * negative performance impact.
1438 *
1439 * Snooping is required on non-llc platforms in execlist
1440 * mode, but since all GGTT accesses use PAT entry 0 we
1441 * get snooping anyway regardless of cache_level.
1442 *
1443 * This is only applicable for Ivy Bridge devices since
1444 * later platforms don't have L3 control bits in the PTE.
1445 */
1446 if (IS_IVYBRIDGE(i915))
1447 i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
1448
d2b4b979
CW
1449 if (engine->default_state) {
1450 void *defaults, *vaddr;
1451
1452 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1453 if (IS_ERR(vaddr)) {
1454 err = PTR_ERR(vaddr);
1455 goto err_obj;
1456 }
1457
1458 defaults = i915_gem_object_pin_map(engine->default_state,
1459 I915_MAP_WB);
1460 if (IS_ERR(defaults)) {
1461 err = PTR_ERR(defaults);
1462 goto err_map;
1463 }
1464
1465 memcpy(vaddr, defaults, engine->context_size);
d2b4b979 1466 i915_gem_object_unpin_map(engine->default_state);
d2b4b979 1467
a679f58d
CW
1468 i915_gem_object_flush_map(obj);
1469 i915_gem_object_unpin_map(obj);
3204c343
CW
1470 }
1471
82ad6443 1472 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
d2b4b979
CW
1473 if (IS_ERR(vma)) {
1474 err = PTR_ERR(vma);
1475 goto err_obj;
1476 }
3204c343
CW
1477
1478 return vma;
d2b4b979
CW
1479
1480err_map:
1481 i915_gem_object_unpin_map(obj);
1482err_obj:
1483 i915_gem_object_put(obj);
1484 return ERR_PTR(err);
3204c343
CW
1485}
1486
95f697eb 1487static int ring_context_pin(struct intel_context *ce)
0cb26a8e 1488{
95f697eb 1489 struct intel_engine_cs *engine = ce->engine;
1fc44d9b 1490 int err;
0cb26a8e 1491
7e3d9a59
CW
1492 /* One ringbuffer to rule them all */
1493 GEM_BUG_ON(!engine->buffer);
1494 ce->ring = engine->buffer;
1495
63ffbcda 1496 if (!ce->state && engine->context_size) {
3204c343
CW
1497 struct i915_vma *vma;
1498
1499 vma = alloc_context_vma(engine);
95f697eb
CW
1500 if (IS_ERR(vma))
1501 return PTR_ERR(vma);
3204c343
CW
1502
1503 ce->state = vma;
1504 }
1505
d901e8e6
CW
1506 err = __context_pin(ce);
1507 if (err)
95f697eb 1508 return err;
0cb26a8e 1509
a2bbf714
CW
1510 err = __context_pin_ppgtt(ce->gem_context);
1511 if (err)
1512 goto err_unpin;
1513
95f697eb 1514 return 0;
266a240b 1515
a2bbf714
CW
1516err_unpin:
1517 __context_unpin(ce);
95f697eb 1518 return err;
0cb26a8e
CW
1519}
1520
9726920b
CW
1521static void ring_context_reset(struct intel_context *ce)
1522{
1523 intel_ring_reset(ce->ring, 0);
1524}
1525
4dc84b77 1526static const struct intel_context_ops ring_context_ops = {
95f697eb 1527 .pin = ring_context_pin,
4dc84b77 1528 .unpin = ring_context_unpin,
9726920b 1529
6eee33e8
CW
1530 .enter = intel_context_enter_engine,
1531 .exit = intel_context_exit_engine,
1532
9726920b 1533 .reset = ring_context_reset,
4dc84b77
CW
1534 .destroy = ring_context_destroy,
1535};
1536
7e37f889 1537void intel_engine_cleanup(struct intel_engine_cs *engine)
62fdfeaf 1538{
1a5788bf 1539 struct drm_i915_private *dev_priv = engine->i915;
6402c330 1540
1a5788bf 1541 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
baba6e57 1542 (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
33626e6a 1543
1a5788bf 1544 intel_ring_unpin(engine->buffer);
65baf0ef 1545 intel_ring_put(engine->buffer);
78501eac 1546
0bc40be8
TU
1547 if (engine->cleanup)
1548 engine->cleanup(engine);
8d19215b 1549
96a945aa 1550 intel_engine_cleanup_common(engine);
0cb26a8e 1551
3b3f1650
AG
1552 dev_priv->engine[engine->id] = NULL;
1553 kfree(engine);
62fdfeaf
EA
1554}
1555
b3ee09a4
CW
1556static int load_pd_dir(struct i915_request *rq,
1557 const struct i915_hw_ppgtt *ppgtt)
1558{
1559 const struct intel_engine_cs * const engine = rq->engine;
1560 u32 *cs;
1561
1562 cs = intel_ring_begin(rq, 6);
1563 if (IS_ERR(cs))
1564 return PTR_ERR(cs);
1565
1566 *cs++ = MI_LOAD_REGISTER_IMM(1);
baba6e57 1567 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
b3ee09a4
CW
1568 *cs++ = PP_DIR_DCLV_2G;
1569
1570 *cs++ = MI_LOAD_REGISTER_IMM(1);
baba6e57 1571 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
b3ee09a4
CW
1572 *cs++ = ppgtt->pd.base.ggtt_offset << 10;
1573
1574 intel_ring_advance(rq, cs);
1575
1576 return 0;
1577}
1578
d9d117e4
CW
1579static int flush_pd_dir(struct i915_request *rq)
1580{
1581 const struct intel_engine_cs * const engine = rq->engine;
1582 u32 *cs;
1583
1584 cs = intel_ring_begin(rq, 4);
1585 if (IS_ERR(cs))
1586 return PTR_ERR(cs);
1587
1588 /* Stall until the page table load is complete */
1589 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
baba6e57 1590 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
51797499 1591 *cs++ = i915_scratch_offset(rq->i915);
d9d117e4
CW
1592 *cs++ = MI_NOOP;
1593
1594 intel_ring_advance(rq, cs);
1595 return 0;
1596}
1597
e61e0f51 1598static inline int mi_set_context(struct i915_request *rq, u32 flags)
8911a31c
CW
1599{
1600 struct drm_i915_private *i915 = rq->i915;
1601 struct intel_engine_cs *engine = rq->engine;
1602 enum intel_engine_id id;
8a68d464
CW
1603 const int num_engines =
1604 IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
1fc719d1 1605 bool force_restore = false;
8911a31c
CW
1606 int len;
1607 u32 *cs;
1608
1609 flags |= MI_MM_SPACE_GTT;
1610 if (IS_HASWELL(i915))
1611 /* These flags are for resource streamer on HSW+ */
1612 flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
1613 else
1215d28e 1614 /* We need to save the extended state for powersaving modes */
8911a31c
CW
1615 flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
1616
1617 len = 4;
cf819eff 1618 if (IS_GEN(i915, 7))
8a68d464 1619 len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
1215d28e
CW
1620 else if (IS_GEN(i915, 5))
1621 len += 2;
1fc719d1
CW
1622 if (flags & MI_FORCE_RESTORE) {
1623 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
1624 flags &= ~MI_FORCE_RESTORE;
1625 force_restore = true;
1626 len += 2;
1627 }
8911a31c
CW
1628
1629 cs = intel_ring_begin(rq, len);
1630 if (IS_ERR(cs))
1631 return PTR_ERR(cs);
1632
1633 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
cf819eff 1634 if (IS_GEN(i915, 7)) {
8911a31c 1635 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
8a68d464 1636 if (num_engines) {
8911a31c
CW
1637 struct intel_engine_cs *signaller;
1638
8a68d464 1639 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
8911a31c
CW
1640 for_each_engine(signaller, i915, id) {
1641 if (signaller == engine)
1642 continue;
1643
1644 *cs++ = i915_mmio_reg_offset(
1645 RING_PSMI_CTL(signaller->mmio_base));
1646 *cs++ = _MASKED_BIT_ENABLE(
1647 GEN6_PSMI_SLEEP_MSG_DISABLE);
1648 }
1649 }
1215d28e
CW
1650 } else if (IS_GEN(i915, 5)) {
1651 /*
1652 * This w/a is only listed for pre-production ilk a/b steppings,
1653 * but is also mentioned for programming the powerctx. To be
1654 * safe, just apply the workaround; we do not use SyncFlush so
1655 * this should never take effect and so be a no-op!
1656 */
1657 *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
8911a31c
CW
1658 }
1659
1fc719d1
CW
1660 if (force_restore) {
1661 /*
1662 * The HW doesn't handle being told to restore the current
1663 * context very well. Quite often it likes goes to go off and
1664 * sulk, especially when it is meant to be reloading PP_DIR.
1665 * A very simple fix to force the reload is to simply switch
1666 * away from the current context and back again.
1667 *
1668 * Note that the kernel_context will contain random state
1669 * following the INHIBIT_RESTORE. We accept this since we
1670 * never use the kernel_context state; it is merely a
1671 * placeholder we use to flush other contexts.
1672 */
1673 *cs++ = MI_SET_CONTEXT;
9dbfea98 1674 *cs++ = i915_ggtt_offset(engine->kernel_context->state) |
1fc719d1
CW
1675 MI_MM_SPACE_GTT |
1676 MI_RESTORE_INHIBIT;
1677 }
1678
8911a31c
CW
1679 *cs++ = MI_NOOP;
1680 *cs++ = MI_SET_CONTEXT;
1fc44d9b 1681 *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
8911a31c
CW
1682 /*
1683 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
1684 * WaMiSetContext_Hang:snb,ivb,vlv
1685 */
1686 *cs++ = MI_NOOP;
1687
cf819eff 1688 if (IS_GEN(i915, 7)) {
8a68d464 1689 if (num_engines) {
8911a31c
CW
1690 struct intel_engine_cs *signaller;
1691 i915_reg_t last_reg = {}; /* keep gcc quiet */
1692
8a68d464 1693 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
8911a31c
CW
1694 for_each_engine(signaller, i915, id) {
1695 if (signaller == engine)
1696 continue;
1697
1698 last_reg = RING_PSMI_CTL(signaller->mmio_base);
1699 *cs++ = i915_mmio_reg_offset(last_reg);
1700 *cs++ = _MASKED_BIT_DISABLE(
1701 GEN6_PSMI_SLEEP_MSG_DISABLE);
1702 }
1703
1704 /* Insert a delay before the next switch! */
1705 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1706 *cs++ = i915_mmio_reg_offset(last_reg);
51797499 1707 *cs++ = i915_scratch_offset(rq->i915);
8911a31c
CW
1708 *cs++ = MI_NOOP;
1709 }
1710 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1215d28e
CW
1711 } else if (IS_GEN(i915, 5)) {
1712 *cs++ = MI_SUSPEND_FLUSH;
8911a31c
CW
1713 }
1714
1715 intel_ring_advance(rq, cs);
1716
1717 return 0;
1718}
1719
e61e0f51 1720static int remap_l3(struct i915_request *rq, int slice)
8911a31c
CW
1721{
1722 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
1723 int i;
1724
1725 if (!remap_info)
1726 return 0;
1727
1728 cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
1729 if (IS_ERR(cs))
1730 return PTR_ERR(cs);
1731
1732 /*
1733 * Note: We do not worry about the concurrent register cacheline hang
1734 * here because no other code should access these registers other than
1735 * at initialization time.
1736 */
1737 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
1738 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
1739 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
1740 *cs++ = remap_info[i];
1741 }
1742 *cs++ = MI_NOOP;
1743 intel_ring_advance(rq, cs);
1744
1745 return 0;
1746}
1747
e61e0f51 1748static int switch_context(struct i915_request *rq)
8911a31c
CW
1749{
1750 struct intel_engine_cs *engine = rq->engine;
b3ee09a4
CW
1751 struct i915_gem_context *ctx = rq->gem_context;
1752 struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
1753 unsigned int unwind_mm = 0;
8911a31c
CW
1754 u32 hw_flags = 0;
1755 int ret, i;
1756
8911a31c
CW
1757 GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
1758
b3ee09a4 1759 if (ppgtt) {
e2a13d1b
CW
1760 int loops;
1761
1762 /*
1763 * Baytail takes a little more convincing that it really needs
1764 * to reload the PD between contexts. It is not just a little
1765 * longer, as adding more stalls after the load_pd_dir (i.e.
1766 * adding a long loop around flush_pd_dir) is not as effective
1767 * as reloading the PD umpteen times. 32 is derived from
1768 * experimentation (gem_exec_parallel/fds) and has no good
1769 * explanation.
1770 */
1771 loops = 1;
8a68d464 1772 if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
e2a13d1b
CW
1773 loops = 32;
1774
1775 do {
1776 ret = load_pd_dir(rq, ppgtt);
1777 if (ret)
1778 goto err;
1779 } while (--loops);
8911a31c 1780
8a68d464
CW
1781 if (ppgtt->pd_dirty_engines & engine->mask) {
1782 unwind_mm = engine->mask;
1783 ppgtt->pd_dirty_engines &= ~unwind_mm;
b3ee09a4
CW
1784 hw_flags = MI_FORCE_RESTORE;
1785 }
8911a31c
CW
1786 }
1787
b3ee09a4 1788 if (rq->hw_context->state) {
8a68d464 1789 GEM_BUG_ON(engine->id != RCS0);
8911a31c
CW
1790
1791 /*
1792 * The kernel context(s) is treated as pure scratch and is not
1793 * expected to retain any state (as we sacrifice it during
1794 * suspend and on resume it may be corrupted). This is ok,
1795 * as nothing actually executes using the kernel context; it
1796 * is purely used for flushing user contexts.
1797 */
b3ee09a4 1798 if (i915_gem_context_is_kernel(ctx))
8911a31c
CW
1799 hw_flags = MI_RESTORE_INHIBIT;
1800
1801 ret = mi_set_context(rq, hw_flags);
1802 if (ret)
1803 goto err_mm;
8911a31c 1804 }
8911a31c 1805
d9d117e4 1806 if (ppgtt) {
06348d30
CW
1807 ret = engine->emit_flush(rq, EMIT_INVALIDATE);
1808 if (ret)
1809 goto err_mm;
1810
d9d117e4
CW
1811 ret = flush_pd_dir(rq);
1812 if (ret)
1813 goto err_mm;
06348d30
CW
1814
1815 /*
1816 * Not only do we need a full barrier (post-sync write) after
1817 * invalidating the TLBs, but we need to wait a little bit
1818 * longer. Whether this is merely delaying us, or the
1819 * subsequent flush is a key part of serialising with the
1820 * post-sync op, this extra pass appears vital before a
1821 * mm switch!
1822 */
1823 ret = engine->emit_flush(rq, EMIT_INVALIDATE);
1824 if (ret)
1825 goto err_mm;
1826
1827 ret = engine->emit_flush(rq, EMIT_FLUSH);
1828 if (ret)
1829 goto err_mm;
8911a31c
CW
1830 }
1831
b3ee09a4 1832 if (ctx->remap_slice) {
8911a31c 1833 for (i = 0; i < MAX_L3_SLICES; i++) {
b3ee09a4 1834 if (!(ctx->remap_slice & BIT(i)))
8911a31c
CW
1835 continue;
1836
1837 ret = remap_l3(rq, i);
1838 if (ret)
b3ee09a4 1839 goto err_mm;
8911a31c
CW
1840 }
1841
b3ee09a4 1842 ctx->remap_slice = 0;
8911a31c
CW
1843 }
1844
1845 return 0;
1846
8911a31c 1847err_mm:
b3ee09a4 1848 if (unwind_mm)
8a68d464 1849 ppgtt->pd_dirty_engines |= unwind_mm;
8911a31c
CW
1850err:
1851 return ret;
1852}
1853
e61e0f51 1854static int ring_request_alloc(struct i915_request *request)
9d773091 1855{
fd138212 1856 int ret;
6310346e 1857
08819549 1858 GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
85474441 1859 GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
e8a9c58f 1860
5f5800a7
CW
1861 /*
1862 * Flush enough space to reduce the likelihood of waiting after
6310346e
CW
1863 * we start building the request - in which case we will just
1864 * have to repeat work.
1865 */
a0442461 1866 request->reserved_space += LEGACY_REQUEST_SIZE;
6310346e 1867
928f8f42
CW
1868 /* Unconditionally invalidate GPU caches and TLBs. */
1869 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
fd138212
CW
1870 if (ret)
1871 return ret;
6310346e 1872
928f8f42 1873 ret = switch_context(request);
3fef5cda
CW
1874 if (ret)
1875 return ret;
1876
a0442461 1877 request->reserved_space -= LEGACY_REQUEST_SIZE;
6310346e 1878 return 0;
9d773091
CW
1879}
1880
fd138212 1881static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
987046ad 1882{
e61e0f51 1883 struct i915_request *target;
e95433c7
CW
1884 long timeout;
1885
95aebcb2 1886 if (intel_ring_update_space(ring) >= bytes)
987046ad
CW
1887 return 0;
1888
36620032 1889 GEM_BUG_ON(list_empty(&ring->request_list));
675d9ad7 1890 list_for_each_entry(target, &ring->request_list, ring_link) {
987046ad 1891 /* Would completion of this request free enough space? */
605d5b32
CW
1892 if (bytes <= __intel_ring_space(target->postfix,
1893 ring->emit, ring->size))
987046ad 1894 break;
79bbcc29 1895 }
29b1b415 1896
675d9ad7 1897 if (WARN_ON(&target->ring_link == &ring->request_list))
987046ad
CW
1898 return -ENOSPC;
1899
e61e0f51 1900 timeout = i915_request_wait(target,
e95433c7
CW
1901 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
1902 MAX_SCHEDULE_TIMEOUT);
1903 if (timeout < 0)
1904 return timeout;
7da844c5 1905
e61e0f51 1906 i915_request_retire_upto(target);
7da844c5
CW
1907
1908 intel_ring_update_space(ring);
1909 GEM_BUG_ON(ring->space < bytes);
1910 return 0;
29b1b415
JH
1911}
1912
e61e0f51 1913u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
cbcc80df 1914{
e61e0f51 1915 struct intel_ring *ring = rq->ring;
5e5655c3
CW
1916 const unsigned int remain_usable = ring->effective_size - ring->emit;
1917 const unsigned int bytes = num_dwords * sizeof(u32);
1918 unsigned int need_wrap = 0;
1919 unsigned int total_bytes;
73dec95e 1920 u32 *cs;
29b1b415 1921
6492ca79
CW
1922 /* Packets must be qword aligned. */
1923 GEM_BUG_ON(num_dwords & 1);
1924
e61e0f51 1925 total_bytes = bytes + rq->reserved_space;
5e5655c3 1926 GEM_BUG_ON(total_bytes > ring->effective_size);
29b1b415 1927
5e5655c3
CW
1928 if (unlikely(total_bytes > remain_usable)) {
1929 const int remain_actual = ring->size - ring->emit;
1930
1931 if (bytes > remain_usable) {
1932 /*
1933 * Not enough space for the basic request. So need to
1934 * flush out the remainder and then wait for
1935 * base + reserved.
1936 */
1937 total_bytes += remain_actual;
1938 need_wrap = remain_actual | 1;
1939 } else {
1940 /*
1941 * The base request will fit but the reserved space
1942 * falls off the end. So we don't need an immediate
1943 * wrap and only need to effectively wait for the
1944 * reserved size from the start of ringbuffer.
1945 */
e61e0f51 1946 total_bytes = rq->reserved_space + remain_actual;
5e5655c3 1947 }
cbcc80df
MK
1948 }
1949
5e5655c3 1950 if (unlikely(total_bytes > ring->space)) {
fd138212
CW
1951 int ret;
1952
1953 /*
1954 * Space is reserved in the ringbuffer for finalising the
1955 * request, as that cannot be allowed to fail. During request
1956 * finalisation, reserved_space is set to 0 to stop the
1957 * overallocation and the assumption is that then we never need
1958 * to wait (which has the risk of failing with EINTR).
1959 *
e61e0f51 1960 * See also i915_request_alloc() and i915_request_add().
fd138212 1961 */
e61e0f51 1962 GEM_BUG_ON(!rq->reserved_space);
fd138212
CW
1963
1964 ret = wait_for_space(ring, total_bytes);
cbcc80df 1965 if (unlikely(ret))
73dec95e 1966 return ERR_PTR(ret);
cbcc80df
MK
1967 }
1968
987046ad 1969 if (unlikely(need_wrap)) {
5e5655c3
CW
1970 need_wrap &= ~1;
1971 GEM_BUG_ON(need_wrap > ring->space);
1972 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
46b86332 1973 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
78501eac 1974
987046ad 1975 /* Fill the tail with MI_NOOP */
46b86332 1976 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
5e5655c3 1977 ring->space -= need_wrap;
46b86332 1978 ring->emit = 0;
987046ad 1979 }
304d695c 1980
e6ba9992 1981 GEM_BUG_ON(ring->emit > ring->size - bytes);
605d5b32 1982 GEM_BUG_ON(ring->space < bytes);
e6ba9992 1983 cs = ring->vaddr + ring->emit;
46b86332 1984 GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
e6ba9992 1985 ring->emit += bytes;
1dae2dfb 1986 ring->space -= bytes;
73dec95e
TU
1987
1988 return cs;
8187a2b7 1989}
78501eac 1990
753b1ad4 1991/* Align the ring tail to a cacheline boundary */
e61e0f51 1992int intel_ring_cacheline_align(struct i915_request *rq)
753b1ad4 1993{
1f177a13
CW
1994 int num_dwords;
1995 void *cs;
753b1ad4 1996
1f177a13 1997 num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
753b1ad4
VS
1998 if (num_dwords == 0)
1999 return 0;
2000
1f177a13
CW
2001 num_dwords = CACHELINE_DWORDS - num_dwords;
2002 GEM_BUG_ON(num_dwords & 1);
2003
e61e0f51 2004 cs = intel_ring_begin(rq, num_dwords);
73dec95e
TU
2005 if (IS_ERR(cs))
2006 return PTR_ERR(cs);
753b1ad4 2007
1f177a13 2008 memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
e61e0f51 2009 intel_ring_advance(rq, cs);
753b1ad4 2010
1f177a13 2011 GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
753b1ad4
VS
2012 return 0;
2013}
2014
e61e0f51 2015static void gen6_bsd_submit_request(struct i915_request *request)
881f47b6 2016{
baba6e57 2017 struct intel_uncore *uncore = request->engine->uncore;
881f47b6 2018
d2d551c0 2019 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
76f8421f 2020
881f47b6 2021 /* Every tail move must follow the sequence below */
12f55818
CW
2022
2023 /* Disable notification that the ring is IDLE. The GT
2024 * will then assume that it is busy and bring it out of rc6.
2025 */
d2d551c0
DCS
2026 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
2027 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
12f55818
CW
2028
2029 /* Clear the context id. Here be magic! */
d2d551c0 2030 intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
0206e353 2031
12f55818 2032 /* Wait for the ring not to be idle, i.e. for it to wake up. */
d2d551c0 2033 if (__intel_wait_for_register_fw(uncore,
02b312d0
CW
2034 GEN6_BSD_SLEEP_PSMI_CONTROL,
2035 GEN6_BSD_SLEEP_INDICATOR,
2036 0,
2037 1000, 0, NULL))
12f55818 2038 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 2039
12f55818 2040 /* Now that the ring is fully powered up, update the tail */
b0411e7d 2041 i9xx_submit_request(request);
12f55818
CW
2042
2043 /* Let the ring send IDLE messages to the GT again,
2044 * and so let it sleep to conserve power when idle.
2045 */
d2d551c0
DCS
2046 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
2047 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
76f8421f 2048
d2d551c0 2049 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
881f47b6
XH
2050}
2051
06348d30 2052static int mi_flush_dw(struct i915_request *rq, u32 flags)
881f47b6 2053{
73dec95e 2054 u32 cmd, *cs;
b72f3acb 2055
e61e0f51 2056 cs = intel_ring_begin(rq, 4);
73dec95e
TU
2057 if (IS_ERR(cs))
2058 return PTR_ERR(cs);
b72f3acb 2059
71a77e07 2060 cmd = MI_FLUSH_DW;
f0a1fb10 2061
70b73f9a
CW
2062 /*
2063 * We always require a command barrier so that subsequent
f0a1fb10
CW
2064 * commands, such as breadcrumb interrupts, are strictly ordered
2065 * wrt the contents of the write cache being flushed to memory
2066 * (and thus being coherent from the CPU).
2067 */
2068 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2069
9a289771 2070 /*
70b73f9a 2071 * Bspec vol 1c.3 - blitter engine command streamer:
9a289771
JB
2072 * "If ENABLED, all TLBs will be invalidated once the flush
2073 * operation is complete. This bit is only valid when the
2074 * Post-Sync Operation field is a value of 1h or 3h."
2075 */
70b73f9a 2076 cmd |= flags;
f0a1fb10 2077
73dec95e
TU
2078 *cs++ = cmd;
2079 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
79e6770c 2080 *cs++ = 0;
73dec95e 2081 *cs++ = MI_NOOP;
70b73f9a 2082
e61e0f51 2083 intel_ring_advance(rq, cs);
70b73f9a 2084
1c7a0623
BW
2085 return 0;
2086}
2087
70b73f9a
CW
2088static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
2089{
06348d30 2090 return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
70b73f9a
CW
2091}
2092
2093static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
2094{
2095 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
2096}
2097
d7d4eedd 2098static int
e61e0f51 2099hsw_emit_bb_start(struct i915_request *rq,
803688ba
CW
2100 u64 offset, u32 len,
2101 unsigned int dispatch_flags)
d7d4eedd 2102{
73dec95e 2103 u32 *cs;
d7d4eedd 2104
e61e0f51 2105 cs = intel_ring_begin(rq, 2);
73dec95e
TU
2106 if (IS_ERR(cs))
2107 return PTR_ERR(cs);
d7d4eedd 2108
73dec95e 2109 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
08e3e21a 2110 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
d7d4eedd 2111 /* bit0-7 is the length on GEN6+ */
73dec95e 2112 *cs++ = offset;
e61e0f51 2113 intel_ring_advance(rq, cs);
d7d4eedd
CW
2114
2115 return 0;
2116}
2117
881f47b6 2118static int
e61e0f51 2119gen6_emit_bb_start(struct i915_request *rq,
803688ba
CW
2120 u64 offset, u32 len,
2121 unsigned int dispatch_flags)
881f47b6 2122{
73dec95e 2123 u32 *cs;
ab6f8e32 2124
e61e0f51 2125 cs = intel_ring_begin(rq, 2);
73dec95e
TU
2126 if (IS_ERR(cs))
2127 return PTR_ERR(cs);
e1f99ce6 2128
73dec95e
TU
2129 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
2130 0 : MI_BATCH_NON_SECURE_I965);
0206e353 2131 /* bit0-7 is the length on GEN6+ */
73dec95e 2132 *cs++ = offset;
e61e0f51 2133 intel_ring_advance(rq, cs);
ab6f8e32 2134
0206e353 2135 return 0;
881f47b6
XH
2136}
2137
549f7365
CW
2138/* Blitter support (SandyBridge+) */
2139
e61e0f51 2140static int gen6_ring_flush(struct i915_request *rq, u32 mode)
8d19215b 2141{
70b73f9a 2142 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
8d19215b
ZN
2143}
2144
ff44ad51
CW
2145static void i9xx_set_default_submission(struct intel_engine_cs *engine)
2146{
2147 engine->submit_request = i9xx_submit_request;
27a5f61b 2148 engine->cancel_requests = cancel_requests;
aba5e278
CW
2149
2150 engine->park = NULL;
2151 engine->unpark = NULL;
ff44ad51
CW
2152}
2153
2154static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
2155{
aba5e278 2156 i9xx_set_default_submission(engine);
ff44ad51
CW
2157 engine->submit_request = gen6_bsd_submit_request;
2158}
2159
11334c6a
CW
2160static void setup_irq(struct intel_engine_cs *engine)
2161{
2162 struct drm_i915_private *i915 = engine->i915;
2163
2164 if (INTEL_GEN(i915) >= 6) {
2165 engine->irq_enable = gen6_irq_enable;
2166 engine->irq_disable = gen6_irq_disable;
2167 } else if (INTEL_GEN(i915) >= 5) {
2168 engine->irq_enable = gen5_irq_enable;
2169 engine->irq_disable = gen5_irq_disable;
2170 } else if (INTEL_GEN(i915) >= 3) {
2171 engine->irq_enable = i9xx_irq_enable;
2172 engine->irq_disable = i9xx_irq_disable;
2173 } else {
2174 engine->irq_enable = i8xx_irq_enable;
2175 engine->irq_disable = i8xx_irq_disable;
2176 }
2177}
2178
2179static void setup_common(struct intel_engine_cs *engine)
06a2fe22 2180{
11334c6a
CW
2181 struct drm_i915_private *i915 = engine->i915;
2182
79e6770c 2183 /* gen8+ are only supported with execlists */
11334c6a 2184 GEM_BUG_ON(INTEL_GEN(i915) >= 8);
79e6770c 2185
11334c6a 2186 setup_irq(engine);
618e4ca7 2187
79ffac85 2188 engine->resume = xcs_resume;
5adfb772
CW
2189 engine->reset.prepare = reset_prepare;
2190 engine->reset.reset = reset_ring;
2191 engine->reset.finish = reset_finish;
7445a2a4 2192
4dc84b77 2193 engine->cops = &ring_context_ops;
f73e7399
CW
2194 engine->request_alloc = ring_request_alloc;
2195
85474441
CW
2196 /*
2197 * Using a global execution timeline; the previous final breadcrumb is
2198 * equivalent to our next initial bread so we can elide
2199 * engine->emit_init_breadcrumb().
2200 */
2201 engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
11334c6a 2202 if (IS_GEN(i915, 5))
85474441 2203 engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
ff44ad51
CW
2204
2205 engine->set_default_submission = i9xx_set_default_submission;
6f7bef75 2206
11334c6a 2207 if (INTEL_GEN(i915) >= 6)
803688ba 2208 engine->emit_bb_start = gen6_emit_bb_start;
11334c6a 2209 else if (INTEL_GEN(i915) >= 4)
803688ba 2210 engine->emit_bb_start = i965_emit_bb_start;
11334c6a 2211 else if (IS_I830(i915) || IS_I845G(i915))
803688ba 2212 engine->emit_bb_start = i830_emit_bb_start;
6f7bef75 2213 else
803688ba 2214 engine->emit_bb_start = i915_emit_bb_start;
06a2fe22
TU
2215}
2216
11334c6a 2217static void setup_rcs(struct intel_engine_cs *engine)
5c1143bb 2218{
11334c6a 2219 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2220
11334c6a 2221 if (HAS_L3_DPF(i915))
61ff75ac 2222 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
f8973c21 2223
fa6f071d
DCS
2224 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2225
11334c6a 2226 if (INTEL_GEN(i915) >= 7) {
e2f80391 2227 engine->init_context = intel_rcs_ctx_init;
c7fe7d25 2228 engine->emit_flush = gen7_render_ring_flush;
85474441 2229 engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
11334c6a 2230 } else if (IS_GEN(i915, 6)) {
caa5915b
CW
2231 engine->init_context = intel_rcs_ctx_init;
2232 engine->emit_flush = gen6_render_ring_flush;
85474441 2233 engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
11334c6a 2234 } else if (IS_GEN(i915, 5)) {
c7fe7d25 2235 engine->emit_flush = gen4_render_ring_flush;
59465b5f 2236 } else {
11334c6a 2237 if (INTEL_GEN(i915) < 4)
c7fe7d25 2238 engine->emit_flush = gen2_render_ring_flush;
46f0f8d1 2239 else
c7fe7d25 2240 engine->emit_flush = gen4_render_ring_flush;
e2f80391 2241 engine->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2242 }
707d9cf9 2243
11334c6a 2244 if (IS_HASWELL(i915))
803688ba 2245 engine->emit_bb_start = hsw_emit_bb_start;
6f7bef75 2246
79ffac85 2247 engine->resume = rcs_resume;
5c1143bb
XH
2248}
2249
11334c6a 2250static void setup_vcs(struct intel_engine_cs *engine)
5c1143bb 2251{
11334c6a 2252 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2253
11334c6a 2254 if (INTEL_GEN(i915) >= 6) {
0fd2c201 2255 /* gen6 bsd needs a special wa for tail updates */
11334c6a 2256 if (IS_GEN(i915, 6))
ff44ad51 2257 engine->set_default_submission = gen6_bsd_set_default_submission;
c7fe7d25 2258 engine->emit_flush = gen6_bsd_ring_flush;
79e6770c 2259 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
caa5915b 2260
11334c6a 2261 if (IS_GEN(i915, 6))
85474441 2262 engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
9fa4973e 2263 else
85474441 2264 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
58fa3835 2265 } else {
c7fe7d25 2266 engine->emit_flush = bsd_ring_flush;
11334c6a 2267 if (IS_GEN(i915, 5))
e2f80391 2268 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
8d228911 2269 else
e2f80391 2270 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
58fa3835 2271 }
5c1143bb 2272}
549f7365 2273
11334c6a 2274static void setup_bcs(struct intel_engine_cs *engine)
549f7365 2275{
11334c6a 2276 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2277
c7fe7d25 2278 engine->emit_flush = gen6_ring_flush;
79e6770c 2279 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
549f7365 2280
11334c6a 2281 if (IS_GEN(i915, 6))
85474441 2282 engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
9fa4973e 2283 else
85474441 2284 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
549f7365 2285}
a7b9761d 2286
11334c6a 2287static void setup_vecs(struct intel_engine_cs *engine)
9a8a2213 2288{
11334c6a 2289 struct drm_i915_private *i915 = engine->i915;
caa5915b 2290
11334c6a 2291 GEM_BUG_ON(INTEL_GEN(i915) < 7);
06a2fe22 2292
c7fe7d25 2293 engine->emit_flush = gen6_ring_flush;
79e6770c
CW
2294 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2295 engine->irq_enable = hsw_vebox_irq_enable;
2296 engine->irq_disable = hsw_vebox_irq_disable;
9a8a2213 2297
85474441 2298 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
11334c6a
CW
2299}
2300
2301int intel_ring_submission_setup(struct intel_engine_cs *engine)
2302{
2303 setup_common(engine);
2304
2305 switch (engine->class) {
2306 case RENDER_CLASS:
2307 setup_rcs(engine);
2308 break;
2309 case VIDEO_DECODE_CLASS:
2310 setup_vcs(engine);
2311 break;
2312 case COPY_ENGINE_CLASS:
2313 setup_bcs(engine);
2314 break;
2315 case VIDEO_ENHANCEMENT_CLASS:
2316 setup_vecs(engine);
2317 break;
2318 default:
2319 MISSING_CASE(engine->class);
2320 return -ENODEV;
2321 }
2322
2323 return 0;
2324}
2325
2326int intel_ring_submission_init(struct intel_engine_cs *engine)
2327{
2328 struct i915_timeline *timeline;
2329 struct intel_ring *ring;
2330 int err;
2331
2332 timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
2333 if (IS_ERR(timeline)) {
2334 err = PTR_ERR(timeline);
2335 goto err;
2336 }
2337 GEM_BUG_ON(timeline->has_initial_breadcrumb);
2338
2339 ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
2340 i915_timeline_put(timeline);
2341 if (IS_ERR(ring)) {
2342 err = PTR_ERR(ring);
2343 goto err;
2344 }
2345
2346 err = intel_ring_pin(ring);
2347 if (err)
2348 goto err_ring;
caa5915b 2349
11334c6a
CW
2350 GEM_BUG_ON(engine->buffer);
2351 engine->buffer = ring;
2352
2353 err = intel_engine_init_common(engine);
2354 if (err)
2355 goto err_unpin;
2356
2357 GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
2358
2359 return 0;
2360
2361err_unpin:
2362 intel_ring_unpin(ring);
2363err_ring:
2364 intel_ring_put(ring);
2365err:
2366 intel_engine_cleanup_common(engine);
2367 return err;
9a8a2213 2368}