]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/i915/gt/intel_ringbuffer.c
drm/i915: Extract GT powermanagement interrupt handling
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / i915 / gt / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
a4d8a0fe 30#include <linux/log2.h>
7c2fa7fa 31
760285e7 32#include <drm/i915_drm.h>
7c2fa7fa 33
10be98a7
CW
34#include "gem/i915_gem_context.h"
35
7c2fa7fa 36#include "i915_drv.h"
62fdfeaf 37#include "i915_trace.h"
10be98a7 38#include "intel_context.h"
2006058e 39#include "intel_gt.h"
d762043f 40#include "intel_gt_pm_irq.h"
112ed2d3 41#include "intel_reset.h"
7d3c425f 42#include "intel_workarounds.h"
62fdfeaf 43
a0442461
CW
44/* Rough estimate of the typical request size, performing a flush,
45 * set-context and then emitting the batch.
46 */
47#define LEGACY_REQUEST_SIZE 200
48
95aebcb2 49unsigned int intel_ring_update_space(struct intel_ring *ring)
ebd0fd4b 50{
95aebcb2
CW
51 unsigned int space;
52
53 space = __intel_ring_space(ring->head, ring->emit, ring->size);
54
55 ring->space = space;
56 return space;
ebd0fd4b
DG
57}
58
b72f3acb 59static int
e61e0f51 60gen2_render_ring_flush(struct i915_request *rq, u32 mode)
46f0f8d1 61{
a889580c 62 unsigned int num_store_dw;
73dec95e 63 u32 cmd, *cs;
46f0f8d1
CW
64
65 cmd = MI_FLUSH;
a889580c 66 num_store_dw = 0;
7c9cf4e3 67 if (mode & EMIT_INVALIDATE)
46f0f8d1 68 cmd |= MI_READ_FLUSH;
a889580c
CW
69 if (mode & EMIT_FLUSH)
70 num_store_dw = 4;
46f0f8d1 71
a889580c 72 cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
73dec95e
TU
73 if (IS_ERR(cs))
74 return PTR_ERR(cs);
46f0f8d1 75
73dec95e 76 *cs++ = cmd;
a889580c
CW
77 while (num_store_dw--) {
78 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
46c5847e
LL
79 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
80 INTEL_GT_SCRATCH_FIELD_DEFAULT);
a889580c
CW
81 *cs++ = 0;
82 }
83 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
84
e61e0f51 85 intel_ring_advance(rq, cs);
46f0f8d1
CW
86
87 return 0;
88}
89
90static int
e61e0f51 91gen4_render_ring_flush(struct i915_request *rq, u32 mode)
62fdfeaf 92{
73dec95e 93 u32 cmd, *cs;
55f99bf2 94 int i;
6f392d54 95
36d527de
CW
96 /*
97 * read/write caches:
98 *
99 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
100 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
101 * also flushed at 2d versus 3d pipeline switches.
102 *
103 * read-only caches:
104 *
105 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
106 * MI_READ_FLUSH is set, and is always flushed on 965.
107 *
108 * I915_GEM_DOMAIN_COMMAND may not exist?
109 *
110 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
111 * invalidated when MI_EXE_FLUSH is set.
112 *
113 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
114 * invalidated with every MI_FLUSH.
115 *
116 * TLBs:
117 *
118 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
119 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
120 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
121 * are flushed at any MI_FLUSH.
122 */
123
b5321f30 124 cmd = MI_FLUSH;
7c9cf4e3 125 if (mode & EMIT_INVALIDATE) {
36d527de 126 cmd |= MI_EXE_FLUSH;
cf819eff 127 if (IS_G4X(rq->i915) || IS_GEN(rq->i915, 5))
b5321f30
CW
128 cmd |= MI_INVALIDATE_ISP;
129 }
70eac33e 130
55f99bf2
CW
131 i = 2;
132 if (mode & EMIT_INVALIDATE)
133 i += 20;
134
135 cs = intel_ring_begin(rq, i);
73dec95e
TU
136 if (IS_ERR(cs))
137 return PTR_ERR(cs);
b72f3acb 138
73dec95e 139 *cs++ = cmd;
55f99bf2
CW
140
141 /*
142 * A random delay to let the CS invalidate take effect? Without this
143 * delay, the GPU relocation path fails as the CS does not see
144 * the updated contents. Just as important, if we apply the flushes
145 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
146 * write and before the invalidate on the next batch), the relocations
147 * still fail. This implies that is a delay following invalidation
148 * that is required to reset the caches as opposed to a delay to
149 * ensure the memory is written.
150 */
151 if (mode & EMIT_INVALIDATE) {
152 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
46c5847e
LL
153 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
154 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
db56f974 155 PIPE_CONTROL_GLOBAL_GTT;
55f99bf2
CW
156 *cs++ = 0;
157 *cs++ = 0;
158
159 for (i = 0; i < 12; i++)
160 *cs++ = MI_FLUSH;
161
162 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
46c5847e
LL
163 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
164 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
db56f974 165 PIPE_CONTROL_GLOBAL_GTT;
55f99bf2
CW
166 *cs++ = 0;
167 *cs++ = 0;
168 }
169
170 *cs++ = cmd;
171
e61e0f51 172 intel_ring_advance(rq, cs);
b72f3acb
CW
173
174 return 0;
8187a2b7
ZN
175}
176
179f4025 177/*
8d315287
JB
178 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
179 * implementing two workarounds on gen6. From section 1.4.7.1
180 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
181 *
182 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
183 * produced by non-pipelined state commands), software needs to first
184 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
185 * 0.
186 *
187 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
188 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
189 *
190 * And the workaround for these two requires this workaround first:
191 *
192 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
193 * BEFORE the pipe-control with a post-sync op and no write-cache
194 * flushes.
195 *
196 * And this last workaround is tricky because of the requirements on
197 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
198 * volume 2 part 1:
199 *
200 * "1 of the following must also be set:
201 * - Render Target Cache Flush Enable ([12] of DW1)
202 * - Depth Cache Flush Enable ([0] of DW1)
203 * - Stall at Pixel Scoreboard ([1] of DW1)
204 * - Depth Stall ([13] of DW1)
205 * - Post-Sync Operation ([13] of DW1)
206 * - Notify Enable ([8] of DW1)"
207 *
208 * The cache flushes require the workaround flush that triggered this
209 * one, so we can't use it. Depth stall would trigger the same.
210 * Post-sync nonzero is what triggered this second workaround, so we
211 * can't use that one either. Notify enable is IRQs, which aren't
212 * really our business. That leaves only stall at scoreboard.
213 */
214static int
caa5915b 215gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
8d315287 216{
db56f974 217 u32 scratch_addr =
46c5847e
LL
218 intel_gt_scratch_offset(rq->engine->gt,
219 INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
73dec95e
TU
220 u32 *cs;
221
e61e0f51 222 cs = intel_ring_begin(rq, 6);
73dec95e
TU
223 if (IS_ERR(cs))
224 return PTR_ERR(cs);
225
226 *cs++ = GFX_OP_PIPE_CONTROL(5);
227 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
228 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
229 *cs++ = 0; /* low dword */
230 *cs++ = 0; /* high dword */
231 *cs++ = MI_NOOP;
e61e0f51 232 intel_ring_advance(rq, cs);
73dec95e 233
e61e0f51 234 cs = intel_ring_begin(rq, 6);
73dec95e
TU
235 if (IS_ERR(cs))
236 return PTR_ERR(cs);
237
238 *cs++ = GFX_OP_PIPE_CONTROL(5);
239 *cs++ = PIPE_CONTROL_QW_WRITE;
240 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
241 *cs++ = 0;
242 *cs++ = 0;
243 *cs++ = MI_NOOP;
e61e0f51 244 intel_ring_advance(rq, cs);
8d315287
JB
245
246 return 0;
247}
248
249static int
e61e0f51 250gen6_render_ring_flush(struct i915_request *rq, u32 mode)
8d315287 251{
db56f974 252 u32 scratch_addr =
46c5847e
LL
253 intel_gt_scratch_offset(rq->engine->gt,
254 INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
73dec95e 255 u32 *cs, flags = 0;
8d315287
JB
256 int ret;
257
b3111509 258 /* Force SNB workarounds for PIPE_CONTROL flushes */
caa5915b 259 ret = gen6_emit_post_sync_nonzero_flush(rq);
b3111509
PZ
260 if (ret)
261 return ret;
262
8d315287
JB
263 /* Just flush everything. Experiments have shown that reducing the
264 * number of bits based on the write domains has little performance
265 * impact.
266 */
7c9cf4e3 267 if (mode & EMIT_FLUSH) {
7d54a904
CW
268 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
269 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
270 /*
271 * Ensure that any following seqno writes only happen
272 * when the render cache is indeed flushed.
273 */
97f209bc 274 flags |= PIPE_CONTROL_CS_STALL;
7d54a904 275 }
7c9cf4e3 276 if (mode & EMIT_INVALIDATE) {
7d54a904
CW
277 flags |= PIPE_CONTROL_TLB_INVALIDATE;
278 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
279 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
280 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
281 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
282 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
283 /*
284 * TLB invalidate requires a post-sync write.
285 */
3ac78313 286 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 287 }
8d315287 288
e61e0f51 289 cs = intel_ring_begin(rq, 4);
73dec95e
TU
290 if (IS_ERR(cs))
291 return PTR_ERR(cs);
8d315287 292
73dec95e
TU
293 *cs++ = GFX_OP_PIPE_CONTROL(4);
294 *cs++ = flags;
295 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
296 *cs++ = 0;
e61e0f51 297 intel_ring_advance(rq, cs);
8d315287
JB
298
299 return 0;
300}
301
e1a73a54 302static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b
CW
303{
304 /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
305 *cs++ = GFX_OP_PIPE_CONTROL(4);
306 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
307 *cs++ = 0;
308 *cs++ = 0;
309
310 *cs++ = GFX_OP_PIPE_CONTROL(4);
311 *cs++ = PIPE_CONTROL_QW_WRITE;
46c5847e
LL
312 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
313 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
db56f974 314 PIPE_CONTROL_GLOBAL_GTT;
caa5915b
CW
315 *cs++ = 0;
316
317 /* Finally we can flush and with it emit the breadcrumb */
318 *cs++ = GFX_OP_PIPE_CONTROL(4);
319 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
320 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
321 PIPE_CONTROL_DC_FLUSH_ENABLE |
322 PIPE_CONTROL_QW_WRITE |
323 PIPE_CONTROL_CS_STALL);
5013eb8c
CW
324 *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
325 *cs++ = rq->fence.seqno;
326
caa5915b
CW
327 *cs++ = MI_USER_INTERRUPT;
328 *cs++ = MI_NOOP;
329
330 rq->tail = intel_ring_offset(rq, cs);
331 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
332
333 return cs;
caa5915b 334}
caa5915b 335
f3987631 336static int
e61e0f51 337gen7_render_ring_cs_stall_wa(struct i915_request *rq)
f3987631 338{
73dec95e 339 u32 *cs;
f3987631 340
e61e0f51 341 cs = intel_ring_begin(rq, 4);
73dec95e
TU
342 if (IS_ERR(cs))
343 return PTR_ERR(cs);
f3987631 344
73dec95e
TU
345 *cs++ = GFX_OP_PIPE_CONTROL(4);
346 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
347 *cs++ = 0;
348 *cs++ = 0;
e61e0f51 349 intel_ring_advance(rq, cs);
f3987631
PZ
350
351 return 0;
352}
353
4772eaeb 354static int
e61e0f51 355gen7_render_ring_flush(struct i915_request *rq, u32 mode)
4772eaeb 356{
db56f974 357 u32 scratch_addr =
46c5847e
LL
358 intel_gt_scratch_offset(rq->engine->gt,
359 INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
73dec95e 360 u32 *cs, flags = 0;
4772eaeb 361
f3987631
PZ
362 /*
363 * Ensure that any following seqno writes only happen when the render
364 * cache is indeed flushed.
365 *
366 * Workaround: 4th PIPE_CONTROL command (except the ones with only
367 * read-cache invalidate bits set) must have the CS_STALL bit set. We
368 * don't try to be clever and just set it unconditionally.
369 */
370 flags |= PIPE_CONTROL_CS_STALL;
371
4772eaeb
PZ
372 /* Just flush everything. Experiments have shown that reducing the
373 * number of bits based on the write domains has little performance
374 * impact.
375 */
7c9cf4e3 376 if (mode & EMIT_FLUSH) {
4772eaeb
PZ
377 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
378 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 379 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 380 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb 381 }
7c9cf4e3 382 if (mode & EMIT_INVALIDATE) {
4772eaeb
PZ
383 flags |= PIPE_CONTROL_TLB_INVALIDATE;
384 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
385 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
386 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
387 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
388 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 389 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
390 /*
391 * TLB invalidate requires a post-sync write.
392 */
393 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 394 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 395
add284a3
CW
396 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
397
f3987631
PZ
398 /* Workaround: we must issue a pipe_control with CS-stall bit
399 * set before a pipe_control command that has the state cache
400 * invalidate bit set. */
e61e0f51 401 gen7_render_ring_cs_stall_wa(rq);
4772eaeb
PZ
402 }
403
e61e0f51 404 cs = intel_ring_begin(rq, 4);
73dec95e
TU
405 if (IS_ERR(cs))
406 return PTR_ERR(cs);
4772eaeb 407
73dec95e
TU
408 *cs++ = GFX_OP_PIPE_CONTROL(4);
409 *cs++ = flags;
410 *cs++ = scratch_addr;
411 *cs++ = 0;
e61e0f51 412 intel_ring_advance(rq, cs);
4772eaeb
PZ
413
414 return 0;
415}
416
e1a73a54 417static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b
CW
418{
419 *cs++ = GFX_OP_PIPE_CONTROL(4);
420 *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
421 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
422 PIPE_CONTROL_DC_FLUSH_ENABLE |
423 PIPE_CONTROL_FLUSH_ENABLE |
424 PIPE_CONTROL_QW_WRITE |
425 PIPE_CONTROL_GLOBAL_GTT_IVB |
426 PIPE_CONTROL_CS_STALL);
5013eb8c
CW
427 *cs++ = rq->timeline->hwsp_offset;
428 *cs++ = rq->fence.seqno;
429
caa5915b
CW
430 *cs++ = MI_USER_INTERRUPT;
431 *cs++ = MI_NOOP;
432
433 rq->tail = intel_ring_offset(rq, cs);
434 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
435
436 return cs;
caa5915b 437}
caa5915b 438
e1a73a54 439static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
caa5915b 440{
5013eb8c
CW
441 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
442 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
443
444 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
445 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
446 *cs++ = rq->fence.seqno;
447
caa5915b
CW
448 *cs++ = MI_USER_INTERRUPT;
449
450 rq->tail = intel_ring_offset(rq, cs);
451 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
452
453 return cs;
caa5915b 454}
caa5915b 455
1212bd82 456#define GEN7_XCS_WA 32
e1a73a54 457static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
1212bd82
CW
458{
459 int i;
460
5013eb8c
CW
461 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
462 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
463
464 *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
465 *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
466 *cs++ = rq->fence.seqno;
467
1212bd82
CW
468 for (i = 0; i < GEN7_XCS_WA; i++) {
469 *cs++ = MI_STORE_DWORD_INDEX;
5013eb8c
CW
470 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
471 *cs++ = rq->fence.seqno;
1212bd82
CW
472 }
473
474 *cs++ = MI_FLUSH_DW;
475 *cs++ = 0;
476 *cs++ = 0;
477
478 *cs++ = MI_USER_INTERRUPT;
519a0194 479 *cs++ = MI_NOOP;
1212bd82
CW
480
481 rq->tail = intel_ring_offset(rq, cs);
482 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
483
484 return cs;
1212bd82 485}
1212bd82
CW
486#undef GEN7_XCS_WA
487
060f2322
CW
488static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
489{
490 /*
491 * Keep the render interrupt unmasked as this papers over
492 * lost interrupts following a reset.
493 */
494 if (engine->class == RENDER_CLASS) {
495 if (INTEL_GEN(engine->i915) >= 6)
496 mask &= ~BIT(0);
497 else
498 mask &= ~I915_USER_INTERRUPT;
499 }
500
501 intel_engine_set_hwsp_writemask(engine, mask);
502}
503
504static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
035dc1e0 505{
c033666a 506 struct drm_i915_private *dev_priv = engine->i915;
035dc1e0
DV
507 u32 addr;
508
d6acae36 509 addr = lower_32_bits(phys);
c033666a 510 if (INTEL_GEN(dev_priv) >= 4)
d6acae36
CW
511 addr |= (phys >> 28) & 0xf0;
512
035dc1e0
DV
513 I915_WRITE(HWS_PGA, addr);
514}
515
0ca88ba0 516static struct page *status_page(struct intel_engine_cs *engine)
060f2322 517{
0ca88ba0 518 struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
060f2322 519
0ca88ba0
CW
520 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
521 return sg_page(obj->mm.pages->sgl);
522}
523
524static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
525{
526 set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
060f2322
CW
527 set_hwstam(engine, ~0u);
528}
529
530static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
af75f269 531{
c033666a 532 struct drm_i915_private *dev_priv = engine->i915;
060f2322 533 i915_reg_t hwsp;
af75f269 534
060f2322
CW
535 /*
536 * The ring status page addresses are no longer next to the rest of
af75f269
DL
537 * the ring registers as of gen7.
538 */
cf819eff 539 if (IS_GEN(dev_priv, 7)) {
0bc40be8 540 switch (engine->id) {
a2d3d265
MT
541 /*
542 * No more rings exist on Gen7. Default case is only to shut up
543 * gcc switch check warning.
544 */
545 default:
546 GEM_BUG_ON(engine->id);
8a68d464
CW
547 /* fallthrough */
548 case RCS0:
060f2322 549 hwsp = RENDER_HWS_PGA_GEN7;
af75f269 550 break;
8a68d464 551 case BCS0:
060f2322 552 hwsp = BLT_HWS_PGA_GEN7;
af75f269 553 break;
8a68d464 554 case VCS0:
060f2322 555 hwsp = BSD_HWS_PGA_GEN7;
af75f269 556 break;
8a68d464 557 case VECS0:
060f2322 558 hwsp = VEBOX_HWS_PGA_GEN7;
af75f269
DL
559 break;
560 }
cf819eff 561 } else if (IS_GEN(dev_priv, 6)) {
060f2322 562 hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
af75f269 563 } else {
060f2322 564 hwsp = RING_HWS_PGA(engine->mmio_base);
a4a71701 565 }
c5498089 566
060f2322
CW
567 I915_WRITE(hwsp, offset);
568 POSTING_READ(hwsp);
569}
af75f269 570
060f2322
CW
571static void flush_cs_tlb(struct intel_engine_cs *engine)
572{
573 struct drm_i915_private *dev_priv = engine->i915;
060f2322
CW
574
575 if (!IS_GEN_RANGE(dev_priv, 6, 7))
576 return;
577
578 /* ring should be idle before issuing a sync flush*/
baba6e57
DCS
579 WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
580
581 ENGINE_WRITE(engine, RING_INSTPM,
582 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
583 INSTPM_SYNC_FLUSH));
584 if (intel_wait_for_register(engine->uncore,
585 RING_INSTPM(engine->mmio_base),
586 INSTPM_SYNC_FLUSH, 0,
060f2322
CW
587 1000))
588 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
589 engine->name);
590}
af75f269 591
060f2322
CW
592static void ring_setup_status_page(struct intel_engine_cs *engine)
593{
0ca88ba0 594 set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
060f2322 595 set_hwstam(engine, ~0u);
af75f269 596
060f2322 597 flush_cs_tlb(engine);
af75f269
DL
598}
599
0bc40be8 600static bool stop_ring(struct intel_engine_cs *engine)
8187a2b7 601{
c033666a 602 struct drm_i915_private *dev_priv = engine->i915;
8187a2b7 603
21a2c58a 604 if (INTEL_GEN(dev_priv) > 2) {
baba6e57
DCS
605 ENGINE_WRITE(engine,
606 RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
607 if (intel_wait_for_register(engine->uncore,
3d808eb1
CW
608 RING_MI_MODE(engine->mmio_base),
609 MODE_IDLE,
610 MODE_IDLE,
611 1000)) {
0bc40be8
TU
612 DRM_ERROR("%s : timed out trying to stop ring\n",
613 engine->name);
baba6e57
DCS
614
615 /*
616 * Sometimes we observe that the idle flag is not
9bec9b13
CW
617 * set even though the ring is empty. So double
618 * check before giving up.
619 */
baba6e57
DCS
620 if (ENGINE_READ(engine, RING_HEAD) !=
621 ENGINE_READ(engine, RING_TAIL))
9bec9b13 622 return false;
9991ae78
CW
623 }
624 }
b7884eb4 625
baba6e57 626 ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
11caf551 627
baba6e57
DCS
628 ENGINE_WRITE(engine, RING_HEAD, 0);
629 ENGINE_WRITE(engine, RING_TAIL, 0);
8187a2b7 630
11caf551 631 /* The ring must be empty before it is disabled */
baba6e57 632 ENGINE_WRITE(engine, RING_CTL, 0);
11caf551 633
baba6e57 634 return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
9991ae78 635}
8187a2b7 636
79ffac85 637static int xcs_resume(struct intel_engine_cs *engine)
9991ae78 638{
c033666a 639 struct drm_i915_private *dev_priv = engine->i915;
75d0a7f3 640 struct intel_ring *ring = engine->legacy.ring;
9991ae78
CW
641 int ret = 0;
642
79ffac85
CW
643 GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
644 engine->name, ring->head, ring->tail);
645
baba6e57 646 intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
9991ae78 647
31483107 648 /* WaClearRingBufHeadRegAtInit:ctg,elk */
0bc40be8 649 if (!stop_ring(engine)) {
9991ae78 650 /* G45 ring initialization often fails to reset head to zero */
8177e112
CW
651 DRM_DEBUG_DRIVER("%s head not reset to zero "
652 "ctl %08x head %08x tail %08x start %08x\n",
653 engine->name,
baba6e57
DCS
654 ENGINE_READ(engine, RING_CTL),
655 ENGINE_READ(engine, RING_HEAD),
656 ENGINE_READ(engine, RING_TAIL),
657 ENGINE_READ(engine, RING_START));
8187a2b7 658
0bc40be8 659 if (!stop_ring(engine)) {
6fd0d56e
CW
660 DRM_ERROR("failed to set %s head to zero "
661 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8 662 engine->name,
baba6e57
DCS
663 ENGINE_READ(engine, RING_CTL),
664 ENGINE_READ(engine, RING_HEAD),
665 ENGINE_READ(engine, RING_TAIL),
666 ENGINE_READ(engine, RING_START));
9991ae78
CW
667 ret = -EIO;
668 goto out;
6fd0d56e 669 }
8187a2b7
ZN
670 }
671
3177659a 672 if (HWS_NEEDS_PHYSICAL(dev_priv))
0bc40be8 673 ring_setup_phys_status_page(engine);
3177659a 674 else
060f2322 675 ring_setup_status_page(engine);
9991ae78 676
ad07dfcd 677 intel_engine_reset_breadcrumbs(engine);
821ed7df 678
ece4a17d 679 /* Enforce ordering by reading HEAD register back */
31483107 680 ENGINE_POSTING_READ(engine, RING_HEAD);
ece4a17d 681
31483107
CW
682 /*
683 * Initialize the ring. This must happen _after_ we've cleared the ring
0d8957c8
DV
684 * registers with the above sequence (the readback of the HEAD registers
685 * also enforces ordering), otherwise the hw might lose the new ring
31483107
CW
686 * register values.
687 */
baba6e57 688 ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
95468892 689
41d37680
CW
690 /* Check that the ring offsets point within the ring! */
691 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
692 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
821ed7df 693 intel_ring_update_space(ring);
b7f21899
CW
694
695 /* First wake the ring up to an empty/idle ring */
baba6e57
DCS
696 ENGINE_WRITE(engine, RING_HEAD, ring->head);
697 ENGINE_WRITE(engine, RING_TAIL, ring->head);
698 ENGINE_POSTING_READ(engine, RING_TAIL);
95468892 699
baba6e57 700 ENGINE_WRITE(engine, RING_CTL, RING_CTL_SIZE(ring->size) | RING_VALID);
8187a2b7 701
8187a2b7 702 /* If the head is still not zero, the ring is dead */
baba6e57 703 if (intel_wait_for_register(engine->uncore,
97a04e0d 704 RING_CTL(engine->mmio_base),
f42bb651
CW
705 RING_VALID, RING_VALID,
706 50)) {
e74cfed5 707 DRM_ERROR("%s initialization failed "
821ed7df 708 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
0bc40be8 709 engine->name,
baba6e57
DCS
710 ENGINE_READ(engine, RING_CTL),
711 ENGINE_READ(engine, RING_CTL) & RING_VALID,
712 ENGINE_READ(engine, RING_HEAD), ring->head,
713 ENGINE_READ(engine, RING_TAIL), ring->tail,
714 ENGINE_READ(engine, RING_START),
bde13ebd 715 i915_ggtt_offset(ring->vma));
b7884eb4
DV
716 ret = -EIO;
717 goto out;
8187a2b7
ZN
718 }
719
7836cd02 720 if (INTEL_GEN(dev_priv) > 2)
baba6e57
DCS
721 ENGINE_WRITE(engine,
722 RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
7836cd02 723
b7f21899
CW
724 /* Now awake, let it get started */
725 if (ring->tail != ring->head) {
baba6e57
DCS
726 ENGINE_WRITE(engine, RING_TAIL, ring->tail);
727 ENGINE_POSTING_READ(engine, RING_TAIL);
b7f21899
CW
728 }
729
d6fee0de 730 /* Papering over lost _interrupts_ immediately following the restart */
52c0fdb2 731 intel_engine_queue_breadcrumbs(engine);
b7884eb4 732out:
baba6e57 733 intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
b7884eb4
DV
734
735 return ret;
8187a2b7
ZN
736}
737
eb8d0f5a 738static void reset_prepare(struct intel_engine_cs *engine)
821ed7df 739{
c30d5dc6
CW
740 struct intel_uncore *uncore = engine->uncore;
741 const u32 base = engine->mmio_base;
742
743 /*
744 * We stop engines, otherwise we might get failed reset and a
745 * dead gpu (on elk). Also as modern gpu as kbl can suffer
746 * from system hang if batchbuffer is progressing when
747 * the reset is issued, regardless of READY_TO_RESET ack.
748 * Thus assume it is best to stop engines on all gens
749 * where we have a gpu reset.
750 *
751 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
752 *
753 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
754 *
755 * FIXME: Wa for more modern gens needs to be validated
756 */
757 GEM_TRACE("%s\n", engine->name);
758
759 if (intel_engine_stop_cs(engine))
760 GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
761
762 intel_uncore_write_fw(uncore,
763 RING_HEAD(base),
764 intel_uncore_read_fw(uncore, RING_TAIL(base)));
765 intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
766
767 intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
768 intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
769 intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
770
771 /* The ring must be empty before it is disabled */
772 intel_uncore_write_fw(uncore, RING_CTL(base), 0);
773
774 /* Check acts as a post */
775 if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
776 GEM_TRACE("%s: ring head [%x] not parked\n",
777 engine->name,
778 intel_uncore_read_fw(uncore, RING_HEAD(base)));
5adfb772
CW
779}
780
eb8d0f5a 781static void reset_ring(struct intel_engine_cs *engine, bool stalled)
5adfb772 782{
eb8d0f5a
CW
783 struct i915_request *pos, *rq;
784 unsigned long flags;
b3ee09a4 785 u32 head;
5adfb772 786
eb8d0f5a 787 rq = NULL;
422d7df4
CW
788 spin_lock_irqsave(&engine->active.lock, flags);
789 list_for_each_entry(pos, &engine->active.requests, sched.link) {
5013eb8c 790 if (!i915_request_completed(pos)) {
eb8d0f5a
CW
791 rq = pos;
792 break;
793 }
b3ee09a4 794 }
67e64564
CW
795
796 /*
eb8d0f5a 797 * The guilty request will get skipped on a hung engine.
c0dcb203 798 *
eb8d0f5a
CW
799 * Users of client default contexts do not rely on logical
800 * state preserved between batches so it is safe to execute
801 * queued requests following the hang. Non default contexts
802 * rely on preserved state, so skipping a batch loses the
803 * evolution of the state and it needs to be considered corrupted.
804 * Executing more queued batches on top of corrupted state is
805 * risky. But we take the risk by trying to advance through
806 * the queued requests in order to make the client behaviour
807 * more predictable around resets, by not throwing away random
808 * amount of batches it has prepared for execution. Sophisticated
809 * clients can use gem_reset_stats_ioctl and dma fence status
810 * (exported via sync_file info ioctl on explicit fences) to observe
811 * when it loses the context state and should rebuild accordingly.
c0dcb203 812 *
eb8d0f5a
CW
813 * The context ban, and ultimately the client ban, mechanism are safety
814 * valves if client submission ends up resulting in nothing more than
815 * subsequent hangs.
c0dcb203 816 */
eb8d0f5a 817
b3ee09a4 818 if (rq) {
eb8d0f5a
CW
819 /*
820 * Try to restore the logical GPU state to match the
821 * continuation of the request queue. If we skip the
822 * context/PD restore, then the next request may try to execute
823 * assuming that its context is valid and loaded on the GPU and
824 * so may try to access invalid memory, prompting repeated GPU
825 * hangs.
826 *
827 * If the request was guilty, we still restore the logical
828 * state in case the next request requires it (e.g. the
829 * aliasing ppgtt), but skip over the hung batch.
830 *
831 * If the request was innocent, we try to replay the request
832 * with the restored context.
833 */
cb823ed9 834 __i915_request_reset(rq, stalled);
eb8d0f5a 835
75d0a7f3 836 GEM_BUG_ON(rq->ring != engine->legacy.ring);
eb8d0f5a
CW
837 head = rq->head;
838 } else {
75d0a7f3 839 head = engine->legacy.ring->tail;
c0dcb203 840 }
75d0a7f3 841 engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
eb8d0f5a 842
422d7df4 843 spin_unlock_irqrestore(&engine->active.lock, flags);
821ed7df
CW
844}
845
5adfb772
CW
846static void reset_finish(struct intel_engine_cs *engine)
847{
848}
849
79ffac85 850static int rcs_resume(struct intel_engine_cs *engine)
8187a2b7 851{
c033666a 852 struct drm_i915_private *dev_priv = engine->i915;
a69ffdbf 853
9ce9bdb0
CW
854 /*
855 * Disable CONSTANT_BUFFER before it is loaded from the context
856 * image. For as it is loaded, it is executed and the stored
857 * address may no longer be valid, leading to a GPU hang.
858 *
859 * This imposes the requirement that userspace reload their
860 * CONSTANT_BUFFER on every batch, fortunately a requirement
861 * they are already accustomed to from before contexts were
862 * enabled.
863 */
864 if (IS_GEN(dev_priv, 4))
865 I915_WRITE(ECOSKPD,
866 _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
867
61a563a2 868 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
00690008 869 if (IS_GEN_RANGE(dev_priv, 4, 6))
6b26c86d 870 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
871
872 /* We need to disable the AsyncFlip performance optimisations in order
873 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
874 * programmed to '1' on all products.
8693a824 875 *
2441f877 876 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 877 */
00690008 878 if (IS_GEN_RANGE(dev_priv, 6, 7))
1c8c38c5
CW
879 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
880
f05bb0c7 881 /* Required for the hardware to program scanline values for waiting */
01fa0302 882 /* WaEnableFlushTlbInvalidationMode:snb */
cf819eff 883 if (IS_GEN(dev_priv, 6))
f05bb0c7 884 I915_WRITE(GFX_MODE,
aa83e30d 885 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 886
01fa0302 887 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
cf819eff 888 if (IS_GEN(dev_priv, 7))
1c8c38c5 889 I915_WRITE(GFX_MODE_GEN7,
01fa0302 890 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 891 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 892
cf819eff 893 if (IS_GEN(dev_priv, 6)) {
3a69ddd6
KG
894 /* From the Sandybridge PRM, volume 1 part 3, page 24:
895 * "If this bit is set, STCunit will have LRA as replacement
896 * policy. [...] This bit must be reset. LRA replacement
897 * policy is not supported."
898 */
899 I915_WRITE(CACHE_MODE_0,
5e13a0c5 900 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
901 }
902
00690008 903 if (IS_GEN_RANGE(dev_priv, 6, 7))
6b26c86d 904 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 905
79ffac85 906 return xcs_resume(engine);
8187a2b7
ZN
907}
908
27a5f61b
CW
909static void cancel_requests(struct intel_engine_cs *engine)
910{
e61e0f51 911 struct i915_request *request;
27a5f61b
CW
912 unsigned long flags;
913
422d7df4 914 spin_lock_irqsave(&engine->active.lock, flags);
27a5f61b
CW
915
916 /* Mark all submitted requests as skipped. */
422d7df4 917 list_for_each_entry(request, &engine->active.requests, sched.link) {
5013eb8c
CW
918 if (!i915_request_signaled(request))
919 dma_fence_set_error(&request->fence, -EIO);
3800960a 920
5013eb8c 921 i915_request_mark_complete(request);
27a5f61b 922 }
3800960a 923
27a5f61b
CW
924 /* Remaining _unready_ requests will be nop'ed when submitted */
925
422d7df4 926 spin_unlock_irqrestore(&engine->active.lock, flags);
27a5f61b
CW
927}
928
e61e0f51 929static void i9xx_submit_request(struct i915_request *request)
b0411e7d 930{
e61e0f51 931 i915_request_submit(request);
d55ac5bf 932
baba6e57
DCS
933 ENGINE_WRITE(request->engine, RING_TAIL,
934 intel_ring_set_tail(request->ring, request->tail));
b0411e7d
CW
935}
936
e1a73a54 937static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
1ec14ad3 938{
5013eb8c
CW
939 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
940 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
941
caa5915b
CW
942 *cs++ = MI_FLUSH;
943
5013eb8c
CW
944 *cs++ = MI_STORE_DWORD_INDEX;
945 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
946 *cs++ = rq->fence.seqno;
947
73dec95e 948 *cs++ = MI_USER_INTERRUPT;
519a0194 949 *cs++ = MI_NOOP;
1ec14ad3 950
e61e0f51
CW
951 rq->tail = intel_ring_offset(rq, cs);
952 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
953
954 return cs;
1ec14ad3 955}
98f29e8d 956
835051d3 957#define GEN5_WA_STORES 8 /* must be at least 1! */
e1a73a54 958static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
c6df541c 959{
835051d3
CW
960 int i;
961
5013eb8c
CW
962 GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
963 GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
964
835051d3
CW
965 *cs++ = MI_FLUSH;
966
967 BUILD_BUG_ON(GEN5_WA_STORES < 1);
968 for (i = 0; i < GEN5_WA_STORES; i++) {
969 *cs++ = MI_STORE_DWORD_INDEX;
b300fde8
CW
970 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
971 *cs++ = rq->fence.seqno;
835051d3
CW
972 }
973
974 *cs++ = MI_USER_INTERRUPT;
975
976 rq->tail = intel_ring_offset(rq, cs);
977 assert_ring_tail_valid(rq->ring, rq->tail);
e1a73a54
CW
978
979 return cs;
c6df541c 980}
835051d3 981#undef GEN5_WA_STORES
c6df541c 982
31bb59cc
CW
983static void
984gen5_irq_enable(struct intel_engine_cs *engine)
e48d8634 985{
31bb59cc 986 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
987}
988
989static void
31bb59cc 990gen5_irq_disable(struct intel_engine_cs *engine)
e48d8634 991{
31bb59cc 992 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
993}
994
31bb59cc
CW
995static void
996i9xx_irq_enable(struct intel_engine_cs *engine)
62fdfeaf 997{
baba6e57 998 engine->i915->irq_mask &= ~engine->irq_enable_mask;
9d9523d8
PZ
999 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
1000 intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
62fdfeaf
EA
1001}
1002
8187a2b7 1003static void
31bb59cc 1004i9xx_irq_disable(struct intel_engine_cs *engine)
62fdfeaf 1005{
baba6e57 1006 engine->i915->irq_mask |= engine->irq_enable_mask;
9d9523d8 1007 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
62fdfeaf
EA
1008}
1009
31bb59cc
CW
1010static void
1011i8xx_irq_enable(struct intel_engine_cs *engine)
c2798b19 1012{
e44d62d1 1013 struct drm_i915_private *i915 = engine->i915;
c2798b19 1014
e44d62d1
TU
1015 i915->irq_mask &= ~engine->irq_enable_mask;
1016 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
1017 ENGINE_POSTING_READ16(engine, RING_IMR);
c2798b19
CW
1018}
1019
1020static void
31bb59cc 1021i8xx_irq_disable(struct intel_engine_cs *engine)
c2798b19 1022{
4f5fd91f 1023 struct drm_i915_private *i915 = engine->i915;
c2798b19 1024
4f5fd91f
TU
1025 i915->irq_mask |= engine->irq_enable_mask;
1026 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
c2798b19
CW
1027}
1028
b72f3acb 1029static int
e61e0f51 1030bsd_ring_flush(struct i915_request *rq, u32 mode)
d1b851fc 1031{
73dec95e 1032 u32 *cs;
b72f3acb 1033
e61e0f51 1034 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1035 if (IS_ERR(cs))
1036 return PTR_ERR(cs);
b72f3acb 1037
73dec95e
TU
1038 *cs++ = MI_FLUSH;
1039 *cs++ = MI_NOOP;
e61e0f51 1040 intel_ring_advance(rq, cs);
b72f3acb 1041 return 0;
d1b851fc
ZN
1042}
1043
31bb59cc
CW
1044static void
1045gen6_irq_enable(struct intel_engine_cs *engine)
0f46832f 1046{
baba6e57
DCS
1047 ENGINE_WRITE(engine, RING_IMR,
1048 ~(engine->irq_enable_mask | engine->irq_keep_mask));
476af9c2
CW
1049
1050 /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
baba6e57 1051 ENGINE_POSTING_READ(engine, RING_IMR);
476af9c2 1052
baba6e57 1053 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
0f46832f
CW
1054}
1055
1056static void
31bb59cc 1057gen6_irq_disable(struct intel_engine_cs *engine)
0f46832f 1058{
baba6e57
DCS
1059 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
1060 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
d1b851fc
ZN
1061}
1062
31bb59cc
CW
1063static void
1064hsw_vebox_irq_enable(struct intel_engine_cs *engine)
a19d2933 1065{
baba6e57 1066 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
e4fc69f2
CW
1067
1068 /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
baba6e57 1069 ENGINE_POSTING_READ(engine, RING_IMR);
e4fc69f2 1070
d762043f 1071 gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
a19d2933
BW
1072}
1073
1074static void
31bb59cc 1075hsw_vebox_irq_disable(struct intel_engine_cs *engine)
a19d2933 1076{
baba6e57 1077 ENGINE_WRITE(engine, RING_IMR, ~0);
d762043f 1078 gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
a19d2933
BW
1079}
1080
d1b851fc 1081static int
e61e0f51 1082i965_emit_bb_start(struct i915_request *rq,
803688ba
CW
1083 u64 offset, u32 length,
1084 unsigned int dispatch_flags)
d1b851fc 1085{
73dec95e 1086 u32 *cs;
78501eac 1087
e61e0f51 1088 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1089 if (IS_ERR(cs))
1090 return PTR_ERR(cs);
e1f99ce6 1091
73dec95e
TU
1092 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
1093 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
1094 *cs++ = offset;
e61e0f51 1095 intel_ring_advance(rq, cs);
78501eac 1096
d1b851fc
ZN
1097 return 0;
1098}
1099
b45305fc 1100/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
51797499 1101#define I830_BATCH_LIMIT SZ_256K
c4d69da1
CW
1102#define I830_TLB_ENTRIES (2)
1103#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1104static int
e61e0f51 1105i830_emit_bb_start(struct i915_request *rq,
803688ba
CW
1106 u64 offset, u32 len,
1107 unsigned int dispatch_flags)
62fdfeaf 1108{
46c5847e
LL
1109 u32 *cs, cs_offset =
1110 intel_gt_scratch_offset(rq->engine->gt,
1111 INTEL_GT_SCRATCH_FIELD_DEFAULT);
51797499 1112
db56f974 1113 GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
62fdfeaf 1114
e61e0f51 1115 cs = intel_ring_begin(rq, 6);
73dec95e
TU
1116 if (IS_ERR(cs))
1117 return PTR_ERR(cs);
62fdfeaf 1118
c4d69da1 1119 /* Evict the invalid PTE TLBs */
73dec95e
TU
1120 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
1121 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
1122 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
1123 *cs++ = cs_offset;
1124 *cs++ = 0xdeadbeef;
1125 *cs++ = MI_NOOP;
e61e0f51 1126 intel_ring_advance(rq, cs);
b45305fc 1127
8e004efc 1128 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1129 if (len > I830_BATCH_LIMIT)
1130 return -ENOSPC;
1131
e61e0f51 1132 cs = intel_ring_begin(rq, 6 + 2);
73dec95e
TU
1133 if (IS_ERR(cs))
1134 return PTR_ERR(cs);
c4d69da1
CW
1135
1136 /* Blit the batch (which has now all relocs applied) to the
1137 * stable batch scratch bo area (so that the CS never
1138 * stumbles over its tlb invalidation bug) ...
1139 */
05f219d7 1140 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
73dec95e
TU
1141 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
1142 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
1143 *cs++ = cs_offset;
1144 *cs++ = 4096;
1145 *cs++ = offset;
1146
1147 *cs++ = MI_FLUSH;
1148 *cs++ = MI_NOOP;
e61e0f51 1149 intel_ring_advance(rq, cs);
b45305fc
DV
1150
1151 /* ... and execute it. */
c4d69da1 1152 offset = cs_offset;
b45305fc 1153 }
e1f99ce6 1154
e61e0f51 1155 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1156 if (IS_ERR(cs))
1157 return PTR_ERR(cs);
c4d69da1 1158
73dec95e
TU
1159 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1160 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1161 MI_BATCH_NON_SECURE);
e61e0f51 1162 intel_ring_advance(rq, cs);
c4d69da1 1163
fb3256da
DV
1164 return 0;
1165}
1166
1167static int
e61e0f51 1168i915_emit_bb_start(struct i915_request *rq,
803688ba
CW
1169 u64 offset, u32 len,
1170 unsigned int dispatch_flags)
fb3256da 1171{
73dec95e 1172 u32 *cs;
fb3256da 1173
e61e0f51 1174 cs = intel_ring_begin(rq, 2);
73dec95e
TU
1175 if (IS_ERR(cs))
1176 return PTR_ERR(cs);
fb3256da 1177
73dec95e
TU
1178 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1179 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1180 MI_BATCH_NON_SECURE);
e61e0f51 1181 intel_ring_advance(rq, cs);
62fdfeaf 1182
62fdfeaf
EA
1183 return 0;
1184}
1185
5503cb0d 1186int intel_ring_pin(struct intel_ring *ring)
7ba717cf 1187{
57e88531 1188 struct i915_vma *vma = ring->vma;
d822bb18 1189 unsigned int flags;
8305216f 1190 void *addr;
7ba717cf
TD
1191 int ret;
1192
09c5ab38
CW
1193 if (atomic_fetch_inc(&ring->pin_count))
1194 return 0;
7ba717cf 1195
d3ef1af6 1196 flags = PIN_GLOBAL;
496bcce3
JB
1197
1198 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
1199 flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
1200
9d80841e 1201 if (vma->obj->stolen)
57e88531 1202 flags |= PIN_MAPPABLE;
2edd4e69
CW
1203 else
1204 flags |= PIN_HIGH;
def0c5f6 1205
7a859c65 1206 ret = i915_vma_pin(vma, 0, 0, flags);
57e88531 1207 if (unlikely(ret))
75d0a7f3 1208 goto err_unpin;
def0c5f6 1209
9d80841e 1210 if (i915_vma_is_map_and_fenceable(vma))
57e88531
CW
1211 addr = (void __force *)i915_vma_pin_iomap(vma);
1212 else
09c5ab38
CW
1213 addr = i915_gem_object_pin_map(vma->obj,
1214 i915_coherent_map_type(vma->vm->i915));
5013eb8c
CW
1215 if (IS_ERR(addr)) {
1216 ret = PTR_ERR(addr);
09c5ab38 1217 goto err_ring;
5013eb8c 1218 }
7ba717cf 1219
1aff1903 1220 i915_vma_make_unshrinkable(vma);
3d574a6b 1221
09c5ab38 1222 GEM_BUG_ON(ring->vaddr);
32c04f16 1223 ring->vaddr = addr;
09c5ab38 1224
7ba717cf 1225 return 0;
d2cad535 1226
09c5ab38 1227err_ring:
57e88531 1228 i915_vma_unpin(vma);
09c5ab38
CW
1229err_unpin:
1230 atomic_dec(&ring->pin_count);
5013eb8c 1231 return ret;
7ba717cf
TD
1232}
1233
e6ba9992
CW
1234void intel_ring_reset(struct intel_ring *ring, u32 tail)
1235{
41d37680
CW
1236 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
1237
e6ba9992
CW
1238 ring->tail = tail;
1239 ring->head = tail;
1240 ring->emit = tail;
1241 intel_ring_update_space(ring);
1242}
1243
aad29fbb
CW
1244void intel_ring_unpin(struct intel_ring *ring)
1245{
1aff1903
CW
1246 struct i915_vma *vma = ring->vma;
1247
09c5ab38
CW
1248 if (!atomic_dec_and_test(&ring->pin_count))
1249 return;
aad29fbb 1250
e6ba9992
CW
1251 /* Discard any unused bytes beyond that submitted to hw. */
1252 intel_ring_reset(ring, ring->tail);
1253
1aff1903
CW
1254 i915_vma_unset_ggtt_write(vma);
1255 if (i915_vma_is_map_and_fenceable(vma))
1256 i915_vma_unpin_iomap(vma);
57e88531 1257 else
1aff1903 1258 i915_gem_object_unpin_map(vma->obj);
09c5ab38
CW
1259
1260 GEM_BUG_ON(!ring->vaddr);
aad29fbb
CW
1261 ring->vaddr = NULL;
1262
1aff1903
CW
1263 i915_vma_unpin(vma);
1264 i915_vma_make_purgeable(vma);
2919d291
OM
1265}
1266
db45fb5b 1267static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
62fdfeaf 1268{
db45fb5b
TU
1269 struct i915_address_space *vm = &ggtt->vm;
1270 struct drm_i915_private *i915 = vm->i915;
05394f39 1271 struct drm_i915_gem_object *obj;
57e88531 1272 struct i915_vma *vma;
62fdfeaf 1273
db45fb5b 1274 obj = i915_gem_object_create_stolen(i915, size);
c58b735f 1275 if (!obj)
db45fb5b 1276 obj = i915_gem_object_create_internal(i915, size);
57e88531
CW
1277 if (IS_ERR(obj))
1278 return ERR_CAST(obj);
8187a2b7 1279
250f8c81
JB
1280 /*
1281 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
1282 * if supported by the platform's GGTT.
1283 */
1284 if (vm->has_read_only)
3e977ac6 1285 i915_gem_object_set_readonly(obj);
24f3a8cf 1286
250f8c81 1287 vma = i915_vma_instance(obj, vm, NULL);
57e88531
CW
1288 if (IS_ERR(vma))
1289 goto err;
1290
1291 return vma;
e3efda49 1292
57e88531
CW
1293err:
1294 i915_gem_object_put(obj);
1295 return vma;
e3efda49
CW
1296}
1297
7e37f889 1298struct intel_ring *
75d0a7f3 1299intel_engine_create_ring(struct intel_engine_cs *engine, int size)
01101fa7 1300{
db45fb5b 1301 struct drm_i915_private *i915 = engine->i915;
7e37f889 1302 struct intel_ring *ring;
57e88531 1303 struct i915_vma *vma;
01101fa7 1304
8f942018 1305 GEM_BUG_ON(!is_power_of_2(size));
62ae14b1 1306 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
8f942018 1307
01101fa7 1308 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
57e88531 1309 if (!ring)
01101fa7
CW
1310 return ERR_PTR(-ENOMEM);
1311
65baf0ef 1312 kref_init(&ring->ref);
675d9ad7
CW
1313 INIT_LIST_HEAD(&ring->request_list);
1314
01101fa7
CW
1315 ring->size = size;
1316 /* Workaround an erratum on the i830 which causes a hang if
1317 * the TAIL pointer points to within the last 2 cachelines
1318 * of the buffer.
1319 */
1320 ring->effective_size = size;
db45fb5b 1321 if (IS_I830(i915) || IS_I845G(i915))
01101fa7
CW
1322 ring->effective_size -= 2 * CACHELINE_BYTES;
1323
01101fa7
CW
1324 intel_ring_update_space(ring);
1325
db45fb5b 1326 vma = create_ring_vma(engine->gt->ggtt, size);
57e88531 1327 if (IS_ERR(vma)) {
01101fa7 1328 kfree(ring);
57e88531 1329 return ERR_CAST(vma);
01101fa7 1330 }
57e88531 1331 ring->vma = vma;
01101fa7
CW
1332
1333 return ring;
1334}
1335
65baf0ef 1336void intel_ring_free(struct kref *ref)
01101fa7 1337{
65baf0ef 1338 struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
f8a7fde4
CW
1339
1340 i915_vma_close(ring->vma);
c017cf6b 1341 i915_vma_put(ring->vma);
f8a7fde4 1342
01101fa7
CW
1343 kfree(ring);
1344}
1345
c4d52feb
CW
1346static void __ring_context_fini(struct intel_context *ce)
1347{
c4d52feb
CW
1348 i915_gem_object_put(ce->state->obj);
1349}
1350
4c5896dc 1351static void ring_context_destroy(struct kref *ref)
1fc44d9b 1352{
4c5896dc
CW
1353 struct intel_context *ce = container_of(ref, typeof(*ce), ref);
1354
08819549 1355 GEM_BUG_ON(intel_context_is_pinned(ce));
1fc44d9b 1356
c4d52feb
CW
1357 if (ce->state)
1358 __ring_context_fini(ce);
efe79d48 1359
df8cf31e 1360 intel_context_fini(ce);
c4d52feb 1361 intel_context_free(ce);
1fc44d9b
CW
1362}
1363
c082afac
CW
1364static struct i915_address_space *vm_alias(struct intel_context *ce)
1365{
1366 struct i915_address_space *vm;
1367
f5d974f9
CW
1368 vm = ce->vm;
1369 if (i915_is_ggtt(vm))
1370 vm = &i915_vm_to_ggtt(vm)->alias->vm;
c082afac
CW
1371
1372 return vm;
1373}
1374
1375static int __context_pin_ppgtt(struct intel_context *ce)
a2bbf714 1376{
e568ac38 1377 struct i915_address_space *vm;
a2bbf714
CW
1378 int err = 0;
1379
c082afac 1380 vm = vm_alias(ce);
e568ac38
CW
1381 if (vm)
1382 err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
a2bbf714
CW
1383
1384 return err;
1385}
1386
c082afac 1387static void __context_unpin_ppgtt(struct intel_context *ce)
a2bbf714 1388{
e568ac38 1389 struct i915_address_space *vm;
a2bbf714 1390
c082afac 1391 vm = vm_alias(ce);
e568ac38
CW
1392 if (vm)
1393 gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
a2bbf714
CW
1394}
1395
4dc84b77 1396static void ring_context_unpin(struct intel_context *ce)
d901e8e6 1397{
c082afac 1398 __context_unpin_ppgtt(ce);
e8a9c58f
CW
1399}
1400
3204c343
CW
1401static struct i915_vma *
1402alloc_context_vma(struct intel_engine_cs *engine)
1403{
1404 struct drm_i915_private *i915 = engine->i915;
1405 struct drm_i915_gem_object *obj;
1406 struct i915_vma *vma;
d2b4b979 1407 int err;
3204c343 1408
8475355f 1409 obj = i915_gem_object_create_shmem(i915, engine->context_size);
3204c343
CW
1410 if (IS_ERR(obj))
1411 return ERR_CAST(obj);
1412
a679f58d
CW
1413 /*
1414 * Try to make the context utilize L3 as well as LLC.
1415 *
1416 * On VLV we don't have L3 controls in the PTEs so we
1417 * shouldn't touch the cache level, especially as that
1418 * would make the object snooped which might have a
1419 * negative performance impact.
1420 *
1421 * Snooping is required on non-llc platforms in execlist
1422 * mode, but since all GGTT accesses use PAT entry 0 we
1423 * get snooping anyway regardless of cache_level.
1424 *
1425 * This is only applicable for Ivy Bridge devices since
1426 * later platforms don't have L3 control bits in the PTE.
1427 */
1428 if (IS_IVYBRIDGE(i915))
1429 i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
1430
d2b4b979
CW
1431 if (engine->default_state) {
1432 void *defaults, *vaddr;
1433
1434 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1435 if (IS_ERR(vaddr)) {
1436 err = PTR_ERR(vaddr);
1437 goto err_obj;
1438 }
1439
1440 defaults = i915_gem_object_pin_map(engine->default_state,
1441 I915_MAP_WB);
1442 if (IS_ERR(defaults)) {
1443 err = PTR_ERR(defaults);
1444 goto err_map;
1445 }
1446
1447 memcpy(vaddr, defaults, engine->context_size);
d2b4b979 1448 i915_gem_object_unpin_map(engine->default_state);
d2b4b979 1449
a679f58d
CW
1450 i915_gem_object_flush_map(obj);
1451 i915_gem_object_unpin_map(obj);
3204c343
CW
1452 }
1453
ba4134a4 1454 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
d2b4b979
CW
1455 if (IS_ERR(vma)) {
1456 err = PTR_ERR(vma);
1457 goto err_obj;
1458 }
3204c343
CW
1459
1460 return vma;
d2b4b979
CW
1461
1462err_map:
1463 i915_gem_object_unpin_map(obj);
1464err_obj:
1465 i915_gem_object_put(obj);
1466 return ERR_PTR(err);
3204c343
CW
1467}
1468
4c60b1aa 1469static int ring_context_alloc(struct intel_context *ce)
0cb26a8e 1470{
95f697eb 1471 struct intel_engine_cs *engine = ce->engine;
0cb26a8e 1472
7e3d9a59 1473 /* One ringbuffer to rule them all */
75d0a7f3
CW
1474 GEM_BUG_ON(!engine->legacy.ring);
1475 ce->ring = engine->legacy.ring;
1476 ce->timeline = intel_timeline_get(engine->legacy.timeline);
7e3d9a59 1477
4c60b1aa
CW
1478 GEM_BUG_ON(ce->state);
1479 if (engine->context_size) {
3204c343
CW
1480 struct i915_vma *vma;
1481
1482 vma = alloc_context_vma(engine);
95f697eb
CW
1483 if (IS_ERR(vma))
1484 return PTR_ERR(vma);
3204c343
CW
1485
1486 ce->state = vma;
1487 }
1488
4c60b1aa
CW
1489 return 0;
1490}
1491
1492static int ring_context_pin(struct intel_context *ce)
1493{
1494 int err;
1495
12c255b5 1496 err = intel_context_active_acquire(ce);
d901e8e6 1497 if (err)
95f697eb 1498 return err;
0cb26a8e 1499
c082afac 1500 err = __context_pin_ppgtt(ce);
a2bbf714 1501 if (err)
ce476c80 1502 goto err_active;
a2bbf714 1503
95f697eb 1504 return 0;
266a240b 1505
ce476c80
CW
1506err_active:
1507 intel_context_active_release(ce);
95f697eb 1508 return err;
0cb26a8e
CW
1509}
1510
9726920b
CW
1511static void ring_context_reset(struct intel_context *ce)
1512{
1513 intel_ring_reset(ce->ring, 0);
1514}
1515
4dc84b77 1516static const struct intel_context_ops ring_context_ops = {
4c60b1aa
CW
1517 .alloc = ring_context_alloc,
1518
95f697eb 1519 .pin = ring_context_pin,
4dc84b77 1520 .unpin = ring_context_unpin,
9726920b 1521
6eee33e8
CW
1522 .enter = intel_context_enter_engine,
1523 .exit = intel_context_exit_engine,
1524
9726920b 1525 .reset = ring_context_reset,
4dc84b77
CW
1526 .destroy = ring_context_destroy,
1527};
1528
ab53497b 1529static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
b3ee09a4
CW
1530{
1531 const struct intel_engine_cs * const engine = rq->engine;
1532 u32 *cs;
1533
1534 cs = intel_ring_begin(rq, 6);
1535 if (IS_ERR(cs))
1536 return PTR_ERR(cs);
1537
1538 *cs++ = MI_LOAD_REGISTER_IMM(1);
baba6e57 1539 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
b3ee09a4
CW
1540 *cs++ = PP_DIR_DCLV_2G;
1541
1542 *cs++ = MI_LOAD_REGISTER_IMM(1);
baba6e57 1543 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
57a7e305 1544 *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
b3ee09a4
CW
1545
1546 intel_ring_advance(rq, cs);
1547
1548 return 0;
1549}
1550
d9d117e4
CW
1551static int flush_pd_dir(struct i915_request *rq)
1552{
1553 const struct intel_engine_cs * const engine = rq->engine;
1554 u32 *cs;
1555
1556 cs = intel_ring_begin(rq, 4);
1557 if (IS_ERR(cs))
1558 return PTR_ERR(cs);
1559
1560 /* Stall until the page table load is complete */
1561 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
baba6e57 1562 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
46c5847e
LL
1563 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
1564 INTEL_GT_SCRATCH_FIELD_DEFAULT);
d9d117e4
CW
1565 *cs++ = MI_NOOP;
1566
1567 intel_ring_advance(rq, cs);
1568 return 0;
1569}
1570
e61e0f51 1571static inline int mi_set_context(struct i915_request *rq, u32 flags)
8911a31c
CW
1572{
1573 struct drm_i915_private *i915 = rq->i915;
1574 struct intel_engine_cs *engine = rq->engine;
1575 enum intel_engine_id id;
8a68d464
CW
1576 const int num_engines =
1577 IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
1fc719d1 1578 bool force_restore = false;
8911a31c
CW
1579 int len;
1580 u32 *cs;
1581
1582 flags |= MI_MM_SPACE_GTT;
1583 if (IS_HASWELL(i915))
1584 /* These flags are for resource streamer on HSW+ */
1585 flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
1586 else
1215d28e 1587 /* We need to save the extended state for powersaving modes */
8911a31c
CW
1588 flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
1589
1590 len = 4;
cf819eff 1591 if (IS_GEN(i915, 7))
8a68d464 1592 len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
1215d28e
CW
1593 else if (IS_GEN(i915, 5))
1594 len += 2;
1fc719d1
CW
1595 if (flags & MI_FORCE_RESTORE) {
1596 GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
1597 flags &= ~MI_FORCE_RESTORE;
1598 force_restore = true;
1599 len += 2;
1600 }
8911a31c
CW
1601
1602 cs = intel_ring_begin(rq, len);
1603 if (IS_ERR(cs))
1604 return PTR_ERR(cs);
1605
1606 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
cf819eff 1607 if (IS_GEN(i915, 7)) {
8911a31c 1608 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
8a68d464 1609 if (num_engines) {
8911a31c
CW
1610 struct intel_engine_cs *signaller;
1611
8a68d464 1612 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
8911a31c
CW
1613 for_each_engine(signaller, i915, id) {
1614 if (signaller == engine)
1615 continue;
1616
1617 *cs++ = i915_mmio_reg_offset(
1618 RING_PSMI_CTL(signaller->mmio_base));
1619 *cs++ = _MASKED_BIT_ENABLE(
1620 GEN6_PSMI_SLEEP_MSG_DISABLE);
1621 }
1622 }
1215d28e
CW
1623 } else if (IS_GEN(i915, 5)) {
1624 /*
1625 * This w/a is only listed for pre-production ilk a/b steppings,
1626 * but is also mentioned for programming the powerctx. To be
1627 * safe, just apply the workaround; we do not use SyncFlush so
1628 * this should never take effect and so be a no-op!
1629 */
1630 *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
8911a31c
CW
1631 }
1632
1fc719d1
CW
1633 if (force_restore) {
1634 /*
1635 * The HW doesn't handle being told to restore the current
1636 * context very well. Quite often it likes goes to go off and
1637 * sulk, especially when it is meant to be reloading PP_DIR.
1638 * A very simple fix to force the reload is to simply switch
1639 * away from the current context and back again.
1640 *
1641 * Note that the kernel_context will contain random state
1642 * following the INHIBIT_RESTORE. We accept this since we
1643 * never use the kernel_context state; it is merely a
1644 * placeholder we use to flush other contexts.
1645 */
1646 *cs++ = MI_SET_CONTEXT;
9dbfea98 1647 *cs++ = i915_ggtt_offset(engine->kernel_context->state) |
1fc719d1
CW
1648 MI_MM_SPACE_GTT |
1649 MI_RESTORE_INHIBIT;
1650 }
1651
8911a31c
CW
1652 *cs++ = MI_NOOP;
1653 *cs++ = MI_SET_CONTEXT;
1fc44d9b 1654 *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
8911a31c
CW
1655 /*
1656 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
1657 * WaMiSetContext_Hang:snb,ivb,vlv
1658 */
1659 *cs++ = MI_NOOP;
1660
cf819eff 1661 if (IS_GEN(i915, 7)) {
8a68d464 1662 if (num_engines) {
8911a31c
CW
1663 struct intel_engine_cs *signaller;
1664 i915_reg_t last_reg = {}; /* keep gcc quiet */
1665
8a68d464 1666 *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
8911a31c
CW
1667 for_each_engine(signaller, i915, id) {
1668 if (signaller == engine)
1669 continue;
1670
1671 last_reg = RING_PSMI_CTL(signaller->mmio_base);
1672 *cs++ = i915_mmio_reg_offset(last_reg);
1673 *cs++ = _MASKED_BIT_DISABLE(
1674 GEN6_PSMI_SLEEP_MSG_DISABLE);
1675 }
1676
1677 /* Insert a delay before the next switch! */
1678 *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1679 *cs++ = i915_mmio_reg_offset(last_reg);
46c5847e
LL
1680 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
1681 INTEL_GT_SCRATCH_FIELD_DEFAULT);
8911a31c
CW
1682 *cs++ = MI_NOOP;
1683 }
1684 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1215d28e
CW
1685 } else if (IS_GEN(i915, 5)) {
1686 *cs++ = MI_SUSPEND_FLUSH;
8911a31c
CW
1687 }
1688
1689 intel_ring_advance(rq, cs);
1690
1691 return 0;
1692}
1693
c082afac 1694static int remap_l3_slice(struct i915_request *rq, int slice)
8911a31c
CW
1695{
1696 u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
1697 int i;
1698
1699 if (!remap_info)
1700 return 0;
1701
1702 cs = intel_ring_begin(rq, GEN7_L3LOG_SIZE/4 * 2 + 2);
1703 if (IS_ERR(cs))
1704 return PTR_ERR(cs);
1705
1706 /*
1707 * Note: We do not worry about the concurrent register cacheline hang
1708 * here because no other code should access these registers other than
1709 * at initialization time.
1710 */
1711 *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
1712 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
1713 *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
1714 *cs++ = remap_info[i];
1715 }
1716 *cs++ = MI_NOOP;
1717 intel_ring_advance(rq, cs);
1718
1719 return 0;
1720}
1721
c082afac
CW
1722static int remap_l3(struct i915_request *rq)
1723{
1724 struct i915_gem_context *ctx = rq->gem_context;
1725 int i, err;
1726
1727 if (!ctx->remap_slice)
1728 return 0;
1729
1730 for (i = 0; i < MAX_L3_SLICES; i++) {
1731 if (!(ctx->remap_slice & BIT(i)))
1732 continue;
1733
1734 err = remap_l3_slice(rq, i);
1735 if (err)
1736 return err;
1737 }
1738
1739 ctx->remap_slice = 0;
1740 return 0;
1741}
1742
e61e0f51 1743static int switch_context(struct i915_request *rq)
8911a31c
CW
1744{
1745 struct intel_engine_cs *engine = rq->engine;
c082afac 1746 struct i915_address_space *vm = vm_alias(rq->hw_context);
b3ee09a4 1747 unsigned int unwind_mm = 0;
8911a31c 1748 u32 hw_flags = 0;
c082afac 1749 int ret;
8911a31c 1750
8911a31c
CW
1751 GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
1752
e568ac38 1753 if (vm) {
ab53497b 1754 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
e2a13d1b
CW
1755 int loops;
1756
1757 /*
1758 * Baytail takes a little more convincing that it really needs
1759 * to reload the PD between contexts. It is not just a little
1760 * longer, as adding more stalls after the load_pd_dir (i.e.
1761 * adding a long loop around flush_pd_dir) is not as effective
1762 * as reloading the PD umpteen times. 32 is derived from
1763 * experimentation (gem_exec_parallel/fds) and has no good
1764 * explanation.
1765 */
1766 loops = 1;
8a68d464 1767 if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
e2a13d1b
CW
1768 loops = 32;
1769
1770 do {
1771 ret = load_pd_dir(rq, ppgtt);
1772 if (ret)
1773 goto err;
1774 } while (--loops);
8911a31c 1775
8a68d464
CW
1776 if (ppgtt->pd_dirty_engines & engine->mask) {
1777 unwind_mm = engine->mask;
1778 ppgtt->pd_dirty_engines &= ~unwind_mm;
b3ee09a4
CW
1779 hw_flags = MI_FORCE_RESTORE;
1780 }
8911a31c
CW
1781 }
1782
b3ee09a4 1783 if (rq->hw_context->state) {
8a68d464 1784 GEM_BUG_ON(engine->id != RCS0);
8911a31c
CW
1785
1786 /*
1787 * The kernel context(s) is treated as pure scratch and is not
1788 * expected to retain any state (as we sacrifice it during
1789 * suspend and on resume it may be corrupted). This is ok,
1790 * as nothing actually executes using the kernel context; it
1791 * is purely used for flushing user contexts.
1792 */
c082afac 1793 if (i915_gem_context_is_kernel(rq->gem_context))
8911a31c
CW
1794 hw_flags = MI_RESTORE_INHIBIT;
1795
1796 ret = mi_set_context(rq, hw_flags);
1797 if (ret)
1798 goto err_mm;
8911a31c 1799 }
8911a31c 1800
e568ac38 1801 if (vm) {
06348d30
CW
1802 ret = engine->emit_flush(rq, EMIT_INVALIDATE);
1803 if (ret)
1804 goto err_mm;
1805
d9d117e4
CW
1806 ret = flush_pd_dir(rq);
1807 if (ret)
1808 goto err_mm;
06348d30
CW
1809
1810 /*
1811 * Not only do we need a full barrier (post-sync write) after
1812 * invalidating the TLBs, but we need to wait a little bit
1813 * longer. Whether this is merely delaying us, or the
1814 * subsequent flush is a key part of serialising with the
1815 * post-sync op, this extra pass appears vital before a
1816 * mm switch!
1817 */
1818 ret = engine->emit_flush(rq, EMIT_INVALIDATE);
1819 if (ret)
1820 goto err_mm;
1821
1822 ret = engine->emit_flush(rq, EMIT_FLUSH);
1823 if (ret)
1824 goto err_mm;
8911a31c
CW
1825 }
1826
c082afac
CW
1827 ret = remap_l3(rq);
1828 if (ret)
1829 goto err_mm;
8911a31c
CW
1830
1831 return 0;
1832
8911a31c 1833err_mm:
b3ee09a4 1834 if (unwind_mm)
e568ac38 1835 i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
8911a31c
CW
1836err:
1837 return ret;
1838}
1839
e61e0f51 1840static int ring_request_alloc(struct i915_request *request)
9d773091 1841{
fd138212 1842 int ret;
6310346e 1843
08819549 1844 GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
85474441 1845 GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
e8a9c58f 1846
5f5800a7
CW
1847 /*
1848 * Flush enough space to reduce the likelihood of waiting after
6310346e
CW
1849 * we start building the request - in which case we will just
1850 * have to repeat work.
1851 */
a0442461 1852 request->reserved_space += LEGACY_REQUEST_SIZE;
6310346e 1853
928f8f42
CW
1854 /* Unconditionally invalidate GPU caches and TLBs. */
1855 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
fd138212
CW
1856 if (ret)
1857 return ret;
6310346e 1858
928f8f42 1859 ret = switch_context(request);
3fef5cda
CW
1860 if (ret)
1861 return ret;
1862
a0442461 1863 request->reserved_space -= LEGACY_REQUEST_SIZE;
6310346e 1864 return 0;
9d773091
CW
1865}
1866
fd138212 1867static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
987046ad 1868{
e61e0f51 1869 struct i915_request *target;
e95433c7
CW
1870 long timeout;
1871
95aebcb2 1872 if (intel_ring_update_space(ring) >= bytes)
987046ad
CW
1873 return 0;
1874
36620032 1875 GEM_BUG_ON(list_empty(&ring->request_list));
675d9ad7 1876 list_for_each_entry(target, &ring->request_list, ring_link) {
987046ad 1877 /* Would completion of this request free enough space? */
605d5b32
CW
1878 if (bytes <= __intel_ring_space(target->postfix,
1879 ring->emit, ring->size))
987046ad 1880 break;
79bbcc29 1881 }
29b1b415 1882
675d9ad7 1883 if (WARN_ON(&target->ring_link == &ring->request_list))
987046ad
CW
1884 return -ENOSPC;
1885
e61e0f51 1886 timeout = i915_request_wait(target,
2f530945 1887 I915_WAIT_INTERRUPTIBLE,
e95433c7
CW
1888 MAX_SCHEDULE_TIMEOUT);
1889 if (timeout < 0)
1890 return timeout;
7da844c5 1891
e61e0f51 1892 i915_request_retire_upto(target);
7da844c5
CW
1893
1894 intel_ring_update_space(ring);
1895 GEM_BUG_ON(ring->space < bytes);
1896 return 0;
29b1b415
JH
1897}
1898
e61e0f51 1899u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
cbcc80df 1900{
e61e0f51 1901 struct intel_ring *ring = rq->ring;
5e5655c3
CW
1902 const unsigned int remain_usable = ring->effective_size - ring->emit;
1903 const unsigned int bytes = num_dwords * sizeof(u32);
1904 unsigned int need_wrap = 0;
1905 unsigned int total_bytes;
73dec95e 1906 u32 *cs;
29b1b415 1907
6492ca79
CW
1908 /* Packets must be qword aligned. */
1909 GEM_BUG_ON(num_dwords & 1);
1910
e61e0f51 1911 total_bytes = bytes + rq->reserved_space;
5e5655c3 1912 GEM_BUG_ON(total_bytes > ring->effective_size);
29b1b415 1913
5e5655c3
CW
1914 if (unlikely(total_bytes > remain_usable)) {
1915 const int remain_actual = ring->size - ring->emit;
1916
1917 if (bytes > remain_usable) {
1918 /*
1919 * Not enough space for the basic request. So need to
1920 * flush out the remainder and then wait for
1921 * base + reserved.
1922 */
1923 total_bytes += remain_actual;
1924 need_wrap = remain_actual | 1;
1925 } else {
1926 /*
1927 * The base request will fit but the reserved space
1928 * falls off the end. So we don't need an immediate
1929 * wrap and only need to effectively wait for the
1930 * reserved size from the start of ringbuffer.
1931 */
e61e0f51 1932 total_bytes = rq->reserved_space + remain_actual;
5e5655c3 1933 }
cbcc80df
MK
1934 }
1935
5e5655c3 1936 if (unlikely(total_bytes > ring->space)) {
fd138212
CW
1937 int ret;
1938
1939 /*
1940 * Space is reserved in the ringbuffer for finalising the
1941 * request, as that cannot be allowed to fail. During request
1942 * finalisation, reserved_space is set to 0 to stop the
1943 * overallocation and the assumption is that then we never need
1944 * to wait (which has the risk of failing with EINTR).
1945 *
e61e0f51 1946 * See also i915_request_alloc() and i915_request_add().
fd138212 1947 */
e61e0f51 1948 GEM_BUG_ON(!rq->reserved_space);
fd138212
CW
1949
1950 ret = wait_for_space(ring, total_bytes);
cbcc80df 1951 if (unlikely(ret))
73dec95e 1952 return ERR_PTR(ret);
cbcc80df
MK
1953 }
1954
987046ad 1955 if (unlikely(need_wrap)) {
5e5655c3
CW
1956 need_wrap &= ~1;
1957 GEM_BUG_ON(need_wrap > ring->space);
1958 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
46b86332 1959 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
78501eac 1960
987046ad 1961 /* Fill the tail with MI_NOOP */
46b86332 1962 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
5e5655c3 1963 ring->space -= need_wrap;
46b86332 1964 ring->emit = 0;
987046ad 1965 }
304d695c 1966
e6ba9992 1967 GEM_BUG_ON(ring->emit > ring->size - bytes);
605d5b32 1968 GEM_BUG_ON(ring->space < bytes);
e6ba9992 1969 cs = ring->vaddr + ring->emit;
46b86332 1970 GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
e6ba9992 1971 ring->emit += bytes;
1dae2dfb 1972 ring->space -= bytes;
73dec95e
TU
1973
1974 return cs;
8187a2b7 1975}
78501eac 1976
753b1ad4 1977/* Align the ring tail to a cacheline boundary */
e61e0f51 1978int intel_ring_cacheline_align(struct i915_request *rq)
753b1ad4 1979{
1f177a13
CW
1980 int num_dwords;
1981 void *cs;
753b1ad4 1982
1f177a13 1983 num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
753b1ad4
VS
1984 if (num_dwords == 0)
1985 return 0;
1986
1f177a13
CW
1987 num_dwords = CACHELINE_DWORDS - num_dwords;
1988 GEM_BUG_ON(num_dwords & 1);
1989
e61e0f51 1990 cs = intel_ring_begin(rq, num_dwords);
73dec95e
TU
1991 if (IS_ERR(cs))
1992 return PTR_ERR(cs);
753b1ad4 1993
1f177a13 1994 memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
e61e0f51 1995 intel_ring_advance(rq, cs);
753b1ad4 1996
1f177a13 1997 GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
753b1ad4
VS
1998 return 0;
1999}
2000
e61e0f51 2001static void gen6_bsd_submit_request(struct i915_request *request)
881f47b6 2002{
baba6e57 2003 struct intel_uncore *uncore = request->engine->uncore;
881f47b6 2004
d2d551c0 2005 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
76f8421f 2006
881f47b6 2007 /* Every tail move must follow the sequence below */
12f55818
CW
2008
2009 /* Disable notification that the ring is IDLE. The GT
2010 * will then assume that it is busy and bring it out of rc6.
2011 */
d2d551c0
DCS
2012 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
2013 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
12f55818
CW
2014
2015 /* Clear the context id. Here be magic! */
d2d551c0 2016 intel_uncore_write64_fw(uncore, GEN6_BSD_RNCID, 0x0);
0206e353 2017
12f55818 2018 /* Wait for the ring not to be idle, i.e. for it to wake up. */
d2d551c0 2019 if (__intel_wait_for_register_fw(uncore,
02b312d0
CW
2020 GEN6_BSD_SLEEP_PSMI_CONTROL,
2021 GEN6_BSD_SLEEP_INDICATOR,
2022 0,
2023 1000, 0, NULL))
12f55818 2024 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 2025
12f55818 2026 /* Now that the ring is fully powered up, update the tail */
b0411e7d 2027 i9xx_submit_request(request);
12f55818
CW
2028
2029 /* Let the ring send IDLE messages to the GT again,
2030 * and so let it sleep to conserve power when idle.
2031 */
d2d551c0
DCS
2032 intel_uncore_write_fw(uncore, GEN6_BSD_SLEEP_PSMI_CONTROL,
2033 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
76f8421f 2034
d2d551c0 2035 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
881f47b6
XH
2036}
2037
06348d30 2038static int mi_flush_dw(struct i915_request *rq, u32 flags)
881f47b6 2039{
73dec95e 2040 u32 cmd, *cs;
b72f3acb 2041
e61e0f51 2042 cs = intel_ring_begin(rq, 4);
73dec95e
TU
2043 if (IS_ERR(cs))
2044 return PTR_ERR(cs);
b72f3acb 2045
71a77e07 2046 cmd = MI_FLUSH_DW;
f0a1fb10 2047
70b73f9a
CW
2048 /*
2049 * We always require a command barrier so that subsequent
f0a1fb10
CW
2050 * commands, such as breadcrumb interrupts, are strictly ordered
2051 * wrt the contents of the write cache being flushed to memory
2052 * (and thus being coherent from the CPU).
2053 */
2054 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2055
9a289771 2056 /*
70b73f9a 2057 * Bspec vol 1c.3 - blitter engine command streamer:
9a289771
JB
2058 * "If ENABLED, all TLBs will be invalidated once the flush
2059 * operation is complete. This bit is only valid when the
2060 * Post-Sync Operation field is a value of 1h or 3h."
2061 */
70b73f9a 2062 cmd |= flags;
f0a1fb10 2063
73dec95e
TU
2064 *cs++ = cmd;
2065 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
79e6770c 2066 *cs++ = 0;
73dec95e 2067 *cs++ = MI_NOOP;
70b73f9a 2068
e61e0f51 2069 intel_ring_advance(rq, cs);
70b73f9a 2070
1c7a0623
BW
2071 return 0;
2072}
2073
70b73f9a
CW
2074static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
2075{
06348d30 2076 return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
70b73f9a
CW
2077}
2078
2079static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
2080{
2081 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
2082}
2083
d7d4eedd 2084static int
e61e0f51 2085hsw_emit_bb_start(struct i915_request *rq,
803688ba
CW
2086 u64 offset, u32 len,
2087 unsigned int dispatch_flags)
d7d4eedd 2088{
73dec95e 2089 u32 *cs;
d7d4eedd 2090
e61e0f51 2091 cs = intel_ring_begin(rq, 2);
73dec95e
TU
2092 if (IS_ERR(cs))
2093 return PTR_ERR(cs);
d7d4eedd 2094
73dec95e 2095 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
08e3e21a 2096 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
d7d4eedd 2097 /* bit0-7 is the length on GEN6+ */
73dec95e 2098 *cs++ = offset;
e61e0f51 2099 intel_ring_advance(rq, cs);
d7d4eedd
CW
2100
2101 return 0;
2102}
2103
881f47b6 2104static int
e61e0f51 2105gen6_emit_bb_start(struct i915_request *rq,
803688ba
CW
2106 u64 offset, u32 len,
2107 unsigned int dispatch_flags)
881f47b6 2108{
73dec95e 2109 u32 *cs;
ab6f8e32 2110
e61e0f51 2111 cs = intel_ring_begin(rq, 2);
73dec95e
TU
2112 if (IS_ERR(cs))
2113 return PTR_ERR(cs);
e1f99ce6 2114
73dec95e
TU
2115 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
2116 0 : MI_BATCH_NON_SECURE_I965);
0206e353 2117 /* bit0-7 is the length on GEN6+ */
73dec95e 2118 *cs++ = offset;
e61e0f51 2119 intel_ring_advance(rq, cs);
ab6f8e32 2120
0206e353 2121 return 0;
881f47b6
XH
2122}
2123
549f7365
CW
2124/* Blitter support (SandyBridge+) */
2125
e61e0f51 2126static int gen6_ring_flush(struct i915_request *rq, u32 mode)
8d19215b 2127{
70b73f9a 2128 return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
8d19215b
ZN
2129}
2130
ff44ad51
CW
2131static void i9xx_set_default_submission(struct intel_engine_cs *engine)
2132{
2133 engine->submit_request = i9xx_submit_request;
27a5f61b 2134 engine->cancel_requests = cancel_requests;
aba5e278
CW
2135
2136 engine->park = NULL;
2137 engine->unpark = NULL;
ff44ad51
CW
2138}
2139
2140static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
2141{
aba5e278 2142 i9xx_set_default_submission(engine);
ff44ad51
CW
2143 engine->submit_request = gen6_bsd_submit_request;
2144}
2145
45b9c968
CW
2146static void ring_destroy(struct intel_engine_cs *engine)
2147{
2148 struct drm_i915_private *dev_priv = engine->i915;
2149
2150 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
2151 (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
2152
09c5ab38
CW
2153 intel_engine_cleanup_common(engine);
2154
75d0a7f3
CW
2155 intel_ring_unpin(engine->legacy.ring);
2156 intel_ring_put(engine->legacy.ring);
2157
2158 intel_timeline_unpin(engine->legacy.timeline);
2159 intel_timeline_put(engine->legacy.timeline);
45b9c968 2160
45b9c968
CW
2161 kfree(engine);
2162}
2163
11334c6a
CW
2164static void setup_irq(struct intel_engine_cs *engine)
2165{
2166 struct drm_i915_private *i915 = engine->i915;
2167
2168 if (INTEL_GEN(i915) >= 6) {
2169 engine->irq_enable = gen6_irq_enable;
2170 engine->irq_disable = gen6_irq_disable;
2171 } else if (INTEL_GEN(i915) >= 5) {
2172 engine->irq_enable = gen5_irq_enable;
2173 engine->irq_disable = gen5_irq_disable;
2174 } else if (INTEL_GEN(i915) >= 3) {
2175 engine->irq_enable = i9xx_irq_enable;
2176 engine->irq_disable = i9xx_irq_disable;
2177 } else {
2178 engine->irq_enable = i8xx_irq_enable;
2179 engine->irq_disable = i8xx_irq_disable;
2180 }
2181}
2182
2183static void setup_common(struct intel_engine_cs *engine)
06a2fe22 2184{
11334c6a
CW
2185 struct drm_i915_private *i915 = engine->i915;
2186
79e6770c 2187 /* gen8+ are only supported with execlists */
11334c6a 2188 GEM_BUG_ON(INTEL_GEN(i915) >= 8);
79e6770c 2189
11334c6a 2190 setup_irq(engine);
618e4ca7 2191
45b9c968
CW
2192 engine->destroy = ring_destroy;
2193
79ffac85 2194 engine->resume = xcs_resume;
5adfb772
CW
2195 engine->reset.prepare = reset_prepare;
2196 engine->reset.reset = reset_ring;
2197 engine->reset.finish = reset_finish;
7445a2a4 2198
4dc84b77 2199 engine->cops = &ring_context_ops;
f73e7399
CW
2200 engine->request_alloc = ring_request_alloc;
2201
85474441
CW
2202 /*
2203 * Using a global execution timeline; the previous final breadcrumb is
2204 * equivalent to our next initial bread so we can elide
2205 * engine->emit_init_breadcrumb().
2206 */
2207 engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
11334c6a 2208 if (IS_GEN(i915, 5))
85474441 2209 engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
ff44ad51
CW
2210
2211 engine->set_default_submission = i9xx_set_default_submission;
6f7bef75 2212
11334c6a 2213 if (INTEL_GEN(i915) >= 6)
803688ba 2214 engine->emit_bb_start = gen6_emit_bb_start;
11334c6a 2215 else if (INTEL_GEN(i915) >= 4)
803688ba 2216 engine->emit_bb_start = i965_emit_bb_start;
11334c6a 2217 else if (IS_I830(i915) || IS_I845G(i915))
803688ba 2218 engine->emit_bb_start = i830_emit_bb_start;
6f7bef75 2219 else
803688ba 2220 engine->emit_bb_start = i915_emit_bb_start;
06a2fe22
TU
2221}
2222
11334c6a 2223static void setup_rcs(struct intel_engine_cs *engine)
5c1143bb 2224{
11334c6a 2225 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2226
11334c6a 2227 if (HAS_L3_DPF(i915))
61ff75ac 2228 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
f8973c21 2229
fa6f071d
DCS
2230 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2231
11334c6a 2232 if (INTEL_GEN(i915) >= 7) {
c7fe7d25 2233 engine->emit_flush = gen7_render_ring_flush;
85474441 2234 engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
11334c6a 2235 } else if (IS_GEN(i915, 6)) {
caa5915b 2236 engine->emit_flush = gen6_render_ring_flush;
85474441 2237 engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
11334c6a 2238 } else if (IS_GEN(i915, 5)) {
c7fe7d25 2239 engine->emit_flush = gen4_render_ring_flush;
59465b5f 2240 } else {
11334c6a 2241 if (INTEL_GEN(i915) < 4)
c7fe7d25 2242 engine->emit_flush = gen2_render_ring_flush;
46f0f8d1 2243 else
c7fe7d25 2244 engine->emit_flush = gen4_render_ring_flush;
e2f80391 2245 engine->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2246 }
707d9cf9 2247
11334c6a 2248 if (IS_HASWELL(i915))
803688ba 2249 engine->emit_bb_start = hsw_emit_bb_start;
6f7bef75 2250
79ffac85 2251 engine->resume = rcs_resume;
5c1143bb
XH
2252}
2253
11334c6a 2254static void setup_vcs(struct intel_engine_cs *engine)
5c1143bb 2255{
11334c6a 2256 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2257
11334c6a 2258 if (INTEL_GEN(i915) >= 6) {
0fd2c201 2259 /* gen6 bsd needs a special wa for tail updates */
11334c6a 2260 if (IS_GEN(i915, 6))
ff44ad51 2261 engine->set_default_submission = gen6_bsd_set_default_submission;
c7fe7d25 2262 engine->emit_flush = gen6_bsd_ring_flush;
79e6770c 2263 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
caa5915b 2264
11334c6a 2265 if (IS_GEN(i915, 6))
85474441 2266 engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
9fa4973e 2267 else
85474441 2268 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
58fa3835 2269 } else {
c7fe7d25 2270 engine->emit_flush = bsd_ring_flush;
11334c6a 2271 if (IS_GEN(i915, 5))
e2f80391 2272 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
8d228911 2273 else
e2f80391 2274 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
58fa3835 2275 }
5c1143bb 2276}
549f7365 2277
11334c6a 2278static void setup_bcs(struct intel_engine_cs *engine)
549f7365 2279{
11334c6a 2280 struct drm_i915_private *i915 = engine->i915;
06a2fe22 2281
c7fe7d25 2282 engine->emit_flush = gen6_ring_flush;
79e6770c 2283 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
549f7365 2284
11334c6a 2285 if (IS_GEN(i915, 6))
85474441 2286 engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
9fa4973e 2287 else
85474441 2288 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
549f7365 2289}
a7b9761d 2290
11334c6a 2291static void setup_vecs(struct intel_engine_cs *engine)
9a8a2213 2292{
11334c6a 2293 struct drm_i915_private *i915 = engine->i915;
caa5915b 2294
11334c6a 2295 GEM_BUG_ON(INTEL_GEN(i915) < 7);
06a2fe22 2296
c7fe7d25 2297 engine->emit_flush = gen6_ring_flush;
79e6770c
CW
2298 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2299 engine->irq_enable = hsw_vebox_irq_enable;
2300 engine->irq_disable = hsw_vebox_irq_disable;
9a8a2213 2301
85474441 2302 engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
11334c6a
CW
2303}
2304
2305int intel_ring_submission_setup(struct intel_engine_cs *engine)
2306{
2307 setup_common(engine);
2308
2309 switch (engine->class) {
2310 case RENDER_CLASS:
2311 setup_rcs(engine);
2312 break;
2313 case VIDEO_DECODE_CLASS:
2314 setup_vcs(engine);
2315 break;
2316 case COPY_ENGINE_CLASS:
2317 setup_bcs(engine);
2318 break;
2319 case VIDEO_ENHANCEMENT_CLASS:
2320 setup_vecs(engine);
2321 break;
2322 default:
2323 MISSING_CASE(engine->class);
2324 return -ENODEV;
2325 }
2326
2327 return 0;
2328}
2329
2330int intel_ring_submission_init(struct intel_engine_cs *engine)
2331{
f0c02c1b 2332 struct intel_timeline *timeline;
11334c6a
CW
2333 struct intel_ring *ring;
2334 int err;
2335
f0c02c1b 2336 timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
11334c6a
CW
2337 if (IS_ERR(timeline)) {
2338 err = PTR_ERR(timeline);
2339 goto err;
2340 }
2341 GEM_BUG_ON(timeline->has_initial_breadcrumb);
2342
75d0a7f3
CW
2343 err = intel_timeline_pin(timeline);
2344 if (err)
2345 goto err_timeline;
2346
2347 ring = intel_engine_create_ring(engine, SZ_16K);
11334c6a
CW
2348 if (IS_ERR(ring)) {
2349 err = PTR_ERR(ring);
75d0a7f3 2350 goto err_timeline_unpin;
11334c6a
CW
2351 }
2352
2353 err = intel_ring_pin(ring);
2354 if (err)
2355 goto err_ring;
caa5915b 2356
75d0a7f3
CW
2357 GEM_BUG_ON(engine->legacy.ring);
2358 engine->legacy.ring = ring;
2359 engine->legacy.timeline = timeline;
11334c6a
CW
2360
2361 err = intel_engine_init_common(engine);
2362 if (err)
75d0a7f3 2363 goto err_ring_unpin;
11334c6a 2364
75d0a7f3 2365 GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
11334c6a
CW
2366
2367 return 0;
2368
75d0a7f3 2369err_ring_unpin:
11334c6a
CW
2370 intel_ring_unpin(ring);
2371err_ring:
2372 intel_ring_put(ring);
75d0a7f3
CW
2373err_timeline_unpin:
2374 intel_timeline_unpin(timeline);
2375err_timeline:
2376 intel_timeline_put(timeline);
11334c6a
CW
2377err:
2378 intel_engine_cleanup_common(engine);
2379 return err;
9a8a2213 2380}