2 * Copyright © 2008-2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
30 #include <linux/log2.h>
33 #include <drm/i915_drm.h>
34 #include "i915_trace.h"
35 #include "intel_drv.h"
37 /* Rough estimate of the typical request size, performing a flush,
38 * set-context and then emitting the batch.
40 #define LEGACY_REQUEST_SIZE 200
42 static int __intel_ring_space(int head
, int tail
, int size
)
44 int space
= head
- tail
;
47 return space
- I915_RING_FREE_SPACE
;
50 void intel_ring_update_space(struct intel_ring
*ring
)
52 if (ring
->last_retired_head
!= -1) {
53 ring
->head
= ring
->last_retired_head
;
54 ring
->last_retired_head
= -1;
57 ring
->space
= __intel_ring_space(ring
->head
& HEAD_ADDR
,
58 ring
->tail
, ring
->size
);
62 gen2_render_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
68 if (mode
& EMIT_INVALIDATE
)
71 cs
= intel_ring_begin(req
, 2);
77 intel_ring_advance(req
, cs
);
83 gen4_render_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
90 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
91 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
92 * also flushed at 2d versus 3d pipeline switches.
96 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
97 * MI_READ_FLUSH is set, and is always flushed on 965.
99 * I915_GEM_DOMAIN_COMMAND may not exist?
101 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
102 * invalidated when MI_EXE_FLUSH is set.
104 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
105 * invalidated with every MI_FLUSH.
109 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
110 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
111 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
112 * are flushed at any MI_FLUSH.
116 if (mode
& EMIT_INVALIDATE
) {
118 if (IS_G4X(req
->i915
) || IS_GEN5(req
->i915
))
119 cmd
|= MI_INVALIDATE_ISP
;
122 cs
= intel_ring_begin(req
, 2);
128 intel_ring_advance(req
, cs
);
134 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
135 * implementing two workarounds on gen6. From section 1.4.7.1
136 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
138 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
139 * produced by non-pipelined state commands), software needs to first
140 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
143 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
144 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
146 * And the workaround for these two requires this workaround first:
148 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
149 * BEFORE the pipe-control with a post-sync op and no write-cache
152 * And this last workaround is tricky because of the requirements on
153 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
156 * "1 of the following must also be set:
157 * - Render Target Cache Flush Enable ([12] of DW1)
158 * - Depth Cache Flush Enable ([0] of DW1)
159 * - Stall at Pixel Scoreboard ([1] of DW1)
160 * - Depth Stall ([13] of DW1)
161 * - Post-Sync Operation ([13] of DW1)
162 * - Notify Enable ([8] of DW1)"
164 * The cache flushes require the workaround flush that triggered this
165 * one, so we can't use it. Depth stall would trigger the same.
166 * Post-sync nonzero is what triggered this second workaround, so we
167 * can't use that one either. Notify enable is IRQs, which aren't
168 * really our business. That leaves only stall at scoreboard.
171 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request
*req
)
174 i915_ggtt_offset(req
->engine
->scratch
) + 2 * CACHELINE_BYTES
;
177 cs
= intel_ring_begin(req
, 6);
181 *cs
++ = GFX_OP_PIPE_CONTROL(5);
182 *cs
++ = PIPE_CONTROL_CS_STALL
| PIPE_CONTROL_STALL_AT_SCOREBOARD
;
183 *cs
++ = scratch_addr
| PIPE_CONTROL_GLOBAL_GTT
;
184 *cs
++ = 0; /* low dword */
185 *cs
++ = 0; /* high dword */
187 intel_ring_advance(req
, cs
);
189 cs
= intel_ring_begin(req
, 6);
193 *cs
++ = GFX_OP_PIPE_CONTROL(5);
194 *cs
++ = PIPE_CONTROL_QW_WRITE
;
195 *cs
++ = scratch_addr
| PIPE_CONTROL_GLOBAL_GTT
;
199 intel_ring_advance(req
, cs
);
205 gen6_render_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
208 i915_ggtt_offset(req
->engine
->scratch
) + 2 * CACHELINE_BYTES
;
212 /* Force SNB workarounds for PIPE_CONTROL flushes */
213 ret
= intel_emit_post_sync_nonzero_flush(req
);
217 /* Just flush everything. Experiments have shown that reducing the
218 * number of bits based on the write domains has little performance
221 if (mode
& EMIT_FLUSH
) {
222 flags
|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH
;
223 flags
|= PIPE_CONTROL_DEPTH_CACHE_FLUSH
;
225 * Ensure that any following seqno writes only happen
226 * when the render cache is indeed flushed.
228 flags
|= PIPE_CONTROL_CS_STALL
;
230 if (mode
& EMIT_INVALIDATE
) {
231 flags
|= PIPE_CONTROL_TLB_INVALIDATE
;
232 flags
|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
;
233 flags
|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
234 flags
|= PIPE_CONTROL_VF_CACHE_INVALIDATE
;
235 flags
|= PIPE_CONTROL_CONST_CACHE_INVALIDATE
;
236 flags
|= PIPE_CONTROL_STATE_CACHE_INVALIDATE
;
238 * TLB invalidate requires a post-sync write.
240 flags
|= PIPE_CONTROL_QW_WRITE
| PIPE_CONTROL_CS_STALL
;
243 cs
= intel_ring_begin(req
, 4);
247 *cs
++ = GFX_OP_PIPE_CONTROL(4);
249 *cs
++ = scratch_addr
| PIPE_CONTROL_GLOBAL_GTT
;
251 intel_ring_advance(req
, cs
);
257 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request
*req
)
261 cs
= intel_ring_begin(req
, 4);
265 *cs
++ = GFX_OP_PIPE_CONTROL(4);
266 *cs
++ = PIPE_CONTROL_CS_STALL
| PIPE_CONTROL_STALL_AT_SCOREBOARD
;
269 intel_ring_advance(req
, cs
);
275 gen7_render_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
278 i915_ggtt_offset(req
->engine
->scratch
) + 2 * CACHELINE_BYTES
;
282 * Ensure that any following seqno writes only happen when the render
283 * cache is indeed flushed.
285 * Workaround: 4th PIPE_CONTROL command (except the ones with only
286 * read-cache invalidate bits set) must have the CS_STALL bit set. We
287 * don't try to be clever and just set it unconditionally.
289 flags
|= PIPE_CONTROL_CS_STALL
;
291 /* Just flush everything. Experiments have shown that reducing the
292 * number of bits based on the write domains has little performance
295 if (mode
& EMIT_FLUSH
) {
296 flags
|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH
;
297 flags
|= PIPE_CONTROL_DEPTH_CACHE_FLUSH
;
298 flags
|= PIPE_CONTROL_DC_FLUSH_ENABLE
;
299 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
301 if (mode
& EMIT_INVALIDATE
) {
302 flags
|= PIPE_CONTROL_TLB_INVALIDATE
;
303 flags
|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
;
304 flags
|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
305 flags
|= PIPE_CONTROL_VF_CACHE_INVALIDATE
;
306 flags
|= PIPE_CONTROL_CONST_CACHE_INVALIDATE
;
307 flags
|= PIPE_CONTROL_STATE_CACHE_INVALIDATE
;
308 flags
|= PIPE_CONTROL_MEDIA_STATE_CLEAR
;
310 * TLB invalidate requires a post-sync write.
312 flags
|= PIPE_CONTROL_QW_WRITE
;
313 flags
|= PIPE_CONTROL_GLOBAL_GTT_IVB
;
315 flags
|= PIPE_CONTROL_STALL_AT_SCOREBOARD
;
317 /* Workaround: we must issue a pipe_control with CS-stall bit
318 * set before a pipe_control command that has the state cache
319 * invalidate bit set. */
320 gen7_render_ring_cs_stall_wa(req
);
323 cs
= intel_ring_begin(req
, 4);
327 *cs
++ = GFX_OP_PIPE_CONTROL(4);
329 *cs
++ = scratch_addr
;
331 intel_ring_advance(req
, cs
);
337 gen8_emit_pipe_control(struct drm_i915_gem_request
*req
,
338 u32 flags
, u32 scratch_addr
)
342 cs
= intel_ring_begin(req
, 6);
346 *cs
++ = GFX_OP_PIPE_CONTROL(6);
348 *cs
++ = scratch_addr
;
352 intel_ring_advance(req
, cs
);
358 gen8_render_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
361 i915_ggtt_offset(req
->engine
->scratch
) + 2 * CACHELINE_BYTES
;
365 flags
|= PIPE_CONTROL_CS_STALL
;
367 if (mode
& EMIT_FLUSH
) {
368 flags
|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH
;
369 flags
|= PIPE_CONTROL_DEPTH_CACHE_FLUSH
;
370 flags
|= PIPE_CONTROL_DC_FLUSH_ENABLE
;
371 flags
|= PIPE_CONTROL_FLUSH_ENABLE
;
373 if (mode
& EMIT_INVALIDATE
) {
374 flags
|= PIPE_CONTROL_TLB_INVALIDATE
;
375 flags
|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE
;
376 flags
|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
377 flags
|= PIPE_CONTROL_VF_CACHE_INVALIDATE
;
378 flags
|= PIPE_CONTROL_CONST_CACHE_INVALIDATE
;
379 flags
|= PIPE_CONTROL_STATE_CACHE_INVALIDATE
;
380 flags
|= PIPE_CONTROL_QW_WRITE
;
381 flags
|= PIPE_CONTROL_GLOBAL_GTT_IVB
;
383 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
384 ret
= gen8_emit_pipe_control(req
,
385 PIPE_CONTROL_CS_STALL
|
386 PIPE_CONTROL_STALL_AT_SCOREBOARD
,
392 return gen8_emit_pipe_control(req
, flags
, scratch_addr
);
395 static void ring_setup_phys_status_page(struct intel_engine_cs
*engine
)
397 struct drm_i915_private
*dev_priv
= engine
->i915
;
400 addr
= dev_priv
->status_page_dmah
->busaddr
;
401 if (INTEL_GEN(dev_priv
) >= 4)
402 addr
|= (dev_priv
->status_page_dmah
->busaddr
>> 28) & 0xf0;
403 I915_WRITE(HWS_PGA
, addr
);
406 static void intel_ring_setup_status_page(struct intel_engine_cs
*engine
)
408 struct drm_i915_private
*dev_priv
= engine
->i915
;
411 /* The ring status page addresses are no longer next to the rest of
412 * the ring registers as of gen7.
414 if (IS_GEN7(dev_priv
)) {
415 switch (engine
->id
) {
417 mmio
= RENDER_HWS_PGA_GEN7
;
420 mmio
= BLT_HWS_PGA_GEN7
;
423 * VCS2 actually doesn't exist on Gen7. Only shut up
424 * gcc switch check warning
428 mmio
= BSD_HWS_PGA_GEN7
;
431 mmio
= VEBOX_HWS_PGA_GEN7
;
434 } else if (IS_GEN6(dev_priv
)) {
435 mmio
= RING_HWS_PGA_GEN6(engine
->mmio_base
);
437 /* XXX: gen8 returns to sanity */
438 mmio
= RING_HWS_PGA(engine
->mmio_base
);
441 I915_WRITE(mmio
, engine
->status_page
.ggtt_offset
);
445 * Flush the TLB for this page
447 * FIXME: These two bits have disappeared on gen8, so a question
448 * arises: do we still need this and if so how should we go about
449 * invalidating the TLB?
451 if (IS_GEN(dev_priv
, 6, 7)) {
452 i915_reg_t reg
= RING_INSTPM(engine
->mmio_base
);
454 /* ring should be idle before issuing a sync flush*/
455 WARN_ON((I915_READ_MODE(engine
) & MODE_IDLE
) == 0);
458 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE
|
460 if (intel_wait_for_register(dev_priv
,
461 reg
, INSTPM_SYNC_FLUSH
, 0,
463 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
468 static bool stop_ring(struct intel_engine_cs
*engine
)
470 struct drm_i915_private
*dev_priv
= engine
->i915
;
472 if (INTEL_GEN(dev_priv
) > 2) {
473 I915_WRITE_MODE(engine
, _MASKED_BIT_ENABLE(STOP_RING
));
474 if (intel_wait_for_register(dev_priv
,
475 RING_MI_MODE(engine
->mmio_base
),
479 DRM_ERROR("%s : timed out trying to stop ring\n",
481 /* Sometimes we observe that the idle flag is not
482 * set even though the ring is empty. So double
483 * check before giving up.
485 if (I915_READ_HEAD(engine
) != I915_READ_TAIL(engine
))
490 I915_WRITE_CTL(engine
, 0);
491 I915_WRITE_HEAD(engine
, 0);
492 I915_WRITE_TAIL(engine
, 0);
494 if (INTEL_GEN(dev_priv
) > 2) {
495 (void)I915_READ_CTL(engine
);
496 I915_WRITE_MODE(engine
, _MASKED_BIT_DISABLE(STOP_RING
));
499 return (I915_READ_HEAD(engine
) & HEAD_ADDR
) == 0;
502 static int init_ring_common(struct intel_engine_cs
*engine
)
504 struct drm_i915_private
*dev_priv
= engine
->i915
;
505 struct intel_ring
*ring
= engine
->buffer
;
508 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
510 if (!stop_ring(engine
)) {
511 /* G45 ring initialization often fails to reset head to zero */
512 DRM_DEBUG_KMS("%s head not reset to zero "
513 "ctl %08x head %08x tail %08x start %08x\n",
515 I915_READ_CTL(engine
),
516 I915_READ_HEAD(engine
),
517 I915_READ_TAIL(engine
),
518 I915_READ_START(engine
));
520 if (!stop_ring(engine
)) {
521 DRM_ERROR("failed to set %s head to zero "
522 "ctl %08x head %08x tail %08x start %08x\n",
524 I915_READ_CTL(engine
),
525 I915_READ_HEAD(engine
),
526 I915_READ_TAIL(engine
),
527 I915_READ_START(engine
));
533 if (HWS_NEEDS_PHYSICAL(dev_priv
))
534 ring_setup_phys_status_page(engine
);
536 intel_ring_setup_status_page(engine
);
538 intel_engine_reset_breadcrumbs(engine
);
540 /* Enforce ordering by reading HEAD register back */
541 I915_READ_HEAD(engine
);
543 /* Initialize the ring. This must happen _after_ we've cleared the ring
544 * registers with the above sequence (the readback of the HEAD registers
545 * also enforces ordering), otherwise the hw might lose the new ring
546 * register values. */
547 I915_WRITE_START(engine
, i915_ggtt_offset(ring
->vma
));
549 /* WaClearRingBufHeadRegAtInit:ctg,elk */
550 if (I915_READ_HEAD(engine
))
551 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
552 engine
->name
, I915_READ_HEAD(engine
));
554 intel_ring_update_space(ring
);
555 I915_WRITE_HEAD(engine
, ring
->head
);
556 I915_WRITE_TAIL(engine
, ring
->tail
);
557 (void)I915_READ_TAIL(engine
);
559 I915_WRITE_CTL(engine
, RING_CTL_SIZE(ring
->size
) | RING_VALID
);
561 /* If the head is still not zero, the ring is dead */
562 if (intel_wait_for_register_fw(dev_priv
, RING_CTL(engine
->mmio_base
),
563 RING_VALID
, RING_VALID
,
565 DRM_ERROR("%s initialization failed "
566 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
568 I915_READ_CTL(engine
),
569 I915_READ_CTL(engine
) & RING_VALID
,
570 I915_READ_HEAD(engine
), ring
->head
,
571 I915_READ_TAIL(engine
), ring
->tail
,
572 I915_READ_START(engine
),
573 i915_ggtt_offset(ring
->vma
));
578 intel_engine_init_hangcheck(engine
);
581 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
586 static void reset_ring_common(struct intel_engine_cs
*engine
,
587 struct drm_i915_gem_request
*request
)
589 /* Try to restore the logical GPU state to match the continuation
590 * of the request queue. If we skip the context/PD restore, then
591 * the next request may try to execute assuming that its context
592 * is valid and loaded on the GPU and so may try to access invalid
593 * memory, prompting repeated GPU hangs.
595 * If the request was guilty, we still restore the logical state
596 * in case the next request requires it (e.g. the aliasing ppgtt),
597 * but skip over the hung batch.
599 * If the request was innocent, we try to replay the request with
600 * the restored context.
603 struct drm_i915_private
*dev_priv
= request
->i915
;
604 struct intel_context
*ce
= &request
->ctx
->engine
[engine
->id
];
605 struct i915_hw_ppgtt
*ppgtt
;
607 /* FIXME consider gen8 reset */
611 i915_ggtt_offset(ce
->state
) |
612 BIT(8) /* must be set! */ |
613 CCID_EXTENDED_STATE_SAVE
|
614 CCID_EXTENDED_STATE_RESTORE
|
618 ppgtt
= request
->ctx
->ppgtt
?: engine
->i915
->mm
.aliasing_ppgtt
;
620 u32 pd_offset
= ppgtt
->pd
.base
.ggtt_offset
<< 10;
622 I915_WRITE(RING_PP_DIR_DCLV(engine
), PP_DIR_DCLV_2G
);
623 I915_WRITE(RING_PP_DIR_BASE(engine
), pd_offset
);
625 /* Wait for the PD reload to complete */
626 if (intel_wait_for_register(dev_priv
,
627 RING_PP_DIR_BASE(engine
),
630 DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
632 ppgtt
->pd_dirty_rings
&= ~intel_engine_flag(engine
);
635 /* If the rq hung, jump to its breadcrumb and skip the batch */
636 if (request
->fence
.error
== -EIO
) {
637 struct intel_ring
*ring
= request
->ring
;
639 ring
->head
= request
->postfix
;
640 ring
->last_retired_head
= -1;
643 engine
->legacy_active_context
= NULL
;
647 int intel_ring_workarounds_emit(struct drm_i915_gem_request
*req
)
649 struct i915_workarounds
*w
= &req
->i915
->workarounds
;
656 ret
= req
->engine
->emit_flush(req
, EMIT_BARRIER
);
660 cs
= intel_ring_begin(req
, (w
->count
* 2 + 2));
664 *cs
++ = MI_LOAD_REGISTER_IMM(w
->count
);
665 for (i
= 0; i
< w
->count
; i
++) {
666 *cs
++ = i915_mmio_reg_offset(w
->reg
[i
].addr
);
667 *cs
++ = w
->reg
[i
].value
;
671 intel_ring_advance(req
, cs
);
673 ret
= req
->engine
->emit_flush(req
, EMIT_BARRIER
);
677 DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w
->count
);
682 static int intel_rcs_ctx_init(struct drm_i915_gem_request
*req
)
686 ret
= intel_ring_workarounds_emit(req
);
690 ret
= i915_gem_render_state_emit(req
);
697 static int wa_add(struct drm_i915_private
*dev_priv
,
699 const u32 mask
, const u32 val
)
701 const u32 idx
= dev_priv
->workarounds
.count
;
703 if (WARN_ON(idx
>= I915_MAX_WA_REGS
))
706 dev_priv
->workarounds
.reg
[idx
].addr
= addr
;
707 dev_priv
->workarounds
.reg
[idx
].value
= val
;
708 dev_priv
->workarounds
.reg
[idx
].mask
= mask
;
710 dev_priv
->workarounds
.count
++;
715 #define WA_REG(addr, mask, val) do { \
716 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
721 #define WA_SET_BIT_MASKED(addr, mask) \
722 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
724 #define WA_CLR_BIT_MASKED(addr, mask) \
725 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
727 #define WA_SET_FIELD_MASKED(addr, mask, value) \
728 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
730 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
731 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
733 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
735 static int wa_ring_whitelist_reg(struct intel_engine_cs
*engine
,
738 struct drm_i915_private
*dev_priv
= engine
->i915
;
739 struct i915_workarounds
*wa
= &dev_priv
->workarounds
;
740 const uint32_t index
= wa
->hw_whitelist_count
[engine
->id
];
742 if (WARN_ON(index
>= RING_MAX_NONPRIV_SLOTS
))
745 WA_WRITE(RING_FORCE_TO_NONPRIV(engine
->mmio_base
, index
),
746 i915_mmio_reg_offset(reg
));
747 wa
->hw_whitelist_count
[engine
->id
]++;
752 static int gen8_init_workarounds(struct intel_engine_cs
*engine
)
754 struct drm_i915_private
*dev_priv
= engine
->i915
;
756 WA_SET_BIT_MASKED(INSTPM
, INSTPM_FORCE_ORDERING
);
758 /* WaDisableAsyncFlipPerfMode:bdw,chv */
759 WA_SET_BIT_MASKED(MI_MODE
, ASYNC_FLIP_PERF_DISABLE
);
761 /* WaDisablePartialInstShootdown:bdw,chv */
762 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
763 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
765 /* Use Force Non-Coherent whenever executing a 3D context. This is a
766 * workaround for for a possible hang in the unlikely event a TLB
767 * invalidation occurs during a PSD flush.
769 /* WaForceEnableNonCoherent:bdw,chv */
770 /* WaHdcDisableFetchWhenMasked:bdw,chv */
771 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
772 HDC_DONOT_FETCH_MEM_WHEN_MASKED
|
773 HDC_FORCE_NON_COHERENT
);
775 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
776 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
777 * polygons in the same 8x4 pixel/sample area to be processed without
778 * stalling waiting for the earlier ones to write to Hierarchical Z
781 * This optimization is off by default for BDW and CHV; turn it on.
783 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7
, HIZ_RAW_STALL_OPT_DISABLE
);
785 /* Wa4x4STCOptimizationDisable:bdw,chv */
786 WA_SET_BIT_MASKED(CACHE_MODE_1
, GEN8_4x4_STC_OPTIMIZATION_DISABLE
);
789 * BSpec recommends 8x4 when MSAA is used,
790 * however in practice 16x4 seems fastest.
792 * Note that PS/WM thread counts depend on the WIZ hashing
793 * disable bit, which we don't touch here, but it's good
794 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
796 WA_SET_FIELD_MASKED(GEN7_GT_MODE
,
797 GEN6_WIZ_HASHING_MASK
,
798 GEN6_WIZ_HASHING_16x4
);
803 static int bdw_init_workarounds(struct intel_engine_cs
*engine
)
805 struct drm_i915_private
*dev_priv
= engine
->i915
;
808 ret
= gen8_init_workarounds(engine
);
812 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
813 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
815 /* WaDisableDopClockGating:bdw
817 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
818 * to disable EUTC clock gating.
820 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2
,
821 DOP_CLOCK_GATING_DISABLE
);
823 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
824 GEN8_SAMPLER_POWER_BYPASS_DIS
);
826 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
827 /* WaForceContextSaveRestoreNonCoherent:bdw */
828 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
829 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
830 (IS_BDW_GT3(dev_priv
) ? HDC_FENCE_DEST_SLM_DISABLE
: 0));
835 static int chv_init_workarounds(struct intel_engine_cs
*engine
)
837 struct drm_i915_private
*dev_priv
= engine
->i915
;
840 ret
= gen8_init_workarounds(engine
);
844 /* WaDisableThreadStallDopClockGating:chv */
845 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
847 /* Improve HiZ throughput on CHV. */
848 WA_SET_BIT_MASKED(HIZ_CHICKEN
, CHV_HZ_8X8_MODE_IN_1X
);
853 static int gen9_init_workarounds(struct intel_engine_cs
*engine
)
855 struct drm_i915_private
*dev_priv
= engine
->i915
;
858 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */
859 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS
, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE
));
861 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */
862 I915_WRITE(BDW_SCRATCH1
, I915_READ(BDW_SCRATCH1
) |
863 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE
);
865 /* WaDisableKillLogic:bxt,skl,kbl */
866 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) |
869 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */
870 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */
871 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
872 FLOW_CONTROL_ENABLE
|
873 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
875 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
876 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
877 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC
);
879 /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */
880 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
))
881 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5
,
882 GEN9_DG_MIRROR_FIX_ENABLE
);
884 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
885 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
)) {
886 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1
,
887 GEN9_RHWO_OPTIMIZATION_DISABLE
);
889 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
890 * but we do that in per ctx batchbuffer as there is an issue
891 * with this register not getting restored on ctx restore
895 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */
896 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7
,
897 GEN9_ENABLE_GPGPU_PREEMPTION
);
899 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */
900 /* WaDisablePartialResolveInVc:skl,bxt,kbl */
901 WA_SET_BIT_MASKED(CACHE_MODE_1
, (GEN8_4x4_STC_OPTIMIZATION_DISABLE
|
902 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE
));
904 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */
905 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5
,
906 GEN9_CCS_TLB_PREFETCH_ENABLE
);
908 /* WaDisableMaskBasedCammingInRCC:bxt */
909 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
))
910 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0
,
911 PIXEL_MASK_CAMMING_DISABLE
);
913 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */
914 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
915 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
916 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE
);
918 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
919 * both tied to WaForceContextSaveRestoreNonCoherent
920 * in some hsds for skl. We keep the tie for all gen9. The
921 * documentation is a bit hazy and so we want to get common behaviour,
922 * even though there is no clear evidence we would need both on kbl/bxt.
923 * This area has been source of system hangs so we play it safe
924 * and mimic the skl regardless of what bspec says.
926 * Use Force Non-Coherent whenever executing a 3D context. This
927 * is a workaround for a possible hang in the unlikely event
928 * a TLB invalidation occurs during a PSD flush.
931 /* WaForceEnableNonCoherent:skl,bxt,kbl */
932 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
933 HDC_FORCE_NON_COHERENT
);
935 /* WaDisableHDCInvalidation:skl,bxt,kbl */
936 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) |
937 BDW_DISABLE_HDC_INVALIDATION
);
939 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */
940 if (IS_SKYLAKE(dev_priv
) ||
941 IS_KABYLAKE(dev_priv
) ||
942 IS_BXT_REVID(dev_priv
, 0, BXT_REVID_B0
))
943 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
944 GEN8_SAMPLER_POWER_BYPASS_DIS
);
946 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */
947 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2
, GEN8_ST_PO_DISABLE
);
949 /* WaOCLCoherentLineFlush:skl,bxt,kbl */
950 I915_WRITE(GEN8_L3SQCREG4
, (I915_READ(GEN8_L3SQCREG4
) |
951 GEN8_LQSC_FLUSH_COHERENT_LINES
));
953 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */
954 ret
= wa_ring_whitelist_reg(engine
, GEN9_CTX_PREEMPT_REG
);
958 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */
959 ret
= wa_ring_whitelist_reg(engine
, GEN8_CS_CHICKEN1
);
963 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */
964 ret
= wa_ring_whitelist_reg(engine
, GEN8_HDC_CHICKEN1
);
971 static int skl_tune_iz_hashing(struct intel_engine_cs
*engine
)
973 struct drm_i915_private
*dev_priv
= engine
->i915
;
974 u8 vals
[3] = { 0, 0, 0 };
977 for (i
= 0; i
< 3; i
++) {
981 * Only consider slices where one, and only one, subslice has 7
984 if (!is_power_of_2(INTEL_INFO(dev_priv
)->sseu
.subslice_7eu
[i
]))
988 * subslice_7eu[i] != 0 (because of the check above) and
989 * ss_max == 4 (maximum number of subslices possible per slice)
993 ss
= ffs(INTEL_INFO(dev_priv
)->sseu
.subslice_7eu
[i
]) - 1;
997 if (vals
[0] == 0 && vals
[1] == 0 && vals
[2] == 0)
1000 /* Tune IZ hashing. See intel_device_info_runtime_init() */
1001 WA_SET_FIELD_MASKED(GEN7_GT_MODE
,
1002 GEN9_IZ_HASHING_MASK(2) |
1003 GEN9_IZ_HASHING_MASK(1) |
1004 GEN9_IZ_HASHING_MASK(0),
1005 GEN9_IZ_HASHING(2, vals
[2]) |
1006 GEN9_IZ_HASHING(1, vals
[1]) |
1007 GEN9_IZ_HASHING(0, vals
[0]));
1012 static int skl_init_workarounds(struct intel_engine_cs
*engine
)
1014 struct drm_i915_private
*dev_priv
= engine
->i915
;
1017 ret
= gen9_init_workarounds(engine
);
1022 * Actual WA is to disable percontext preemption granularity control
1023 * until D0 which is the default case so this is equivalent to
1024 * !WaDisablePerCtxtPreemptionGranularityControl:skl
1026 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1
,
1027 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL
));
1029 /* WaEnableGapsTsvCreditFix:skl */
1030 I915_WRITE(GEN8_GARBCNTL
, (I915_READ(GEN8_GARBCNTL
) |
1031 GEN9_GAPS_TSV_CREDIT_DISABLE
));
1033 /* WaDisableGafsUnitClkGating:skl */
1034 WA_SET_BIT(GEN7_UCGCTL4
, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1036 /* WaInPlaceDecompressionHang:skl */
1037 if (IS_SKL_REVID(dev_priv
, SKL_REVID_H0
, REVID_FOREVER
))
1038 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA
,
1039 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1041 /* WaDisableLSQCROPERFforOCL:skl */
1042 ret
= wa_ring_whitelist_reg(engine
, GEN8_L3SQCREG4
);
1046 return skl_tune_iz_hashing(engine
);
1049 static int bxt_init_workarounds(struct intel_engine_cs
*engine
)
1051 struct drm_i915_private
*dev_priv
= engine
->i915
;
1054 ret
= gen9_init_workarounds(engine
);
1058 /* WaStoreMultiplePTEenable:bxt */
1059 /* This is a requirement according to Hardware specification */
1060 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
))
1061 I915_WRITE(TILECTL
, I915_READ(TILECTL
) | TILECTL_TLBPF
);
1063 /* WaSetClckGatingDisableMedia:bxt */
1064 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
)) {
1065 I915_WRITE(GEN7_MISCCPCTL
, (I915_READ(GEN7_MISCCPCTL
) &
1066 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE
));
1069 /* WaDisableThreadStallDopClockGating:bxt */
1070 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
1071 STALL_DOP_GATING_DISABLE
);
1073 /* WaDisablePooledEuLoadBalancingFix:bxt */
1074 if (IS_BXT_REVID(dev_priv
, BXT_REVID_B0
, REVID_FOREVER
)) {
1075 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2
,
1076 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE
);
1079 /* WaDisableSbeCacheDispatchPortSharing:bxt */
1080 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_B0
)) {
1082 GEN7_HALF_SLICE_CHICKEN1
,
1083 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
1086 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1087 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1088 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1089 /* WaDisableLSQCROPERFforOCL:bxt */
1090 if (IS_BXT_REVID(dev_priv
, 0, BXT_REVID_A1
)) {
1091 ret
= wa_ring_whitelist_reg(engine
, GEN9_CS_DEBUG_MODE1
);
1095 ret
= wa_ring_whitelist_reg(engine
, GEN8_L3SQCREG4
);
1100 /* WaProgramL3SqcReg1DefaultForPerf:bxt */
1101 if (IS_BXT_REVID(dev_priv
, BXT_REVID_B0
, REVID_FOREVER
))
1102 I915_WRITE(GEN8_L3SQCREG1
, L3_GENERAL_PRIO_CREDITS(62) |
1103 L3_HIGH_PRIO_CREDITS(2));
1105 /* WaToEnableHwFixForPushConstHWBug:bxt */
1106 if (IS_BXT_REVID(dev_priv
, BXT_REVID_C0
, REVID_FOREVER
))
1107 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
1108 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
1110 /* WaInPlaceDecompressionHang:bxt */
1111 if (IS_BXT_REVID(dev_priv
, BXT_REVID_C0
, REVID_FOREVER
))
1112 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA
,
1113 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1118 static int kbl_init_workarounds(struct intel_engine_cs
*engine
)
1120 struct drm_i915_private
*dev_priv
= engine
->i915
;
1123 ret
= gen9_init_workarounds(engine
);
1127 /* WaEnableGapsTsvCreditFix:kbl */
1128 I915_WRITE(GEN8_GARBCNTL
, (I915_READ(GEN8_GARBCNTL
) |
1129 GEN9_GAPS_TSV_CREDIT_DISABLE
));
1131 /* WaDisableDynamicCreditSharing:kbl */
1132 if (IS_KBL_REVID(dev_priv
, 0, KBL_REVID_B0
))
1133 WA_SET_BIT(GAMT_CHKN_BIT_REG
,
1134 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING
);
1136 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1137 if (IS_KBL_REVID(dev_priv
, KBL_REVID_A0
, KBL_REVID_A0
))
1138 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
1139 HDC_FENCE_DEST_SLM_DISABLE
);
1141 /* WaToEnableHwFixForPushConstHWBug:kbl */
1142 if (IS_KBL_REVID(dev_priv
, KBL_REVID_C0
, REVID_FOREVER
))
1143 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
1144 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
1146 /* WaDisableGafsUnitClkGating:kbl */
1147 WA_SET_BIT(GEN7_UCGCTL4
, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
1149 /* WaDisableSbeCacheDispatchPortSharing:kbl */
1151 GEN7_HALF_SLICE_CHICKEN1
,
1152 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
1154 /* WaInPlaceDecompressionHang:kbl */
1155 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA
,
1156 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
1158 /* WaDisableLSQCROPERFforOCL:kbl */
1159 ret
= wa_ring_whitelist_reg(engine
, GEN8_L3SQCREG4
);
1166 static int glk_init_workarounds(struct intel_engine_cs
*engine
)
1168 struct drm_i915_private
*dev_priv
= engine
->i915
;
1171 ret
= gen9_init_workarounds(engine
);
1175 /* WaToEnableHwFixForPushConstHWBug:glk */
1176 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
1177 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
1182 int init_workarounds_ring(struct intel_engine_cs
*engine
)
1184 struct drm_i915_private
*dev_priv
= engine
->i915
;
1186 WARN_ON(engine
->id
!= RCS
);
1188 dev_priv
->workarounds
.count
= 0;
1189 dev_priv
->workarounds
.hw_whitelist_count
[RCS
] = 0;
1191 if (IS_BROADWELL(dev_priv
))
1192 return bdw_init_workarounds(engine
);
1194 if (IS_CHERRYVIEW(dev_priv
))
1195 return chv_init_workarounds(engine
);
1197 if (IS_SKYLAKE(dev_priv
))
1198 return skl_init_workarounds(engine
);
1200 if (IS_BROXTON(dev_priv
))
1201 return bxt_init_workarounds(engine
);
1203 if (IS_KABYLAKE(dev_priv
))
1204 return kbl_init_workarounds(engine
);
1206 if (IS_GEMINILAKE(dev_priv
))
1207 return glk_init_workarounds(engine
);
1212 static int init_render_ring(struct intel_engine_cs
*engine
)
1214 struct drm_i915_private
*dev_priv
= engine
->i915
;
1215 int ret
= init_ring_common(engine
);
1219 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1220 if (IS_GEN(dev_priv
, 4, 6))
1221 I915_WRITE(MI_MODE
, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH
));
1223 /* We need to disable the AsyncFlip performance optimisations in order
1224 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1225 * programmed to '1' on all products.
1227 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1229 if (IS_GEN(dev_priv
, 6, 7))
1230 I915_WRITE(MI_MODE
, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE
));
1232 /* Required for the hardware to program scanline values for waiting */
1233 /* WaEnableFlushTlbInvalidationMode:snb */
1234 if (IS_GEN6(dev_priv
))
1235 I915_WRITE(GFX_MODE
,
1236 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT
));
1238 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1239 if (IS_GEN7(dev_priv
))
1240 I915_WRITE(GFX_MODE_GEN7
,
1241 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT
) |
1242 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE
));
1244 if (IS_GEN6(dev_priv
)) {
1245 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1246 * "If this bit is set, STCunit will have LRA as replacement
1247 * policy. [...] This bit must be reset. LRA replacement
1248 * policy is not supported."
1250 I915_WRITE(CACHE_MODE_0
,
1251 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB
));
1254 if (IS_GEN(dev_priv
, 6, 7))
1255 I915_WRITE(INSTPM
, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING
));
1257 if (INTEL_INFO(dev_priv
)->gen
>= 6)
1258 I915_WRITE_IMR(engine
, ~engine
->irq_keep_mask
);
1260 return init_workarounds_ring(engine
);
1263 static void render_ring_cleanup(struct intel_engine_cs
*engine
)
1265 struct drm_i915_private
*dev_priv
= engine
->i915
;
1267 i915_vma_unpin_and_release(&dev_priv
->semaphore
);
1270 static u32
*gen8_rcs_signal(struct drm_i915_gem_request
*req
, u32
*cs
)
1272 struct drm_i915_private
*dev_priv
= req
->i915
;
1273 struct intel_engine_cs
*waiter
;
1274 enum intel_engine_id id
;
1276 for_each_engine(waiter
, dev_priv
, id
) {
1277 u64 gtt_offset
= req
->engine
->semaphore
.signal_ggtt
[id
];
1278 if (gtt_offset
== MI_SEMAPHORE_SYNC_INVALID
)
1281 *cs
++ = GFX_OP_PIPE_CONTROL(6);
1282 *cs
++ = PIPE_CONTROL_GLOBAL_GTT_IVB
| PIPE_CONTROL_QW_WRITE
|
1283 PIPE_CONTROL_CS_STALL
;
1284 *cs
++ = lower_32_bits(gtt_offset
);
1285 *cs
++ = upper_32_bits(gtt_offset
);
1286 *cs
++ = req
->global_seqno
;
1288 *cs
++ = MI_SEMAPHORE_SIGNAL
|
1289 MI_SEMAPHORE_TARGET(waiter
->hw_id
);
1296 static u32
*gen8_xcs_signal(struct drm_i915_gem_request
*req
, u32
*cs
)
1298 struct drm_i915_private
*dev_priv
= req
->i915
;
1299 struct intel_engine_cs
*waiter
;
1300 enum intel_engine_id id
;
1302 for_each_engine(waiter
, dev_priv
, id
) {
1303 u64 gtt_offset
= req
->engine
->semaphore
.signal_ggtt
[id
];
1304 if (gtt_offset
== MI_SEMAPHORE_SYNC_INVALID
)
1307 *cs
++ = (MI_FLUSH_DW
+ 1) | MI_FLUSH_DW_OP_STOREDW
;
1308 *cs
++ = lower_32_bits(gtt_offset
) | MI_FLUSH_DW_USE_GTT
;
1309 *cs
++ = upper_32_bits(gtt_offset
);
1310 *cs
++ = req
->global_seqno
;
1311 *cs
++ = MI_SEMAPHORE_SIGNAL
|
1312 MI_SEMAPHORE_TARGET(waiter
->hw_id
);
1319 static u32
*gen6_signal(struct drm_i915_gem_request
*req
, u32
*cs
)
1321 struct drm_i915_private
*dev_priv
= req
->i915
;
1322 struct intel_engine_cs
*engine
;
1323 enum intel_engine_id id
;
1326 for_each_engine(engine
, dev_priv
, id
) {
1327 i915_reg_t mbox_reg
;
1329 if (!(BIT(engine
->hw_id
) & GEN6_SEMAPHORES_MASK
))
1332 mbox_reg
= req
->engine
->semaphore
.mbox
.signal
[engine
->hw_id
];
1333 if (i915_mmio_reg_valid(mbox_reg
)) {
1334 *cs
++ = MI_LOAD_REGISTER_IMM(1);
1335 *cs
++ = i915_mmio_reg_offset(mbox_reg
);
1336 *cs
++ = req
->global_seqno
;
1346 static void i9xx_submit_request(struct drm_i915_gem_request
*request
)
1348 struct drm_i915_private
*dev_priv
= request
->i915
;
1350 i915_gem_request_submit(request
);
1352 I915_WRITE_TAIL(request
->engine
, request
->tail
);
1355 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request
*req
, u32
*cs
)
1357 *cs
++ = MI_STORE_DWORD_INDEX
;
1358 *cs
++ = I915_GEM_HWS_INDEX
<< MI_STORE_DWORD_INDEX_SHIFT
;
1359 *cs
++ = req
->global_seqno
;
1360 *cs
++ = MI_USER_INTERRUPT
;
1362 req
->tail
= intel_ring_offset(req
, cs
);
1365 static const int i9xx_emit_breadcrumb_sz
= 4;
1368 * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
1370 * @request - request to write to the ring
1372 * Update the mailbox registers in the *other* rings with the current seqno.
1373 * This acts like a signal in the canonical semaphore.
1375 static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request
*req
, u32
*cs
)
1377 return i9xx_emit_breadcrumb(req
,
1378 req
->engine
->semaphore
.signal(req
, cs
));
1381 static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request
*req
,
1384 struct intel_engine_cs
*engine
= req
->engine
;
1386 if (engine
->semaphore
.signal
)
1387 cs
= engine
->semaphore
.signal(req
, cs
);
1389 *cs
++ = GFX_OP_PIPE_CONTROL(6);
1390 *cs
++ = PIPE_CONTROL_GLOBAL_GTT_IVB
| PIPE_CONTROL_CS_STALL
|
1391 PIPE_CONTROL_QW_WRITE
;
1392 *cs
++ = intel_hws_seqno_address(engine
);
1394 *cs
++ = req
->global_seqno
;
1395 /* We're thrashing one dword of HWS. */
1397 *cs
++ = MI_USER_INTERRUPT
;
1400 req
->tail
= intel_ring_offset(req
, cs
);
1403 static const int gen8_render_emit_breadcrumb_sz
= 8;
1406 * intel_ring_sync - sync the waiter to the signaller on seqno
1408 * @waiter - ring that is waiting
1409 * @signaller - ring which has, or will signal
1410 * @seqno - seqno which the waiter will block on
1414 gen8_ring_sync_to(struct drm_i915_gem_request
*req
,
1415 struct drm_i915_gem_request
*signal
)
1417 struct drm_i915_private
*dev_priv
= req
->i915
;
1418 u64 offset
= GEN8_WAIT_OFFSET(req
->engine
, signal
->engine
->id
);
1419 struct i915_hw_ppgtt
*ppgtt
;
1422 cs
= intel_ring_begin(req
, 4);
1426 *cs
++ = MI_SEMAPHORE_WAIT
| MI_SEMAPHORE_GLOBAL_GTT
|
1427 MI_SEMAPHORE_SAD_GTE_SDD
;
1428 *cs
++ = signal
->global_seqno
;
1429 *cs
++ = lower_32_bits(offset
);
1430 *cs
++ = upper_32_bits(offset
);
1431 intel_ring_advance(req
, cs
);
1433 /* When the !RCS engines idle waiting upon a semaphore, they lose their
1434 * pagetables and we must reload them before executing the batch.
1435 * We do this on the i915_switch_context() following the wait and
1436 * before the dispatch.
1438 ppgtt
= req
->ctx
->ppgtt
;
1439 if (ppgtt
&& req
->engine
->id
!= RCS
)
1440 ppgtt
->pd_dirty_rings
|= intel_engine_flag(req
->engine
);
1445 gen6_ring_sync_to(struct drm_i915_gem_request
*req
,
1446 struct drm_i915_gem_request
*signal
)
1448 u32 dw1
= MI_SEMAPHORE_MBOX
|
1449 MI_SEMAPHORE_COMPARE
|
1450 MI_SEMAPHORE_REGISTER
;
1451 u32 wait_mbox
= signal
->engine
->semaphore
.mbox
.wait
[req
->engine
->hw_id
];
1454 WARN_ON(wait_mbox
== MI_SEMAPHORE_SYNC_INVALID
);
1456 cs
= intel_ring_begin(req
, 4);
1460 *cs
++ = dw1
| wait_mbox
;
1461 /* Throughout all of the GEM code, seqno passed implies our current
1462 * seqno is >= the last seqno executed. However for hardware the
1463 * comparison is strictly greater than.
1465 *cs
++ = signal
->global_seqno
- 1;
1468 intel_ring_advance(req
, cs
);
1474 gen5_seqno_barrier(struct intel_engine_cs
*engine
)
1476 /* MI_STORE are internally buffered by the GPU and not flushed
1477 * either by MI_FLUSH or SyncFlush or any other combination of
1480 * "Only the submission of the store operation is guaranteed.
1481 * The write result will be complete (coherent) some time later
1482 * (this is practically a finite period but there is no guaranteed
1485 * Empirically, we observe that we need a delay of at least 75us to
1486 * be sure that the seqno write is visible by the CPU.
1488 usleep_range(125, 250);
1492 gen6_seqno_barrier(struct intel_engine_cs
*engine
)
1494 struct drm_i915_private
*dev_priv
= engine
->i915
;
1496 /* Workaround to force correct ordering between irq and seqno writes on
1497 * ivb (and maybe also on snb) by reading from a CS register (like
1498 * ACTHD) before reading the status page.
1500 * Note that this effectively stalls the read by the time it takes to
1501 * do a memory transaction, which more or less ensures that the write
1502 * from the GPU has sufficient time to invalidate the CPU cacheline.
1503 * Alternatively we could delay the interrupt from the CS ring to give
1504 * the write time to land, but that would incur a delay after every
1505 * batch i.e. much more frequent than a delay when waiting for the
1506 * interrupt (with the same net latency).
1508 * Also note that to prevent whole machine hangs on gen7, we have to
1509 * take the spinlock to guard against concurrent cacheline access.
1511 spin_lock_irq(&dev_priv
->uncore
.lock
);
1512 POSTING_READ_FW(RING_ACTHD(engine
->mmio_base
));
1513 spin_unlock_irq(&dev_priv
->uncore
.lock
);
1517 gen5_irq_enable(struct intel_engine_cs
*engine
)
1519 gen5_enable_gt_irq(engine
->i915
, engine
->irq_enable_mask
);
1523 gen5_irq_disable(struct intel_engine_cs
*engine
)
1525 gen5_disable_gt_irq(engine
->i915
, engine
->irq_enable_mask
);
1529 i9xx_irq_enable(struct intel_engine_cs
*engine
)
1531 struct drm_i915_private
*dev_priv
= engine
->i915
;
1533 dev_priv
->irq_mask
&= ~engine
->irq_enable_mask
;
1534 I915_WRITE(IMR
, dev_priv
->irq_mask
);
1535 POSTING_READ_FW(RING_IMR(engine
->mmio_base
));
1539 i9xx_irq_disable(struct intel_engine_cs
*engine
)
1541 struct drm_i915_private
*dev_priv
= engine
->i915
;
1543 dev_priv
->irq_mask
|= engine
->irq_enable_mask
;
1544 I915_WRITE(IMR
, dev_priv
->irq_mask
);
1548 i8xx_irq_enable(struct intel_engine_cs
*engine
)
1550 struct drm_i915_private
*dev_priv
= engine
->i915
;
1552 dev_priv
->irq_mask
&= ~engine
->irq_enable_mask
;
1553 I915_WRITE16(IMR
, dev_priv
->irq_mask
);
1554 POSTING_READ16(RING_IMR(engine
->mmio_base
));
1558 i8xx_irq_disable(struct intel_engine_cs
*engine
)
1560 struct drm_i915_private
*dev_priv
= engine
->i915
;
1562 dev_priv
->irq_mask
|= engine
->irq_enable_mask
;
1563 I915_WRITE16(IMR
, dev_priv
->irq_mask
);
1567 bsd_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
1571 cs
= intel_ring_begin(req
, 2);
1577 intel_ring_advance(req
, cs
);
1582 gen6_irq_enable(struct intel_engine_cs
*engine
)
1584 struct drm_i915_private
*dev_priv
= engine
->i915
;
1586 I915_WRITE_IMR(engine
,
1587 ~(engine
->irq_enable_mask
|
1588 engine
->irq_keep_mask
));
1589 gen5_enable_gt_irq(dev_priv
, engine
->irq_enable_mask
);
1593 gen6_irq_disable(struct intel_engine_cs
*engine
)
1595 struct drm_i915_private
*dev_priv
= engine
->i915
;
1597 I915_WRITE_IMR(engine
, ~engine
->irq_keep_mask
);
1598 gen5_disable_gt_irq(dev_priv
, engine
->irq_enable_mask
);
1602 hsw_vebox_irq_enable(struct intel_engine_cs
*engine
)
1604 struct drm_i915_private
*dev_priv
= engine
->i915
;
1606 I915_WRITE_IMR(engine
, ~engine
->irq_enable_mask
);
1607 gen6_unmask_pm_irq(dev_priv
, engine
->irq_enable_mask
);
1611 hsw_vebox_irq_disable(struct intel_engine_cs
*engine
)
1613 struct drm_i915_private
*dev_priv
= engine
->i915
;
1615 I915_WRITE_IMR(engine
, ~0);
1616 gen6_mask_pm_irq(dev_priv
, engine
->irq_enable_mask
);
1620 gen8_irq_enable(struct intel_engine_cs
*engine
)
1622 struct drm_i915_private
*dev_priv
= engine
->i915
;
1624 I915_WRITE_IMR(engine
,
1625 ~(engine
->irq_enable_mask
|
1626 engine
->irq_keep_mask
));
1627 POSTING_READ_FW(RING_IMR(engine
->mmio_base
));
1631 gen8_irq_disable(struct intel_engine_cs
*engine
)
1633 struct drm_i915_private
*dev_priv
= engine
->i915
;
1635 I915_WRITE_IMR(engine
, ~engine
->irq_keep_mask
);
1639 i965_emit_bb_start(struct drm_i915_gem_request
*req
,
1640 u64 offset
, u32 length
,
1641 unsigned int dispatch_flags
)
1645 cs
= intel_ring_begin(req
, 2);
1649 *cs
++ = MI_BATCH_BUFFER_START
| MI_BATCH_GTT
| (dispatch_flags
&
1650 I915_DISPATCH_SECURE
? 0 : MI_BATCH_NON_SECURE_I965
);
1652 intel_ring_advance(req
, cs
);
1657 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1658 #define I830_BATCH_LIMIT (256*1024)
1659 #define I830_TLB_ENTRIES (2)
1660 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1662 i830_emit_bb_start(struct drm_i915_gem_request
*req
,
1663 u64 offset
, u32 len
,
1664 unsigned int dispatch_flags
)
1666 u32
*cs
, cs_offset
= i915_ggtt_offset(req
->engine
->scratch
);
1668 cs
= intel_ring_begin(req
, 6);
1672 /* Evict the invalid PTE TLBs */
1673 *cs
++ = COLOR_BLT_CMD
| BLT_WRITE_RGBA
;
1674 *cs
++ = BLT_DEPTH_32
| BLT_ROP_COLOR_COPY
| 4096;
1675 *cs
++ = I830_TLB_ENTRIES
<< 16 | 4; /* load each page */
1679 intel_ring_advance(req
, cs
);
1681 if ((dispatch_flags
& I915_DISPATCH_PINNED
) == 0) {
1682 if (len
> I830_BATCH_LIMIT
)
1685 cs
= intel_ring_begin(req
, 6 + 2);
1689 /* Blit the batch (which has now all relocs applied) to the
1690 * stable batch scratch bo area (so that the CS never
1691 * stumbles over its tlb invalidation bug) ...
1693 *cs
++ = SRC_COPY_BLT_CMD
| BLT_WRITE_RGBA
;
1694 *cs
++ = BLT_DEPTH_32
| BLT_ROP_SRC_COPY
| 4096;
1695 *cs
++ = DIV_ROUND_UP(len
, 4096) << 16 | 4096;
1702 intel_ring_advance(req
, cs
);
1704 /* ... and execute it. */
1708 cs
= intel_ring_begin(req
, 2);
1712 *cs
++ = MI_BATCH_BUFFER_START
| MI_BATCH_GTT
;
1713 *cs
++ = offset
| (dispatch_flags
& I915_DISPATCH_SECURE
? 0 :
1714 MI_BATCH_NON_SECURE
);
1715 intel_ring_advance(req
, cs
);
1721 i915_emit_bb_start(struct drm_i915_gem_request
*req
,
1722 u64 offset
, u32 len
,
1723 unsigned int dispatch_flags
)
1727 cs
= intel_ring_begin(req
, 2);
1731 *cs
++ = MI_BATCH_BUFFER_START
| MI_BATCH_GTT
;
1732 *cs
++ = offset
| (dispatch_flags
& I915_DISPATCH_SECURE
? 0 :
1733 MI_BATCH_NON_SECURE
);
1734 intel_ring_advance(req
, cs
);
1739 static void cleanup_phys_status_page(struct intel_engine_cs
*engine
)
1741 struct drm_i915_private
*dev_priv
= engine
->i915
;
1743 if (!dev_priv
->status_page_dmah
)
1746 drm_pci_free(&dev_priv
->drm
, dev_priv
->status_page_dmah
);
1747 engine
->status_page
.page_addr
= NULL
;
1750 static void cleanup_status_page(struct intel_engine_cs
*engine
)
1752 struct i915_vma
*vma
;
1753 struct drm_i915_gem_object
*obj
;
1755 vma
= fetch_and_zero(&engine
->status_page
.vma
);
1761 i915_vma_unpin(vma
);
1762 i915_vma_close(vma
);
1764 i915_gem_object_unpin_map(obj
);
1765 __i915_gem_object_release_unless_active(obj
);
1768 static int init_status_page(struct intel_engine_cs
*engine
)
1770 struct drm_i915_gem_object
*obj
;
1771 struct i915_vma
*vma
;
1776 obj
= i915_gem_object_create_internal(engine
->i915
, PAGE_SIZE
);
1778 DRM_ERROR("Failed to allocate status page\n");
1779 return PTR_ERR(obj
);
1782 ret
= i915_gem_object_set_cache_level(obj
, I915_CACHE_LLC
);
1786 vma
= i915_vma_instance(obj
, &engine
->i915
->ggtt
.base
, NULL
);
1793 if (!HAS_LLC(engine
->i915
))
1794 /* On g33, we cannot place HWS above 256MiB, so
1795 * restrict its pinning to the low mappable arena.
1796 * Though this restriction is not documented for
1797 * gen4, gen5, or byt, they also behave similarly
1798 * and hang if the HWS is placed at the top of the
1799 * GTT. To generalise, it appears that all !llc
1800 * platforms have issues with us placing the HWS
1801 * above the mappable region (even though we never
1804 flags
|= PIN_MAPPABLE
;
1805 ret
= i915_vma_pin(vma
, 0, 4096, flags
);
1809 vaddr
= i915_gem_object_pin_map(obj
, I915_MAP_WB
);
1810 if (IS_ERR(vaddr
)) {
1811 ret
= PTR_ERR(vaddr
);
1815 engine
->status_page
.vma
= vma
;
1816 engine
->status_page
.ggtt_offset
= i915_ggtt_offset(vma
);
1817 engine
->status_page
.page_addr
= memset(vaddr
, 0, PAGE_SIZE
);
1819 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1820 engine
->name
, i915_ggtt_offset(vma
));
1824 i915_vma_unpin(vma
);
1826 i915_gem_object_put(obj
);
1830 static int init_phys_status_page(struct intel_engine_cs
*engine
)
1832 struct drm_i915_private
*dev_priv
= engine
->i915
;
1834 dev_priv
->status_page_dmah
=
1835 drm_pci_alloc(&dev_priv
->drm
, PAGE_SIZE
, PAGE_SIZE
);
1836 if (!dev_priv
->status_page_dmah
)
1839 engine
->status_page
.page_addr
= dev_priv
->status_page_dmah
->vaddr
;
1840 memset(engine
->status_page
.page_addr
, 0, PAGE_SIZE
);
1845 int intel_ring_pin(struct intel_ring
*ring
, unsigned int offset_bias
)
1848 enum i915_map_type map
;
1849 struct i915_vma
*vma
= ring
->vma
;
1853 GEM_BUG_ON(ring
->vaddr
);
1855 map
= HAS_LLC(ring
->engine
->i915
) ? I915_MAP_WB
: I915_MAP_WC
;
1859 flags
|= PIN_OFFSET_BIAS
| offset_bias
;
1860 if (vma
->obj
->stolen
)
1861 flags
|= PIN_MAPPABLE
;
1863 if (!(vma
->flags
& I915_VMA_GLOBAL_BIND
)) {
1864 if (flags
& PIN_MAPPABLE
|| map
== I915_MAP_WC
)
1865 ret
= i915_gem_object_set_to_gtt_domain(vma
->obj
, true);
1867 ret
= i915_gem_object_set_to_cpu_domain(vma
->obj
, true);
1872 ret
= i915_vma_pin(vma
, 0, PAGE_SIZE
, flags
);
1876 if (i915_vma_is_map_and_fenceable(vma
))
1877 addr
= (void __force
*)i915_vma_pin_iomap(vma
);
1879 addr
= i915_gem_object_pin_map(vma
->obj
, map
);
1887 i915_vma_unpin(vma
);
1888 return PTR_ERR(addr
);
1891 void intel_ring_unpin(struct intel_ring
*ring
)
1893 GEM_BUG_ON(!ring
->vma
);
1894 GEM_BUG_ON(!ring
->vaddr
);
1896 if (i915_vma_is_map_and_fenceable(ring
->vma
))
1897 i915_vma_unpin_iomap(ring
->vma
);
1899 i915_gem_object_unpin_map(ring
->vma
->obj
);
1902 i915_vma_unpin(ring
->vma
);
1905 static struct i915_vma
*
1906 intel_ring_create_vma(struct drm_i915_private
*dev_priv
, int size
)
1908 struct drm_i915_gem_object
*obj
;
1909 struct i915_vma
*vma
;
1911 obj
= i915_gem_object_create_stolen(dev_priv
, size
);
1913 obj
= i915_gem_object_create(dev_priv
, size
);
1915 return ERR_CAST(obj
);
1917 /* mark ring buffers as read-only from GPU side by default */
1920 vma
= i915_vma_instance(obj
, &dev_priv
->ggtt
.base
, NULL
);
1927 i915_gem_object_put(obj
);
1932 intel_engine_create_ring(struct intel_engine_cs
*engine
, int size
)
1934 struct intel_ring
*ring
;
1935 struct i915_vma
*vma
;
1937 GEM_BUG_ON(!is_power_of_2(size
));
1938 GEM_BUG_ON(RING_CTL_SIZE(size
) & ~RING_NR_PAGES
);
1940 ring
= kzalloc(sizeof(*ring
), GFP_KERNEL
);
1942 return ERR_PTR(-ENOMEM
);
1944 ring
->engine
= engine
;
1946 INIT_LIST_HEAD(&ring
->request_list
);
1949 /* Workaround an erratum on the i830 which causes a hang if
1950 * the TAIL pointer points to within the last 2 cachelines
1953 ring
->effective_size
= size
;
1954 if (IS_I830(engine
->i915
) || IS_I845G(engine
->i915
))
1955 ring
->effective_size
-= 2 * CACHELINE_BYTES
;
1957 ring
->last_retired_head
= -1;
1958 intel_ring_update_space(ring
);
1960 vma
= intel_ring_create_vma(engine
->i915
, size
);
1963 return ERR_CAST(vma
);
1971 intel_ring_free(struct intel_ring
*ring
)
1973 struct drm_i915_gem_object
*obj
= ring
->vma
->obj
;
1975 i915_vma_close(ring
->vma
);
1976 __i915_gem_object_release_unless_active(obj
);
1981 static int context_pin(struct i915_gem_context
*ctx
)
1983 struct i915_vma
*vma
= ctx
->engine
[RCS
].state
;
1986 /* Clear this page out of any CPU caches for coherent swap-in/out.
1987 * We only want to do this on the first bind so that we do not stall
1988 * on an active context (which by nature is already on the GPU).
1990 if (!(vma
->flags
& I915_VMA_GLOBAL_BIND
)) {
1991 ret
= i915_gem_object_set_to_gtt_domain(vma
->obj
, false);
1996 return i915_vma_pin(vma
, 0, ctx
->ggtt_alignment
, PIN_GLOBAL
| PIN_HIGH
);
1999 static int intel_ring_context_pin(struct intel_engine_cs
*engine
,
2000 struct i915_gem_context
*ctx
)
2002 struct intel_context
*ce
= &ctx
->engine
[engine
->id
];
2005 lockdep_assert_held(&ctx
->i915
->drm
.struct_mutex
);
2007 if (ce
->pin_count
++)
2011 ret
= context_pin(ctx
);
2016 /* The kernel context is only used as a placeholder for flushing the
2017 * active context. It is never used for submitting user rendering and
2018 * as such never requires the golden render context, and so we can skip
2019 * emitting it when we switch to the kernel context. This is required
2020 * as during eviction we cannot allocate and pin the renderstate in
2021 * order to initialise the context.
2023 if (i915_gem_context_is_kernel(ctx
))
2024 ce
->initialised
= true;
2026 i915_gem_context_get(ctx
);
2034 static void intel_ring_context_unpin(struct intel_engine_cs
*engine
,
2035 struct i915_gem_context
*ctx
)
2037 struct intel_context
*ce
= &ctx
->engine
[engine
->id
];
2039 lockdep_assert_held(&ctx
->i915
->drm
.struct_mutex
);
2040 GEM_BUG_ON(ce
->pin_count
== 0);
2042 if (--ce
->pin_count
)
2046 i915_vma_unpin(ce
->state
);
2048 i915_gem_context_put(ctx
);
2051 static int intel_init_ring_buffer(struct intel_engine_cs
*engine
)
2053 struct drm_i915_private
*dev_priv
= engine
->i915
;
2054 struct intel_ring
*ring
;
2057 WARN_ON(engine
->buffer
);
2059 intel_engine_setup_common(engine
);
2061 ret
= intel_engine_init_common(engine
);
2065 ring
= intel_engine_create_ring(engine
, 32 * PAGE_SIZE
);
2067 ret
= PTR_ERR(ring
);
2071 if (HWS_NEEDS_PHYSICAL(dev_priv
)) {
2072 WARN_ON(engine
->id
!= RCS
);
2073 ret
= init_phys_status_page(engine
);
2077 ret
= init_status_page(engine
);
2082 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
2083 ret
= intel_ring_pin(ring
, I915_GTT_PAGE_SIZE
);
2085 intel_ring_free(ring
);
2088 engine
->buffer
= ring
;
2093 intel_engine_cleanup(engine
);
2097 void intel_engine_cleanup(struct intel_engine_cs
*engine
)
2099 struct drm_i915_private
*dev_priv
;
2101 dev_priv
= engine
->i915
;
2103 if (engine
->buffer
) {
2104 WARN_ON(INTEL_GEN(dev_priv
) > 2 &&
2105 (I915_READ_MODE(engine
) & MODE_IDLE
) == 0);
2107 intel_ring_unpin(engine
->buffer
);
2108 intel_ring_free(engine
->buffer
);
2109 engine
->buffer
= NULL
;
2112 if (engine
->cleanup
)
2113 engine
->cleanup(engine
);
2115 if (HWS_NEEDS_PHYSICAL(dev_priv
)) {
2116 WARN_ON(engine
->id
!= RCS
);
2117 cleanup_phys_status_page(engine
);
2119 cleanup_status_page(engine
);
2122 intel_engine_cleanup_common(engine
);
2124 engine
->i915
= NULL
;
2125 dev_priv
->engine
[engine
->id
] = NULL
;
2129 void intel_legacy_submission_resume(struct drm_i915_private
*dev_priv
)
2131 struct intel_engine_cs
*engine
;
2132 enum intel_engine_id id
;
2134 for_each_engine(engine
, dev_priv
, id
) {
2135 engine
->buffer
->head
= engine
->buffer
->tail
;
2136 engine
->buffer
->last_retired_head
= -1;
2140 static int ring_request_alloc(struct drm_i915_gem_request
*request
)
2144 GEM_BUG_ON(!request
->ctx
->engine
[request
->engine
->id
].pin_count
);
2146 /* Flush enough space to reduce the likelihood of waiting after
2147 * we start building the request - in which case we will just
2148 * have to repeat work.
2150 request
->reserved_space
+= LEGACY_REQUEST_SIZE
;
2152 GEM_BUG_ON(!request
->engine
->buffer
);
2153 request
->ring
= request
->engine
->buffer
;
2155 cs
= intel_ring_begin(request
, 0);
2159 request
->reserved_space
-= LEGACY_REQUEST_SIZE
;
2163 static int wait_for_space(struct drm_i915_gem_request
*req
, int bytes
)
2165 struct intel_ring
*ring
= req
->ring
;
2166 struct drm_i915_gem_request
*target
;
2169 lockdep_assert_held(&req
->i915
->drm
.struct_mutex
);
2171 intel_ring_update_space(ring
);
2172 if (ring
->space
>= bytes
)
2176 * Space is reserved in the ringbuffer for finalising the request,
2177 * as that cannot be allowed to fail. During request finalisation,
2178 * reserved_space is set to 0 to stop the overallocation and the
2179 * assumption is that then we never need to wait (which has the
2180 * risk of failing with EINTR).
2182 * See also i915_gem_request_alloc() and i915_add_request().
2184 GEM_BUG_ON(!req
->reserved_space
);
2186 list_for_each_entry(target
, &ring
->request_list
, ring_link
) {
2189 /* Would completion of this request free enough space? */
2190 space
= __intel_ring_space(target
->postfix
, ring
->tail
,
2196 if (WARN_ON(&target
->ring_link
== &ring
->request_list
))
2199 timeout
= i915_wait_request(target
,
2200 I915_WAIT_INTERRUPTIBLE
| I915_WAIT_LOCKED
,
2201 MAX_SCHEDULE_TIMEOUT
);
2205 i915_gem_request_retire_upto(target
);
2207 intel_ring_update_space(ring
);
2208 GEM_BUG_ON(ring
->space
< bytes
);
2212 u32
*intel_ring_begin(struct drm_i915_gem_request
*req
, int num_dwords
)
2214 struct intel_ring
*ring
= req
->ring
;
2215 int remain_actual
= ring
->size
- ring
->tail
;
2216 int remain_usable
= ring
->effective_size
- ring
->tail
;
2217 int bytes
= num_dwords
* sizeof(u32
);
2218 int total_bytes
, wait_bytes
;
2219 bool need_wrap
= false;
2222 total_bytes
= bytes
+ req
->reserved_space
;
2224 if (unlikely(bytes
> remain_usable
)) {
2226 * Not enough space for the basic request. So need to flush
2227 * out the remainder and then wait for base + reserved.
2229 wait_bytes
= remain_actual
+ total_bytes
;
2231 } else if (unlikely(total_bytes
> remain_usable
)) {
2233 * The base request will fit but the reserved space
2234 * falls off the end. So we don't need an immediate wrap
2235 * and only need to effectively wait for the reserved
2236 * size space from the start of ringbuffer.
2238 wait_bytes
= remain_actual
+ req
->reserved_space
;
2240 /* No wrapping required, just waiting. */
2241 wait_bytes
= total_bytes
;
2244 if (wait_bytes
> ring
->space
) {
2245 int ret
= wait_for_space(req
, wait_bytes
);
2247 return ERR_PTR(ret
);
2250 if (unlikely(need_wrap
)) {
2251 GEM_BUG_ON(remain_actual
> ring
->space
);
2252 GEM_BUG_ON(ring
->tail
+ remain_actual
> ring
->size
);
2254 /* Fill the tail with MI_NOOP */
2255 memset(ring
->vaddr
+ ring
->tail
, 0, remain_actual
);
2257 ring
->space
-= remain_actual
;
2260 GEM_BUG_ON(ring
->tail
> ring
->size
- bytes
);
2261 cs
= ring
->vaddr
+ ring
->tail
;
2262 ring
->tail
+= bytes
;
2263 ring
->space
-= bytes
;
2264 GEM_BUG_ON(ring
->space
< 0);
2269 /* Align the ring tail to a cacheline boundary */
2270 int intel_ring_cacheline_align(struct drm_i915_gem_request
*req
)
2273 (req
->ring
->tail
& (CACHELINE_BYTES
- 1)) / sizeof(uint32_t);
2276 if (num_dwords
== 0)
2279 num_dwords
= CACHELINE_BYTES
/ sizeof(uint32_t) - num_dwords
;
2280 cs
= intel_ring_begin(req
, num_dwords
);
2284 while (num_dwords
--)
2287 intel_ring_advance(req
, cs
);
2292 static void gen6_bsd_submit_request(struct drm_i915_gem_request
*request
)
2294 struct drm_i915_private
*dev_priv
= request
->i915
;
2296 intel_uncore_forcewake_get(dev_priv
, FORCEWAKE_ALL
);
2298 /* Every tail move must follow the sequence below */
2300 /* Disable notification that the ring is IDLE. The GT
2301 * will then assume that it is busy and bring it out of rc6.
2303 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL
,
2304 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE
));
2306 /* Clear the context id. Here be magic! */
2307 I915_WRITE64_FW(GEN6_BSD_RNCID
, 0x0);
2309 /* Wait for the ring not to be idle, i.e. for it to wake up. */
2310 if (intel_wait_for_register_fw(dev_priv
,
2311 GEN6_BSD_SLEEP_PSMI_CONTROL
,
2312 GEN6_BSD_SLEEP_INDICATOR
,
2315 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2317 /* Now that the ring is fully powered up, update the tail */
2318 i9xx_submit_request(request
);
2320 /* Let the ring send IDLE messages to the GT again,
2321 * and so let it sleep to conserve power when idle.
2323 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL
,
2324 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE
));
2326 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
2329 static int gen6_bsd_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
2333 cs
= intel_ring_begin(req
, 4);
2338 if (INTEL_GEN(req
->i915
) >= 8)
2341 /* We always require a command barrier so that subsequent
2342 * commands, such as breadcrumb interrupts, are strictly ordered
2343 * wrt the contents of the write cache being flushed to memory
2344 * (and thus being coherent from the CPU).
2346 cmd
|= MI_FLUSH_DW_STORE_INDEX
| MI_FLUSH_DW_OP_STOREDW
;
2349 * Bspec vol 1c.5 - video engine command streamer:
2350 * "If ENABLED, all TLBs will be invalidated once the flush
2351 * operation is complete. This bit is only valid when the
2352 * Post-Sync Operation field is a value of 1h or 3h."
2354 if (mode
& EMIT_INVALIDATE
)
2355 cmd
|= MI_INVALIDATE_TLB
| MI_INVALIDATE_BSD
;
2358 *cs
++ = I915_GEM_HWS_SCRATCH_ADDR
| MI_FLUSH_DW_USE_GTT
;
2359 if (INTEL_GEN(req
->i915
) >= 8) {
2360 *cs
++ = 0; /* upper addr */
2361 *cs
++ = 0; /* value */
2366 intel_ring_advance(req
, cs
);
2371 gen8_emit_bb_start(struct drm_i915_gem_request
*req
,
2372 u64 offset
, u32 len
,
2373 unsigned int dispatch_flags
)
2375 bool ppgtt
= USES_PPGTT(req
->i915
) &&
2376 !(dispatch_flags
& I915_DISPATCH_SECURE
);
2379 cs
= intel_ring_begin(req
, 4);
2383 /* FIXME(BDW): Address space and security selectors. */
2384 *cs
++ = MI_BATCH_BUFFER_START_GEN8
| (ppgtt
<< 8) | (dispatch_flags
&
2385 I915_DISPATCH_RS
? MI_BATCH_RESOURCE_STREAMER
: 0);
2386 *cs
++ = lower_32_bits(offset
);
2387 *cs
++ = upper_32_bits(offset
);
2389 intel_ring_advance(req
, cs
);
2395 hsw_emit_bb_start(struct drm_i915_gem_request
*req
,
2396 u64 offset
, u32 len
,
2397 unsigned int dispatch_flags
)
2401 cs
= intel_ring_begin(req
, 2);
2405 *cs
++ = MI_BATCH_BUFFER_START
| (dispatch_flags
& I915_DISPATCH_SECURE
?
2406 0 : MI_BATCH_PPGTT_HSW
| MI_BATCH_NON_SECURE_HSW
) |
2407 (dispatch_flags
& I915_DISPATCH_RS
?
2408 MI_BATCH_RESOURCE_STREAMER
: 0);
2409 /* bit0-7 is the length on GEN6+ */
2411 intel_ring_advance(req
, cs
);
2417 gen6_emit_bb_start(struct drm_i915_gem_request
*req
,
2418 u64 offset
, u32 len
,
2419 unsigned int dispatch_flags
)
2423 cs
= intel_ring_begin(req
, 2);
2427 *cs
++ = MI_BATCH_BUFFER_START
| (dispatch_flags
& I915_DISPATCH_SECURE
?
2428 0 : MI_BATCH_NON_SECURE_I965
);
2429 /* bit0-7 is the length on GEN6+ */
2431 intel_ring_advance(req
, cs
);
2436 /* Blitter support (SandyBridge+) */
2438 static int gen6_ring_flush(struct drm_i915_gem_request
*req
, u32 mode
)
2442 cs
= intel_ring_begin(req
, 4);
2447 if (INTEL_GEN(req
->i915
) >= 8)
2450 /* We always require a command barrier so that subsequent
2451 * commands, such as breadcrumb interrupts, are strictly ordered
2452 * wrt the contents of the write cache being flushed to memory
2453 * (and thus being coherent from the CPU).
2455 cmd
|= MI_FLUSH_DW_STORE_INDEX
| MI_FLUSH_DW_OP_STOREDW
;
2458 * Bspec vol 1c.3 - blitter engine command streamer:
2459 * "If ENABLED, all TLBs will be invalidated once the flush
2460 * operation is complete. This bit is only valid when the
2461 * Post-Sync Operation field is a value of 1h or 3h."
2463 if (mode
& EMIT_INVALIDATE
)
2464 cmd
|= MI_INVALIDATE_TLB
;
2466 *cs
++ = I915_GEM_HWS_SCRATCH_ADDR
| MI_FLUSH_DW_USE_GTT
;
2467 if (INTEL_GEN(req
->i915
) >= 8) {
2468 *cs
++ = 0; /* upper addr */
2469 *cs
++ = 0; /* value */
2474 intel_ring_advance(req
, cs
);
2479 static void intel_ring_init_semaphores(struct drm_i915_private
*dev_priv
,
2480 struct intel_engine_cs
*engine
)
2482 struct drm_i915_gem_object
*obj
;
2485 if (!i915
.semaphores
)
2488 if (INTEL_GEN(dev_priv
) >= 8 && !dev_priv
->semaphore
) {
2489 struct i915_vma
*vma
;
2491 obj
= i915_gem_object_create(dev_priv
, PAGE_SIZE
);
2495 vma
= i915_vma_instance(obj
, &dev_priv
->ggtt
.base
, NULL
);
2499 ret
= i915_gem_object_set_to_gtt_domain(obj
, false);
2503 ret
= i915_vma_pin(vma
, 0, 0, PIN_GLOBAL
| PIN_HIGH
);
2507 dev_priv
->semaphore
= vma
;
2510 if (INTEL_GEN(dev_priv
) >= 8) {
2511 u32 offset
= i915_ggtt_offset(dev_priv
->semaphore
);
2513 engine
->semaphore
.sync_to
= gen8_ring_sync_to
;
2514 engine
->semaphore
.signal
= gen8_xcs_signal
;
2516 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
2519 if (i
!= engine
->id
)
2520 ring_offset
= offset
+ GEN8_SEMAPHORE_OFFSET(engine
->id
, i
);
2522 ring_offset
= MI_SEMAPHORE_SYNC_INVALID
;
2524 engine
->semaphore
.signal_ggtt
[i
] = ring_offset
;
2526 } else if (INTEL_GEN(dev_priv
) >= 6) {
2527 engine
->semaphore
.sync_to
= gen6_ring_sync_to
;
2528 engine
->semaphore
.signal
= gen6_signal
;
2531 * The current semaphore is only applied on pre-gen8
2532 * platform. And there is no VCS2 ring on the pre-gen8
2533 * platform. So the semaphore between RCS and VCS2 is
2534 * initialized as INVALID. Gen8 will initialize the
2535 * sema between VCS2 and RCS later.
2537 for (i
= 0; i
< GEN6_NUM_SEMAPHORES
; i
++) {
2538 static const struct {
2540 i915_reg_t mbox_reg
;
2541 } sem_data
[GEN6_NUM_SEMAPHORES
][GEN6_NUM_SEMAPHORES
] = {
2543 [VCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_RV
, .mbox_reg
= GEN6_VRSYNC
},
2544 [BCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_RB
, .mbox_reg
= GEN6_BRSYNC
},
2545 [VECS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_RVE
, .mbox_reg
= GEN6_VERSYNC
},
2548 [RCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VR
, .mbox_reg
= GEN6_RVSYNC
},
2549 [BCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VB
, .mbox_reg
= GEN6_BVSYNC
},
2550 [VECS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VVE
, .mbox_reg
= GEN6_VEVSYNC
},
2553 [RCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_BR
, .mbox_reg
= GEN6_RBSYNC
},
2554 [VCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_BV
, .mbox_reg
= GEN6_VBSYNC
},
2555 [VECS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_BVE
, .mbox_reg
= GEN6_VEBSYNC
},
2558 [RCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VER
, .mbox_reg
= GEN6_RVESYNC
},
2559 [VCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VEV
, .mbox_reg
= GEN6_VVESYNC
},
2560 [BCS_HW
] = { .wait_mbox
= MI_SEMAPHORE_SYNC_VEB
, .mbox_reg
= GEN6_BVESYNC
},
2564 i915_reg_t mbox_reg
;
2566 if (i
== engine
->hw_id
) {
2567 wait_mbox
= MI_SEMAPHORE_SYNC_INVALID
;
2568 mbox_reg
= GEN6_NOSYNC
;
2570 wait_mbox
= sem_data
[engine
->hw_id
][i
].wait_mbox
;
2571 mbox_reg
= sem_data
[engine
->hw_id
][i
].mbox_reg
;
2574 engine
->semaphore
.mbox
.wait
[i
] = wait_mbox
;
2575 engine
->semaphore
.mbox
.signal
[i
] = mbox_reg
;
2582 i915_gem_object_put(obj
);
2584 DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
2585 i915
.semaphores
= 0;
2588 static void intel_ring_init_irq(struct drm_i915_private
*dev_priv
,
2589 struct intel_engine_cs
*engine
)
2591 engine
->irq_enable_mask
= GT_RENDER_USER_INTERRUPT
<< engine
->irq_shift
;
2593 if (INTEL_GEN(dev_priv
) >= 8) {
2594 engine
->irq_enable
= gen8_irq_enable
;
2595 engine
->irq_disable
= gen8_irq_disable
;
2596 engine
->irq_seqno_barrier
= gen6_seqno_barrier
;
2597 } else if (INTEL_GEN(dev_priv
) >= 6) {
2598 engine
->irq_enable
= gen6_irq_enable
;
2599 engine
->irq_disable
= gen6_irq_disable
;
2600 engine
->irq_seqno_barrier
= gen6_seqno_barrier
;
2601 } else if (INTEL_GEN(dev_priv
) >= 5) {
2602 engine
->irq_enable
= gen5_irq_enable
;
2603 engine
->irq_disable
= gen5_irq_disable
;
2604 engine
->irq_seqno_barrier
= gen5_seqno_barrier
;
2605 } else if (INTEL_GEN(dev_priv
) >= 3) {
2606 engine
->irq_enable
= i9xx_irq_enable
;
2607 engine
->irq_disable
= i9xx_irq_disable
;
2609 engine
->irq_enable
= i8xx_irq_enable
;
2610 engine
->irq_disable
= i8xx_irq_disable
;
2614 static void intel_ring_default_vfuncs(struct drm_i915_private
*dev_priv
,
2615 struct intel_engine_cs
*engine
)
2617 intel_ring_init_irq(dev_priv
, engine
);
2618 intel_ring_init_semaphores(dev_priv
, engine
);
2620 engine
->init_hw
= init_ring_common
;
2621 engine
->reset_hw
= reset_ring_common
;
2623 engine
->context_pin
= intel_ring_context_pin
;
2624 engine
->context_unpin
= intel_ring_context_unpin
;
2626 engine
->request_alloc
= ring_request_alloc
;
2628 engine
->emit_breadcrumb
= i9xx_emit_breadcrumb
;
2629 engine
->emit_breadcrumb_sz
= i9xx_emit_breadcrumb_sz
;
2630 if (i915
.semaphores
) {
2633 engine
->emit_breadcrumb
= gen6_sema_emit_breadcrumb
;
2635 num_rings
= hweight32(INTEL_INFO(dev_priv
)->ring_mask
) - 1;
2636 if (INTEL_GEN(dev_priv
) >= 8) {
2637 engine
->emit_breadcrumb_sz
+= num_rings
* 6;
2639 engine
->emit_breadcrumb_sz
+= num_rings
* 3;
2641 engine
->emit_breadcrumb_sz
++;
2644 engine
->submit_request
= i9xx_submit_request
;
2646 if (INTEL_GEN(dev_priv
) >= 8)
2647 engine
->emit_bb_start
= gen8_emit_bb_start
;
2648 else if (INTEL_GEN(dev_priv
) >= 6)
2649 engine
->emit_bb_start
= gen6_emit_bb_start
;
2650 else if (INTEL_GEN(dev_priv
) >= 4)
2651 engine
->emit_bb_start
= i965_emit_bb_start
;
2652 else if (IS_I830(dev_priv
) || IS_I845G(dev_priv
))
2653 engine
->emit_bb_start
= i830_emit_bb_start
;
2655 engine
->emit_bb_start
= i915_emit_bb_start
;
2658 int intel_init_render_ring_buffer(struct intel_engine_cs
*engine
)
2660 struct drm_i915_private
*dev_priv
= engine
->i915
;
2663 intel_ring_default_vfuncs(dev_priv
, engine
);
2665 if (HAS_L3_DPF(dev_priv
))
2666 engine
->irq_keep_mask
= GT_RENDER_L3_PARITY_ERROR_INTERRUPT
;
2668 if (INTEL_GEN(dev_priv
) >= 8) {
2669 engine
->init_context
= intel_rcs_ctx_init
;
2670 engine
->emit_breadcrumb
= gen8_render_emit_breadcrumb
;
2671 engine
->emit_breadcrumb_sz
= gen8_render_emit_breadcrumb_sz
;
2672 engine
->emit_flush
= gen8_render_ring_flush
;
2673 if (i915
.semaphores
) {
2676 engine
->semaphore
.signal
= gen8_rcs_signal
;
2679 hweight32(INTEL_INFO(dev_priv
)->ring_mask
) - 1;
2680 engine
->emit_breadcrumb_sz
+= num_rings
* 6;
2682 } else if (INTEL_GEN(dev_priv
) >= 6) {
2683 engine
->init_context
= intel_rcs_ctx_init
;
2684 engine
->emit_flush
= gen7_render_ring_flush
;
2685 if (IS_GEN6(dev_priv
))
2686 engine
->emit_flush
= gen6_render_ring_flush
;
2687 } else if (IS_GEN5(dev_priv
)) {
2688 engine
->emit_flush
= gen4_render_ring_flush
;
2690 if (INTEL_GEN(dev_priv
) < 4)
2691 engine
->emit_flush
= gen2_render_ring_flush
;
2693 engine
->emit_flush
= gen4_render_ring_flush
;
2694 engine
->irq_enable_mask
= I915_USER_INTERRUPT
;
2697 if (IS_HASWELL(dev_priv
))
2698 engine
->emit_bb_start
= hsw_emit_bb_start
;
2700 engine
->init_hw
= init_render_ring
;
2701 engine
->cleanup
= render_ring_cleanup
;
2703 ret
= intel_init_ring_buffer(engine
);
2707 if (INTEL_GEN(dev_priv
) >= 6) {
2708 ret
= intel_engine_create_scratch(engine
, PAGE_SIZE
);
2711 } else if (HAS_BROKEN_CS_TLB(dev_priv
)) {
2712 ret
= intel_engine_create_scratch(engine
, I830_WA_SIZE
);
2720 int intel_init_bsd_ring_buffer(struct intel_engine_cs
*engine
)
2722 struct drm_i915_private
*dev_priv
= engine
->i915
;
2724 intel_ring_default_vfuncs(dev_priv
, engine
);
2726 if (INTEL_GEN(dev_priv
) >= 6) {
2727 /* gen6 bsd needs a special wa for tail updates */
2728 if (IS_GEN6(dev_priv
))
2729 engine
->submit_request
= gen6_bsd_submit_request
;
2730 engine
->emit_flush
= gen6_bsd_ring_flush
;
2731 if (INTEL_GEN(dev_priv
) < 8)
2732 engine
->irq_enable_mask
= GT_BSD_USER_INTERRUPT
;
2734 engine
->mmio_base
= BSD_RING_BASE
;
2735 engine
->emit_flush
= bsd_ring_flush
;
2736 if (IS_GEN5(dev_priv
))
2737 engine
->irq_enable_mask
= ILK_BSD_USER_INTERRUPT
;
2739 engine
->irq_enable_mask
= I915_BSD_USER_INTERRUPT
;
2742 return intel_init_ring_buffer(engine
);
2746 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2748 int intel_init_bsd2_ring_buffer(struct intel_engine_cs
*engine
)
2750 struct drm_i915_private
*dev_priv
= engine
->i915
;
2752 intel_ring_default_vfuncs(dev_priv
, engine
);
2754 engine
->emit_flush
= gen6_bsd_ring_flush
;
2756 return intel_init_ring_buffer(engine
);
2759 int intel_init_blt_ring_buffer(struct intel_engine_cs
*engine
)
2761 struct drm_i915_private
*dev_priv
= engine
->i915
;
2763 intel_ring_default_vfuncs(dev_priv
, engine
);
2765 engine
->emit_flush
= gen6_ring_flush
;
2766 if (INTEL_GEN(dev_priv
) < 8)
2767 engine
->irq_enable_mask
= GT_BLT_USER_INTERRUPT
;
2769 return intel_init_ring_buffer(engine
);
2772 int intel_init_vebox_ring_buffer(struct intel_engine_cs
*engine
)
2774 struct drm_i915_private
*dev_priv
= engine
->i915
;
2776 intel_ring_default_vfuncs(dev_priv
, engine
);
2778 engine
->emit_flush
= gen6_ring_flush
;
2780 if (INTEL_GEN(dev_priv
) < 8) {
2781 engine
->irq_enable_mask
= PM_VEBOX_USER_INTERRUPT
;
2782 engine
->irq_enable
= hsw_vebox_irq_enable
;
2783 engine
->irq_disable
= hsw_vebox_irq_disable
;
2786 return intel_init_ring_buffer(engine
);