]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/i915/intel_ringbuffer.c
drm/i915: Disable MI_SET_CONTEXT psmi w/a for bdw
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
CommitLineData
62fdfeaf
EA
1/*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
a4d8a0fe 30#include <linux/log2.h>
760285e7 31#include <drm/drmP.h>
62fdfeaf 32#include "i915_drv.h"
760285e7 33#include <drm/i915_drm.h>
62fdfeaf 34#include "i915_trace.h"
881f47b6 35#include "intel_drv.h"
62fdfeaf 36
a0442461
CW
37/* Rough estimate of the typical request size, performing a flush,
38 * set-context and then emitting the batch.
39 */
40#define LEGACY_REQUEST_SIZE 200
41
2f35afe9 42static int __intel_ring_space(int head, int tail, int size)
c7dca47b 43{
4f54741e
DG
44 int space = head - tail;
45 if (space <= 0)
1cf0ba14 46 space += size;
4f54741e 47 return space - I915_RING_FREE_SPACE;
c7dca47b
CW
48}
49
32c04f16 50void intel_ring_update_space(struct intel_ring *ring)
ebd0fd4b 51{
fe085f13 52 ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
ebd0fd4b
DG
53}
54
b72f3acb 55static int
7c9cf4e3 56gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
46f0f8d1 57{
73dec95e 58 u32 cmd, *cs;
46f0f8d1
CW
59
60 cmd = MI_FLUSH;
46f0f8d1 61
7c9cf4e3 62 if (mode & EMIT_INVALIDATE)
46f0f8d1
CW
63 cmd |= MI_READ_FLUSH;
64
73dec95e
TU
65 cs = intel_ring_begin(req, 2);
66 if (IS_ERR(cs))
67 return PTR_ERR(cs);
46f0f8d1 68
73dec95e
TU
69 *cs++ = cmd;
70 *cs++ = MI_NOOP;
71 intel_ring_advance(req, cs);
46f0f8d1
CW
72
73 return 0;
74}
75
76static int
7c9cf4e3 77gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
62fdfeaf 78{
73dec95e 79 u32 cmd, *cs;
6f392d54 80
36d527de
CW
81 /*
82 * read/write caches:
83 *
84 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
86 * also flushed at 2d versus 3d pipeline switches.
87 *
88 * read-only caches:
89 *
90 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91 * MI_READ_FLUSH is set, and is always flushed on 965.
92 *
93 * I915_GEM_DOMAIN_COMMAND may not exist?
94 *
95 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96 * invalidated when MI_EXE_FLUSH is set.
97 *
98 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99 * invalidated with every MI_FLUSH.
100 *
101 * TLBs:
102 *
103 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106 * are flushed at any MI_FLUSH.
107 */
108
b5321f30 109 cmd = MI_FLUSH;
7c9cf4e3 110 if (mode & EMIT_INVALIDATE) {
36d527de 111 cmd |= MI_EXE_FLUSH;
b5321f30
CW
112 if (IS_G4X(req->i915) || IS_GEN5(req->i915))
113 cmd |= MI_INVALIDATE_ISP;
114 }
70eac33e 115
73dec95e
TU
116 cs = intel_ring_begin(req, 2);
117 if (IS_ERR(cs))
118 return PTR_ERR(cs);
b72f3acb 119
73dec95e
TU
120 *cs++ = cmd;
121 *cs++ = MI_NOOP;
122 intel_ring_advance(req, cs);
b72f3acb
CW
123
124 return 0;
8187a2b7
ZN
125}
126
8d315287
JB
127/**
128 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
129 * implementing two workarounds on gen6. From section 1.4.7.1
130 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
131 *
132 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
133 * produced by non-pipelined state commands), software needs to first
134 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
135 * 0.
136 *
137 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
138 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
139 *
140 * And the workaround for these two requires this workaround first:
141 *
142 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
143 * BEFORE the pipe-control with a post-sync op and no write-cache
144 * flushes.
145 *
146 * And this last workaround is tricky because of the requirements on
147 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
148 * volume 2 part 1:
149 *
150 * "1 of the following must also be set:
151 * - Render Target Cache Flush Enable ([12] of DW1)
152 * - Depth Cache Flush Enable ([0] of DW1)
153 * - Stall at Pixel Scoreboard ([1] of DW1)
154 * - Depth Stall ([13] of DW1)
155 * - Post-Sync Operation ([13] of DW1)
156 * - Notify Enable ([8] of DW1)"
157 *
158 * The cache flushes require the workaround flush that triggered this
159 * one, so we can't use it. Depth stall would trigger the same.
160 * Post-sync nonzero is what triggered this second workaround, so we
161 * can't use that one either. Notify enable is IRQs, which aren't
162 * really our business. That leaves only stall at scoreboard.
163 */
164static int
f2cf1fcc 165intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
8d315287 166{
b5321f30 167 u32 scratch_addr =
bde13ebd 168 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e
TU
169 u32 *cs;
170
171 cs = intel_ring_begin(req, 6);
172 if (IS_ERR(cs))
173 return PTR_ERR(cs);
174
175 *cs++ = GFX_OP_PIPE_CONTROL(5);
176 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
177 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
178 *cs++ = 0; /* low dword */
179 *cs++ = 0; /* high dword */
180 *cs++ = MI_NOOP;
181 intel_ring_advance(req, cs);
182
183 cs = intel_ring_begin(req, 6);
184 if (IS_ERR(cs))
185 return PTR_ERR(cs);
186
187 *cs++ = GFX_OP_PIPE_CONTROL(5);
188 *cs++ = PIPE_CONTROL_QW_WRITE;
189 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
190 *cs++ = 0;
191 *cs++ = 0;
192 *cs++ = MI_NOOP;
193 intel_ring_advance(req, cs);
8d315287
JB
194
195 return 0;
196}
197
198static int
7c9cf4e3 199gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
8d315287 200{
b5321f30 201 u32 scratch_addr =
bde13ebd 202 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e 203 u32 *cs, flags = 0;
8d315287
JB
204 int ret;
205
b3111509 206 /* Force SNB workarounds for PIPE_CONTROL flushes */
f2cf1fcc 207 ret = intel_emit_post_sync_nonzero_flush(req);
b3111509
PZ
208 if (ret)
209 return ret;
210
8d315287
JB
211 /* Just flush everything. Experiments have shown that reducing the
212 * number of bits based on the write domains has little performance
213 * impact.
214 */
7c9cf4e3 215 if (mode & EMIT_FLUSH) {
7d54a904
CW
216 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
217 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
218 /*
219 * Ensure that any following seqno writes only happen
220 * when the render cache is indeed flushed.
221 */
97f209bc 222 flags |= PIPE_CONTROL_CS_STALL;
7d54a904 223 }
7c9cf4e3 224 if (mode & EMIT_INVALIDATE) {
7d54a904
CW
225 flags |= PIPE_CONTROL_TLB_INVALIDATE;
226 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
227 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
228 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
229 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
230 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
231 /*
232 * TLB invalidate requires a post-sync write.
233 */
3ac78313 234 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
7d54a904 235 }
8d315287 236
73dec95e
TU
237 cs = intel_ring_begin(req, 4);
238 if (IS_ERR(cs))
239 return PTR_ERR(cs);
8d315287 240
73dec95e
TU
241 *cs++ = GFX_OP_PIPE_CONTROL(4);
242 *cs++ = flags;
243 *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
244 *cs++ = 0;
245 intel_ring_advance(req, cs);
8d315287
JB
246
247 return 0;
248}
249
f3987631 250static int
f2cf1fcc 251gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
f3987631 252{
73dec95e 253 u32 *cs;
f3987631 254
73dec95e
TU
255 cs = intel_ring_begin(req, 4);
256 if (IS_ERR(cs))
257 return PTR_ERR(cs);
f3987631 258
73dec95e
TU
259 *cs++ = GFX_OP_PIPE_CONTROL(4);
260 *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
261 *cs++ = 0;
262 *cs++ = 0;
263 intel_ring_advance(req, cs);
f3987631
PZ
264
265 return 0;
266}
267
4772eaeb 268static int
7c9cf4e3 269gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
4772eaeb 270{
b5321f30 271 u32 scratch_addr =
bde13ebd 272 i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
73dec95e 273 u32 *cs, flags = 0;
4772eaeb 274
f3987631
PZ
275 /*
276 * Ensure that any following seqno writes only happen when the render
277 * cache is indeed flushed.
278 *
279 * Workaround: 4th PIPE_CONTROL command (except the ones with only
280 * read-cache invalidate bits set) must have the CS_STALL bit set. We
281 * don't try to be clever and just set it unconditionally.
282 */
283 flags |= PIPE_CONTROL_CS_STALL;
284
4772eaeb
PZ
285 /* Just flush everything. Experiments have shown that reducing the
286 * number of bits based on the write domains has little performance
287 * impact.
288 */
7c9cf4e3 289 if (mode & EMIT_FLUSH) {
4772eaeb
PZ
290 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
291 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 292 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 293 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4772eaeb 294 }
7c9cf4e3 295 if (mode & EMIT_INVALIDATE) {
4772eaeb
PZ
296 flags |= PIPE_CONTROL_TLB_INVALIDATE;
297 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
298 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
299 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
300 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
301 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
148b83d0 302 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
4772eaeb
PZ
303 /*
304 * TLB invalidate requires a post-sync write.
305 */
306 flags |= PIPE_CONTROL_QW_WRITE;
b9e1faa7 307 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
f3987631 308
add284a3
CW
309 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
310
f3987631
PZ
311 /* Workaround: we must issue a pipe_control with CS-stall bit
312 * set before a pipe_control command that has the state cache
313 * invalidate bit set. */
f2cf1fcc 314 gen7_render_ring_cs_stall_wa(req);
4772eaeb
PZ
315 }
316
73dec95e
TU
317 cs = intel_ring_begin(req, 4);
318 if (IS_ERR(cs))
319 return PTR_ERR(cs);
4772eaeb 320
73dec95e
TU
321 *cs++ = GFX_OP_PIPE_CONTROL(4);
322 *cs++ = flags;
323 *cs++ = scratch_addr;
324 *cs++ = 0;
325 intel_ring_advance(req, cs);
4772eaeb
PZ
326
327 return 0;
328}
329
884ceace 330static int
9f235dfa 331gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
884ceace 332{
9f235dfa 333 u32 flags;
73dec95e 334 u32 *cs;
884ceace 335
9f235dfa 336 cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
73dec95e
TU
337 if (IS_ERR(cs))
338 return PTR_ERR(cs);
884ceace 339
9f235dfa 340 flags = PIPE_CONTROL_CS_STALL;
a5f3d68e 341
7c9cf4e3 342 if (mode & EMIT_FLUSH) {
a5f3d68e
BW
343 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
344 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
965fd602 345 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
40a24488 346 flags |= PIPE_CONTROL_FLUSH_ENABLE;
a5f3d68e 347 }
7c9cf4e3 348 if (mode & EMIT_INVALIDATE) {
a5f3d68e
BW
349 flags |= PIPE_CONTROL_TLB_INVALIDATE;
350 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
351 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
352 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
353 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
354 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
355 flags |= PIPE_CONTROL_QW_WRITE;
356 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
02c9f7e3
KG
357
358 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
9f235dfa
TU
359 cs = gen8_emit_pipe_control(cs,
360 PIPE_CONTROL_CS_STALL |
361 PIPE_CONTROL_STALL_AT_SCOREBOARD,
362 0);
a5f3d68e
BW
363 }
364
9f235dfa
TU
365 cs = gen8_emit_pipe_control(cs, flags,
366 i915_ggtt_offset(req->engine->scratch) +
367 2 * CACHELINE_BYTES);
368
369 intel_ring_advance(req, cs);
370
371 return 0;
a5f3d68e
BW
372}
373
0bc40be8 374static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
035dc1e0 375{
c033666a 376 struct drm_i915_private *dev_priv = engine->i915;
035dc1e0
DV
377 u32 addr;
378
379 addr = dev_priv->status_page_dmah->busaddr;
c033666a 380 if (INTEL_GEN(dev_priv) >= 4)
035dc1e0
DV
381 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
382 I915_WRITE(HWS_PGA, addr);
383}
384
0bc40be8 385static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
af75f269 386{
c033666a 387 struct drm_i915_private *dev_priv = engine->i915;
f0f59a00 388 i915_reg_t mmio;
af75f269
DL
389
390 /* The ring status page addresses are no longer next to the rest of
391 * the ring registers as of gen7.
392 */
c033666a 393 if (IS_GEN7(dev_priv)) {
0bc40be8 394 switch (engine->id) {
af75f269
DL
395 case RCS:
396 mmio = RENDER_HWS_PGA_GEN7;
397 break;
398 case BCS:
399 mmio = BLT_HWS_PGA_GEN7;
400 break;
401 /*
402 * VCS2 actually doesn't exist on Gen7. Only shut up
403 * gcc switch check warning
404 */
405 case VCS2:
406 case VCS:
407 mmio = BSD_HWS_PGA_GEN7;
408 break;
409 case VECS:
410 mmio = VEBOX_HWS_PGA_GEN7;
411 break;
412 }
c033666a 413 } else if (IS_GEN6(dev_priv)) {
0bc40be8 414 mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
af75f269
DL
415 } else {
416 /* XXX: gen8 returns to sanity */
0bc40be8 417 mmio = RING_HWS_PGA(engine->mmio_base);
af75f269
DL
418 }
419
57e88531 420 I915_WRITE(mmio, engine->status_page.ggtt_offset);
af75f269
DL
421 POSTING_READ(mmio);
422
423 /*
424 * Flush the TLB for this page
425 *
426 * FIXME: These two bits have disappeared on gen8, so a question
427 * arises: do we still need this and if so how should we go about
428 * invalidating the TLB?
429 */
ac657f64 430 if (IS_GEN(dev_priv, 6, 7)) {
0bc40be8 431 i915_reg_t reg = RING_INSTPM(engine->mmio_base);
af75f269
DL
432
433 /* ring should be idle before issuing a sync flush*/
0bc40be8 434 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
af75f269
DL
435
436 I915_WRITE(reg,
437 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
438 INSTPM_SYNC_FLUSH));
25ab57f4
CW
439 if (intel_wait_for_register(dev_priv,
440 reg, INSTPM_SYNC_FLUSH, 0,
441 1000))
af75f269 442 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
0bc40be8 443 engine->name);
af75f269
DL
444 }
445}
446
0bc40be8 447static bool stop_ring(struct intel_engine_cs *engine)
8187a2b7 448{
c033666a 449 struct drm_i915_private *dev_priv = engine->i915;
8187a2b7 450
21a2c58a 451 if (INTEL_GEN(dev_priv) > 2) {
0bc40be8 452 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
3d808eb1
CW
453 if (intel_wait_for_register(dev_priv,
454 RING_MI_MODE(engine->mmio_base),
455 MODE_IDLE,
456 MODE_IDLE,
457 1000)) {
0bc40be8
TU
458 DRM_ERROR("%s : timed out trying to stop ring\n",
459 engine->name);
9bec9b13
CW
460 /* Sometimes we observe that the idle flag is not
461 * set even though the ring is empty. So double
462 * check before giving up.
463 */
0bc40be8 464 if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine))
9bec9b13 465 return false;
9991ae78
CW
466 }
467 }
b7884eb4 468
0bc40be8
TU
469 I915_WRITE_CTL(engine, 0);
470 I915_WRITE_HEAD(engine, 0);
c5efa1ad 471 I915_WRITE_TAIL(engine, 0);
8187a2b7 472
21a2c58a 473 if (INTEL_GEN(dev_priv) > 2) {
0bc40be8
TU
474 (void)I915_READ_CTL(engine);
475 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
9991ae78 476 }
a51435a3 477
0bc40be8 478 return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
9991ae78 479}
8187a2b7 480
0bc40be8 481static int init_ring_common(struct intel_engine_cs *engine)
9991ae78 482{
c033666a 483 struct drm_i915_private *dev_priv = engine->i915;
7e37f889 484 struct intel_ring *ring = engine->buffer;
9991ae78
CW
485 int ret = 0;
486
59bad947 487 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
9991ae78 488
0bc40be8 489 if (!stop_ring(engine)) {
9991ae78 490 /* G45 ring initialization often fails to reset head to zero */
6fd0d56e
CW
491 DRM_DEBUG_KMS("%s head not reset to zero "
492 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8
TU
493 engine->name,
494 I915_READ_CTL(engine),
495 I915_READ_HEAD(engine),
496 I915_READ_TAIL(engine),
497 I915_READ_START(engine));
8187a2b7 498
0bc40be8 499 if (!stop_ring(engine)) {
6fd0d56e
CW
500 DRM_ERROR("failed to set %s head to zero "
501 "ctl %08x head %08x tail %08x start %08x\n",
0bc40be8
TU
502 engine->name,
503 I915_READ_CTL(engine),
504 I915_READ_HEAD(engine),
505 I915_READ_TAIL(engine),
506 I915_READ_START(engine));
9991ae78
CW
507 ret = -EIO;
508 goto out;
6fd0d56e 509 }
8187a2b7
ZN
510 }
511
3177659a 512 if (HWS_NEEDS_PHYSICAL(dev_priv))
0bc40be8 513 ring_setup_phys_status_page(engine);
3177659a
CS
514 else
515 intel_ring_setup_status_page(engine);
9991ae78 516
ad07dfcd 517 intel_engine_reset_breadcrumbs(engine);
821ed7df 518
ece4a17d 519 /* Enforce ordering by reading HEAD register back */
0bc40be8 520 I915_READ_HEAD(engine);
ece4a17d 521
0d8957c8
DV
522 /* Initialize the ring. This must happen _after_ we've cleared the ring
523 * registers with the above sequence (the readback of the HEAD registers
524 * also enforces ordering), otherwise the hw might lose the new ring
525 * register values. */
bde13ebd 526 I915_WRITE_START(engine, i915_ggtt_offset(ring->vma));
95468892
CW
527
528 /* WaClearRingBufHeadRegAtInit:ctg,elk */
0bc40be8 529 if (I915_READ_HEAD(engine))
95468892 530 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
0bc40be8 531 engine->name, I915_READ_HEAD(engine));
821ed7df
CW
532
533 intel_ring_update_space(ring);
534 I915_WRITE_HEAD(engine, ring->head);
535 I915_WRITE_TAIL(engine, ring->tail);
536 (void)I915_READ_TAIL(engine);
95468892 537
62ae14b1 538 I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID);
8187a2b7 539
8187a2b7 540 /* If the head is still not zero, the ring is dead */
821ed7df
CW
541 if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
542 RING_VALID, RING_VALID,
543 50)) {
e74cfed5 544 DRM_ERROR("%s initialization failed "
821ed7df 545 "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
0bc40be8
TU
546 engine->name,
547 I915_READ_CTL(engine),
548 I915_READ_CTL(engine) & RING_VALID,
821ed7df
CW
549 I915_READ_HEAD(engine), ring->head,
550 I915_READ_TAIL(engine), ring->tail,
0bc40be8 551 I915_READ_START(engine),
bde13ebd 552 i915_ggtt_offset(ring->vma));
b7884eb4
DV
553 ret = -EIO;
554 goto out;
8187a2b7
ZN
555 }
556
fc0768ce 557 intel_engine_init_hangcheck(engine);
50f018df 558
b7884eb4 559out:
59bad947 560 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
b7884eb4
DV
561
562 return ret;
8187a2b7
ZN
563}
564
821ed7df
CW
565static void reset_ring_common(struct intel_engine_cs *engine,
566 struct drm_i915_gem_request *request)
567{
c0dcb203
CW
568 /* Try to restore the logical GPU state to match the continuation
569 * of the request queue. If we skip the context/PD restore, then
570 * the next request may try to execute assuming that its context
571 * is valid and loaded on the GPU and so may try to access invalid
572 * memory, prompting repeated GPU hangs.
573 *
574 * If the request was guilty, we still restore the logical state
575 * in case the next request requires it (e.g. the aliasing ppgtt),
576 * but skip over the hung batch.
577 *
578 * If the request was innocent, we try to replay the request with
579 * the restored context.
580 */
581 if (request) {
582 struct drm_i915_private *dev_priv = request->i915;
583 struct intel_context *ce = &request->ctx->engine[engine->id];
584 struct i915_hw_ppgtt *ppgtt;
585
586 /* FIXME consider gen8 reset */
587
588 if (ce->state) {
589 I915_WRITE(CCID,
590 i915_ggtt_offset(ce->state) |
591 BIT(8) /* must be set! */ |
592 CCID_EXTENDED_STATE_SAVE |
593 CCID_EXTENDED_STATE_RESTORE |
594 CCID_EN);
595 }
596
597 ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
598 if (ppgtt) {
599 u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
600
601 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
602 I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
603
604 /* Wait for the PD reload to complete */
605 if (intel_wait_for_register(dev_priv,
606 RING_PP_DIR_BASE(engine),
607 BIT(0), 0,
608 10))
609 DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
821ed7df 610
c0dcb203
CW
611 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
612 }
613
614 /* If the rq hung, jump to its breadcrumb and skip the batch */
fe085f13
CW
615 if (request->fence.error == -EIO)
616 request->ring->head = request->postfix;
c0dcb203
CW
617 } else {
618 engine->legacy_active_context = NULL;
619 }
821ed7df
CW
620}
621
8753181e 622static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
8f0e2b9d
DV
623{
624 int ret;
625
e2be4faf 626 ret = intel_ring_workarounds_emit(req);
8f0e2b9d
DV
627 if (ret != 0)
628 return ret;
629
4e50f082 630 ret = i915_gem_render_state_emit(req);
8f0e2b9d 631 if (ret)
e26e1b97 632 return ret;
8f0e2b9d 633
e26e1b97 634 return 0;
8f0e2b9d
DV
635}
636
0bc40be8 637static int init_render_ring(struct intel_engine_cs *engine)
8187a2b7 638{
c033666a 639 struct drm_i915_private *dev_priv = engine->i915;
0bc40be8 640 int ret = init_ring_common(engine);
9c33baa6
KZ
641 if (ret)
642 return ret;
a69ffdbf 643
61a563a2 644 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
ac657f64 645 if (IS_GEN(dev_priv, 4, 6))
6b26c86d 646 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1c8c38c5
CW
647
648 /* We need to disable the AsyncFlip performance optimisations in order
649 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
650 * programmed to '1' on all products.
8693a824 651 *
2441f877 652 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1c8c38c5 653 */
ac657f64 654 if (IS_GEN(dev_priv, 6, 7))
1c8c38c5
CW
655 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
656
f05bb0c7 657 /* Required for the hardware to program scanline values for waiting */
01fa0302 658 /* WaEnableFlushTlbInvalidationMode:snb */
c033666a 659 if (IS_GEN6(dev_priv))
f05bb0c7 660 I915_WRITE(GFX_MODE,
aa83e30d 661 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
f05bb0c7 662
01fa0302 663 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
c033666a 664 if (IS_GEN7(dev_priv))
1c8c38c5 665 I915_WRITE(GFX_MODE_GEN7,
01fa0302 666 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1c8c38c5 667 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
78501eac 668
c033666a 669 if (IS_GEN6(dev_priv)) {
3a69ddd6
KG
670 /* From the Sandybridge PRM, volume 1 part 3, page 24:
671 * "If this bit is set, STCunit will have LRA as replacement
672 * policy. [...] This bit must be reset. LRA replacement
673 * policy is not supported."
674 */
675 I915_WRITE(CACHE_MODE_0,
5e13a0c5 676 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
84f9f938
BW
677 }
678
ac657f64 679 if (IS_GEN(dev_priv, 6, 7))
6b26c86d 680 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
84f9f938 681
035ea405
VS
682 if (INTEL_INFO(dev_priv)->gen >= 6)
683 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
15b9f80e 684
0bc40be8 685 return init_workarounds_ring(engine);
8187a2b7
ZN
686}
687
0bc40be8 688static void render_ring_cleanup(struct intel_engine_cs *engine)
c6df541c 689{
c033666a 690 struct drm_i915_private *dev_priv = engine->i915;
3e78998a 691
19880c4a 692 i915_vma_unpin_and_release(&dev_priv->semaphore);
c6df541c
CW
693}
694
73dec95e 695static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs)
3e78998a 696{
ad7bdb2b 697 struct drm_i915_private *dev_priv = req->i915;
3e78998a 698 struct intel_engine_cs *waiter;
c3232b18 699 enum intel_engine_id id;
3e78998a 700
3b3f1650 701 for_each_engine(waiter, dev_priv, id) {
ad7bdb2b 702 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
3e78998a
BW
703 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
704 continue;
705
73dec95e
TU
706 *cs++ = GFX_OP_PIPE_CONTROL(6);
707 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE |
708 PIPE_CONTROL_CS_STALL;
709 *cs++ = lower_32_bits(gtt_offset);
710 *cs++ = upper_32_bits(gtt_offset);
711 *cs++ = req->global_seqno;
712 *cs++ = 0;
713 *cs++ = MI_SEMAPHORE_SIGNAL |
714 MI_SEMAPHORE_TARGET(waiter->hw_id);
715 *cs++ = 0;
3e78998a
BW
716 }
717
73dec95e 718 return cs;
3e78998a
BW
719}
720
73dec95e 721static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs)
3e78998a 722{
ad7bdb2b 723 struct drm_i915_private *dev_priv = req->i915;
3e78998a 724 struct intel_engine_cs *waiter;
c3232b18 725 enum intel_engine_id id;
3e78998a 726
3b3f1650 727 for_each_engine(waiter, dev_priv, id) {
ad7bdb2b 728 u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
3e78998a
BW
729 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
730 continue;
731
73dec95e
TU
732 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
733 *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
734 *cs++ = upper_32_bits(gtt_offset);
735 *cs++ = req->global_seqno;
736 *cs++ = MI_SEMAPHORE_SIGNAL |
737 MI_SEMAPHORE_TARGET(waiter->hw_id);
738 *cs++ = 0;
3e78998a
BW
739 }
740
73dec95e 741 return cs;
3e78998a
BW
742}
743
73dec95e 744static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
1ec14ad3 745{
ad7bdb2b 746 struct drm_i915_private *dev_priv = req->i915;
318f89ca 747 struct intel_engine_cs *engine;
3b3f1650 748 enum intel_engine_id id;
caddfe71 749 int num_rings = 0;
024a43e1 750
3b3f1650 751 for_each_engine(engine, dev_priv, id) {
318f89ca
TU
752 i915_reg_t mbox_reg;
753
754 if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
755 continue;
f0f59a00 756
318f89ca 757 mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
f0f59a00 758 if (i915_mmio_reg_valid(mbox_reg)) {
73dec95e
TU
759 *cs++ = MI_LOAD_REGISTER_IMM(1);
760 *cs++ = i915_mmio_reg_offset(mbox_reg);
761 *cs++ = req->global_seqno;
caddfe71 762 num_rings++;
78325f2d
BW
763 }
764 }
caddfe71 765 if (num_rings & 1)
73dec95e 766 *cs++ = MI_NOOP;
024a43e1 767
73dec95e 768 return cs;
1ec14ad3
CW
769}
770
b0411e7d
CW
771static void i9xx_submit_request(struct drm_i915_gem_request *request)
772{
773 struct drm_i915_private *dev_priv = request->i915;
774
d55ac5bf
CW
775 i915_gem_request_submit(request);
776
944a36d4 777 GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
caddfe71 778 I915_WRITE_TAIL(request->engine, request->tail);
b0411e7d
CW
779}
780
73dec95e 781static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
1ec14ad3 782{
73dec95e
TU
783 *cs++ = MI_STORE_DWORD_INDEX;
784 *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
785 *cs++ = req->global_seqno;
786 *cs++ = MI_USER_INTERRUPT;
1ec14ad3 787
73dec95e 788 req->tail = intel_ring_offset(req, cs);
944a36d4 789 GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
1ec14ad3
CW
790}
791
98f29e8d
CW
792static const int i9xx_emit_breadcrumb_sz = 4;
793
b0411e7d 794/**
9b81d556 795 * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
b0411e7d
CW
796 *
797 * @request - request to write to the ring
798 *
799 * Update the mailbox registers in the *other* rings with the current seqno.
800 * This acts like a signal in the canonical semaphore.
801 */
73dec95e 802static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
b0411e7d 803{
caddfe71 804 return i9xx_emit_breadcrumb(req,
73dec95e 805 req->engine->semaphore.signal(req, cs));
b0411e7d
CW
806}
807
caddfe71 808static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
73dec95e 809 u32 *cs)
a58c01aa
CW
810{
811 struct intel_engine_cs *engine = req->engine;
9242f974 812
caddfe71 813 if (engine->semaphore.signal)
73dec95e
TU
814 cs = engine->semaphore.signal(req, cs);
815
816 *cs++ = GFX_OP_PIPE_CONTROL(6);
817 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
818 PIPE_CONTROL_QW_WRITE;
819 *cs++ = intel_hws_seqno_address(engine);
820 *cs++ = 0;
821 *cs++ = req->global_seqno;
a58c01aa 822 /* We're thrashing one dword of HWS. */
73dec95e
TU
823 *cs++ = 0;
824 *cs++ = MI_USER_INTERRUPT;
825 *cs++ = MI_NOOP;
a58c01aa 826
73dec95e 827 req->tail = intel_ring_offset(req, cs);
944a36d4 828 GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
a58c01aa
CW
829}
830
98f29e8d
CW
831static const int gen8_render_emit_breadcrumb_sz = 8;
832
c8c99b0f
BW
833/**
834 * intel_ring_sync - sync the waiter to the signaller on seqno
835 *
836 * @waiter - ring that is waiting
837 * @signaller - ring which has, or will signal
838 * @seqno - seqno which the waiter will block on
839 */
5ee426ca
BW
840
841static int
ad7bdb2b
CW
842gen8_ring_sync_to(struct drm_i915_gem_request *req,
843 struct drm_i915_gem_request *signal)
5ee426ca 844{
ad7bdb2b
CW
845 struct drm_i915_private *dev_priv = req->i915;
846 u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id);
6ef48d7f 847 struct i915_hw_ppgtt *ppgtt;
73dec95e 848 u32 *cs;
5ee426ca 849
73dec95e
TU
850 cs = intel_ring_begin(req, 4);
851 if (IS_ERR(cs))
852 return PTR_ERR(cs);
5ee426ca 853
73dec95e
TU
854 *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT |
855 MI_SEMAPHORE_SAD_GTE_SDD;
856 *cs++ = signal->global_seqno;
857 *cs++ = lower_32_bits(offset);
858 *cs++ = upper_32_bits(offset);
859 intel_ring_advance(req, cs);
6ef48d7f
CW
860
861 /* When the !RCS engines idle waiting upon a semaphore, they lose their
862 * pagetables and we must reload them before executing the batch.
863 * We do this on the i915_switch_context() following the wait and
864 * before the dispatch.
865 */
ad7bdb2b
CW
866 ppgtt = req->ctx->ppgtt;
867 if (ppgtt && req->engine->id != RCS)
868 ppgtt->pd_dirty_rings |= intel_engine_flag(req->engine);
5ee426ca
BW
869 return 0;
870}
871
c8c99b0f 872static int
ad7bdb2b
CW
873gen6_ring_sync_to(struct drm_i915_gem_request *req,
874 struct drm_i915_gem_request *signal)
1ec14ad3 875{
c8c99b0f
BW
876 u32 dw1 = MI_SEMAPHORE_MBOX |
877 MI_SEMAPHORE_COMPARE |
878 MI_SEMAPHORE_REGISTER;
318f89ca 879 u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id];
73dec95e 880 u32 *cs;
1ec14ad3 881
ebc348b2 882 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
686cb5f9 883
73dec95e
TU
884 cs = intel_ring_begin(req, 4);
885 if (IS_ERR(cs))
886 return PTR_ERR(cs);
1ec14ad3 887
73dec95e 888 *cs++ = dw1 | wait_mbox;
ddf07be7
CW
889 /* Throughout all of the GEM code, seqno passed implies our current
890 * seqno is >= the last seqno executed. However for hardware the
891 * comparison is strictly greater than.
892 */
73dec95e
TU
893 *cs++ = signal->global_seqno - 1;
894 *cs++ = 0;
895 *cs++ = MI_NOOP;
896 intel_ring_advance(req, cs);
1ec14ad3
CW
897
898 return 0;
899}
900
f8973c21 901static void
38a0f2db 902gen5_seqno_barrier(struct intel_engine_cs *engine)
c6df541c 903{
f8973c21
CW
904 /* MI_STORE are internally buffered by the GPU and not flushed
905 * either by MI_FLUSH or SyncFlush or any other combination of
906 * MI commands.
c6df541c 907 *
f8973c21
CW
908 * "Only the submission of the store operation is guaranteed.
909 * The write result will be complete (coherent) some time later
910 * (this is practically a finite period but there is no guaranteed
911 * latency)."
912 *
913 * Empirically, we observe that we need a delay of at least 75us to
914 * be sure that the seqno write is visible by the CPU.
c6df541c 915 */
f8973c21 916 usleep_range(125, 250);
c6df541c
CW
917}
918
c04e0f3b
CW
919static void
920gen6_seqno_barrier(struct intel_engine_cs *engine)
4cd53c0c 921{
c033666a 922 struct drm_i915_private *dev_priv = engine->i915;
bcbdb6d0 923
4cd53c0c
DV
924 /* Workaround to force correct ordering between irq and seqno writes on
925 * ivb (and maybe also on snb) by reading from a CS register (like
9b9ed309
CW
926 * ACTHD) before reading the status page.
927 *
928 * Note that this effectively stalls the read by the time it takes to
929 * do a memory transaction, which more or less ensures that the write
930 * from the GPU has sufficient time to invalidate the CPU cacheline.
931 * Alternatively we could delay the interrupt from the CS ring to give
932 * the write time to land, but that would incur a delay after every
933 * batch i.e. much more frequent than a delay when waiting for the
934 * interrupt (with the same net latency).
bcbdb6d0
CW
935 *
936 * Also note that to prevent whole machine hangs on gen7, we have to
937 * take the spinlock to guard against concurrent cacheline access.
9b9ed309 938 */
bcbdb6d0 939 spin_lock_irq(&dev_priv->uncore.lock);
c04e0f3b 940 POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
bcbdb6d0 941 spin_unlock_irq(&dev_priv->uncore.lock);
4cd53c0c
DV
942}
943
31bb59cc
CW
944static void
945gen5_irq_enable(struct intel_engine_cs *engine)
e48d8634 946{
31bb59cc 947 gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
948}
949
950static void
31bb59cc 951gen5_irq_disable(struct intel_engine_cs *engine)
e48d8634 952{
31bb59cc 953 gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
e48d8634
DV
954}
955
31bb59cc
CW
956static void
957i9xx_irq_enable(struct intel_engine_cs *engine)
62fdfeaf 958{
c033666a 959 struct drm_i915_private *dev_priv = engine->i915;
b13c2b96 960
31bb59cc
CW
961 dev_priv->irq_mask &= ~engine->irq_enable_mask;
962 I915_WRITE(IMR, dev_priv->irq_mask);
963 POSTING_READ_FW(RING_IMR(engine->mmio_base));
62fdfeaf
EA
964}
965
8187a2b7 966static void
31bb59cc 967i9xx_irq_disable(struct intel_engine_cs *engine)
62fdfeaf 968{
c033666a 969 struct drm_i915_private *dev_priv = engine->i915;
62fdfeaf 970
31bb59cc
CW
971 dev_priv->irq_mask |= engine->irq_enable_mask;
972 I915_WRITE(IMR, dev_priv->irq_mask);
62fdfeaf
EA
973}
974
31bb59cc
CW
975static void
976i8xx_irq_enable(struct intel_engine_cs *engine)
c2798b19 977{
c033666a 978 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 979
31bb59cc
CW
980 dev_priv->irq_mask &= ~engine->irq_enable_mask;
981 I915_WRITE16(IMR, dev_priv->irq_mask);
982 POSTING_READ16(RING_IMR(engine->mmio_base));
c2798b19
CW
983}
984
985static void
31bb59cc 986i8xx_irq_disable(struct intel_engine_cs *engine)
c2798b19 987{
c033666a 988 struct drm_i915_private *dev_priv = engine->i915;
c2798b19 989
31bb59cc
CW
990 dev_priv->irq_mask |= engine->irq_enable_mask;
991 I915_WRITE16(IMR, dev_priv->irq_mask);
c2798b19
CW
992}
993
b72f3acb 994static int
7c9cf4e3 995bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
d1b851fc 996{
73dec95e 997 u32 *cs;
b72f3acb 998
73dec95e
TU
999 cs = intel_ring_begin(req, 2);
1000 if (IS_ERR(cs))
1001 return PTR_ERR(cs);
b72f3acb 1002
73dec95e
TU
1003 *cs++ = MI_FLUSH;
1004 *cs++ = MI_NOOP;
1005 intel_ring_advance(req, cs);
b72f3acb 1006 return 0;
d1b851fc
ZN
1007}
1008
31bb59cc
CW
1009static void
1010gen6_irq_enable(struct intel_engine_cs *engine)
0f46832f 1011{
c033666a 1012 struct drm_i915_private *dev_priv = engine->i915;
0f46832f 1013
61ff75ac
CW
1014 I915_WRITE_IMR(engine,
1015 ~(engine->irq_enable_mask |
1016 engine->irq_keep_mask));
31bb59cc 1017 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
0f46832f
CW
1018}
1019
1020static void
31bb59cc 1021gen6_irq_disable(struct intel_engine_cs *engine)
0f46832f 1022{
c033666a 1023 struct drm_i915_private *dev_priv = engine->i915;
0f46832f 1024
61ff75ac 1025 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
31bb59cc 1026 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
d1b851fc
ZN
1027}
1028
31bb59cc
CW
1029static void
1030hsw_vebox_irq_enable(struct intel_engine_cs *engine)
a19d2933 1031{
c033666a 1032 struct drm_i915_private *dev_priv = engine->i915;
a19d2933 1033
31bb59cc 1034 I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
f4e9af4f 1035 gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask);
a19d2933
BW
1036}
1037
1038static void
31bb59cc 1039hsw_vebox_irq_disable(struct intel_engine_cs *engine)
a19d2933 1040{
c033666a 1041 struct drm_i915_private *dev_priv = engine->i915;
a19d2933 1042
31bb59cc 1043 I915_WRITE_IMR(engine, ~0);
f4e9af4f 1044 gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask);
a19d2933
BW
1045}
1046
31bb59cc
CW
1047static void
1048gen8_irq_enable(struct intel_engine_cs *engine)
abd58f01 1049{
c033666a 1050 struct drm_i915_private *dev_priv = engine->i915;
abd58f01 1051
61ff75ac
CW
1052 I915_WRITE_IMR(engine,
1053 ~(engine->irq_enable_mask |
1054 engine->irq_keep_mask));
31bb59cc 1055 POSTING_READ_FW(RING_IMR(engine->mmio_base));
abd58f01
BW
1056}
1057
1058static void
31bb59cc 1059gen8_irq_disable(struct intel_engine_cs *engine)
abd58f01 1060{
c033666a 1061 struct drm_i915_private *dev_priv = engine->i915;
abd58f01 1062
61ff75ac 1063 I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
abd58f01
BW
1064}
1065
d1b851fc 1066static int
803688ba
CW
1067i965_emit_bb_start(struct drm_i915_gem_request *req,
1068 u64 offset, u32 length,
1069 unsigned int dispatch_flags)
d1b851fc 1070{
73dec95e 1071 u32 *cs;
78501eac 1072
73dec95e
TU
1073 cs = intel_ring_begin(req, 2);
1074 if (IS_ERR(cs))
1075 return PTR_ERR(cs);
e1f99ce6 1076
73dec95e
TU
1077 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags &
1078 I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965);
1079 *cs++ = offset;
1080 intel_ring_advance(req, cs);
78501eac 1081
d1b851fc
ZN
1082 return 0;
1083}
1084
b45305fc
DV
1085/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1086#define I830_BATCH_LIMIT (256*1024)
c4d69da1
CW
1087#define I830_TLB_ENTRIES (2)
1088#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
8187a2b7 1089static int
803688ba
CW
1090i830_emit_bb_start(struct drm_i915_gem_request *req,
1091 u64 offset, u32 len,
1092 unsigned int dispatch_flags)
62fdfeaf 1093{
73dec95e 1094 u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch);
62fdfeaf 1095
73dec95e
TU
1096 cs = intel_ring_begin(req, 6);
1097 if (IS_ERR(cs))
1098 return PTR_ERR(cs);
62fdfeaf 1099
c4d69da1 1100 /* Evict the invalid PTE TLBs */
73dec95e
TU
1101 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
1102 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
1103 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
1104 *cs++ = cs_offset;
1105 *cs++ = 0xdeadbeef;
1106 *cs++ = MI_NOOP;
1107 intel_ring_advance(req, cs);
b45305fc 1108
8e004efc 1109 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
b45305fc
DV
1110 if (len > I830_BATCH_LIMIT)
1111 return -ENOSPC;
1112
73dec95e
TU
1113 cs = intel_ring_begin(req, 6 + 2);
1114 if (IS_ERR(cs))
1115 return PTR_ERR(cs);
c4d69da1
CW
1116
1117 /* Blit the batch (which has now all relocs applied) to the
1118 * stable batch scratch bo area (so that the CS never
1119 * stumbles over its tlb invalidation bug) ...
1120 */
73dec95e
TU
1121 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
1122 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
1123 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
1124 *cs++ = cs_offset;
1125 *cs++ = 4096;
1126 *cs++ = offset;
1127
1128 *cs++ = MI_FLUSH;
1129 *cs++ = MI_NOOP;
1130 intel_ring_advance(req, cs);
b45305fc
DV
1131
1132 /* ... and execute it. */
c4d69da1 1133 offset = cs_offset;
b45305fc 1134 }
e1f99ce6 1135
73dec95e
TU
1136 cs = intel_ring_begin(req, 2);
1137 if (IS_ERR(cs))
1138 return PTR_ERR(cs);
c4d69da1 1139
73dec95e
TU
1140 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1141 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1142 MI_BATCH_NON_SECURE);
1143 intel_ring_advance(req, cs);
c4d69da1 1144
fb3256da
DV
1145 return 0;
1146}
1147
1148static int
803688ba
CW
1149i915_emit_bb_start(struct drm_i915_gem_request *req,
1150 u64 offset, u32 len,
1151 unsigned int dispatch_flags)
fb3256da 1152{
73dec95e 1153 u32 *cs;
fb3256da 1154
73dec95e
TU
1155 cs = intel_ring_begin(req, 2);
1156 if (IS_ERR(cs))
1157 return PTR_ERR(cs);
fb3256da 1158
73dec95e
TU
1159 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1160 *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 :
1161 MI_BATCH_NON_SECURE);
1162 intel_ring_advance(req, cs);
62fdfeaf 1163
62fdfeaf
EA
1164 return 0;
1165}
1166
0bc40be8 1167static void cleanup_phys_status_page(struct intel_engine_cs *engine)
7d3fdfff 1168{
c033666a 1169 struct drm_i915_private *dev_priv = engine->i915;
7d3fdfff
VS
1170
1171 if (!dev_priv->status_page_dmah)
1172 return;
1173
91c8a326 1174 drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
0bc40be8 1175 engine->status_page.page_addr = NULL;
7d3fdfff
VS
1176}
1177
0bc40be8 1178static void cleanup_status_page(struct intel_engine_cs *engine)
62fdfeaf 1179{
57e88531 1180 struct i915_vma *vma;
f8a7fde4 1181 struct drm_i915_gem_object *obj;
62fdfeaf 1182
57e88531
CW
1183 vma = fetch_and_zero(&engine->status_page.vma);
1184 if (!vma)
62fdfeaf 1185 return;
62fdfeaf 1186
f8a7fde4
CW
1187 obj = vma->obj;
1188
57e88531 1189 i915_vma_unpin(vma);
f8a7fde4
CW
1190 i915_vma_close(vma);
1191
1192 i915_gem_object_unpin_map(obj);
1193 __i915_gem_object_release_unless_active(obj);
62fdfeaf
EA
1194}
1195
0bc40be8 1196static int init_status_page(struct intel_engine_cs *engine)
62fdfeaf 1197{
57e88531
CW
1198 struct drm_i915_gem_object *obj;
1199 struct i915_vma *vma;
1200 unsigned int flags;
920cf419 1201 void *vaddr;
57e88531 1202 int ret;
e4ffd173 1203
f51455d4 1204 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
57e88531
CW
1205 if (IS_ERR(obj)) {
1206 DRM_ERROR("Failed to allocate status page\n");
1207 return PTR_ERR(obj);
1208 }
62fdfeaf 1209
57e88531
CW
1210 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1211 if (ret)
1212 goto err;
e3efda49 1213
a01cb37a 1214 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
57e88531
CW
1215 if (IS_ERR(vma)) {
1216 ret = PTR_ERR(vma);
1217 goto err;
e3efda49 1218 }
62fdfeaf 1219
57e88531
CW
1220 flags = PIN_GLOBAL;
1221 if (!HAS_LLC(engine->i915))
1222 /* On g33, we cannot place HWS above 256MiB, so
1223 * restrict its pinning to the low mappable arena.
1224 * Though this restriction is not documented for
1225 * gen4, gen5, or byt, they also behave similarly
1226 * and hang if the HWS is placed at the top of the
1227 * GTT. To generalise, it appears that all !llc
1228 * platforms have issues with us placing the HWS
1229 * above the mappable region (even though we never
1230 * actualy map it).
1231 */
1232 flags |= PIN_MAPPABLE;
1233 ret = i915_vma_pin(vma, 0, 4096, flags);
1234 if (ret)
1235 goto err;
62fdfeaf 1236
920cf419
CW
1237 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1238 if (IS_ERR(vaddr)) {
1239 ret = PTR_ERR(vaddr);
1240 goto err_unpin;
1241 }
1242
57e88531 1243 engine->status_page.vma = vma;
bde13ebd 1244 engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
f51455d4 1245 engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
62fdfeaf 1246
bde13ebd
CW
1247 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1248 engine->name, i915_ggtt_offset(vma));
62fdfeaf 1249 return 0;
57e88531 1250
920cf419
CW
1251err_unpin:
1252 i915_vma_unpin(vma);
57e88531
CW
1253err:
1254 i915_gem_object_put(obj);
1255 return ret;
62fdfeaf
EA
1256}
1257
0bc40be8 1258static int init_phys_status_page(struct intel_engine_cs *engine)
6b8294a4 1259{
c033666a 1260 struct drm_i915_private *dev_priv = engine->i915;
6b8294a4 1261
57e88531
CW
1262 dev_priv->status_page_dmah =
1263 drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
1264 if (!dev_priv->status_page_dmah)
1265 return -ENOMEM;
6b8294a4 1266
0bc40be8
TU
1267 engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1268 memset(engine->status_page.page_addr, 0, PAGE_SIZE);
6b8294a4
CW
1269
1270 return 0;
1271}
1272
d3ef1af6 1273int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias)
7ba717cf 1274{
d3ef1af6 1275 unsigned int flags;
9d80841e 1276 enum i915_map_type map;
57e88531 1277 struct i915_vma *vma = ring->vma;
8305216f 1278 void *addr;
7ba717cf
TD
1279 int ret;
1280
57e88531 1281 GEM_BUG_ON(ring->vaddr);
7ba717cf 1282
9d80841e
CW
1283 map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC;
1284
d3ef1af6
DCS
1285 flags = PIN_GLOBAL;
1286 if (offset_bias)
1287 flags |= PIN_OFFSET_BIAS | offset_bias;
9d80841e 1288 if (vma->obj->stolen)
57e88531 1289 flags |= PIN_MAPPABLE;
def0c5f6 1290
57e88531 1291 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
9d80841e 1292 if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
57e88531
CW
1293 ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
1294 else
1295 ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
1296 if (unlikely(ret))
def0c5f6 1297 return ret;
57e88531 1298 }
7ba717cf 1299
57e88531
CW
1300 ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags);
1301 if (unlikely(ret))
1302 return ret;
def0c5f6 1303
9d80841e 1304 if (i915_vma_is_map_and_fenceable(vma))
57e88531
CW
1305 addr = (void __force *)i915_vma_pin_iomap(vma);
1306 else
9d80841e 1307 addr = i915_gem_object_pin_map(vma->obj, map);
57e88531
CW
1308 if (IS_ERR(addr))
1309 goto err;
7ba717cf 1310
32c04f16 1311 ring->vaddr = addr;
7ba717cf 1312 return 0;
d2cad535 1313
57e88531
CW
1314err:
1315 i915_vma_unpin(vma);
1316 return PTR_ERR(addr);
7ba717cf
TD
1317}
1318
aad29fbb
CW
1319void intel_ring_unpin(struct intel_ring *ring)
1320{
1321 GEM_BUG_ON(!ring->vma);
1322 GEM_BUG_ON(!ring->vaddr);
1323
9d80841e 1324 if (i915_vma_is_map_and_fenceable(ring->vma))
aad29fbb 1325 i915_vma_unpin_iomap(ring->vma);
57e88531
CW
1326 else
1327 i915_gem_object_unpin_map(ring->vma->obj);
aad29fbb
CW
1328 ring->vaddr = NULL;
1329
57e88531 1330 i915_vma_unpin(ring->vma);
2919d291
OM
1331}
1332
57e88531
CW
1333static struct i915_vma *
1334intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
62fdfeaf 1335{
05394f39 1336 struct drm_i915_gem_object *obj;
57e88531 1337 struct i915_vma *vma;
62fdfeaf 1338
187685cb 1339 obj = i915_gem_object_create_stolen(dev_priv, size);
c58b735f 1340 if (!obj)
12d79d78 1341 obj = i915_gem_object_create(dev_priv, size);
57e88531
CW
1342 if (IS_ERR(obj))
1343 return ERR_CAST(obj);
8187a2b7 1344
24f3a8cf
AG
1345 /* mark ring buffers as read-only from GPU side by default */
1346 obj->gt_ro = 1;
1347
a01cb37a 1348 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
57e88531
CW
1349 if (IS_ERR(vma))
1350 goto err;
1351
1352 return vma;
e3efda49 1353
57e88531
CW
1354err:
1355 i915_gem_object_put(obj);
1356 return vma;
e3efda49
CW
1357}
1358
7e37f889
CW
1359struct intel_ring *
1360intel_engine_create_ring(struct intel_engine_cs *engine, int size)
01101fa7 1361{
7e37f889 1362 struct intel_ring *ring;
57e88531 1363 struct i915_vma *vma;
01101fa7 1364
8f942018 1365 GEM_BUG_ON(!is_power_of_2(size));
62ae14b1 1366 GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
8f942018 1367
01101fa7 1368 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
57e88531 1369 if (!ring)
01101fa7
CW
1370 return ERR_PTR(-ENOMEM);
1371
4a570db5 1372 ring->engine = engine;
01101fa7 1373
675d9ad7
CW
1374 INIT_LIST_HEAD(&ring->request_list);
1375
01101fa7
CW
1376 ring->size = size;
1377 /* Workaround an erratum on the i830 which causes a hang if
1378 * the TAIL pointer points to within the last 2 cachelines
1379 * of the buffer.
1380 */
1381 ring->effective_size = size;
2a307c2e 1382 if (IS_I830(engine->i915) || IS_I845G(engine->i915))
01101fa7
CW
1383 ring->effective_size -= 2 * CACHELINE_BYTES;
1384
01101fa7
CW
1385 intel_ring_update_space(ring);
1386
57e88531
CW
1387 vma = intel_ring_create_vma(engine->i915, size);
1388 if (IS_ERR(vma)) {
01101fa7 1389 kfree(ring);
57e88531 1390 return ERR_CAST(vma);
01101fa7 1391 }
57e88531 1392 ring->vma = vma;
01101fa7
CW
1393
1394 return ring;
1395}
1396
1397void
7e37f889 1398intel_ring_free(struct intel_ring *ring)
01101fa7 1399{
f8a7fde4
CW
1400 struct drm_i915_gem_object *obj = ring->vma->obj;
1401
1402 i915_vma_close(ring->vma);
1403 __i915_gem_object_release_unless_active(obj);
1404
01101fa7
CW
1405 kfree(ring);
1406}
1407
72b72ae4 1408static int context_pin(struct i915_gem_context *ctx)
e8a9c58f
CW
1409{
1410 struct i915_vma *vma = ctx->engine[RCS].state;
1411 int ret;
1412
1413 /* Clear this page out of any CPU caches for coherent swap-in/out.
1414 * We only want to do this on the first bind so that we do not stall
1415 * on an active context (which by nature is already on the GPU).
1416 */
1417 if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
1418 ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
1419 if (ret)
1420 return ret;
1421 }
1422
afeddf50
CW
1423 return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT,
1424 PIN_GLOBAL | PIN_HIGH);
e8a9c58f
CW
1425}
1426
1427static int intel_ring_context_pin(struct intel_engine_cs *engine,
1428 struct i915_gem_context *ctx)
0cb26a8e
CW
1429{
1430 struct intel_context *ce = &ctx->engine[engine->id];
1431 int ret;
1432
91c8a326 1433 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
0cb26a8e
CW
1434
1435 if (ce->pin_count++)
1436 return 0;
a533b4ba 1437 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
0cb26a8e
CW
1438
1439 if (ce->state) {
72b72ae4 1440 ret = context_pin(ctx);
e8a9c58f 1441 if (ret)
0cb26a8e 1442 goto error;
5d4bac55
CW
1443
1444 ce->state->obj->mm.dirty = true;
0cb26a8e
CW
1445 }
1446
c7c3c07d
CW
1447 /* The kernel context is only used as a placeholder for flushing the
1448 * active context. It is never used for submitting user rendering and
1449 * as such never requires the golden render context, and so we can skip
1450 * emitting it when we switch to the kernel context. This is required
1451 * as during eviction we cannot allocate and pin the renderstate in
1452 * order to initialise the context.
1453 */
984ff29f 1454 if (i915_gem_context_is_kernel(ctx))
c7c3c07d
CW
1455 ce->initialised = true;
1456
9a6feaf0 1457 i915_gem_context_get(ctx);
0cb26a8e
CW
1458 return 0;
1459
1460error:
1461 ce->pin_count = 0;
1462 return ret;
1463}
1464
e8a9c58f
CW
1465static void intel_ring_context_unpin(struct intel_engine_cs *engine,
1466 struct i915_gem_context *ctx)
0cb26a8e
CW
1467{
1468 struct intel_context *ce = &ctx->engine[engine->id];
1469
91c8a326 1470 lockdep_assert_held(&ctx->i915->drm.struct_mutex);
e8a9c58f 1471 GEM_BUG_ON(ce->pin_count == 0);
0cb26a8e
CW
1472
1473 if (--ce->pin_count)
1474 return;
1475
1476 if (ce->state)
bf3783e5 1477 i915_vma_unpin(ce->state);
0cb26a8e 1478
9a6feaf0 1479 i915_gem_context_put(ctx);
0cb26a8e
CW
1480}
1481
acd27845 1482static int intel_init_ring_buffer(struct intel_engine_cs *engine)
e3efda49 1483{
acd27845 1484 struct drm_i915_private *dev_priv = engine->i915;
32c04f16 1485 struct intel_ring *ring;
e3efda49
CW
1486 int ret;
1487
0bc40be8 1488 WARN_ON(engine->buffer);
bfc882b4 1489
019bf277
TU
1490 intel_engine_setup_common(engine);
1491
019bf277 1492 ret = intel_engine_init_common(engine);
688e6c72
CW
1493 if (ret)
1494 goto error;
e3efda49 1495
32c04f16
CW
1496 ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
1497 if (IS_ERR(ring)) {
1498 ret = PTR_ERR(ring);
b0366a54
DG
1499 goto error;
1500 }
01101fa7 1501
3177659a
CS
1502 if (HWS_NEEDS_PHYSICAL(dev_priv)) {
1503 WARN_ON(engine->id != RCS);
1504 ret = init_phys_status_page(engine);
e3efda49 1505 if (ret)
8ee14975 1506 goto error;
e3efda49 1507 } else {
3177659a 1508 ret = init_status_page(engine);
e3efda49 1509 if (ret)
8ee14975 1510 goto error;
e3efda49
CW
1511 }
1512
d3ef1af6 1513 /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
f51455d4 1514 ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE);
bfc882b4 1515 if (ret) {
57e88531 1516 intel_ring_free(ring);
bfc882b4 1517 goto error;
e3efda49 1518 }
57e88531 1519 engine->buffer = ring;
62fdfeaf 1520
8ee14975 1521 return 0;
351e3db2 1522
8ee14975 1523error:
7e37f889 1524 intel_engine_cleanup(engine);
8ee14975 1525 return ret;
62fdfeaf
EA
1526}
1527
7e37f889 1528void intel_engine_cleanup(struct intel_engine_cs *engine)
62fdfeaf 1529{
6402c330 1530 struct drm_i915_private *dev_priv;
33626e6a 1531
c033666a 1532 dev_priv = engine->i915;
6402c330 1533
0bc40be8 1534 if (engine->buffer) {
21a2c58a
CW
1535 WARN_ON(INTEL_GEN(dev_priv) > 2 &&
1536 (I915_READ_MODE(engine) & MODE_IDLE) == 0);
33626e6a 1537
aad29fbb 1538 intel_ring_unpin(engine->buffer);
7e37f889 1539 intel_ring_free(engine->buffer);
0bc40be8 1540 engine->buffer = NULL;
b0366a54 1541 }
78501eac 1542
0bc40be8
TU
1543 if (engine->cleanup)
1544 engine->cleanup(engine);
8d19215b 1545
3177659a 1546 if (HWS_NEEDS_PHYSICAL(dev_priv)) {
0bc40be8
TU
1547 WARN_ON(engine->id != RCS);
1548 cleanup_phys_status_page(engine);
3177659a
CS
1549 } else {
1550 cleanup_status_page(engine);
7d3fdfff 1551 }
44e895a8 1552
96a945aa 1553 intel_engine_cleanup_common(engine);
0cb26a8e 1554
c033666a 1555 engine->i915 = NULL;
3b3f1650
AG
1556 dev_priv->engine[engine->id] = NULL;
1557 kfree(engine);
62fdfeaf
EA
1558}
1559
821ed7df
CW
1560void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
1561{
1562 struct intel_engine_cs *engine;
3b3f1650 1563 enum intel_engine_id id;
821ed7df 1564
fe085f13 1565 for_each_engine(engine, dev_priv, id)
821ed7df 1566 engine->buffer->head = engine->buffer->tail;
821ed7df
CW
1567}
1568
f73e7399 1569static int ring_request_alloc(struct drm_i915_gem_request *request)
9d773091 1570{
73dec95e 1571 u32 *cs;
6310346e 1572
e8a9c58f
CW
1573 GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
1574
6310346e
CW
1575 /* Flush enough space to reduce the likelihood of waiting after
1576 * we start building the request - in which case we will just
1577 * have to repeat work.
1578 */
a0442461 1579 request->reserved_space += LEGACY_REQUEST_SIZE;
6310346e 1580
e8a9c58f 1581 GEM_BUG_ON(!request->engine->buffer);
1dae2dfb 1582 request->ring = request->engine->buffer;
6310346e 1583
73dec95e
TU
1584 cs = intel_ring_begin(request, 0);
1585 if (IS_ERR(cs))
1586 return PTR_ERR(cs);
6310346e 1587
a0442461 1588 request->reserved_space -= LEGACY_REQUEST_SIZE;
6310346e 1589 return 0;
9d773091
CW
1590}
1591
987046ad
CW
1592static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
1593{
7e37f889 1594 struct intel_ring *ring = req->ring;
987046ad 1595 struct drm_i915_gem_request *target;
e95433c7
CW
1596 long timeout;
1597
1598 lockdep_assert_held(&req->i915->drm.struct_mutex);
987046ad 1599
1dae2dfb
CW
1600 intel_ring_update_space(ring);
1601 if (ring->space >= bytes)
987046ad
CW
1602 return 0;
1603
1604 /*
1605 * Space is reserved in the ringbuffer for finalising the request,
1606 * as that cannot be allowed to fail. During request finalisation,
1607 * reserved_space is set to 0 to stop the overallocation and the
1608 * assumption is that then we never need to wait (which has the
1609 * risk of failing with EINTR).
1610 *
1611 * See also i915_gem_request_alloc() and i915_add_request().
1612 */
0251a963 1613 GEM_BUG_ON(!req->reserved_space);
987046ad 1614
675d9ad7 1615 list_for_each_entry(target, &ring->request_list, ring_link) {
987046ad
CW
1616 unsigned space;
1617
987046ad 1618 /* Would completion of this request free enough space? */
1dae2dfb
CW
1619 space = __intel_ring_space(target->postfix, ring->tail,
1620 ring->size);
987046ad
CW
1621 if (space >= bytes)
1622 break;
79bbcc29 1623 }
29b1b415 1624
675d9ad7 1625 if (WARN_ON(&target->ring_link == &ring->request_list))
987046ad
CW
1626 return -ENOSPC;
1627
e95433c7
CW
1628 timeout = i915_wait_request(target,
1629 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
1630 MAX_SCHEDULE_TIMEOUT);
1631 if (timeout < 0)
1632 return timeout;
7da844c5 1633
7da844c5
CW
1634 i915_gem_request_retire_upto(target);
1635
1636 intel_ring_update_space(ring);
1637 GEM_BUG_ON(ring->space < bytes);
1638 return 0;
29b1b415
JH
1639}
1640
73dec95e 1641u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
cbcc80df 1642{
7e37f889 1643 struct intel_ring *ring = req->ring;
1dae2dfb
CW
1644 int remain_actual = ring->size - ring->tail;
1645 int remain_usable = ring->effective_size - ring->tail;
987046ad
CW
1646 int bytes = num_dwords * sizeof(u32);
1647 int total_bytes, wait_bytes;
79bbcc29 1648 bool need_wrap = false;
73dec95e 1649 u32 *cs;
29b1b415 1650
0251a963 1651 total_bytes = bytes + req->reserved_space;
29b1b415 1652
79bbcc29
JH
1653 if (unlikely(bytes > remain_usable)) {
1654 /*
1655 * Not enough space for the basic request. So need to flush
1656 * out the remainder and then wait for base + reserved.
1657 */
1658 wait_bytes = remain_actual + total_bytes;
1659 need_wrap = true;
987046ad
CW
1660 } else if (unlikely(total_bytes > remain_usable)) {
1661 /*
1662 * The base request will fit but the reserved space
1663 * falls off the end. So we don't need an immediate wrap
1664 * and only need to effectively wait for the reserved
1665 * size space from the start of ringbuffer.
1666 */
0251a963 1667 wait_bytes = remain_actual + req->reserved_space;
79bbcc29 1668 } else {
987046ad
CW
1669 /* No wrapping required, just waiting. */
1670 wait_bytes = total_bytes;
cbcc80df
MK
1671 }
1672
1dae2dfb 1673 if (wait_bytes > ring->space) {
987046ad 1674 int ret = wait_for_space(req, wait_bytes);
cbcc80df 1675 if (unlikely(ret))
73dec95e 1676 return ERR_PTR(ret);
cbcc80df
MK
1677 }
1678
987046ad 1679 if (unlikely(need_wrap)) {
1dae2dfb
CW
1680 GEM_BUG_ON(remain_actual > ring->space);
1681 GEM_BUG_ON(ring->tail + remain_actual > ring->size);
78501eac 1682
987046ad 1683 /* Fill the tail with MI_NOOP */
1dae2dfb
CW
1684 memset(ring->vaddr + ring->tail, 0, remain_actual);
1685 ring->tail = 0;
1686 ring->space -= remain_actual;
987046ad 1687 }
304d695c 1688
73dec95e
TU
1689 GEM_BUG_ON(ring->tail > ring->size - bytes);
1690 cs = ring->vaddr + ring->tail;
1691 ring->tail += bytes;
1dae2dfb
CW
1692 ring->space -= bytes;
1693 GEM_BUG_ON(ring->space < 0);
73dec95e
TU
1694
1695 return cs;
8187a2b7 1696}
78501eac 1697
753b1ad4 1698/* Align the ring tail to a cacheline boundary */
bba09b12 1699int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
753b1ad4 1700{
b5321f30 1701 int num_dwords =
73dec95e
TU
1702 (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
1703 u32 *cs;
753b1ad4
VS
1704
1705 if (num_dwords == 0)
1706 return 0;
1707
18393f63 1708 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
73dec95e
TU
1709 cs = intel_ring_begin(req, num_dwords);
1710 if (IS_ERR(cs))
1711 return PTR_ERR(cs);
753b1ad4
VS
1712
1713 while (num_dwords--)
73dec95e 1714 *cs++ = MI_NOOP;
753b1ad4 1715
73dec95e 1716 intel_ring_advance(req, cs);
753b1ad4
VS
1717
1718 return 0;
1719}
1720
c5efa1ad 1721static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
881f47b6 1722{
c5efa1ad 1723 struct drm_i915_private *dev_priv = request->i915;
881f47b6 1724
76f8421f
CW
1725 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1726
881f47b6 1727 /* Every tail move must follow the sequence below */
12f55818
CW
1728
1729 /* Disable notification that the ring is IDLE. The GT
1730 * will then assume that it is busy and bring it out of rc6.
1731 */
76f8421f
CW
1732 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
1733 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
12f55818
CW
1734
1735 /* Clear the context id. Here be magic! */
76f8421f 1736 I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
0206e353 1737
12f55818 1738 /* Wait for the ring not to be idle, i.e. for it to wake up. */
76f8421f
CW
1739 if (intel_wait_for_register_fw(dev_priv,
1740 GEN6_BSD_SLEEP_PSMI_CONTROL,
1741 GEN6_BSD_SLEEP_INDICATOR,
1742 0,
1743 50))
12f55818 1744 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
0206e353 1745
12f55818 1746 /* Now that the ring is fully powered up, update the tail */
b0411e7d 1747 i9xx_submit_request(request);
12f55818
CW
1748
1749 /* Let the ring send IDLE messages to the GT again,
1750 * and so let it sleep to conserve power when idle.
1751 */
76f8421f
CW
1752 I915_WRITE_FW(GEN6_BSD_SLEEP_PSMI_CONTROL,
1753 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1754
1755 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
881f47b6
XH
1756}
1757
7c9cf4e3 1758static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode)
881f47b6 1759{
73dec95e 1760 u32 cmd, *cs;
b72f3acb 1761
73dec95e
TU
1762 cs = intel_ring_begin(req, 4);
1763 if (IS_ERR(cs))
1764 return PTR_ERR(cs);
b72f3acb 1765
71a77e07 1766 cmd = MI_FLUSH_DW;
c033666a 1767 if (INTEL_GEN(req->i915) >= 8)
075b3bba 1768 cmd += 1;
f0a1fb10
CW
1769
1770 /* We always require a command barrier so that subsequent
1771 * commands, such as breadcrumb interrupts, are strictly ordered
1772 * wrt the contents of the write cache being flushed to memory
1773 * (and thus being coherent from the CPU).
1774 */
1775 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1776
9a289771
JB
1777 /*
1778 * Bspec vol 1c.5 - video engine command streamer:
1779 * "If ENABLED, all TLBs will be invalidated once the flush
1780 * operation is complete. This bit is only valid when the
1781 * Post-Sync Operation field is a value of 1h or 3h."
1782 */
7c9cf4e3 1783 if (mode & EMIT_INVALIDATE)
f0a1fb10
CW
1784 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1785
73dec95e
TU
1786 *cs++ = cmd;
1787 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
c033666a 1788 if (INTEL_GEN(req->i915) >= 8) {
73dec95e
TU
1789 *cs++ = 0; /* upper addr */
1790 *cs++ = 0; /* value */
075b3bba 1791 } else {
73dec95e
TU
1792 *cs++ = 0;
1793 *cs++ = MI_NOOP;
075b3bba 1794 }
73dec95e 1795 intel_ring_advance(req, cs);
b72f3acb 1796 return 0;
881f47b6
XH
1797}
1798
1c7a0623 1799static int
803688ba
CW
1800gen8_emit_bb_start(struct drm_i915_gem_request *req,
1801 u64 offset, u32 len,
1802 unsigned int dispatch_flags)
1c7a0623 1803{
b5321f30 1804 bool ppgtt = USES_PPGTT(req->i915) &&
8e004efc 1805 !(dispatch_flags & I915_DISPATCH_SECURE);
73dec95e 1806 u32 *cs;
1c7a0623 1807
73dec95e
TU
1808 cs = intel_ring_begin(req, 4);
1809 if (IS_ERR(cs))
1810 return PTR_ERR(cs);
1c7a0623
BW
1811
1812 /* FIXME(BDW): Address space and security selectors. */
73dec95e
TU
1813 *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags &
1814 I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
1815 *cs++ = lower_32_bits(offset);
1816 *cs++ = upper_32_bits(offset);
1817 *cs++ = MI_NOOP;
1818 intel_ring_advance(req, cs);
1c7a0623
BW
1819
1820 return 0;
1821}
1822
d7d4eedd 1823static int
803688ba
CW
1824hsw_emit_bb_start(struct drm_i915_gem_request *req,
1825 u64 offset, u32 len,
1826 unsigned int dispatch_flags)
d7d4eedd 1827{
73dec95e 1828 u32 *cs;
d7d4eedd 1829
73dec95e
TU
1830 cs = intel_ring_begin(req, 2);
1831 if (IS_ERR(cs))
1832 return PTR_ERR(cs);
d7d4eedd 1833
73dec95e
TU
1834 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
1835 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
1836 (dispatch_flags & I915_DISPATCH_RS ?
1837 MI_BATCH_RESOURCE_STREAMER : 0);
d7d4eedd 1838 /* bit0-7 is the length on GEN6+ */
73dec95e
TU
1839 *cs++ = offset;
1840 intel_ring_advance(req, cs);
d7d4eedd
CW
1841
1842 return 0;
1843}
1844
881f47b6 1845static int
803688ba
CW
1846gen6_emit_bb_start(struct drm_i915_gem_request *req,
1847 u64 offset, u32 len,
1848 unsigned int dispatch_flags)
881f47b6 1849{
73dec95e 1850 u32 *cs;
ab6f8e32 1851
73dec95e
TU
1852 cs = intel_ring_begin(req, 2);
1853 if (IS_ERR(cs))
1854 return PTR_ERR(cs);
e1f99ce6 1855
73dec95e
TU
1856 *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
1857 0 : MI_BATCH_NON_SECURE_I965);
0206e353 1858 /* bit0-7 is the length on GEN6+ */
73dec95e
TU
1859 *cs++ = offset;
1860 intel_ring_advance(req, cs);
ab6f8e32 1861
0206e353 1862 return 0;
881f47b6
XH
1863}
1864
549f7365
CW
1865/* Blitter support (SandyBridge+) */
1866
7c9cf4e3 1867static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode)
8d19215b 1868{
73dec95e 1869 u32 cmd, *cs;
b72f3acb 1870
73dec95e
TU
1871 cs = intel_ring_begin(req, 4);
1872 if (IS_ERR(cs))
1873 return PTR_ERR(cs);
b72f3acb 1874
71a77e07 1875 cmd = MI_FLUSH_DW;
c033666a 1876 if (INTEL_GEN(req->i915) >= 8)
075b3bba 1877 cmd += 1;
f0a1fb10
CW
1878
1879 /* We always require a command barrier so that subsequent
1880 * commands, such as breadcrumb interrupts, are strictly ordered
1881 * wrt the contents of the write cache being flushed to memory
1882 * (and thus being coherent from the CPU).
1883 */
1884 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1885
9a289771
JB
1886 /*
1887 * Bspec vol 1c.3 - blitter engine command streamer:
1888 * "If ENABLED, all TLBs will be invalidated once the flush
1889 * operation is complete. This bit is only valid when the
1890 * Post-Sync Operation field is a value of 1h or 3h."
1891 */
7c9cf4e3 1892 if (mode & EMIT_INVALIDATE)
f0a1fb10 1893 cmd |= MI_INVALIDATE_TLB;
73dec95e
TU
1894 *cs++ = cmd;
1895 *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
c033666a 1896 if (INTEL_GEN(req->i915) >= 8) {
73dec95e
TU
1897 *cs++ = 0; /* upper addr */
1898 *cs++ = 0; /* value */
075b3bba 1899 } else {
73dec95e
TU
1900 *cs++ = 0;
1901 *cs++ = MI_NOOP;
075b3bba 1902 }
73dec95e 1903 intel_ring_advance(req, cs);
fd3da6c9 1904
b72f3acb 1905 return 0;
8d19215b
ZN
1906}
1907
d9a64610
TU
1908static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
1909 struct intel_engine_cs *engine)
1910{
db3d4019 1911 struct drm_i915_gem_object *obj;
1b9e6650 1912 int ret, i;
db3d4019 1913
39df9190 1914 if (!i915.semaphores)
db3d4019
TU
1915 return;
1916
51d545d0
CW
1917 if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) {
1918 struct i915_vma *vma;
1919
f51455d4 1920 obj = i915_gem_object_create(dev_priv, PAGE_SIZE);
51d545d0
CW
1921 if (IS_ERR(obj))
1922 goto err;
db3d4019 1923
a01cb37a 1924 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
51d545d0
CW
1925 if (IS_ERR(vma))
1926 goto err_obj;
1927
1928 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1929 if (ret)
1930 goto err_obj;
1931
1932 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
1933 if (ret)
1934 goto err_obj;
1935
1936 dev_priv->semaphore = vma;
1937 }
d9a64610
TU
1938
1939 if (INTEL_GEN(dev_priv) >= 8) {
bde13ebd 1940 u32 offset = i915_ggtt_offset(dev_priv->semaphore);
1b9e6650 1941
ad7bdb2b 1942 engine->semaphore.sync_to = gen8_ring_sync_to;
d9a64610 1943 engine->semaphore.signal = gen8_xcs_signal;
1b9e6650
TU
1944
1945 for (i = 0; i < I915_NUM_ENGINES; i++) {
bde13ebd 1946 u32 ring_offset;
1b9e6650
TU
1947
1948 if (i != engine->id)
1949 ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i);
1950 else
1951 ring_offset = MI_SEMAPHORE_SYNC_INVALID;
1952
1953 engine->semaphore.signal_ggtt[i] = ring_offset;
1954 }
d9a64610 1955 } else if (INTEL_GEN(dev_priv) >= 6) {
ad7bdb2b 1956 engine->semaphore.sync_to = gen6_ring_sync_to;
d9a64610 1957 engine->semaphore.signal = gen6_signal;
4b8e38a9
TU
1958
1959 /*
1960 * The current semaphore is only applied on pre-gen8
1961 * platform. And there is no VCS2 ring on the pre-gen8
1962 * platform. So the semaphore between RCS and VCS2 is
1963 * initialized as INVALID. Gen8 will initialize the
1964 * sema between VCS2 and RCS later.
1965 */
318f89ca 1966 for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
4b8e38a9
TU
1967 static const struct {
1968 u32 wait_mbox;
1969 i915_reg_t mbox_reg;
318f89ca
TU
1970 } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
1971 [RCS_HW] = {
1972 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
1973 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
1974 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
4b8e38a9 1975 },
318f89ca
TU
1976 [VCS_HW] = {
1977 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
1978 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
1979 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
4b8e38a9 1980 },
318f89ca
TU
1981 [BCS_HW] = {
1982 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
1983 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
1984 [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
4b8e38a9 1985 },
318f89ca
TU
1986 [VECS_HW] = {
1987 [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
1988 [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
1989 [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
4b8e38a9
TU
1990 },
1991 };
1992 u32 wait_mbox;
1993 i915_reg_t mbox_reg;
1994
318f89ca 1995 if (i == engine->hw_id) {
4b8e38a9
TU
1996 wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
1997 mbox_reg = GEN6_NOSYNC;
1998 } else {
318f89ca
TU
1999 wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
2000 mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
4b8e38a9
TU
2001 }
2002
2003 engine->semaphore.mbox.wait[i] = wait_mbox;
2004 engine->semaphore.mbox.signal[i] = mbox_reg;
2005 }
d9a64610 2006 }
51d545d0
CW
2007
2008 return;
2009
2010err_obj:
2011 i915_gem_object_put(obj);
2012err:
2013 DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n");
2014 i915.semaphores = 0;
d9a64610
TU
2015}
2016
ed003078
CW
2017static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
2018 struct intel_engine_cs *engine)
2019{
c78d6061
TU
2020 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
2021
ed003078 2022 if (INTEL_GEN(dev_priv) >= 8) {
31bb59cc
CW
2023 engine->irq_enable = gen8_irq_enable;
2024 engine->irq_disable = gen8_irq_disable;
ed003078
CW
2025 engine->irq_seqno_barrier = gen6_seqno_barrier;
2026 } else if (INTEL_GEN(dev_priv) >= 6) {
31bb59cc
CW
2027 engine->irq_enable = gen6_irq_enable;
2028 engine->irq_disable = gen6_irq_disable;
ed003078
CW
2029 engine->irq_seqno_barrier = gen6_seqno_barrier;
2030 } else if (INTEL_GEN(dev_priv) >= 5) {
31bb59cc
CW
2031 engine->irq_enable = gen5_irq_enable;
2032 engine->irq_disable = gen5_irq_disable;
f8973c21 2033 engine->irq_seqno_barrier = gen5_seqno_barrier;
ed003078 2034 } else if (INTEL_GEN(dev_priv) >= 3) {
31bb59cc
CW
2035 engine->irq_enable = i9xx_irq_enable;
2036 engine->irq_disable = i9xx_irq_disable;
ed003078 2037 } else {
31bb59cc
CW
2038 engine->irq_enable = i8xx_irq_enable;
2039 engine->irq_disable = i8xx_irq_disable;
ed003078
CW
2040 }
2041}
2042
ff44ad51
CW
2043static void i9xx_set_default_submission(struct intel_engine_cs *engine)
2044{
2045 engine->submit_request = i9xx_submit_request;
2046}
2047
2048static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
2049{
2050 engine->submit_request = gen6_bsd_submit_request;
2051}
2052
06a2fe22
TU
2053static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
2054 struct intel_engine_cs *engine)
2055{
618e4ca7
CW
2056 intel_ring_init_irq(dev_priv, engine);
2057 intel_ring_init_semaphores(dev_priv, engine);
2058
1d8a1337 2059 engine->init_hw = init_ring_common;
821ed7df 2060 engine->reset_hw = reset_ring_common;
7445a2a4 2061
e8a9c58f
CW
2062 engine->context_pin = intel_ring_context_pin;
2063 engine->context_unpin = intel_ring_context_unpin;
2064
f73e7399
CW
2065 engine->request_alloc = ring_request_alloc;
2066
9b81d556 2067 engine->emit_breadcrumb = i9xx_emit_breadcrumb;
98f29e8d
CW
2068 engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
2069 if (i915.semaphores) {
2070 int num_rings;
2071
9b81d556 2072 engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
98f29e8d
CW
2073
2074 num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
2075 if (INTEL_GEN(dev_priv) >= 8) {
2076 engine->emit_breadcrumb_sz += num_rings * 6;
2077 } else {
2078 engine->emit_breadcrumb_sz += num_rings * 3;
2079 if (num_rings & 1)
2080 engine->emit_breadcrumb_sz++;
2081 }
2082 }
ff44ad51
CW
2083
2084 engine->set_default_submission = i9xx_set_default_submission;
6f7bef75
CW
2085
2086 if (INTEL_GEN(dev_priv) >= 8)
803688ba 2087 engine->emit_bb_start = gen8_emit_bb_start;
6f7bef75 2088 else if (INTEL_GEN(dev_priv) >= 6)
803688ba 2089 engine->emit_bb_start = gen6_emit_bb_start;
6f7bef75 2090 else if (INTEL_GEN(dev_priv) >= 4)
803688ba 2091 engine->emit_bb_start = i965_emit_bb_start;
2a307c2e 2092 else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
803688ba 2093 engine->emit_bb_start = i830_emit_bb_start;
6f7bef75 2094 else
803688ba 2095 engine->emit_bb_start = i915_emit_bb_start;
06a2fe22
TU
2096}
2097
8b3e2d36 2098int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
5c1143bb 2099{
8b3e2d36 2100 struct drm_i915_private *dev_priv = engine->i915;
3e78998a 2101 int ret;
5c1143bb 2102
06a2fe22
TU
2103 intel_ring_default_vfuncs(dev_priv, engine);
2104
61ff75ac
CW
2105 if (HAS_L3_DPF(dev_priv))
2106 engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
f8973c21 2107
c033666a 2108 if (INTEL_GEN(dev_priv) >= 8) {
e2f80391 2109 engine->init_context = intel_rcs_ctx_init;
9b81d556 2110 engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
98f29e8d 2111 engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
c7fe7d25 2112 engine->emit_flush = gen8_render_ring_flush;
98f29e8d
CW
2113 if (i915.semaphores) {
2114 int num_rings;
2115
e2f80391 2116 engine->semaphore.signal = gen8_rcs_signal;
98f29e8d
CW
2117
2118 num_rings =
2119 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
2120 engine->emit_breadcrumb_sz += num_rings * 6;
2121 }
c033666a 2122 } else if (INTEL_GEN(dev_priv) >= 6) {
e2f80391 2123 engine->init_context = intel_rcs_ctx_init;
c7fe7d25 2124 engine->emit_flush = gen7_render_ring_flush;
c033666a 2125 if (IS_GEN6(dev_priv))
c7fe7d25 2126 engine->emit_flush = gen6_render_ring_flush;
c033666a 2127 } else if (IS_GEN5(dev_priv)) {
c7fe7d25 2128 engine->emit_flush = gen4_render_ring_flush;
59465b5f 2129 } else {
c033666a 2130 if (INTEL_GEN(dev_priv) < 4)
c7fe7d25 2131 engine->emit_flush = gen2_render_ring_flush;
46f0f8d1 2132 else
c7fe7d25 2133 engine->emit_flush = gen4_render_ring_flush;
e2f80391 2134 engine->irq_enable_mask = I915_USER_INTERRUPT;
1ec14ad3 2135 }
707d9cf9 2136
c033666a 2137 if (IS_HASWELL(dev_priv))
803688ba 2138 engine->emit_bb_start = hsw_emit_bb_start;
6f7bef75 2139
e2f80391
TU
2140 engine->init_hw = init_render_ring;
2141 engine->cleanup = render_ring_cleanup;
59465b5f 2142
acd27845 2143 ret = intel_init_ring_buffer(engine);
99be1dfe
DV
2144 if (ret)
2145 return ret;
2146
f8973c21 2147 if (INTEL_GEN(dev_priv) >= 6) {
f51455d4 2148 ret = intel_engine_create_scratch(engine, PAGE_SIZE);
7d5ea807
CW
2149 if (ret)
2150 return ret;
2151 } else if (HAS_BROKEN_CS_TLB(dev_priv)) {
56c0f1a7 2152 ret = intel_engine_create_scratch(engine, I830_WA_SIZE);
99be1dfe
DV
2153 if (ret)
2154 return ret;
2155 }
2156
2157 return 0;
5c1143bb
XH
2158}
2159
8b3e2d36 2160int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
5c1143bb 2161{
8b3e2d36 2162 struct drm_i915_private *dev_priv = engine->i915;
58fa3835 2163
06a2fe22
TU
2164 intel_ring_default_vfuncs(dev_priv, engine);
2165
c033666a 2166 if (INTEL_GEN(dev_priv) >= 6) {
0fd2c201 2167 /* gen6 bsd needs a special wa for tail updates */
c033666a 2168 if (IS_GEN6(dev_priv))
ff44ad51 2169 engine->set_default_submission = gen6_bsd_set_default_submission;
c7fe7d25 2170 engine->emit_flush = gen6_bsd_ring_flush;
c78d6061 2171 if (INTEL_GEN(dev_priv) < 8)
e2f80391 2172 engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
58fa3835 2173 } else {
e2f80391 2174 engine->mmio_base = BSD_RING_BASE;
c7fe7d25 2175 engine->emit_flush = bsd_ring_flush;
8d228911 2176 if (IS_GEN5(dev_priv))
e2f80391 2177 engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
8d228911 2178 else
e2f80391 2179 engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
58fa3835 2180 }
58fa3835 2181
acd27845 2182 return intel_init_ring_buffer(engine);
5c1143bb 2183}
549f7365 2184
845f74a7 2185/**
62659920 2186 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
845f74a7 2187 */
8b3e2d36 2188int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine)
845f74a7 2189{
8b3e2d36 2190 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2191
2192 intel_ring_default_vfuncs(dev_priv, engine);
2193
c7fe7d25 2194 engine->emit_flush = gen6_bsd_ring_flush;
845f74a7 2195
acd27845 2196 return intel_init_ring_buffer(engine);
845f74a7
ZY
2197}
2198
8b3e2d36 2199int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
549f7365 2200{
8b3e2d36 2201 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2202
2203 intel_ring_default_vfuncs(dev_priv, engine);
2204
c7fe7d25 2205 engine->emit_flush = gen6_ring_flush;
c78d6061 2206 if (INTEL_GEN(dev_priv) < 8)
e2f80391 2207 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
549f7365 2208
acd27845 2209 return intel_init_ring_buffer(engine);
549f7365 2210}
a7b9761d 2211
8b3e2d36 2212int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
9a8a2213 2213{
8b3e2d36 2214 struct drm_i915_private *dev_priv = engine->i915;
06a2fe22
TU
2215
2216 intel_ring_default_vfuncs(dev_priv, engine);
2217
c7fe7d25 2218 engine->emit_flush = gen6_ring_flush;
abd58f01 2219
c78d6061 2220 if (INTEL_GEN(dev_priv) < 8) {
e2f80391 2221 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
31bb59cc
CW
2222 engine->irq_enable = hsw_vebox_irq_enable;
2223 engine->irq_disable = hsw_vebox_irq_disable;
abd58f01 2224 }
9a8a2213 2225
acd27845 2226 return intel_init_ring_buffer(engine);
9a8a2213 2227}